symbian-qemu-0.9.1-12/qemu-symbian-svp/block-qcow.c
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 /*
       
     2  * Block driver for the QCOW format
       
     3  *
       
     4  * Copyright (c) 2004-2006 Fabrice Bellard
       
     5  *
       
     6  * Permission is hereby granted, free of charge, to any person obtaining a copy
       
     7  * of this software and associated documentation files (the "Software"), to deal
       
     8  * in the Software without restriction, including without limitation the rights
       
     9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
       
    10  * copies of the Software, and to permit persons to whom the Software is
       
    11  * furnished to do so, subject to the following conditions:
       
    12  *
       
    13  * The above copyright notice and this permission notice shall be included in
       
    14  * all copies or substantial portions of the Software.
       
    15  *
       
    16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
       
    17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
       
    18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
       
    19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
       
    20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
       
    21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
       
    22  * THE SOFTWARE.
       
    23  */
       
    24 #include "qemu-common.h"
       
    25 #include "block_int.h"
       
    26 #include <zlib.h>
       
    27 #include "aes.h"
       
    28 
       
    29 /**************************************************************/
       
    30 /* QEMU COW block driver with compression and encryption support */
       
    31 
       
    32 #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
       
    33 #define QCOW_VERSION 1
       
    34 
       
    35 #define QCOW_CRYPT_NONE 0
       
    36 #define QCOW_CRYPT_AES  1
       
    37 
       
    38 #define QCOW_OFLAG_COMPRESSED (1LL << 63)
       
    39 
       
    40 typedef struct QCowHeader {
       
    41     uint32_t magic;
       
    42     uint32_t version;
       
    43     uint64_t backing_file_offset;
       
    44     uint32_t backing_file_size;
       
    45     uint32_t mtime;
       
    46     uint64_t size; /* in bytes */
       
    47     uint8_t cluster_bits;
       
    48     uint8_t l2_bits;
       
    49     uint32_t crypt_method;
       
    50     uint64_t l1_table_offset;
       
    51 } QCowHeader;
       
    52 
       
    53 #define L2_CACHE_SIZE 16
       
    54 
       
    55 typedef struct BDRVQcowState {
       
    56     BlockDriverState *hd;
       
    57     int cluster_bits;
       
    58     int cluster_size;
       
    59     int cluster_sectors;
       
    60     int l2_bits;
       
    61     int l2_size;
       
    62     int l1_size;
       
    63     uint64_t cluster_offset_mask;
       
    64     uint64_t l1_table_offset;
       
    65     uint64_t *l1_table;
       
    66     uint64_t *l2_cache;
       
    67     uint64_t l2_cache_offsets[L2_CACHE_SIZE];
       
    68     uint32_t l2_cache_counts[L2_CACHE_SIZE];
       
    69     uint8_t *cluster_cache;
       
    70     uint8_t *cluster_data;
       
    71     uint64_t cluster_cache_offset;
       
    72     uint32_t crypt_method; /* current crypt method, 0 if no key yet */
       
    73     uint32_t crypt_method_header;
       
    74     AES_KEY aes_encrypt_key;
       
    75     AES_KEY aes_decrypt_key;
       
    76 } BDRVQcowState;
       
    77 
       
    78 static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset);
       
    79 
       
    80 static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
       
    81 {
       
    82     const QCowHeader *cow_header = (const void *)buf;
       
    83 
       
    84     if (buf_size >= sizeof(QCowHeader) &&
       
    85         be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
       
    86         be32_to_cpu(cow_header->version) == QCOW_VERSION)
       
    87         return 100;
       
    88     else
       
    89         return 0;
       
    90 }
       
    91 
       
    92 static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
       
    93 {
       
    94     BDRVQcowState *s = bs->opaque;
       
    95     int len, i, shift, ret;
       
    96     QCowHeader header;
       
    97 
       
    98     ret = bdrv_file_open(&s->hd, filename, flags);
       
    99     if (ret < 0)
       
   100         return ret;
       
   101     if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
       
   102         goto fail;
       
   103     be32_to_cpus(&header.magic);
       
   104     be32_to_cpus(&header.version);
       
   105     be64_to_cpus(&header.backing_file_offset);
       
   106     be32_to_cpus(&header.backing_file_size);
       
   107     be32_to_cpus(&header.mtime);
       
   108     be64_to_cpus(&header.size);
       
   109     be32_to_cpus(&header.crypt_method);
       
   110     be64_to_cpus(&header.l1_table_offset);
       
   111 
       
   112     if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION)
       
   113         goto fail;
       
   114     if (header.size <= 1 || header.cluster_bits < 9)
       
   115         goto fail;
       
   116     if (header.crypt_method > QCOW_CRYPT_AES)
       
   117         goto fail;
       
   118     s->crypt_method_header = header.crypt_method;
       
   119     if (s->crypt_method_header)
       
   120         bs->encrypted = 1;
       
   121     s->cluster_bits = header.cluster_bits;
       
   122     s->cluster_size = 1 << s->cluster_bits;
       
   123     s->cluster_sectors = 1 << (s->cluster_bits - 9);
       
   124     s->l2_bits = header.l2_bits;
       
   125     s->l2_size = 1 << s->l2_bits;
       
   126     bs->total_sectors = header.size / 512;
       
   127     s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
       
   128 
       
   129     /* read the level 1 table */
       
   130     shift = s->cluster_bits + s->l2_bits;
       
   131     s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
       
   132 
       
   133     s->l1_table_offset = header.l1_table_offset;
       
   134     s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
       
   135     if (!s->l1_table)
       
   136         goto fail;
       
   137     if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
       
   138         s->l1_size * sizeof(uint64_t))
       
   139         goto fail;
       
   140     for(i = 0;i < s->l1_size; i++) {
       
   141         be64_to_cpus(&s->l1_table[i]);
       
   142     }
       
   143     /* alloc L2 cache */
       
   144     s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
       
   145     if (!s->l2_cache)
       
   146         goto fail;
       
   147     s->cluster_cache = qemu_malloc(s->cluster_size);
       
   148     if (!s->cluster_cache)
       
   149         goto fail;
       
   150     s->cluster_data = qemu_malloc(s->cluster_size);
       
   151     if (!s->cluster_data)
       
   152         goto fail;
       
   153     s->cluster_cache_offset = -1;
       
   154 
       
   155     /* read the backing file name */
       
   156     if (header.backing_file_offset != 0) {
       
   157         len = header.backing_file_size;
       
   158         if (len > 1023)
       
   159             len = 1023;
       
   160         if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len)
       
   161             goto fail;
       
   162         bs->backing_file[len] = '\0';
       
   163     }
       
   164     return 0;
       
   165 
       
   166  fail:
       
   167     qemu_free(s->l1_table);
       
   168     qemu_free(s->l2_cache);
       
   169     qemu_free(s->cluster_cache);
       
   170     qemu_free(s->cluster_data);
       
   171     bdrv_delete(s->hd);
       
   172     return -1;
       
   173 }
       
   174 
       
   175 static int qcow_set_key(BlockDriverState *bs, const char *key)
       
   176 {
       
   177     BDRVQcowState *s = bs->opaque;
       
   178     uint8_t keybuf[16];
       
   179     int len, i;
       
   180 
       
   181     memset(keybuf, 0, 16);
       
   182     len = strlen(key);
       
   183     if (len > 16)
       
   184         len = 16;
       
   185     /* XXX: we could compress the chars to 7 bits to increase
       
   186        entropy */
       
   187     for(i = 0;i < len;i++) {
       
   188         keybuf[i] = key[i];
       
   189     }
       
   190     s->crypt_method = s->crypt_method_header;
       
   191 
       
   192     if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
       
   193         return -1;
       
   194     if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
       
   195         return -1;
       
   196 #if 0
       
   197     /* test */
       
   198     {
       
   199         uint8_t in[16];
       
   200         uint8_t out[16];
       
   201         uint8_t tmp[16];
       
   202         for(i=0;i<16;i++)
       
   203             in[i] = i;
       
   204         AES_encrypt(in, tmp, &s->aes_encrypt_key);
       
   205         AES_decrypt(tmp, out, &s->aes_decrypt_key);
       
   206         for(i = 0; i < 16; i++)
       
   207             printf(" %02x", tmp[i]);
       
   208         printf("\n");
       
   209         for(i = 0; i < 16; i++)
       
   210             printf(" %02x", out[i]);
       
   211         printf("\n");
       
   212     }
       
   213 #endif
       
   214     return 0;
       
   215 }
       
   216 
       
   217 /* The crypt function is compatible with the linux cryptoloop
       
   218    algorithm for < 4 GB images. NOTE: out_buf == in_buf is
       
   219    supported */
       
   220 static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
       
   221                             uint8_t *out_buf, const uint8_t *in_buf,
       
   222                             int nb_sectors, int enc,
       
   223                             const AES_KEY *key)
       
   224 {
       
   225     union {
       
   226         uint64_t ll[2];
       
   227         uint8_t b[16];
       
   228     } ivec;
       
   229     int i;
       
   230 
       
   231     for(i = 0; i < nb_sectors; i++) {
       
   232         ivec.ll[0] = cpu_to_le64(sector_num);
       
   233         ivec.ll[1] = 0;
       
   234         AES_cbc_encrypt(in_buf, out_buf, 512, key,
       
   235                         ivec.b, enc);
       
   236         sector_num++;
       
   237         in_buf += 512;
       
   238         out_buf += 512;
       
   239     }
       
   240 }
       
   241 
       
   242 /* 'allocate' is:
       
   243  *
       
   244  * 0 to not allocate.
       
   245  *
       
   246  * 1 to allocate a normal cluster (for sector indexes 'n_start' to
       
   247  * 'n_end')
       
   248  *
       
   249  * 2 to allocate a compressed cluster of size
       
   250  * 'compressed_size'. 'compressed_size' must be > 0 and <
       
   251  * cluster_size
       
   252  *
       
   253  * return 0 if not allocated.
       
   254  */
       
   255 static uint64_t get_cluster_offset(BlockDriverState *bs,
       
   256                                    uint64_t offset, int allocate,
       
   257                                    int compressed_size,
       
   258                                    int n_start, int n_end)
       
   259 {
       
   260     BDRVQcowState *s = bs->opaque;
       
   261     int min_index, i, j, l1_index, l2_index;
       
   262     uint64_t l2_offset, *l2_table, cluster_offset, tmp;
       
   263     uint32_t min_count;
       
   264     int new_l2_table;
       
   265 
       
   266     l1_index = offset >> (s->l2_bits + s->cluster_bits);
       
   267     l2_offset = s->l1_table[l1_index];
       
   268     new_l2_table = 0;
       
   269     if (!l2_offset) {
       
   270         if (!allocate)
       
   271             return 0;
       
   272         /* allocate a new l2 entry */
       
   273         l2_offset = bdrv_getlength(s->hd);
       
   274         /* round to cluster size */
       
   275         l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
       
   276         /* update the L1 entry */
       
   277         s->l1_table[l1_index] = l2_offset;
       
   278         tmp = cpu_to_be64(l2_offset);
       
   279         if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp),
       
   280                         &tmp, sizeof(tmp)) != sizeof(tmp))
       
   281             return 0;
       
   282         new_l2_table = 1;
       
   283     }
       
   284     for(i = 0; i < L2_CACHE_SIZE; i++) {
       
   285         if (l2_offset == s->l2_cache_offsets[i]) {
       
   286             /* increment the hit count */
       
   287             if (++s->l2_cache_counts[i] == 0xffffffff) {
       
   288                 for(j = 0; j < L2_CACHE_SIZE; j++) {
       
   289                     s->l2_cache_counts[j] >>= 1;
       
   290                 }
       
   291             }
       
   292             l2_table = s->l2_cache + (i << s->l2_bits);
       
   293             goto found;
       
   294         }
       
   295     }
       
   296     /* not found: load a new entry in the least used one */
       
   297     min_index = 0;
       
   298     min_count = 0xffffffff;
       
   299     for(i = 0; i < L2_CACHE_SIZE; i++) {
       
   300         if (s->l2_cache_counts[i] < min_count) {
       
   301             min_count = s->l2_cache_counts[i];
       
   302             min_index = i;
       
   303         }
       
   304     }
       
   305     l2_table = s->l2_cache + (min_index << s->l2_bits);
       
   306     if (new_l2_table) {
       
   307         memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
       
   308         if (bdrv_pwrite(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
       
   309             s->l2_size * sizeof(uint64_t))
       
   310             return 0;
       
   311     } else {
       
   312         if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
       
   313             s->l2_size * sizeof(uint64_t))
       
   314             return 0;
       
   315     }
       
   316     s->l2_cache_offsets[min_index] = l2_offset;
       
   317     s->l2_cache_counts[min_index] = 1;
       
   318  found:
       
   319     l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
       
   320     cluster_offset = be64_to_cpu(l2_table[l2_index]);
       
   321     if (!cluster_offset ||
       
   322         ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
       
   323         if (!allocate)
       
   324             return 0;
       
   325         /* allocate a new cluster */
       
   326         if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
       
   327             (n_end - n_start) < s->cluster_sectors) {
       
   328             /* if the cluster is already compressed, we must
       
   329                decompress it in the case it is not completely
       
   330                overwritten */
       
   331             if (decompress_cluster(s, cluster_offset) < 0)
       
   332                 return 0;
       
   333             cluster_offset = bdrv_getlength(s->hd);
       
   334             cluster_offset = (cluster_offset + s->cluster_size - 1) &
       
   335                 ~(s->cluster_size - 1);
       
   336             /* write the cluster content */
       
   337             if (bdrv_pwrite(s->hd, cluster_offset, s->cluster_cache, s->cluster_size) !=
       
   338                 s->cluster_size)
       
   339                 return -1;
       
   340         } else {
       
   341             cluster_offset = bdrv_getlength(s->hd);
       
   342             /* round to cluster size */
       
   343             cluster_offset = (cluster_offset + s->cluster_size - 1) &
       
   344                 ~(s->cluster_size - 1);
       
   345             bdrv_truncate(s->hd, cluster_offset + s->cluster_size);
       
   346             /* if encrypted, we must initialize the cluster
       
   347                content which won't be written */
       
   348             if (s->crypt_method &&
       
   349                 (n_end - n_start) < s->cluster_sectors) {
       
   350                 uint64_t start_sect;
       
   351                 start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
       
   352                 memset(s->cluster_data + 512, 0x00, 512);
       
   353                 for(i = 0; i < s->cluster_sectors; i++) {
       
   354                     if (i < n_start || i >= n_end) {
       
   355                         encrypt_sectors(s, start_sect + i,
       
   356                                         s->cluster_data,
       
   357                                         s->cluster_data + 512, 1, 1,
       
   358                                         &s->aes_encrypt_key);
       
   359                         if (bdrv_pwrite(s->hd, cluster_offset + i * 512,
       
   360                                         s->cluster_data, 512) != 512)
       
   361                             return -1;
       
   362                     }
       
   363                 }
       
   364             }
       
   365         }
       
   366         /* update L2 table */
       
   367         tmp = cpu_to_be64(cluster_offset);
       
   368         l2_table[l2_index] = tmp;
       
   369         if (bdrv_pwrite(s->hd,
       
   370                         l2_offset + l2_index * sizeof(tmp), &tmp, sizeof(tmp)) != sizeof(tmp))
       
   371             return 0;
       
   372     }
       
   373     return cluster_offset;
       
   374 }
       
   375 
       
   376 static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
       
   377                              int nb_sectors, int *pnum)
       
   378 {
       
   379     BDRVQcowState *s = bs->opaque;
       
   380     int index_in_cluster, n;
       
   381     uint64_t cluster_offset;
       
   382 
       
   383     cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
       
   384     index_in_cluster = sector_num & (s->cluster_sectors - 1);
       
   385     n = s->cluster_sectors - index_in_cluster;
       
   386     if (n > nb_sectors)
       
   387         n = nb_sectors;
       
   388     *pnum = n;
       
   389     return (cluster_offset != 0);
       
   390 }
       
   391 
       
   392 static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
       
   393                              const uint8_t *buf, int buf_size)
       
   394 {
       
   395     z_stream strm1, *strm = &strm1;
       
   396     int ret, out_len;
       
   397 
       
   398     memset(strm, 0, sizeof(*strm));
       
   399 
       
   400     strm->next_in = (uint8_t *)buf;
       
   401     strm->avail_in = buf_size;
       
   402     strm->next_out = out_buf;
       
   403     strm->avail_out = out_buf_size;
       
   404 
       
   405     ret = inflateInit2(strm, -12);
       
   406     if (ret != Z_OK)
       
   407         return -1;
       
   408     ret = inflate(strm, Z_FINISH);
       
   409     out_len = strm->next_out - out_buf;
       
   410     if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
       
   411         out_len != out_buf_size) {
       
   412         inflateEnd(strm);
       
   413         return -1;
       
   414     }
       
   415     inflateEnd(strm);
       
   416     return 0;
       
   417 }
       
   418 
       
   419 static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
       
   420 {
       
   421     int ret, csize;
       
   422     uint64_t coffset;
       
   423 
       
   424     coffset = cluster_offset & s->cluster_offset_mask;
       
   425     if (s->cluster_cache_offset != coffset) {
       
   426         csize = cluster_offset >> (63 - s->cluster_bits);
       
   427         csize &= (s->cluster_size - 1);
       
   428         ret = bdrv_pread(s->hd, coffset, s->cluster_data, csize);
       
   429         if (ret != csize)
       
   430             return -1;
       
   431         if (decompress_buffer(s->cluster_cache, s->cluster_size,
       
   432                               s->cluster_data, csize) < 0) {
       
   433             return -1;
       
   434         }
       
   435         s->cluster_cache_offset = coffset;
       
   436     }
       
   437     return 0;
       
   438 }
       
   439 
       
   440 #if 0
       
   441 
       
   442 static int qcow_read(BlockDriverState *bs, int64_t sector_num,
       
   443                      uint8_t *buf, int nb_sectors)
       
   444 {
       
   445     BDRVQcowState *s = bs->opaque;
       
   446     int ret, index_in_cluster, n;
       
   447     uint64_t cluster_offset;
       
   448 
       
   449     while (nb_sectors > 0) {
       
   450         cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
       
   451         index_in_cluster = sector_num & (s->cluster_sectors - 1);
       
   452         n = s->cluster_sectors - index_in_cluster;
       
   453         if (n > nb_sectors)
       
   454             n = nb_sectors;
       
   455         if (!cluster_offset) {
       
   456             if (bs->backing_hd) {
       
   457                 /* read from the base image */
       
   458                 ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
       
   459                 if (ret < 0)
       
   460                     return -1;
       
   461             } else {
       
   462                 memset(buf, 0, 512 * n);
       
   463             }
       
   464         } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
       
   465             if (decompress_cluster(s, cluster_offset) < 0)
       
   466                 return -1;
       
   467             memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
       
   468         } else {
       
   469             ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
       
   470             if (ret != n * 512)
       
   471                 return -1;
       
   472             if (s->crypt_method) {
       
   473                 encrypt_sectors(s, sector_num, buf, buf, n, 0,
       
   474                                 &s->aes_decrypt_key);
       
   475             }
       
   476         }
       
   477         nb_sectors -= n;
       
   478         sector_num += n;
       
   479         buf += n * 512;
       
   480     }
       
   481     return 0;
       
   482 }
       
   483 #endif
       
   484 
       
   485 static int qcow_write(BlockDriverState *bs, int64_t sector_num,
       
   486                      const uint8_t *buf, int nb_sectors)
       
   487 {
       
   488     BDRVQcowState *s = bs->opaque;
       
   489     int ret, index_in_cluster, n;
       
   490     uint64_t cluster_offset;
       
   491 
       
   492     while (nb_sectors > 0) {
       
   493         index_in_cluster = sector_num & (s->cluster_sectors - 1);
       
   494         n = s->cluster_sectors - index_in_cluster;
       
   495         if (n > nb_sectors)
       
   496             n = nb_sectors;
       
   497         cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
       
   498                                             index_in_cluster,
       
   499                                             index_in_cluster + n);
       
   500         if (!cluster_offset)
       
   501             return -1;
       
   502         if (s->crypt_method) {
       
   503             encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1,
       
   504                             &s->aes_encrypt_key);
       
   505             ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512,
       
   506                               s->cluster_data, n * 512);
       
   507         } else {
       
   508             ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
       
   509         }
       
   510         if (ret != n * 512)
       
   511             return -1;
       
   512         nb_sectors -= n;
       
   513         sector_num += n;
       
   514         buf += n * 512;
       
   515     }
       
   516     s->cluster_cache_offset = -1; /* disable compressed cache */
       
   517     return 0;
       
   518 }
       
   519 
       
   520 typedef struct QCowAIOCB {
       
   521     BlockDriverAIOCB common;
       
   522     int64_t sector_num;
       
   523     uint8_t *buf;
       
   524     int nb_sectors;
       
   525     int n;
       
   526     uint64_t cluster_offset;
       
   527     uint8_t *cluster_data;
       
   528     BlockDriverAIOCB *hd_aiocb;
       
   529 } QCowAIOCB;
       
   530 
       
   531 static void qcow_aio_read_cb(void *opaque, int ret)
       
   532 {
       
   533     QCowAIOCB *acb = opaque;
       
   534     BlockDriverState *bs = acb->common.bs;
       
   535     BDRVQcowState *s = bs->opaque;
       
   536     int index_in_cluster;
       
   537 
       
   538     acb->hd_aiocb = NULL;
       
   539     if (ret < 0) {
       
   540     fail:
       
   541         acb->common.cb(acb->common.opaque, ret);
       
   542         qemu_aio_release(acb);
       
   543         return;
       
   544     }
       
   545 
       
   546  redo:
       
   547     /* post process the read buffer */
       
   548     if (!acb->cluster_offset) {
       
   549         /* nothing to do */
       
   550     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
       
   551         /* nothing to do */
       
   552     } else {
       
   553         if (s->crypt_method) {
       
   554             encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
       
   555                             acb->n, 0,
       
   556                             &s->aes_decrypt_key);
       
   557         }
       
   558     }
       
   559 
       
   560     acb->nb_sectors -= acb->n;
       
   561     acb->sector_num += acb->n;
       
   562     acb->buf += acb->n * 512;
       
   563 
       
   564     if (acb->nb_sectors == 0) {
       
   565         /* request completed */
       
   566         acb->common.cb(acb->common.opaque, 0);
       
   567         qemu_aio_release(acb);
       
   568         return;
       
   569     }
       
   570 
       
   571     /* prepare next AIO request */
       
   572     acb->cluster_offset = get_cluster_offset(bs, acb->sector_num << 9,
       
   573                                              0, 0, 0, 0);
       
   574     index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
       
   575     acb->n = s->cluster_sectors - index_in_cluster;
       
   576     if (acb->n > acb->nb_sectors)
       
   577         acb->n = acb->nb_sectors;
       
   578 
       
   579     if (!acb->cluster_offset) {
       
   580         if (bs->backing_hd) {
       
   581             /* read from the base image */
       
   582             acb->hd_aiocb = bdrv_aio_read(bs->backing_hd,
       
   583                 acb->sector_num, acb->buf, acb->n, qcow_aio_read_cb, acb);
       
   584             if (acb->hd_aiocb == NULL)
       
   585                 goto fail;
       
   586         } else {
       
   587             /* Note: in this case, no need to wait */
       
   588             memset(acb->buf, 0, 512 * acb->n);
       
   589             goto redo;
       
   590         }
       
   591     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
       
   592         /* add AIO support for compressed blocks ? */
       
   593         if (decompress_cluster(s, acb->cluster_offset) < 0)
       
   594             goto fail;
       
   595         memcpy(acb->buf,
       
   596                s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
       
   597         goto redo;
       
   598     } else {
       
   599         if ((acb->cluster_offset & 511) != 0) {
       
   600             ret = -EIO;
       
   601             goto fail;
       
   602         }
       
   603         acb->hd_aiocb = bdrv_aio_read(s->hd,
       
   604                             (acb->cluster_offset >> 9) + index_in_cluster,
       
   605                             acb->buf, acb->n, qcow_aio_read_cb, acb);
       
   606         if (acb->hd_aiocb == NULL)
       
   607             goto fail;
       
   608     }
       
   609 }
       
   610 
       
   611 static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,
       
   612         int64_t sector_num, uint8_t *buf, int nb_sectors,
       
   613         BlockDriverCompletionFunc *cb, void *opaque)
       
   614 {
       
   615     QCowAIOCB *acb;
       
   616 
       
   617     acb = qemu_aio_get(bs, cb, opaque);
       
   618     if (!acb)
       
   619         return NULL;
       
   620     acb->hd_aiocb = NULL;
       
   621     acb->sector_num = sector_num;
       
   622     acb->buf = buf;
       
   623     acb->nb_sectors = nb_sectors;
       
   624     acb->n = 0;
       
   625     acb->cluster_offset = 0;
       
   626 
       
   627     qcow_aio_read_cb(acb, 0);
       
   628     return &acb->common;
       
   629 }
       
   630 
       
   631 static void qcow_aio_write_cb(void *opaque, int ret)
       
   632 {
       
   633     QCowAIOCB *acb = opaque;
       
   634     BlockDriverState *bs = acb->common.bs;
       
   635     BDRVQcowState *s = bs->opaque;
       
   636     int index_in_cluster;
       
   637     uint64_t cluster_offset;
       
   638     const uint8_t *src_buf;
       
   639 
       
   640     acb->hd_aiocb = NULL;
       
   641 
       
   642     if (ret < 0) {
       
   643     fail:
       
   644         acb->common.cb(acb->common.opaque, ret);
       
   645         qemu_aio_release(acb);
       
   646         return;
       
   647     }
       
   648 
       
   649     acb->nb_sectors -= acb->n;
       
   650     acb->sector_num += acb->n;
       
   651     acb->buf += acb->n * 512;
       
   652 
       
   653     if (acb->nb_sectors == 0) {
       
   654         /* request completed */
       
   655         acb->common.cb(acb->common.opaque, 0);
       
   656         qemu_aio_release(acb);
       
   657         return;
       
   658     }
       
   659 
       
   660     index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
       
   661     acb->n = s->cluster_sectors - index_in_cluster;
       
   662     if (acb->n > acb->nb_sectors)
       
   663         acb->n = acb->nb_sectors;
       
   664     cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, 1, 0,
       
   665                                         index_in_cluster,
       
   666                                         index_in_cluster + acb->n);
       
   667     if (!cluster_offset || (cluster_offset & 511) != 0) {
       
   668         ret = -EIO;
       
   669         goto fail;
       
   670     }
       
   671     if (s->crypt_method) {
       
   672         if (!acb->cluster_data) {
       
   673             acb->cluster_data = qemu_mallocz(s->cluster_size);
       
   674             if (!acb->cluster_data) {
       
   675                 ret = -ENOMEM;
       
   676                 goto fail;
       
   677             }
       
   678         }
       
   679         encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
       
   680                         acb->n, 1, &s->aes_encrypt_key);
       
   681         src_buf = acb->cluster_data;
       
   682     } else {
       
   683         src_buf = acb->buf;
       
   684     }
       
   685     acb->hd_aiocb = bdrv_aio_write(s->hd,
       
   686                                    (cluster_offset >> 9) + index_in_cluster,
       
   687                                    src_buf, acb->n,
       
   688                                    qcow_aio_write_cb, acb);
       
   689     if (acb->hd_aiocb == NULL)
       
   690         goto fail;
       
   691 }
       
   692 
       
   693 static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
       
   694         int64_t sector_num, const uint8_t *buf, int nb_sectors,
       
   695         BlockDriverCompletionFunc *cb, void *opaque)
       
   696 {
       
   697     BDRVQcowState *s = bs->opaque;
       
   698     QCowAIOCB *acb;
       
   699 
       
   700     s->cluster_cache_offset = -1; /* disable compressed cache */
       
   701 
       
   702     acb = qemu_aio_get(bs, cb, opaque);
       
   703     if (!acb)
       
   704         return NULL;
       
   705     acb->hd_aiocb = NULL;
       
   706     acb->sector_num = sector_num;
       
   707     acb->buf = (uint8_t *)buf;
       
   708     acb->nb_sectors = nb_sectors;
       
   709     acb->n = 0;
       
   710 
       
   711     qcow_aio_write_cb(acb, 0);
       
   712     return &acb->common;
       
   713 }
       
   714 
       
   715 static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
       
   716 {
       
   717     QCowAIOCB *acb = (QCowAIOCB *)blockacb;
       
   718     if (acb->hd_aiocb)
       
   719         bdrv_aio_cancel(acb->hd_aiocb);
       
   720     qemu_aio_release(acb);
       
   721 }
       
   722 
       
   723 static void qcow_close(BlockDriverState *bs)
       
   724 {
       
   725     BDRVQcowState *s = bs->opaque;
       
   726     qemu_free(s->l1_table);
       
   727     qemu_free(s->l2_cache);
       
   728     qemu_free(s->cluster_cache);
       
   729     qemu_free(s->cluster_data);
       
   730     bdrv_delete(s->hd);
       
   731 }
       
   732 
       
   733 static int qcow_create(const char *filename, int64_t total_size,
       
   734                       const char *backing_file, int flags)
       
   735 {
       
   736     int fd, header_size, backing_filename_len, l1_size, i, shift;
       
   737     QCowHeader header;
       
   738     uint64_t tmp;
       
   739 
       
   740     fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
       
   741     if (fd < 0)
       
   742         return -1;
       
   743     memset(&header, 0, sizeof(header));
       
   744     header.magic = cpu_to_be32(QCOW_MAGIC);
       
   745     header.version = cpu_to_be32(QCOW_VERSION);
       
   746     header.size = cpu_to_be64(total_size * 512);
       
   747     header_size = sizeof(header);
       
   748     backing_filename_len = 0;
       
   749     if (backing_file) {
       
   750         if (strcmp(backing_file, "fat:")) {
       
   751             header.backing_file_offset = cpu_to_be64(header_size);
       
   752             backing_filename_len = strlen(backing_file);
       
   753             header.backing_file_size = cpu_to_be32(backing_filename_len);
       
   754             header_size += backing_filename_len;
       
   755         } else {
       
   756             /* special backing file for vvfat */
       
   757             backing_file = NULL;
       
   758         }
       
   759         header.cluster_bits = 9; /* 512 byte cluster to avoid copying
       
   760                                     unmodifyed sectors */
       
   761         header.l2_bits = 12; /* 32 KB L2 tables */
       
   762     } else {
       
   763         header.cluster_bits = 12; /* 4 KB clusters */
       
   764         header.l2_bits = 9; /* 4 KB L2 tables */
       
   765     }
       
   766     header_size = (header_size + 7) & ~7;
       
   767     shift = header.cluster_bits + header.l2_bits;
       
   768     l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;
       
   769 
       
   770     header.l1_table_offset = cpu_to_be64(header_size);
       
   771     if (flags & BLOCK_FLAG_ENCRYPT) {
       
   772         header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
       
   773     } else {
       
   774         header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
       
   775     }
       
   776 
       
   777     /* write all the data */
       
   778     write(fd, &header, sizeof(header));
       
   779     if (backing_file) {
       
   780         write(fd, backing_file, backing_filename_len);
       
   781     }
       
   782     lseek(fd, header_size, SEEK_SET);
       
   783     tmp = 0;
       
   784     for(i = 0;i < l1_size; i++) {
       
   785         write(fd, &tmp, sizeof(tmp));
       
   786     }
       
   787     close(fd);
       
   788     return 0;
       
   789 }
       
   790 
       
   791 static int qcow_make_empty(BlockDriverState *bs)
       
   792 {
       
   793     BDRVQcowState *s = bs->opaque;
       
   794     uint32_t l1_length = s->l1_size * sizeof(uint64_t);
       
   795     int ret;
       
   796 
       
   797     memset(s->l1_table, 0, l1_length);
       
   798     if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0)
       
   799 	return -1;
       
   800     ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length);
       
   801     if (ret < 0)
       
   802         return ret;
       
   803 
       
   804     memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
       
   805     memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
       
   806     memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
       
   807 
       
   808     return 0;
       
   809 }
       
   810 
       
   811 /* XXX: put compressed sectors first, then all the cluster aligned
       
   812    tables to avoid losing bytes in alignment */
       
   813 static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
       
   814                                  const uint8_t *buf, int nb_sectors)
       
   815 {
       
   816     BDRVQcowState *s = bs->opaque;
       
   817     z_stream strm;
       
   818     int ret, out_len;
       
   819     uint8_t *out_buf;
       
   820     uint64_t cluster_offset;
       
   821 
       
   822     if (nb_sectors != s->cluster_sectors)
       
   823         return -EINVAL;
       
   824 
       
   825     out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
       
   826     if (!out_buf)
       
   827         return -1;
       
   828 
       
   829     /* best compression, small window, no zlib header */
       
   830     memset(&strm, 0, sizeof(strm));
       
   831     ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
       
   832                        Z_DEFLATED, -12,
       
   833                        9, Z_DEFAULT_STRATEGY);
       
   834     if (ret != 0) {
       
   835         qemu_free(out_buf);
       
   836         return -1;
       
   837     }
       
   838 
       
   839     strm.avail_in = s->cluster_size;
       
   840     strm.next_in = (uint8_t *)buf;
       
   841     strm.avail_out = s->cluster_size;
       
   842     strm.next_out = out_buf;
       
   843 
       
   844     ret = deflate(&strm, Z_FINISH);
       
   845     if (ret != Z_STREAM_END && ret != Z_OK) {
       
   846         qemu_free(out_buf);
       
   847         deflateEnd(&strm);
       
   848         return -1;
       
   849     }
       
   850     out_len = strm.next_out - out_buf;
       
   851 
       
   852     deflateEnd(&strm);
       
   853 
       
   854     if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
       
   855         /* could not compress: write normal cluster */
       
   856         qcow_write(bs, sector_num, buf, s->cluster_sectors);
       
   857     } else {
       
   858         cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
       
   859                                             out_len, 0, 0);
       
   860         cluster_offset &= s->cluster_offset_mask;
       
   861         if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) {
       
   862             qemu_free(out_buf);
       
   863             return -1;
       
   864         }
       
   865     }
       
   866 
       
   867     qemu_free(out_buf);
       
   868     return 0;
       
   869 }
       
   870 
       
   871 static void qcow_flush(BlockDriverState *bs)
       
   872 {
       
   873     BDRVQcowState *s = bs->opaque;
       
   874     bdrv_flush(s->hd);
       
   875 }
       
   876 
       
   877 static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
       
   878 {
       
   879     BDRVQcowState *s = bs->opaque;
       
   880     bdi->cluster_size = s->cluster_size;
       
   881     return 0;
       
   882 }
       
   883 
       
   884 BlockDriver bdrv_qcow = {
       
   885     "qcow",
       
   886     sizeof(BDRVQcowState),
       
   887     qcow_probe,
       
   888     qcow_open,
       
   889     NULL,
       
   890     NULL,
       
   891     qcow_close,
       
   892     qcow_create,
       
   893     qcow_flush,
       
   894     qcow_is_allocated,
       
   895     qcow_set_key,
       
   896     qcow_make_empty,
       
   897 
       
   898     .bdrv_aio_read = qcow_aio_read,
       
   899     .bdrv_aio_write = qcow_aio_write,
       
   900     .bdrv_aio_cancel = qcow_aio_cancel,
       
   901     .aiocb_size = sizeof(QCowAIOCB),
       
   902     .bdrv_write_compressed = qcow_write_compressed,
       
   903     .bdrv_get_info = qcow_get_info,
       
   904 };