GRUB2/MOD_SRC/grub-2.04/grub-core/ventoy/lzx.c

   1 /*
   2  * Copyright (C) 2014 Michael Brown <mbrown@fensystems.co.uk>.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public License as
   6  * published by the Free Software Foundation; either version 2 of the
   7  * License, or (at your option) any later version.
   8  *
   9  * This program is distributed in the hope that it will be useful, but
  10  * WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17  * 02110-1301, USA.
  18  */
  19
  20 /**
  21  * @file
  22  *
  23  * LZX decompression
  24  *
  25  * This algorithm is derived jointly from the document "[MS-PATCH]:
  26  * LZX DELTA Compression and Decompression", available from
  27  *
  28  *     http://msdn.microsoft.com/en-us/library/cc483133.aspx
  29  *
  30  * and from the file lzx-decompress.c in the wimlib source code.
  31  *
  32  */
  33
  34 #include "wimboot.h"
  35 #include "huffman.h"
  36 #include "lzx.h"
  37
  38 /** Base positions, indexed by position slot */
  39 static unsigned int lzx_position_base[LZX_POSITION_SLOTS];
  40
  41 /**
  42  * Attempt to accumulate bits from LZX bitstream
  43  *
  44  * @v lzx               Decompressor
  45  * @v bits              Number of bits to accumulate
  46  * @v norm_value        Accumulated value (normalised to 16 bits)
  47  *
  48  * Note that there may not be sufficient accumulated bits in the
  49  * bitstream; callers must check that sufficient bits are available
  50  * before using the value.
  51  */
  52 static int lzx_accumulate ( struct lzx *lzx, unsigned int bits ) {
  53         const uint16_t *src16;
  54
  55         /* Accumulate more bits if required */
  56         if ( ( lzx->bits < bits ) &&
  57              ( lzx->input.offset < lzx->input.len ) ) {
  58                 src16 = (const uint16_t *)( ( char * ) lzx->input.data + lzx->input.offset );
  59                 lzx->input.offset += sizeof ( *src16 );
  60                 lzx->accumulator |= ( *src16 << ( 16 - lzx->bits ) );
  61                 lzx->bits += 16;
  62         }
  63
  64         return ( lzx->accumulator >> 16 );
  65 }
  66
  67 /**
  68  * Consume accumulated bits from LZX bitstream
  69  *
  70  * @v lzx               Decompressor
  71  * @v bits              Number of bits to consume
  72  * @ret rc              Return status code
  73  */
  74 static int lzx_consume ( struct lzx *lzx, unsigned int bits ) {
  75
  76         /* Fail if insufficient bits are available */
  77         if ( lzx->bits < bits ) {
  78                 DBG ( "LZX input overrun in %#zx/%#zx out %#zx)\n",
  79                       lzx->input.offset, lzx->input.len, lzx->output.offset );
  80                 return -1;
  81         }
  82
  83         /* Consume bits */
  84         lzx->accumulator <<= bits;
  85         lzx->bits -= bits;
  86
  87         return 0;
  88 }
  89
  90 /**
  91  * Get bits from LZX bitstream
  92  *
  93  * @v lzx               Decompressor
  94  * @v bits              Number of bits to fetch
  95  * @ret value           Value, or negative error
  96  */
  97 static int lzx_getbits ( struct lzx *lzx, unsigned int bits ) {
  98         int norm_value;
  99         int rc;
 100
 101         /* Accumulate more bits if required */
 102         norm_value = lzx_accumulate ( lzx, bits );
 103
 104         /* Consume bits */
 105         if ( ( rc = lzx_consume ( lzx, bits ) ) != 0 )
 106                 return rc;
 107
 108         return ( norm_value >> ( 16 - bits ) );
 109 }
 110
 111 /**
 112  * Align LZX bitstream for byte access
 113  *
 114  * @v lzx               Decompressor
 115  * @v bits              Minimum number of padding bits
 116  * @ret rc              Return status code
 117  */
 118 static int lzx_align ( struct lzx *lzx, unsigned int bits ) {
 119         int pad;
 120
 121         /* Get padding bits */
 122         pad = lzx_getbits ( lzx, bits );
 123         if ( pad < 0 )
 124                 return pad;
 125
 126         /* Consume all accumulated bits */
 127         lzx_consume ( lzx, lzx->bits );
 128
 129         return 0;
 130 }
 131
 132 /**
 133  * Get bytes from LZX bitstream
 134  *
 135  * @v lzx               Decompressor
 136  * @v data              Data buffer, or NULL
 137  * @v len               Length of data buffer
 138  * @ret rc              Return status code
 139  */
 140 static int lzx_getbytes ( struct lzx *lzx, void *data, size_t len ) {
 141
 142         /* Sanity check */
 143         if ( ( lzx->input.offset + len ) > lzx->input.len ) {
 144                 DBG ( "LZX input overrun in %#zx/%#zx out %#zx)\n",
 145                       lzx->input.offset, lzx->input.len, lzx->output.offset );
 146                 return -1;
 147         }
 148
 149         /* Copy data */
 150         if ( data )
 151                 memcpy ( data, ( lzx->input.data + lzx->input.offset ), len );
 152         lzx->input.offset += len;
 153
 154         return 0;
 155 }
 156
 157 /**
 158  * Decode LZX Huffman-coded symbol
 159  *
 160  * @v lzx               Decompressor
 161  * @v alphabet          Huffman alphabet
 162  * @ret raw             Raw symbol, or negative error
 163  */
 164 static int lzx_decode ( struct lzx *lzx, struct huffman_alphabet *alphabet ) {
 165         struct huffman_symbols *sym;
 166         int huf;
 167         int rc;
 168
 169         /* Accumulate sufficient bits */
 170         huf = lzx_accumulate ( lzx, HUFFMAN_BITS );
 171         if ( huf < 0 )
 172                 return huf;
 173
 174         /* Decode symbol */
 175         sym = huffman_sym ( alphabet, huf );
 176
 177         /* Consume bits */
 178         if ( ( rc = lzx_consume ( lzx, huffman_len ( sym ) ) ) != 0 )
 179                 return rc;
 180
 181         return huffman_raw ( sym, huf );
 182 }
 183
 184 /**
 185  * Generate Huffman alphabet from raw length table
 186  *
 187  * @v lzx               Decompressor
 188  * @v count             Number of symbols
 189  * @v bits              Length of each length (in bits)
 190  * @v lengths           Lengths table to fill in
 191  * @v alphabet          Huffman alphabet to fill in
 192  * @ret rc              Return status code
 193  */
 194 static int lzx_raw_alphabet ( struct lzx *lzx, unsigned int count,
 195                               unsigned int bits, uint8_t *lengths,
 196                               struct huffman_alphabet *alphabet ) {
 197         unsigned int i;
 198         int len;
 199         int rc;
 200
 201         /* Read lengths */
 202         for ( i = 0 ; i < count ; i++ ) {
 203                 len = lzx_getbits ( lzx, bits );
 204                 if ( len < 0 )
 205                         return len;
 206                 lengths[i] = len;
 207         }
 208
 209         /* Generate Huffman alphabet */
 210         if ( ( rc = huffman_alphabet ( alphabet, lengths, count ) ) != 0 )
 211                 return rc;
 212
 213         return 0;
 214 }
 215
 216 /**
 217  * Generate pretree
 218  *
 219  * @v lzx               Decompressor
 220  * @v count             Number of symbols
 221  * @v lengths           Lengths table to fill in
 222  * @ret rc              Return status code
 223  */
 224 static int lzx_pretree ( struct lzx *lzx, unsigned int count,
 225                          uint8_t *lengths ) {
 226         unsigned int i;
 227         unsigned int length;
 228         int dup = 0;
 229         int code;
 230         int rc;
 231
 232         /* Generate pretree alphabet */
 233         if ( ( rc =  lzx_raw_alphabet ( lzx, LZX_PRETREE_CODES,
 234                                         LZX_PRETREE_BITS, lzx->pretree_lengths,
 235                                         &lzx->pretree ) ) != 0 )
 236                 return rc;
 237
 238         /* Read lengths */
 239         for ( i = 0 ; i < count ; i++ ) {
 240
 241                 if ( dup ) {
 242
 243                         /* Duplicate previous length */
 244                         lengths[i] = lengths[ i - 1 ];
 245                         dup--;
 246
 247                 } else {
 248
 249                         /* Get next code */
 250                         code = lzx_decode ( lzx, &lzx->pretree );
 251                         if ( code < 0 )
 252                                 return code;
 253
 254                         /* Interpret code */
 255                         if ( code <= 16 ) {
 256                                 length = ( ( lengths[i] - code + 17 ) % 17 );
 257                         } else if ( code == 17 ) {
 258                                 length = 0;
 259                                 dup = lzx_getbits ( lzx, 4 );
 260                                 if ( dup < 0 )
 261                                         return dup;
 262                                 dup += 3;
 263                         } else if ( code == 18 ) {
 264                                 length = 0;
 265                                 dup = lzx_getbits ( lzx, 5 );
 266                                 if ( dup < 0 )
 267                                         return dup;
 268                                 dup += 19;
 269                         } else if ( code == 19 ) {
 270                                 length = 0;
 271                                 dup = lzx_getbits ( lzx, 1 );
 272                                 if ( dup < 0 )
 273                                         return dup;
 274                                 dup += 3;
 275                                 code = lzx_decode ( lzx, &lzx->pretree );
 276                                 if ( code < 0 )
 277                                         return code;
 278                                 length = ( ( lengths[i] - code + 17 ) % 17 );
 279                         } else {
 280                                 DBG ( "Unrecognised pretree code %d\n", code );
 281                                 return -1;
 282                         }
 283                         lengths[i] = length;
 284                 }
 285         }
 286
 287         /* Sanity check */
 288         if ( dup ) {
 289                 DBG ( "Pretree duplicate overrun\n" );
 290                 return -1;
 291         }
 292
 293         return 0;
 294 }
 295
 296 /**
 297  * Generate aligned offset Huffman alphabet
 298  *
 299  * @v lzx               Decompressor
 300  * @ret rc              Return status code
 301  */
 302 static int lzx_alignoffset_alphabet ( struct lzx *lzx ) {
 303         int rc;
 304
 305         /* Generate aligned offset alphabet */
 306         if ( ( rc = lzx_raw_alphabet ( lzx, LZX_ALIGNOFFSET_CODES,
 307                                        LZX_ALIGNOFFSET_BITS,
 308                                        lzx->alignoffset_lengths,
 309                                        &lzx->alignoffset ) ) != 0 )
 310                 return rc;
 311
 312         return 0;
 313 }
 314
 315 /**
 316  * Generate main Huffman alphabet
 317  *
 318  * @v lzx               Decompressor
 319  * @ret rc              Return status code
 320  */
 321 static int lzx_main_alphabet ( struct lzx *lzx ) {
 322         int rc;
 323
 324         /* Generate literal symbols pretree */
 325         if ( ( rc = lzx_pretree ( lzx, LZX_MAIN_LIT_CODES,
 326                                   lzx->main_lengths.literals ) ) != 0 ) {
 327                 DBG ( "Could not construct main literal pretree\n" );
 328                 return rc;
 329         }
 330
 331         /* Generate remaining symbols pretree */
 332         if ( ( rc = lzx_pretree ( lzx, ( LZX_MAIN_CODES - LZX_MAIN_LIT_CODES ),
 333                                   lzx->main_lengths.remainder ) ) != 0 ) {
 334                 DBG ( "Could not construct main remainder pretree\n" );
 335                 return rc;
 336         }
 337
 338         /* Generate Huffman alphabet */
 339         if ( ( rc = huffman_alphabet ( &lzx->main, lzx->main_lengths.literals,
 340                                        LZX_MAIN_CODES ) ) != 0 ) {
 341                 DBG ( "Could not generate main alphabet\n" );
 342                 return rc;
 343         }
 344
 345         return 0;
 346 }
 347
 348 /**
 349  * Generate length Huffman alphabet
 350  *
 351  * @v lzx               Decompressor
 352  * @ret rc              Return status code
 353  */
 354 static int lzx_length_alphabet ( struct lzx *lzx ) {
 355         int rc;
 356
 357         /* Generate pretree */
 358         if ( ( rc = lzx_pretree ( lzx, LZX_LENGTH_CODES,
 359                                   lzx->length_lengths ) ) != 0 ) {
 360                 DBG ( "Could not generate length pretree\n" );
 361                 return rc;
 362         }
 363
 364         /* Generate Huffman alphabet */
 365         if ( ( rc = huffman_alphabet ( &lzx->length, lzx->length_lengths,
 366                                        LZX_LENGTH_CODES ) ) != 0 ) {
 367                 DBG ( "Could not generate length alphabet\n" );
 368                 return rc;
 369         }
 370
 371         return 0;
 372 }
 373
 374 /**
 375  * Process LZX block header
 376  *
 377  * @v lzx               Decompressor
 378  * @ret rc              Return status code
 379  */
 380 static int lzx_block_header ( struct lzx *lzx ) {
 381         size_t block_len;
 382         int block_type;
 383         int default_len;
 384         int len_high;
 385         int len_low;
 386         int rc;
 387
 388         /* Get block type */
 389         block_type = lzx_getbits ( lzx, LZX_BLOCK_TYPE_BITS );
 390         if ( block_type < 0 )
 391                 return block_type;
 392         lzx->block_type = block_type;
 393
 394         /* Check block length */
 395         default_len = lzx_getbits ( lzx, 1 );
 396         if ( default_len < 0 )
 397                 return default_len;
 398         if ( default_len ) {
 399                 block_len = LZX_DEFAULT_BLOCK_LEN;
 400         } else {
 401                 len_high = lzx_getbits ( lzx, 8 );
 402                 if ( len_high < 0 )
 403                         return len_high;
 404                 len_low = lzx_getbits ( lzx, 8 );
 405                 if ( len_low < 0 )
 406                         return len_low;
 407                 block_len = ( ( len_high << 8 ) | len_low );
 408         }
 409         lzx->output.threshold = ( lzx->output.offset + block_len );
 410
 411         /* Handle block type */
 412         switch ( block_type ) {
 413         case LZX_BLOCK_ALIGNOFFSET :
 414                 /* Generated aligned offset alphabet */
 415                 if ( ( rc = lzx_alignoffset_alphabet ( lzx ) ) != 0 )
 416                         return rc;
 417                 /* Fall through */
 418         case LZX_BLOCK_VERBATIM :
 419                 /* Generate main alphabet */
 420                 if ( ( rc = lzx_main_alphabet ( lzx ) ) != 0 )
 421                         return rc;
 422                 /* Generate lengths alphabet */
 423                 if ( ( rc = lzx_length_alphabet ( lzx ) ) != 0 )
 424                         return rc;
 425                 break;
 426         case LZX_BLOCK_UNCOMPRESSED :
 427                 /* Align input stream */
 428                 if ( ( rc = lzx_align ( lzx, 1 ) ) != 0 )
 429                         return rc;
 430                 /* Read new repeated offsets */
 431                 if ( ( rc = lzx_getbytes ( lzx, &lzx->repeated_offset,
 432                                            sizeof ( lzx->repeated_offset )))!=0)
 433                         return rc;
 434                 break;
 435         default:
 436                 DBG ( "Unrecognised block type %d\n", block_type );
 437                 return -1;
 438         }
 439
 440         return 0;
 441 }
 442
 443 /**
 444  * Process uncompressed data
 445  *
 446  * @v lzx               Decompressor
 447  * @ret rc              Return status code
 448  */
 449 static int lzx_uncompressed ( struct lzx *lzx ) {
 450         void *data;
 451         size_t len;
 452         int rc;
 453
 454         /* Copy bytes */
 455         data = ( lzx->output.data ?
 456                  ( lzx->output.data + lzx->output.offset ) : NULL );
 457         len = ( lzx->output.threshold - lzx->output.offset );
 458         if ( ( rc = lzx_getbytes ( lzx, data, len ) ) != 0 )
 459                 return rc;
 460
 461         /* Align input stream */
 462         if ( len % 2 )
 463                 lzx->input.offset++;
 464
 465         return 0;
 466 }
 467
 468 /**
 469  * Process an LZX token
 470  *
 471  * @v lzx               Decompressor
 472  * @ret rc              Return status code
 473  *
 474  * Variable names are chosen to match the LZX specification
 475  * pseudo-code.
 476  */
 477 static int lzx_token ( struct lzx *lzx ) {
 478         unsigned int length_header;
 479         unsigned int position_slot;
 480         unsigned int offset_bits;
 481         unsigned int i;
 482         size_t match_offset;
 483         size_t match_length;
 484         int verbatim_bits;
 485         int aligned_bits;
 486         int maindata;
 487         int length;
 488         uint8_t *copy;
 489
 490         /* Get maindata symelse*/
 491         maindata = lzx_decode ( lzx, &lzx->main );
 492         if ( maindata < 0 )
 493                 return maindata;
 494
 495         /* Check for literals */
 496         if ( maindata < LZX_MAIN_LIT_CODES ) {
 497                 if ( lzx->output.data )
 498                         lzx->output.data[lzx->output.offset] = maindata;
 499                 lzx->output.offset++;
 500                 return 0;
 501         }
 502         maindata -= LZX_MAIN_LIT_CODES;
 503
 504         /* Calculate the match length */
 505         length_header = ( maindata & 7 );
 506         if ( length_header == 7 ) {
 507                 length = lzx_decode ( lzx, &lzx->length );
 508                 if ( length < 0 )
 509                         return length;
 510         } else {
 511                 length = 0;
 512         }
 513         match_length = ( length_header + 2 + length );
 514
 515         /* Calculate the position slot */
 516         position_slot = ( maindata >> 3 );
 517         if ( position_slot < LZX_REPEATED_OFFSETS ) {
 518
 519                 /* Repeated offset */
 520                 match_offset = lzx->repeated_offset[position_slot];
 521                 lzx->repeated_offset[position_slot] = lzx->repeated_offset[0];
 522                 lzx->repeated_offset[0] = match_offset;
 523
 524         } else {
 525
 526                 /* Non-repeated offset */
 527                 offset_bits = lzx_footer_bits ( position_slot );
 528                 if ( ( lzx->block_type == LZX_BLOCK_ALIGNOFFSET ) &&
 529                      ( offset_bits >= 3 ) ) {
 530                         verbatim_bits = lzx_getbits ( lzx, ( offset_bits - 3 ));
 531                         if ( verbatim_bits < 0 )
 532                                 return verbatim_bits;
 533                         verbatim_bits <<= 3;
 534                         aligned_bits = lzx_decode ( lzx, &lzx->alignoffset );
 535                         if ( aligned_bits < 0 )
 536                                 return aligned_bits;
 537                 } else {
 538                         verbatim_bits = lzx_getbits ( lzx, offset_bits );
 539                         if ( verbatim_bits < 0 )
 540                                 return verbatim_bits;
 541                         aligned_bits = 0;
 542                 }
 543                 match_offset = ( lzx_position_base[position_slot] +
 544                                  verbatim_bits + aligned_bits - 2 );
 545
 546                 /* Update repeated offset list */
 547                 for ( i = ( LZX_REPEATED_OFFSETS - 1 ) ; i > 0 ; i-- )
 548                         lzx->repeated_offset[i] = lzx->repeated_offset[ i - 1 ];
 549                 lzx->repeated_offset[0] = match_offset;
 550         }
 551
 552         /* Copy data */
 553         if ( match_offset > lzx->output.offset ) {
 554                 DBG ( "LZX match underrun out 0x%x offset 0x%x len 0x%x\n",
 555                       lzx->output.offset, match_offset, match_length );
 556                 return -1;
 557         }
 558         if ( lzx->output.data ) {
 559                 copy = &lzx->output.data[lzx->output.offset];
 560                 for ( i = 0 ; i < match_length ; i++ )
 561                         copy[i] = copy[ i - match_offset ];
 562         }
 563         lzx->output.offset += match_length;
 564
 565         return 0;
 566 }
 567
 568 /**
 569  * Translate E8 jump addresses
 570  *
 571  * @v lzx               Decompressor
 572  */
 573 static void lzx_translate_jumps ( struct lzx *lzx ) {
 574         size_t offset;
 575         int32_t *target;
 576
 577         /* Sanity check */
 578         if ( lzx->output.offset < 10 )
 579                 return;
 580
 581         /* Scan for jump instructions */
 582         for ( offset = 0 ; offset < ( lzx->output.offset - 10 ) ; offset++ ) {
 583
 584                 /* Check for jump instruction */
 585                 if ( lzx->output.data[offset] != 0xe8 )
 586                         continue;
 587
 588                 /* Translate jump target */
 589                 target = ( ( int32_t * ) &lzx->output.data[ offset + 1 ] );
 590                 if ( *target >= 0 ) {
 591                         if ( *target < LZX_WIM_MAGIC_FILESIZE )
 592                                 *target -= offset;
 593                 } else {
 594                         if ( *target >= -( ( int32_t ) offset ) )
 595                                 *target += LZX_WIM_MAGIC_FILESIZE;
 596                 }
 597                 offset += sizeof ( *target );
 598         }
 599 }
 600
 601 /**
 602  * Decompress LZX-compressed data
 603  *
 604  * @v data              Compressed data
 605  * @v len               Length of compressed data
 606  * @v buf               Decompression buffer, or NULL
 607  * @ret out_len         Length of decompressed data, or negative error
 608  */
 609 ssize_t lzx_decompress ( const void *data, size_t len, void *buf ) {
 610         struct lzx lzx;
 611         unsigned int i;
 612         int rc;
 613
 614         /* Sanity check */
 615         if ( len % 2 ) {
 616                 DBG ( "LZX cannot handle odd-length input data\n" );
 617                 return -1;
 618         }
 619
 620         /* Initialise global state, if required */
 621         if ( ! lzx_position_base[ LZX_POSITION_SLOTS - 1 ] ) {
 622                 for ( i = 1 ; i < LZX_POSITION_SLOTS ; i++ ) {
 623                         lzx_position_base[i] =
 624                                 ( lzx_position_base[i-1] +
 625                                   ( 1 << lzx_footer_bits ( i - 1 ) ) );
 626                 }
 627         }
 628
 629         /* Initialise decompressor */
 630         memset ( &lzx, 0, sizeof ( lzx ) );
 631         lzx.input.data = data;
 632         lzx.input.len = len;
 633         lzx.output.data = buf;
 634         for ( i = 0 ; i < LZX_REPEATED_OFFSETS ; i++ )
 635                 lzx.repeated_offset[i] = 1;
 636
 637         /* Process blocks */
 638         while ( lzx.input.offset < lzx.input.len ) {
 639
 640                 /* Process block header */
 641                 if ( ( rc = lzx_block_header ( &lzx ) ) != 0 )
 642                         return rc;
 643
 644                 /* Process block contents */
 645                 if ( lzx.block_type == LZX_BLOCK_UNCOMPRESSED ) {
 646
 647                         /* Copy uncompressed data */
 648                         if ( ( rc = lzx_uncompressed ( &lzx ) ) != 0 )
 649                                 return rc;
 650
 651                 } else {
 652
 653                         /* Process token stream */
 654                         while ( lzx.output.offset < lzx.output.threshold ) {
 655                                 if ( ( rc = lzx_token ( &lzx ) ) != 0 )
 656                                         return rc;
 657                         }
 658                 }
 659         }
 660
 661         /* Postprocess to undo E8 jump compression */
 662         if ( lzx.output.data )
 663                 lzx_translate_jumps ( &lzx );
 664
 665         return lzx.output.offset;
 666 }