]>
glassweightruler.freedombox.rocks Git - Ventoy.git/blob - LZIP/smallz4cat.c
1 // //////////////////////////////////////////////////////////
3 // Copyright (c) 2016-2019 Stephan Brumme. All rights reserved.
4 // see https://create.stephan-brumme.com/smallz4/
7 // Permission is hereby granted, free of charge, to any person obtaining a copy
8 // of this software and associated documentation files (the "Software"),
9 // to deal in the Software without restriction, including without limitation
10 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 // and/or sell copies of the Software, and to permit persons to whom the Software
12 // is furnished to do so, subject to the following conditions:
14 // The above copyright notice and this permission notice shall be included
15 // in all copies or substantial portions of the Software.
17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
18 // INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
19 // PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 // This program is a shorter, more readable, albeit slower re-implementation of lz4cat ( https://github.com/Cyan4973/xxHash )
26 // compile: gcc smallz4cat.c -O3 -o smallz4cat -Wall -pedantic -std=c99 -s
27 // The static 8k binary was compiled using Clang and dietlibc (see https://www.fefe.de/dietlibc/ )
30 // - skippable frames and legacy frames are not implemented (and most likely never will)
31 // - checksums are not verified (see https://create.stephan-brumme.com/xxhash/ for a simple implementation)
33 // Replace getByteFromIn() and sendToOut() by your own code if you need in-memory LZ4 decompression.
34 // Corrupted data causes a call to unlz4error().
36 // suppress warnings when compiled by Visual C++
37 #define _CRT_SECURE_NO_WARNINGS
39 #include <stdio.h> // stdin/stdout/stderr, fopen, ...
40 #include <stdlib.h> // exit()
41 #include <string.h> // memcpy
49 static void unlz4error(const char* msg
)
51 // smaller static binary than fprintf(stderr, "ERROR: %s\n", msg);
52 fputs("ERROR: ", stderr
);
59 // ==================== I/O INTERFACE ====================
62 // read one byte from input, see getByteFromIn() for a basic implementation
63 typedef unsigned char (*GET_BYTE
) (void* userPtr
);
64 // write several bytes, see sendBytesToOut() for a basic implementation
65 typedef void (*SEND_BYTES
)(const unsigned char*, unsigned int, void* userPtr
);
72 // modify input buffer size as you like ... for most use cases, bigger buffer aren't faster anymore - and even reducing to 1 byte works !
73 #define READ_BUFFER_SIZE 4*1024
74 unsigned char readBuffer
[READ_BUFFER_SIZE
];
76 unsigned int available
;
79 /// read a single byte (with simple buffering)
80 static unsigned char getByteFromIn(void* userPtr
) // parameter "userPtr" not needed
82 /// cast user-specific data
83 struct UserPtr
* user
= (struct UserPtr
*)userPtr
;
86 if (user
->pos
== user
->available
)
89 user
->available
= fread(user
->readBuffer
, 1, READ_BUFFER_SIZE
, user
->in
);
90 if (user
->available
== 0)
91 unlz4error("out of data");
95 return user
->readBuffer
[user
->pos
++];
98 /// write a block of bytes
99 static void sendBytesToOut(const unsigned char* data
, unsigned int numBytes
, void* userPtr
)
101 /// cast user-specific data
102 struct UserPtr
* user
= (struct UserPtr
*)userPtr
;
103 if (data
!= NULL
&& numBytes
> 0)
104 fwrite(data
, 1, numBytes
, user
->out
);
108 // ==================== LZ4 DECOMPRESSOR ====================
111 /// decompress everything in input stream (accessed via getByte) and write to output stream (via sendBytes)
112 void unlz4_userPtr(GET_BYTE getByte
, SEND_BYTES sendBytes
, const char* dictionary
, void* userPtr
)
115 unsigned char signature1
= getByte(userPtr
);
116 unsigned char signature2
= getByte(userPtr
);
117 unsigned char signature3
= getByte(userPtr
);
118 unsigned char signature4
= getByte(userPtr
);
119 unsigned int signature
= (signature4
<< 24) | (signature3
<< 16) | (signature2
<< 8) | signature1
;
120 unsigned char isModern
= (signature
== 0x184D2204);
121 unsigned char isLegacy
= (signature
== 0x184C2102);
122 if (!isModern
&& !isLegacy
)
123 unlz4error("invalid signature");
125 unsigned char hasBlockChecksum
= FALSE
;
126 unsigned char hasContentSize
= FALSE
;
127 unsigned char hasContentChecksum
= FALSE
;
128 unsigned char hasDictionaryID
= FALSE
;
132 unsigned char flags
= getByte(userPtr
);
133 hasBlockChecksum
= flags
& 16;
134 hasContentSize
= flags
& 8;
135 hasContentChecksum
= flags
& 4;
136 hasDictionaryID
= flags
& 1;
138 // only version 1 file format
139 unsigned char version
= flags
>> 6;
141 unlz4error("only LZ4 file format version 1 supported");
146 // ignore, skip 8 bytes
149 // ignore, skip 4 bytes
153 // ignore header checksum (xxhash32 of everything up this point & 0xFF)
156 // skip all those ignored bytes
161 // don't lower this value, backreferences can be 64kb far away
162 #define HISTORY_SIZE 64*1024
163 // contains the latest decoded data
164 unsigned char history
[HISTORY_SIZE
];
165 // next free position in history[]
166 unsigned int pos
= 0;
168 // dictionary compression is a recently introduced feature, just move its contents to the buffer
169 if (dictionary
!= NULL
)
172 FILE* dict
= fopen(dictionary
, "rb");
174 unlz4error("cannot open dictionary");
176 // get dictionary's filesize
177 fseek(dict
, 0, SEEK_END
);
178 long dictSize
= ftell(dict
);
179 // only the last 64k are relevant
180 long relevant
= dictSize
< 65536 ? 0 : dictSize
- 65536;
181 fseek(dict
, relevant
, SEEK_SET
);
182 if (dictSize
> 65536)
184 // read it and store it at the end of the buffer
185 fread(history
+ HISTORY_SIZE
- dictSize
, 1, dictSize
, dict
);
189 // parse all blocks until blockSize == 0
193 unsigned int blockSize
= getByte(userPtr
);
194 blockSize
|= (unsigned int)getByte(userPtr
) << 8;
195 blockSize
|= (unsigned int)getByte(userPtr
) << 16;
196 blockSize
|= (unsigned int)getByte(userPtr
) << 24;
199 unsigned char isCompressed
= isLegacy
|| (blockSize
& 0x80000000) == 0;
201 blockSize
&= 0x7FFFFFFF;
203 // stop after last block
210 unsigned int blockOffset
= 0;
211 unsigned int numWritten
= 0;
212 while (blockOffset
< blockSize
)
215 unsigned char token
= getByte(userPtr
);
218 // determine number of literals
219 unsigned int numLiterals
= token
>> 4;
220 if (numLiterals
== 15)
222 // number of literals length encoded in more than 1 byte
223 unsigned char current
;
226 current
= getByte(userPtr
);
227 numLiterals
+= current
;
229 } while (current
== 255);
232 blockOffset
+= numLiterals
;
234 // copy all those literals
235 if (pos
+ numLiterals
< HISTORY_SIZE
)
238 while (numLiterals
-- > 0)
239 history
[pos
++] = getByte(userPtr
);
244 while (numLiterals
-- > 0)
246 history
[pos
++] = getByte(userPtr
);
248 // flush output buffer
249 if (pos
== HISTORY_SIZE
)
251 sendBytes(history
, HISTORY_SIZE
, userPtr
);
252 numWritten
+= HISTORY_SIZE
;
258 // last token has only literals
259 if (blockOffset
== blockSize
)
262 // match distance is encoded in two bytes (little endian)
263 unsigned int delta
= getByte(userPtr
);
264 delta
|= (unsigned int)getByte(userPtr
) << 8;
265 // zero isn't allowed
267 unlz4error("invalid offset");
270 // match length (always >= 4, therefore length is stored minus 4)
271 unsigned int matchLength
= 4 + (token
& 0x0F);
272 if (matchLength
== 4 + 0x0F)
274 unsigned char current
;
275 do // match length encoded in more than 1 byte
277 current
= getByte(userPtr
);
278 matchLength
+= current
;
280 } while (current
== 255);
284 unsigned int referencePos
= (pos
>= delta
) ? (pos
- delta
) : (HISTORY_SIZE
+ pos
- delta
);
285 // start and end within the current 64k block ?
286 if (pos
+ matchLength
< HISTORY_SIZE
&& referencePos
+ matchLength
< HISTORY_SIZE
)
288 // read/write continuous block (no wrap-around at the end of history[])
290 if (pos
>= referencePos
+ matchLength
|| referencePos
>= pos
+ matchLength
)
293 memcpy(history
+ pos
, history
+ referencePos
, matchLength
);
298 // overlapping, slower byte-wise copy
299 while (matchLength
-- > 0)
300 history
[pos
++] = history
[referencePos
++];
305 // either read or write wraps around at the end of history[]
306 while (matchLength
-- > 0)
309 history
[pos
++] = history
[referencePos
++];
311 // cannot write anymore ? => wrap around
312 if (pos
== HISTORY_SIZE
)
314 // flush output buffer
315 sendBytes(history
, HISTORY_SIZE
, userPtr
);
316 numWritten
+= HISTORY_SIZE
;
319 // wrap-around of read location
320 referencePos
%= HISTORY_SIZE
;
325 // all legacy blocks must be completely filled - except for the last one
326 if (isLegacy
&& numWritten
+ pos
< 8*1024*1024)
331 // copy uncompressed data and add to history, too (if next block is compressed and some matches refer to this block)
332 while (blockSize
-- > 0)
335 history
[pos
++] = getByte(userPtr
);
336 // ... until buffer is full => send to output
337 if (pos
== HISTORY_SIZE
)
339 sendBytes(history
, HISTORY_SIZE
, userPtr
);
345 if (hasBlockChecksum
)
347 // ignore checksum, skip 4 bytes
348 getByte(userPtr
); getByte(userPtr
); getByte(userPtr
); getByte(userPtr
);
352 if (hasContentChecksum
)
354 // ignore checksum, skip 4 bytes
355 getByte(userPtr
); getByte(userPtr
); getByte(userPtr
); getByte(userPtr
);
358 // flush output buffer
359 sendBytes(history
, pos
, userPtr
);
362 /// old interface where getByte and sendBytes use global file handles
363 void unlz4(GET_BYTE getByte
, SEND_BYTES sendBytes
, const char* dictionary
)
365 unlz4_userPtr(getByte
, sendBytes
, dictionary
, NULL
);
369 // ==================== COMMAND-LINE HANDLING ====================
372 /// parse command-line
373 int main(int argc
, const char* argv
[])
375 // default input/output streams
376 struct UserPtr user
=
380 .pos
= 0, // initial input buffer is empty
384 const char* dictionary
= NULL
;
386 // first command-line parameter is our input filename / but ignore "-" which stands for STDIN
388 for (parameter
= 1; parameter
< argc
; parameter
++)
390 const char* current
= argv
[parameter
];
392 if (current
[0] == '-' && current
[1] == 'D')
394 if (parameter
+ 1 >= argc
)
395 unlz4error("no dictionary filename found");
396 dictionary
= argv
[++parameter
];
401 // read from STDIN, default behavior
402 if (current
[0] != '-' && current
[1] != '\0')
404 // already have a filename - at most one filename is allowed (except for dictionary) ?
405 if (user
.in
!= stdin
)
406 unlz4error("can only decompress one file at a time");
408 user
.in
= fopen(argv
[1], "rb");
410 unlz4error("file not found");
415 unlz4_userPtr(getByteFromIn
, sendBytesToOut
, dictionary
, &user
);