]> glassweightruler.freedombox.rocks Git - Ventoy.git/blob - LZIP/smallz4cat.c
1.1.06 release
[Ventoy.git] / LZIP / smallz4cat.c
1 // //////////////////////////////////////////////////////////
2 // smallz4cat.c
3 // Copyright (c) 2016-2019 Stephan Brumme. All rights reserved.
4 // see https://create.stephan-brumme.com/smallz4/
5 //
6 // "MIT License":
7 // Permission is hereby granted, free of charge, to any person obtaining a copy
8 // of this software and associated documentation files (the "Software"),
9 // to deal in the Software without restriction, including without limitation
10 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 // and/or sell copies of the Software, and to permit persons to whom the Software
12 // is furnished to do so, subject to the following conditions:
13 //
14 // The above copyright notice and this permission notice shall be included
15 // in all copies or substantial portions of the Software.
16 //
17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
18 // INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
19 // PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 // This program is a shorter, more readable, albeit slower re-implementation of lz4cat ( https://github.com/Cyan4973/xxHash )
25
26 // compile: gcc smallz4cat.c -O3 -o smallz4cat -Wall -pedantic -std=c99 -s
27 // The static 8k binary was compiled using Clang and dietlibc (see https://www.fefe.de/dietlibc/ )
28
29 // Limitations:
30 // - skippable frames and legacy frames are not implemented (and most likely never will)
31 // - checksums are not verified (see https://create.stephan-brumme.com/xxhash/ for a simple implementation)
32
33 // Replace getByteFromIn() and sendToOut() by your own code if you need in-memory LZ4 decompression.
34 // Corrupted data causes a call to unlz4error().
35
36 // suppress warnings when compiled by Visual C++
37 #define _CRT_SECURE_NO_WARNINGS
38
39 #include <stdio.h> // stdin/stdout/stderr, fopen, ...
40 #include <stdlib.h> // exit()
41 #include <string.h> // memcpy
42
43 #ifndef FALSE
44 #define FALSE 0
45 #define TRUE 1
46 #endif
47
48 /// error handler
49 static void unlz4error(const char* msg)
50 {
51 // smaller static binary than fprintf(stderr, "ERROR: %s\n", msg);
52 fputs("ERROR: ", stderr);
53 fputs(msg, stderr);
54 fputc('\n', stderr);
55 exit(1);
56 }
57
58
59 // ==================== I/O INTERFACE ====================
60
61
62 // read one byte from input, see getByteFromIn() for a basic implementation
63 typedef unsigned char (*GET_BYTE) (void* userPtr);
64 // write several bytes, see sendBytesToOut() for a basic implementation
65 typedef void (*SEND_BYTES)(const unsigned char*, unsigned int, void* userPtr);
66
67 struct UserPtr
68 {
69 // file handles
70 FILE* in;
71 FILE* out;
72 // modify input buffer size as you like ... for most use cases, bigger buffer aren't faster anymore - and even reducing to 1 byte works !
73 #define READ_BUFFER_SIZE 4*1024
74 unsigned char readBuffer[READ_BUFFER_SIZE];
75 unsigned int pos;
76 unsigned int available;
77 };
78
79 /// read a single byte (with simple buffering)
80 static unsigned char getByteFromIn(void* userPtr) // parameter "userPtr" not needed
81 {
82 /// cast user-specific data
83 struct UserPtr* user = (struct UserPtr*)userPtr;
84
85 // refill buffer
86 if (user->pos == user->available)
87 {
88 user->pos = 0;
89 user->available = fread(user->readBuffer, 1, READ_BUFFER_SIZE, user->in);
90 if (user->available == 0)
91 unlz4error("out of data");
92 }
93
94 // return a byte
95 return user->readBuffer[user->pos++];
96 }
97
98 /// write a block of bytes
99 static void sendBytesToOut(const unsigned char* data, unsigned int numBytes, void* userPtr)
100 {
101 /// cast user-specific data
102 struct UserPtr* user = (struct UserPtr*)userPtr;
103 if (data != NULL && numBytes > 0)
104 fwrite(data, 1, numBytes, user->out);
105 }
106
107
108 // ==================== LZ4 DECOMPRESSOR ====================
109
110
111 /// decompress everything in input stream (accessed via getByte) and write to output stream (via sendBytes)
112 void unlz4_userPtr(GET_BYTE getByte, SEND_BYTES sendBytes, const char* dictionary, void* userPtr)
113 {
114 // signature
115 unsigned char signature1 = getByte(userPtr);
116 unsigned char signature2 = getByte(userPtr);
117 unsigned char signature3 = getByte(userPtr);
118 unsigned char signature4 = getByte(userPtr);
119 unsigned int signature = (signature4 << 24) | (signature3 << 16) | (signature2 << 8) | signature1;
120 unsigned char isModern = (signature == 0x184D2204);
121 unsigned char isLegacy = (signature == 0x184C2102);
122 if (!isModern && !isLegacy)
123 unlz4error("invalid signature");
124
125 unsigned char hasBlockChecksum = FALSE;
126 unsigned char hasContentSize = FALSE;
127 unsigned char hasContentChecksum = FALSE;
128 unsigned char hasDictionaryID = FALSE;
129 if (isModern)
130 {
131 // flags
132 unsigned char flags = getByte(userPtr);
133 hasBlockChecksum = flags & 16;
134 hasContentSize = flags & 8;
135 hasContentChecksum = flags & 4;
136 hasDictionaryID = flags & 1;
137
138 // only version 1 file format
139 unsigned char version = flags >> 6;
140 if (version != 1)
141 unlz4error("only LZ4 file format version 1 supported");
142
143 // ignore blocksize
144 char numIgnore = 1;
145
146 // ignore, skip 8 bytes
147 if (hasContentSize)
148 numIgnore += 8;
149 // ignore, skip 4 bytes
150 if (hasDictionaryID)
151 numIgnore += 4;
152
153 // ignore header checksum (xxhash32 of everything up this point & 0xFF)
154 numIgnore++;
155
156 // skip all those ignored bytes
157 while (numIgnore--)
158 getByte(userPtr);
159 }
160
161 // don't lower this value, backreferences can be 64kb far away
162 #define HISTORY_SIZE 64*1024
163 // contains the latest decoded data
164 unsigned char history[HISTORY_SIZE];
165 // next free position in history[]
166 unsigned int pos = 0;
167
168 // dictionary compression is a recently introduced feature, just move its contents to the buffer
169 if (dictionary != NULL)
170 {
171 // open dictionary
172 FILE* dict = fopen(dictionary, "rb");
173 if (!dict)
174 unlz4error("cannot open dictionary");
175
176 // get dictionary's filesize
177 fseek(dict, 0, SEEK_END);
178 long dictSize = ftell(dict);
179 // only the last 64k are relevant
180 long relevant = dictSize < 65536 ? 0 : dictSize - 65536;
181 fseek(dict, relevant, SEEK_SET);
182 if (dictSize > 65536)
183 dictSize = 65536;
184 // read it and store it at the end of the buffer
185 fread(history + HISTORY_SIZE - dictSize, 1, dictSize, dict);
186 fclose(dict);
187 }
188
189 // parse all blocks until blockSize == 0
190 while (1)
191 {
192 // block size
193 unsigned int blockSize = getByte(userPtr);
194 blockSize |= (unsigned int)getByte(userPtr) << 8;
195 blockSize |= (unsigned int)getByte(userPtr) << 16;
196 blockSize |= (unsigned int)getByte(userPtr) << 24;
197
198 // highest bit set ?
199 unsigned char isCompressed = isLegacy || (blockSize & 0x80000000) == 0;
200 if (isModern)
201 blockSize &= 0x7FFFFFFF;
202
203 // stop after last block
204 if (blockSize == 0)
205 break;
206
207 if (isCompressed)
208 {
209 // decompress block
210 unsigned int blockOffset = 0;
211 unsigned int numWritten = 0;
212 while (blockOffset < blockSize)
213 {
214 // get a token
215 unsigned char token = getByte(userPtr);
216 blockOffset++;
217
218 // determine number of literals
219 unsigned int numLiterals = token >> 4;
220 if (numLiterals == 15)
221 {
222 // number of literals length encoded in more than 1 byte
223 unsigned char current;
224 do
225 {
226 current = getByte(userPtr);
227 numLiterals += current;
228 blockOffset++;
229 } while (current == 255);
230 }
231
232 blockOffset += numLiterals;
233
234 // copy all those literals
235 if (pos + numLiterals < HISTORY_SIZE)
236 {
237 // fast loop
238 while (numLiterals-- > 0)
239 history[pos++] = getByte(userPtr);
240 }
241 else
242 {
243 // slow loop
244 while (numLiterals-- > 0)
245 {
246 history[pos++] = getByte(userPtr);
247
248 // flush output buffer
249 if (pos == HISTORY_SIZE)
250 {
251 sendBytes(history, HISTORY_SIZE, userPtr);
252 numWritten += HISTORY_SIZE;
253 pos = 0;
254 }
255 }
256 }
257
258 // last token has only literals
259 if (blockOffset == blockSize)
260 break;
261
262 // match distance is encoded in two bytes (little endian)
263 unsigned int delta = getByte(userPtr);
264 delta |= (unsigned int)getByte(userPtr) << 8;
265 // zero isn't allowed
266 if (delta == 0)
267 unlz4error("invalid offset");
268 blockOffset += 2;
269
270 // match length (always >= 4, therefore length is stored minus 4)
271 unsigned int matchLength = 4 + (token & 0x0F);
272 if (matchLength == 4 + 0x0F)
273 {
274 unsigned char current;
275 do // match length encoded in more than 1 byte
276 {
277 current = getByte(userPtr);
278 matchLength += current;
279 blockOffset++;
280 } while (current == 255);
281 }
282
283 // copy match
284 unsigned int referencePos = (pos >= delta) ? (pos - delta) : (HISTORY_SIZE + pos - delta);
285 // start and end within the current 64k block ?
286 if (pos + matchLength < HISTORY_SIZE && referencePos + matchLength < HISTORY_SIZE)
287 {
288 // read/write continuous block (no wrap-around at the end of history[])
289 // fast copy
290 if (pos >= referencePos + matchLength || referencePos >= pos + matchLength)
291 {
292 // non-overlapping
293 memcpy(history + pos, history + referencePos, matchLength);
294 pos += matchLength;
295 }
296 else
297 {
298 // overlapping, slower byte-wise copy
299 while (matchLength-- > 0)
300 history[pos++] = history[referencePos++];
301 }
302 }
303 else
304 {
305 // either read or write wraps around at the end of history[]
306 while (matchLength-- > 0)
307 {
308 // copy single byte
309 history[pos++] = history[referencePos++];
310
311 // cannot write anymore ? => wrap around
312 if (pos == HISTORY_SIZE)
313 {
314 // flush output buffer
315 sendBytes(history, HISTORY_SIZE, userPtr);
316 numWritten += HISTORY_SIZE;
317 pos = 0;
318 }
319 // wrap-around of read location
320 referencePos %= HISTORY_SIZE;
321 }
322 }
323 }
324
325 // all legacy blocks must be completely filled - except for the last one
326 if (isLegacy && numWritten + pos < 8*1024*1024)
327 break;
328 }
329 else
330 {
331 // copy uncompressed data and add to history, too (if next block is compressed and some matches refer to this block)
332 while (blockSize-- > 0)
333 {
334 // copy a byte ...
335 history[pos++] = getByte(userPtr);
336 // ... until buffer is full => send to output
337 if (pos == HISTORY_SIZE)
338 {
339 sendBytes(history, HISTORY_SIZE, userPtr);
340 pos = 0;
341 }
342 }
343 }
344
345 if (hasBlockChecksum)
346 {
347 // ignore checksum, skip 4 bytes
348 getByte(userPtr); getByte(userPtr); getByte(userPtr); getByte(userPtr);
349 }
350 }
351
352 if (hasContentChecksum)
353 {
354 // ignore checksum, skip 4 bytes
355 getByte(userPtr); getByte(userPtr); getByte(userPtr); getByte(userPtr);
356 }
357
358 // flush output buffer
359 sendBytes(history, pos, userPtr);
360 }
361
362 /// old interface where getByte and sendBytes use global file handles
363 void unlz4(GET_BYTE getByte, SEND_BYTES sendBytes, const char* dictionary)
364 {
365 unlz4_userPtr(getByte, sendBytes, dictionary, NULL);
366 }
367
368
369 // ==================== COMMAND-LINE HANDLING ====================
370
371
372 /// parse command-line
373 int main(int argc, const char* argv[])
374 {
375 // default input/output streams
376 struct UserPtr user =
377 {
378 .in = stdin,
379 .out = stdout,
380 .pos = 0, // initial input buffer is empty
381 .available = 0
382 };
383
384 const char* dictionary = NULL;
385
386 // first command-line parameter is our input filename / but ignore "-" which stands for STDIN
387 int parameter;
388 for (parameter = 1; parameter < argc; parameter++)
389 {
390 const char* current = argv[parameter];
391 // dictionary
392 if (current[0] == '-' && current[1] == 'D')
393 {
394 if (parameter + 1 >= argc)
395 unlz4error("no dictionary filename found");
396 dictionary = argv[++parameter];
397 continue;
398 }
399
400 // filename
401 // read from STDIN, default behavior
402 if (current[0] != '-' && current[1] != '\0')
403 {
404 // already have a filename - at most one filename is allowed (except for dictionary) ?
405 if (user.in != stdin)
406 unlz4error("can only decompress one file at a time");
407 // get handle
408 user.in = fopen(argv[1], "rb");
409 if (!user.in)
410 unlz4error("file not found");
411 }
412 }
413
414 // and go !
415 unlz4_userPtr(getByteFromIn, sendBytesToOut, dictionary, &user);
416 return 0;
417 }