X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=cmd%2Fzstd%2Funzstd.c;h=faeda1559fa5c03ac740a177279fe4bf540537cc;hb=HEAD;hp=caddee5f6e4aebbef6822a77b42fe66f16bc6ac5;hpb=44f2c8e3e96dfc6c076f9503a89967a502b5bad7;p=tofuproxy.git diff --git a/cmd/zstd/unzstd.c b/cmd/zstd/unzstd.c index caddee5..d3708f3 100644 --- a/cmd/zstd/unzstd.c +++ b/cmd/zstd/unzstd.c @@ -1,19 +1,17 @@ -/* -unzstd -- .warc.zst decompressor -Copyright (C) 2021 Sergey Matveev - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, version 3 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ +// unzstd -- .warc.zst decompressor +// Copyright (C) 2021-2024 Sergey Matveev +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 3 of the License. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . // https://iipc.github.io/warc-specifications/specifications/warc-zstd/ @@ -23,7 +21,6 @@ along with this program. If not, see . #include #include #include -#include #include @@ -34,6 +31,13 @@ along with this program. If not, see . #include #endif // __FreeBSD__ +static uint32_t +le32dec(const char buf[4]) +{ + return (uint32_t)(buf[3]) << 24 | (uint32_t)(buf[2]) << 16 | + (uint32_t)(buf[1]) << 8 | (uint32_t)(buf[0]); +} + int main(int argc, char **argv) { @@ -41,7 +45,7 @@ main(int argc, char **argv) #ifdef __FreeBSD__ if ((fdOff != NULL) && (caph_limit_stream(3, CAPH_WRITE)) != 0) { errx(EX_OSERR, "can not caph_limit_stream(3)"); - }; + } capsicum_start(); #endif // __FreeBSD__ @@ -49,35 +53,35 @@ main(int argc, char **argv) if (ctx == NULL) { fputs("can not initialize ZSTD_DCtx\n", stderr); return 1; - }; - int rc = EXIT_FAILURE; - uint8_t *bufIn = NULL; - uint8_t *bufOut = NULL; + } + int rc = EXIT_FAILURE; + char *bufIn = NULL; + char *bufOut = NULL; const size_t bufInSize = ZSTD_DStreamInSize(); - bufIn = malloc(bufInSize); + bufIn = malloc(bufInSize); if (bufIn == NULL) { fputs("no memory\n", stderr); goto Exit; - }; + } const size_t bufOutSize = ZSTD_DStreamOutSize(); - bufOut = malloc(bufOutSize); + bufOut = malloc(bufOutSize); if (bufOut == NULL) { fputs("no memory\n", stderr); goto Exit; - }; + } unsigned long long bufSize = 0; - ZSTD_inBuffer bIn = {bufIn, 0, 0}; + ZSTD_inBuffer bIn = {bufIn, 0, 0}; ZSTD_outBuffer bOut = {bufOut, 0, 0}; - bool isEmpty = true; - bool lastBlock = false; - size_t n = 0; - size_t written = 0; - size_t offset = 0; + bool isEmpty = true; + bool lastBlock = false; + size_t n = 0; + size_t written = 0; + size_t offset = 0; size_t offsetPrev = 0; - size_t zCode = 0; + size_t zCode = 0; ReadAgain: for (;;) { n = fread(bufIn, 1, bufInSize, stdin); @@ -87,30 +91,30 @@ ReadAgain: } else { perror("can not fread(FILE)"); goto Exit; - }; - }; + } + } if (n >= 8 && le32dec(bufIn) == 0x184D2A5D) { // dictionary frame size_t dictSize = (size_t)le32dec(bufIn + 4); - uint8_t *dict = malloc(dictSize); + char *dict = malloc(dictSize); if (dict == NULL) { fprintf(stderr, "insufficient memory for dictionary: %zu\n", dictSize); goto Exit; - }; + } const size_t alreadyRead = n - 8; memcpy(dict, bufIn + 8, alreadyRead); errno = 0; - n = fread(dict + alreadyRead, 1, dictSize - alreadyRead, stdin); + n = fread(dict + alreadyRead, 1, dictSize - alreadyRead, stdin); if (n != dictSize - alreadyRead) { perror("can not read dictionary data"); free(dict); goto Exit; - }; - offset = dictSize + 8; + } + offset = dictSize + 8; offsetPrev = offset; if (fdOff != NULL) { fprintf(fdOff, "%zu\t0\n", offset); - }; + } uint32_t hdr = le32dec(dict); switch (hdr) { case ZSTD_MAGIC_DICTIONARY: @@ -122,9 +126,8 @@ ReadAgain: "can not load dictionary: %s\n", ZSTD_getErrorName(zCode)); goto Exit; - }; + } goto ReadAgain; - break; case ZSTD_MAGICNUMBER: bufSize = ZSTD_getFrameContentSize(dict, dictSize); switch (bufSize) { @@ -133,14 +136,14 @@ ReadAgain: fprintf(stderr, "can not determine dictionary's size\n"); free(dict); goto Exit; - }; - uint8_t *buf = malloc(bufSize); + } + char *buf = malloc(bufSize); if (buf == NULL) { fprintf( stderr, "insufficient memory for dictionary: %llu\n", bufSize); free(dict); goto Exit; - }; + } zCode = ZSTD_decompress(buf, bufSize, dict, dictSize); free(dict); if (ZSTD_isError(zCode)) { @@ -150,7 +153,7 @@ ReadAgain: ZSTD_getErrorName(zCode)); free(buf); goto Exit; - }; + } zCode = ZSTD_DCtx_loadDictionary(ctx, buf, zCode); free(buf); if ((zCode != 0) && (ZSTD_isError(zCode))) { @@ -159,68 +162,67 @@ ReadAgain: "can not load dictionary: %s\n", ZSTD_getErrorName(zCode)); goto Exit; - }; + } goto ReadAgain; - break; default: fprintf(stderr, "unknown dictionary header\n"); free(dict); goto Exit; - }; - }; - isEmpty = false; + } + } + isEmpty = false; bIn.size = n; - bIn.pos = 0; + bIn.pos = 0; while (bIn.pos < bIn.size) { bOut.size = bufOutSize; - bOut.pos = 0; - zCode = ZSTD_decompressStream(ctx, &bOut, &bIn); + bOut.pos = 0; + zCode = ZSTD_decompressStream(ctx, &bOut, &bIn); if ((zCode != 0) && (ZSTD_isError(zCode))) { fprintf(stderr, "can not decompress: %s\n", ZSTD_getErrorName(zCode)); goto Exit; - }; + } n = fwrite(bufOut, 1, bOut.pos, stdout); if (n != bOut.pos) { perror("can not fwrite(stdout)"); goto Exit; - }; + } written += n; if (zCode == 0) { offset += bIn.pos; if (fdOff != NULL) { fprintf(fdOff, "%zu\t%zu\n", offset - offsetPrev, written); - }; + } offsetPrev = offset + bIn.pos; - written = 0; - }; - }; + written = 0; + } + } if (lastBlock) { break; - }; + } offset += bIn.pos; - }; + } if (isEmpty) { fputs("empty input\n", stderr); goto Exit; - }; + } if (zCode != 0) { fprintf(stderr, "unfinished decompression: %s\n", ZSTD_getErrorName(zCode)); goto Exit; - }; + } rc = EXIT_SUCCESS; Exit: if (bufOut != NULL) { free(bufOut); - }; + } if (bufIn != NULL) { free(bufIn); - }; + } ZSTD_freeDCtx(ctx); if ((fdOff != NULL) && (fclose(fdOff) != 0)) { perror("can not fclose(4)"); return EXIT_FAILURE; - }; + } return rc; -}; +}