-/*
-unzstd -- .warc.zst decompressor
-Copyright (C) 2021 Sergey Matveev <stargrave@stargrave.org>
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, version 3 of the License.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+// unzstd -- .warc.zst decompressor
+// Copyright (C) 2021-2024 Sergey Matveev <stargrave@stargrave.org>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 3 of the License.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
// https://iipc.github.io/warc-specifications/specifications/warc-zstd/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/endian.h>
#include <zstd.h>
+#ifdef __FreeBSD__
+#include "capsicum.c.in"
+#include <capsicum_helpers.h>
+#include <err.h>
+#include <sysexits.h>
+#endif // __FreeBSD__
+
+static uint32_t
+le32dec(const char buf[4])
+{
+ return (uint32_t)(buf[3]) << 24 | (uint32_t)(buf[2]) << 16 |
+ (uint32_t)(buf[1]) << 8 | (uint32_t)(buf[0]);
+}
+
int
main(int argc, char **argv)
{
+ FILE *fdOff = fdopen(3, "wb");
+#ifdef __FreeBSD__
+ if ((fdOff != NULL) && (caph_limit_stream(3, CAPH_WRITE)) != 0) {
+ errx(EX_OSERR, "can not caph_limit_stream(3)");
+ }
+ capsicum_start();
+#endif // __FreeBSD__
+
ZSTD_DCtx *ctx = ZSTD_createDCtx();
if (ctx == NULL) {
fputs("can not initialize ZSTD_DCtx\n", stderr);
return 1;
- };
- FILE *fdOff = fdopen(3, "wb");
- int rc = EXIT_FAILURE;
- uint8_t *bufIn = NULL;
- uint8_t *bufOut = NULL;
+ }
+ int rc = EXIT_FAILURE;
+ char *bufIn = NULL;
+ char *bufOut = NULL;
const size_t bufInSize = ZSTD_DStreamInSize();
- bufIn = malloc(bufInSize);
+ bufIn = malloc(bufInSize);
if (bufIn == NULL) {
fputs("no memory\n", stderr);
goto Exit;
- };
+ }
const size_t bufOutSize = ZSTD_DStreamOutSize();
- bufOut = malloc(bufOutSize);
+ bufOut = malloc(bufOutSize);
if (bufOut == NULL) {
fputs("no memory\n", stderr);
goto Exit;
- };
+ }
unsigned long long bufSize = 0;
- ZSTD_inBuffer bIn = {bufIn, 0, 0};
+ ZSTD_inBuffer bIn = {bufIn, 0, 0};
ZSTD_outBuffer bOut = {bufOut, 0, 0};
- bool isEmpty = true;
- bool lastBlock = false;
- size_t n = 0;
- size_t written = 0;
- size_t offset = 0;
+ bool isEmpty = true;
+ bool lastBlock = false;
+ size_t n = 0;
+ size_t written = 0;
+ size_t offset = 0;
size_t offsetPrev = 0;
- size_t zCode = 0;
+ size_t zCode = 0;
ReadAgain:
for (;;) {
n = fread(bufIn, 1, bufInSize, stdin);
} else {
perror("can not fread(FILE)");
goto Exit;
- };
- };
+ }
+ }
if (n >= 8 && le32dec(bufIn) == 0x184D2A5D) {
// dictionary frame
size_t dictSize = (size_t)le32dec(bufIn + 4);
- uint8_t *dict = malloc(dictSize);
+ char *dict = malloc(dictSize);
if (dict == NULL) {
fprintf(stderr, "insufficient memory for dictionary: %zu\n", dictSize);
goto Exit;
- };
+ }
const size_t alreadyRead = n - 8;
memcpy(dict, bufIn + 8, alreadyRead);
errno = 0;
- n = fread(dict + alreadyRead, 1, dictSize - alreadyRead, stdin);
+ n = fread(dict + alreadyRead, 1, dictSize - alreadyRead, stdin);
if (n != dictSize - alreadyRead) {
perror("can not read dictionary data");
free(dict);
goto Exit;
- };
- offset = dictSize + 8;
+ }
+ offset = dictSize + 8;
offsetPrev = offset;
if (fdOff != NULL) {
fprintf(fdOff, "%zu\t0\n", offset);
- };
+ }
uint32_t hdr = le32dec(dict);
switch (hdr) {
case ZSTD_MAGIC_DICTIONARY:
"can not load dictionary: %s\n",
ZSTD_getErrorName(zCode));
goto Exit;
- };
+ }
goto ReadAgain;
- break;
case ZSTD_MAGICNUMBER:
bufSize = ZSTD_getFrameContentSize(dict, dictSize);
switch (bufSize) {
fprintf(stderr, "can not determine dictionary's size\n");
free(dict);
goto Exit;
- };
- uint8_t *buf = malloc(bufSize);
+ }
+ char *buf = malloc(bufSize);
if (buf == NULL) {
fprintf(
stderr, "insufficient memory for dictionary: %llu\n", bufSize);
free(dict);
goto Exit;
- };
+ }
zCode = ZSTD_decompress(buf, bufSize, dict, dictSize);
free(dict);
if (ZSTD_isError(zCode)) {
ZSTD_getErrorName(zCode));
free(buf);
goto Exit;
- };
+ }
zCode = ZSTD_DCtx_loadDictionary(ctx, buf, zCode);
free(buf);
if ((zCode != 0) && (ZSTD_isError(zCode))) {
"can not load dictionary: %s\n",
ZSTD_getErrorName(zCode));
goto Exit;
- };
+ }
goto ReadAgain;
- break;
default:
fprintf(stderr, "unknown dictionary header\n");
free(dict);
goto Exit;
- };
- };
- isEmpty = false;
+ }
+ }
+ isEmpty = false;
bIn.size = n;
- bIn.pos = 0;
+ bIn.pos = 0;
while (bIn.pos < bIn.size) {
bOut.size = bufOutSize;
- bOut.pos = 0;
- zCode = ZSTD_decompressStream(ctx, &bOut, &bIn);
+ bOut.pos = 0;
+ zCode = ZSTD_decompressStream(ctx, &bOut, &bIn);
if ((zCode != 0) && (ZSTD_isError(zCode))) {
fprintf(stderr, "can not decompress: %s\n", ZSTD_getErrorName(zCode));
goto Exit;
- };
+ }
n = fwrite(bufOut, 1, bOut.pos, stdout);
if (n != bOut.pos) {
perror("can not fwrite(stdout)");
goto Exit;
- };
+ }
written += n;
if (zCode == 0) {
offset += bIn.pos;
if (fdOff != NULL) {
fprintf(fdOff, "%zu\t%zu\n", offset - offsetPrev, written);
- };
+ }
offsetPrev = offset + bIn.pos;
- written = 0;
- };
- };
+ written = 0;
+ }
+ }
if (lastBlock) {
break;
- };
+ }
offset += bIn.pos;
- };
+ }
if (isEmpty) {
fputs("empty input\n", stderr);
goto Exit;
- };
+ }
if (zCode != 0) {
fprintf(stderr, "unfinished decompression: %s\n", ZSTD_getErrorName(zCode));
goto Exit;
- };
+ }
rc = EXIT_SUCCESS;
Exit:
if (bufOut != NULL) {
free(bufOut);
- };
+ }
if (bufIn != NULL) {
free(bufIn);
- };
+ }
ZSTD_freeDCtx(ctx);
if ((fdOff != NULL) && (fclose(fdOff) != 0)) {
perror("can not fclose(4)");
return EXIT_FAILURE;
- };
+ }
return rc;
-};
+}