func main() {
uri := flag.String("uri", "", "URI to extract, if specified")
hdr := flag.Bool("hdr", false, "Also extract WARC's header")
- idx := flag.Bool("idx", false, "Save WARC indexes")
+ idx := flag.Bool("idx", false, "Save WARC indices")
recompress := flag.Bool("for-enzstd", false, "Output for enzstd utility")
flag.Parse()
log.SetFlags(log.Lshortfile)
}
log.Println("added", p, len(warc.WARCs[p]), "URIs")
if *idx {
- if err := warc.SaveIndexes(); err != nil {
+ if err := warc.SaveIndices(); err != nil {
log.Fatalln(err)
}
}
flexible HTTP/HTTPS proxy server, TLS terminator, X.509 TOFU manager,
@url{https://en.wikipedia.org/wiki/Web_ARChive, WARC} and
@url{https://en.wikipedia.org/wiki/Gemini_(protocol), geminispace}
-browser, written on @url{https://golang.org/, Go} with following
+browser, written on @url{https://go.dev/, Go} with following
capabilities:
@itemize
@code{redo warc-extract.cmd} utility uses exactly the same code for
parsing WARCs. It can be used to check if WARCs can be successfully
loaded, to list all URIs after, to extract some specified URI and to
-pre-generate @file{.idx.gob} indexes.
+pre-generate @file{.idx.gob} indices.
@example
$ warc-extract.cmd -idx \
for {
for _, warcPath := range readLinesFromFIFO(p) {
if warcPath == "SAVE" {
- if err := warc.SaveIndexes(); err != nil {
+ if err := warc.SaveIndices(); err != nil {
log.Printf("%s: can not save index %s: %+v\n", p, warcPath, err)
}
continue
if strings.Contains(req.Header.Get("User-Agent"), "newsboat/") {
return true
}
- if strings.Contains(req.Header.Get("User-Agent"), "go.stargrave.org-feeder/") {
+ if strings.Contains(req.Header.Get("User-Agent"), "stargrave.org-feeder/") {
return true
}
return false
return nil
}
-func SaveIndexes() error {
+func SaveIndices() error {
WARCsM.RLock()
defer WARCsM.RUnlock()
for warcPath, uris := range WARCs {