From f7491326a7640c1a590c2d6aa153ed29d7b74014 Mon Sep 17 00:00:00 2001
From: Sergey Matveev <stargrave@stargrave.org>
Date: Fri, 11 Mar 2022 17:09:31 +0300
Subject: [PATCH 1/1] Initial commit

---
 README             |  56 ++++++++++++++++++
 cmd/index/main.go  | 138 +++++++++++++++++++++++++++++++++++++++++++++
 cmd/search/main.go |  85 ++++++++++++++++++++++++++++
 cmd/show/main.go   |  20 +++++++
 go.mod             |   5 ++
 go.sum             |   2 +
 torrent.go         |  29 ++++++++++
 7 files changed, 335 insertions(+)
 create mode 100644 README
 create mode 100644 cmd/index/main.go
 create mode 100644 cmd/search/main.go
 create mode 100644 cmd/show/main.go
 create mode 100644 go.mod
 create mode 100644 go.sum
 create mode 100644 torrent.go

diff --git a/README b/README
new file mode 100644
index 0000000..fae6d14
--- /dev/null
+++ b/README
@@ -0,0 +1,56 @@
+rutrackerer -- index and search through Rutracker's XML
+
+Rutracker is located now beneath Cloudflare, so you can not use it
+anymore without compromising your computer with required downloadable
+JavaScript programs.
+
+But you can get its full contents (without comments) in XML form from
+https://rutracker.org/forum/viewtopic.php?t=5591249
+
+    $ curl "http://api.rutracker.org/v1/get_tor_hash?by=topic_id&val=5591249"
+    {"result":{"5591249":"FE63F7FA266E8F3F812FF637EB9025413CB58D51"}}
+    $ aria2c "magnet:?xt=urn:btih:FE63F7FA266E8F3F812FF637EB9025413CB58D51"
+
+It is recommended to recompress it with Zstandard, to save the space and
+quickly feed in further commands. That XML is huge, so you can get some
+troubles searching in it quickly. So here are the indexer, that extracts
+titles, sizes, hashes and XML offsets:
+
+    $ cmd/index < rutracker-XXX.xml > rutracker.gob
+
+On my computer is works for several minutes, that is pretty fast.
+
+After that, you can search the desired title like that:
+
+    $ cmd/search -i "borknagar .* origin" < rutracker.gob
+    (Avantgarde/Folk/Black Metal) Borknagar - Коллекция (6 Original CD) [...]
+            2011-06-13T12:05:00 2.1 GiB 10625776348
+            https://rutracker.org/forum/viewtopic.php?t=3571053
+            magnet:?xt=urn:btih:69A167FC69640304D0EF410ADDFBED80C1156989
+    (Rock, Acoustic) [LP] [24/96] Borknagar - Origin - 2018 (2006), FLAC
+            2021-04-14T10:25:00 762 MiB 22050441855
+            https://rutracker.org/forum/viewtopic.php?t=6040594
+            magnet:?xt=urn:btih:E562633975C28E3CC385F7AD7446D53B3DD883D1
+
+Titles are fed into external "grep" command's stdin and all command
+line arguments are also proxied as is. GNU Grep is really pretty fast at
+that job.
+
+Optionally you can view corresponding contents from that XML, by
+providing the offset:
+
+    $ zstd -d < rutracker-XXX.xml.zst | cmd/show 22050441855
+     <title>(Rock, Acoustic) [LP] [24/96] Borknagar – Origin - 2018 (2006) [...]
+     <torrent hash="E562633975C28E3CC385F7AD7446D53B3DD883D1" tracker_id="2"/>
+     <forum id="1756">Hi-Res форматы, оцифровки - Оцифровки с аналоговых [...]
+    [b]Жанр[/b]: Rock, Acoustic
+    [b]Носитель[/b]: LP
+    [b]Год выпуска[/b]: 2018 (2006)
+    [b]Лейбл[/b]: Cosmic Key Creations &#8206;– CKC041
+    [b]Страна-производитель[/b]: Netherlands
+    [b]Аудио кодек[/b]: FLAC
+    [b]Тип рипа[/b]: tracks
+    [b]Формат записи[/b]: 24/96
+    [b]Формат раздачи[/b]: 24/96
+    [b]Продолжительность[/b]: 00:35:32
+    [...]
diff --git a/cmd/index/main.go b/cmd/index/main.go
new file mode 100644
index 0000000..f221dd7
--- /dev/null
+++ b/cmd/index/main.go
@@ -0,0 +1,138 @@
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"crypto/sha1"
+	"encoding/gob"
+	"encoding/hex"
+	"encoding/xml"
+	"flag"
+	"fmt"
+	"io"
+	"os"
+	"strconv"
+	"time"
+
+	"go.stargrave.org/rutrackerer"
+)
+
+func main() {
+	doCSV := flag.Bool("csv", false, "Output CSV instead of gob")
+	flag.Parse()
+	br := bufio.NewReader(os.Stdin)
+	d := xml.NewDecoder(br)
+	var t xml.Token
+	var err error
+	var e xml.StartElement
+	var ok bool
+	for {
+		t, err = d.Token()
+		if err != nil {
+			panic(err)
+		}
+		e, ok = t.(xml.StartElement)
+		if ok && e.Name.Local == "torrents" {
+			break
+		}
+	}
+	var gobEnc *gob.Encoder
+	bufStdout := bufio.NewWriter(os.Stdout)
+	if !*doCSV {
+		gobEnc = gob.NewEncoder(bufStdout)
+	}
+	emptyHash := make([]byte, sha1.Size)
+	var torrent *rutrackerer.Torrent
+	var c xml.CharData
+	var attr xml.Attr
+	for {
+		t, err = d.Token()
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			panic(err)
+		}
+		e, ok = t.(xml.StartElement)
+		if !ok {
+			continue
+		}
+		switch e.Name.Local {
+		case "title":
+			t, err = d.Token()
+			if err != nil {
+				panic(err)
+			}
+			c, ok = t.(xml.CharData)
+			if !ok {
+				panic("non-character data after title")
+			}
+			torrent.Title = string(c)
+		case "torrent":
+			if len(e.Attr) < 3 {
+				for _, attr = range e.Attr {
+					if attr.Name.Local != "hash" {
+						continue
+					}
+					if len(attr.Value) != sha1.Size*2 {
+						panic("bad hash size")
+					}
+					_, err = hex.Decode(torrent.Hash[:], []byte(attr.Value))
+					if err != nil {
+						panic(err)
+					}
+				}
+				continue
+			}
+			if torrent != nil {
+				if torrent.Title == "" {
+					panic("empty title")
+				}
+				if torrent.Id == 0 {
+					panic("empty id")
+				}
+				if torrent.Size == 0 {
+					panic("empty size")
+				}
+				if bytes.Compare(torrent.Hash[:], emptyHash) == 0 {
+					panic("empty hash")
+				}
+				if *doCSV {
+					fmt.Println(torrent.CSV())
+				} else {
+					if err = gobEnc.Encode(torrent); err != nil {
+						panic(err)
+					}
+				}
+			}
+			torrent = new(rutrackerer.Torrent)
+			for _, attr = range e.Attr {
+				switch attr.Name.Local {
+				case "id":
+					torrent.Id, err = strconv.ParseInt(attr.Value, 10, 64)
+					if err != nil {
+						panic(err)
+					}
+					torrent.Offset = d.InputOffset()
+				case "registred_at":
+					torrent.Registered, err = time.Parse(
+						"2006.01.02 15:04:05", attr.Value,
+					)
+					if err != nil {
+						panic(err)
+					}
+				case "size":
+					torrent.Size, err = strconv.ParseInt(attr.Value, 10, 64)
+					if err != nil {
+						panic(err)
+					}
+				}
+			}
+		}
+	}
+	if !*doCSV {
+		if err = bufStdout.Flush(); err != nil {
+			panic(err)
+		}
+	}
+}
diff --git a/cmd/search/main.go b/cmd/search/main.go
new file mode 100644
index 0000000..0645be0
--- /dev/null
+++ b/cmd/search/main.go
@@ -0,0 +1,85 @@
+package main
+
+import (
+	"bufio"
+	"encoding/gob"
+	"encoding/hex"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"strconv"
+	"strings"
+
+	"github.com/dustin/go-humanize"
+	"go.stargrave.org/rutrackerer"
+)
+
+func main() {
+	cmd := exec.Command("grep", append([]string{"-n"}, os.Args[1:]...)...)
+	grepStdin, err := cmd.StdinPipe()
+	if err != nil {
+		panic(err)
+	}
+	grepStdout, err := cmd.StdoutPipe()
+	if err != nil {
+		panic(err)
+	}
+	if err = cmd.Start(); err != nil {
+		panic(err)
+	}
+	torrents := make([]*rutrackerer.Torrent, 0, 1<<20)
+	printer := make(chan struct{})
+	go func() {
+		scanner := bufio.NewScanner(grepStdout)
+		var t string
+		var i int
+		var err error
+		var torrent *rutrackerer.Torrent
+		for scanner.Scan() {
+			t = scanner.Text()
+			i = strings.IndexByte(t, ':')
+			i, err = strconv.Atoi(t[:i])
+			if err != nil {
+				panic(err)
+			}
+			torrent = torrents[i-1]
+			fmt.Printf(
+				`%s
+	%s %s %d
+	https://rutracker.org/forum/viewtopic.php?t=%d
+	magnet:?xt=urn:btih:%s
+`,
+				torrent.Title,
+				torrent.Registered.Format("2006-01-02T15:04:05"),
+				humanize.IBytes(uint64(torrent.Size)),
+				torrent.Offset,
+				torrent.Id,
+				strings.ToUpper(hex.EncodeToString(torrent.Hash[:])),
+			)
+		}
+		if err = scanner.Err(); err != nil {
+			panic(err)
+		}
+		close(printer)
+	}()
+	gobDec := gob.NewDecoder(bufio.NewReader(os.Stdin))
+	for {
+		var torrent rutrackerer.Torrent
+		if err = gobDec.Decode(&torrent); err != nil {
+			if err == io.EOF {
+				break
+			}
+			panic(err)
+		}
+		torrents = append(torrents, &torrent)
+		if _, err = grepStdin.Write([]byte(torrent.Title + "\n")); err != nil {
+			panic(err)
+		}
+	}
+	grepStdin.Close()
+	<-printer
+	if err = cmd.Wait(); err != nil {
+		panic(err)
+	}
+}
diff --git a/cmd/show/main.go b/cmd/show/main.go
new file mode 100644
index 0000000..afffe12
--- /dev/null
+++ b/cmd/show/main.go
@@ -0,0 +1,20 @@
+package main
+
+import (
+	"bufio"
+	"io"
+	"os"
+	"strconv"
+)
+
+func main() {
+	skip, err := strconv.Atoi(os.Args[1])
+	if err != nil {
+		panic(err)
+	}
+	br := bufio.NewReader(os.Stdin)
+	if _, err = br.Discard(skip); err != nil {
+		panic(err)
+	}
+	io.Copy(os.Stdout, br)
+}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..930aabb
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,5 @@
+module go.stargrave.org/rutrackerer
+
+go 1.17
+
+require github.com/dustin/go-humanize v1.0.0
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..4d39dd1
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,2 @@
+github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
+github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
diff --git a/torrent.go b/torrent.go
new file mode 100644
index 0000000..2d93c7e
--- /dev/null
+++ b/torrent.go
@@ -0,0 +1,29 @@
+package rutrackerer
+
+import (
+	"crypto/sha1"
+	"encoding/hex"
+	"strconv"
+	"strings"
+	"time"
+)
+
+type Torrent struct {
+	Offset     int64
+	Id         int64
+	Size       int64
+	Title      string
+	Hash       [sha1.Size]byte
+	Registered time.Time
+}
+
+func (t *Torrent) CSV() string {
+	return strings.Join([]string{
+		strconv.FormatInt(t.Offset, 10),
+		strconv.FormatInt(t.Id, 10),
+		strconv.FormatInt(t.Size, 10),
+		hex.EncodeToString(t.Hash[:]),
+		t.Registered.Format("2006-01-02T15:04:05"),
+		t.Title,
+	}, "\t")
+}
-- 
2.51.0