]> Sergey Matveev's repositories - rutrackerer.git/commitdiff
Initial commit
authorSergey Matveev <stargrave@stargrave.org>
Fri, 11 Mar 2022 14:09:31 +0000 (17:09 +0300)
committerSergey Matveev <stargrave@stargrave.org>
Fri, 11 Mar 2022 14:55:07 +0000 (17:55 +0300)
README [new file with mode: 0644]
cmd/index/main.go [new file with mode: 0644]
cmd/search/main.go [new file with mode: 0644]
cmd/show/main.go [new file with mode: 0644]
go.mod [new file with mode: 0644]
go.sum [new file with mode: 0644]
torrent.go [new file with mode: 0644]

diff --git a/README b/README
new file mode 100644 (file)
index 0000000..fae6d14
--- /dev/null
+++ b/README
@@ -0,0 +1,56 @@
+rutrackerer -- index and search through Rutracker's XML
+
+Rutracker is located now beneath Cloudflare, so you can not use it
+anymore without compromising your computer with required downloadable
+JavaScript programs.
+
+But you can get its full contents (without comments) in XML form from
+https://rutracker.org/forum/viewtopic.php?t=5591249
+
+    $ curl "http://api.rutracker.org/v1/get_tor_hash?by=topic_id&val=5591249"
+    {"result":{"5591249":"FE63F7FA266E8F3F812FF637EB9025413CB58D51"}}
+    $ aria2c "magnet:?xt=urn:btih:FE63F7FA266E8F3F812FF637EB9025413CB58D51"
+
+It is recommended to recompress it with Zstandard, to save the space and
+quickly feed in further commands. That XML is huge, so you can get some
+troubles searching in it quickly. So here are the indexer, that extracts
+titles, sizes, hashes and XML offsets:
+
+    $ cmd/index < rutracker-XXX.xml > rutracker.gob
+
+On my computer is works for several minutes, that is pretty fast.
+
+After that, you can search the desired title like that:
+
+    $ cmd/search -i "borknagar .* origin" < rutracker.gob
+    (Avantgarde/Folk/Black Metal) Borknagar - Коллекция (6 Original CD) [...]
+            2011-06-13T12:05:00 2.1 GiB 10625776348
+            https://rutracker.org/forum/viewtopic.php?t=3571053
+            magnet:?xt=urn:btih:69A167FC69640304D0EF410ADDFBED80C1156989
+    (Rock, Acoustic) [LP] [24/96] Borknagar - Origin - 2018 (2006), FLAC
+            2021-04-14T10:25:00 762 MiB 22050441855
+            https://rutracker.org/forum/viewtopic.php?t=6040594
+            magnet:?xt=urn:btih:E562633975C28E3CC385F7AD7446D53B3DD883D1
+
+Titles are fed into external "grep" command's stdin and all command
+line arguments are also proxied as is. GNU Grep is really pretty fast at
+that job.
+
+Optionally you can view corresponding contents from that XML, by
+providing the offset:
+
+    $ zstd -d < rutracker-XXX.xml.zst | cmd/show 22050441855
+     <title>(Rock, Acoustic) [LP] [24/96] Borknagar – Origin - 2018 (2006) [...]
+     <torrent hash="E562633975C28E3CC385F7AD7446D53B3DD883D1" tracker_id="2"/>
+     <forum id="1756">Hi-Res форматы, оцифровки - Оцифровки с аналоговых [...]
+    [b]Жанр[/b]: Rock, Acoustic
+    [b]Носитель[/b]: LP
+    [b]Год выпуска[/b]: 2018 (2006)
+    [b]Лейбл[/b]: Cosmic Key Creations &#8206;– CKC041
+    [b]Страна-производитель[/b]: Netherlands
+    [b]Аудио кодек[/b]: FLAC
+    [b]Тип рипа[/b]: tracks
+    [b]Формат записи[/b]: 24/96
+    [b]Формат раздачи[/b]: 24/96
+    [b]Продолжительность[/b]: 00:35:32
+    [...]
diff --git a/cmd/index/main.go b/cmd/index/main.go
new file mode 100644 (file)
index 0000000..f221dd7
--- /dev/null
@@ -0,0 +1,138 @@
+package main
+
+import (
+       "bufio"
+       "bytes"
+       "crypto/sha1"
+       "encoding/gob"
+       "encoding/hex"
+       "encoding/xml"
+       "flag"
+       "fmt"
+       "io"
+       "os"
+       "strconv"
+       "time"
+
+       "go.stargrave.org/rutrackerer"
+)
+
+func main() {
+       doCSV := flag.Bool("csv", false, "Output CSV instead of gob")
+       flag.Parse()
+       br := bufio.NewReader(os.Stdin)
+       d := xml.NewDecoder(br)
+       var t xml.Token
+       var err error
+       var e xml.StartElement
+       var ok bool
+       for {
+               t, err = d.Token()
+               if err != nil {
+                       panic(err)
+               }
+               e, ok = t.(xml.StartElement)
+               if ok && e.Name.Local == "torrents" {
+                       break
+               }
+       }
+       var gobEnc *gob.Encoder
+       bufStdout := bufio.NewWriter(os.Stdout)
+       if !*doCSV {
+               gobEnc = gob.NewEncoder(bufStdout)
+       }
+       emptyHash := make([]byte, sha1.Size)
+       var torrent *rutrackerer.Torrent
+       var c xml.CharData
+       var attr xml.Attr
+       for {
+               t, err = d.Token()
+               if err != nil {
+                       if err == io.EOF {
+                               break
+                       }
+                       panic(err)
+               }
+               e, ok = t.(xml.StartElement)
+               if !ok {
+                       continue
+               }
+               switch e.Name.Local {
+               case "title":
+                       t, err = d.Token()
+                       if err != nil {
+                               panic(err)
+                       }
+                       c, ok = t.(xml.CharData)
+                       if !ok {
+                               panic("non-character data after title")
+                       }
+                       torrent.Title = string(c)
+               case "torrent":
+                       if len(e.Attr) < 3 {
+                               for _, attr = range e.Attr {
+                                       if attr.Name.Local != "hash" {
+                                               continue
+                                       }
+                                       if len(attr.Value) != sha1.Size*2 {
+                                               panic("bad hash size")
+                                       }
+                                       _, err = hex.Decode(torrent.Hash[:], []byte(attr.Value))
+                                       if err != nil {
+                                               panic(err)
+                                       }
+                               }
+                               continue
+                       }
+                       if torrent != nil {
+                               if torrent.Title == "" {
+                                       panic("empty title")
+                               }
+                               if torrent.Id == 0 {
+                                       panic("empty id")
+                               }
+                               if torrent.Size == 0 {
+                                       panic("empty size")
+                               }
+                               if bytes.Compare(torrent.Hash[:], emptyHash) == 0 {
+                                       panic("empty hash")
+                               }
+                               if *doCSV {
+                                       fmt.Println(torrent.CSV())
+                               } else {
+                                       if err = gobEnc.Encode(torrent); err != nil {
+                                               panic(err)
+                                       }
+                               }
+                       }
+                       torrent = new(rutrackerer.Torrent)
+                       for _, attr = range e.Attr {
+                               switch attr.Name.Local {
+                               case "id":
+                                       torrent.Id, err = strconv.ParseInt(attr.Value, 10, 64)
+                                       if err != nil {
+                                               panic(err)
+                                       }
+                                       torrent.Offset = d.InputOffset()
+                               case "registred_at":
+                                       torrent.Registered, err = time.Parse(
+                                               "2006.01.02 15:04:05", attr.Value,
+                                       )
+                                       if err != nil {
+                                               panic(err)
+                                       }
+                               case "size":
+                                       torrent.Size, err = strconv.ParseInt(attr.Value, 10, 64)
+                                       if err != nil {
+                                               panic(err)
+                                       }
+                               }
+                       }
+               }
+       }
+       if !*doCSV {
+               if err = bufStdout.Flush(); err != nil {
+                       panic(err)
+               }
+       }
+}
diff --git a/cmd/search/main.go b/cmd/search/main.go
new file mode 100644 (file)
index 0000000..0645be0
--- /dev/null
@@ -0,0 +1,85 @@
+package main
+
+import (
+       "bufio"
+       "encoding/gob"
+       "encoding/hex"
+       "fmt"
+       "io"
+       "os"
+       "os/exec"
+       "strconv"
+       "strings"
+
+       "github.com/dustin/go-humanize"
+       "go.stargrave.org/rutrackerer"
+)
+
+func main() {
+       cmd := exec.Command("grep", append([]string{"-n"}, os.Args[1:]...)...)
+       grepStdin, err := cmd.StdinPipe()
+       if err != nil {
+               panic(err)
+       }
+       grepStdout, err := cmd.StdoutPipe()
+       if err != nil {
+               panic(err)
+       }
+       if err = cmd.Start(); err != nil {
+               panic(err)
+       }
+       torrents := make([]*rutrackerer.Torrent, 0, 1<<20)
+       printer := make(chan struct{})
+       go func() {
+               scanner := bufio.NewScanner(grepStdout)
+               var t string
+               var i int
+               var err error
+               var torrent *rutrackerer.Torrent
+               for scanner.Scan() {
+                       t = scanner.Text()
+                       i = strings.IndexByte(t, ':')
+                       i, err = strconv.Atoi(t[:i])
+                       if err != nil {
+                               panic(err)
+                       }
+                       torrent = torrents[i-1]
+                       fmt.Printf(
+                               `%s
+       %s %s %d
+       https://rutracker.org/forum/viewtopic.php?t=%d
+       magnet:?xt=urn:btih:%s
+`,
+                               torrent.Title,
+                               torrent.Registered.Format("2006-01-02T15:04:05"),
+                               humanize.IBytes(uint64(torrent.Size)),
+                               torrent.Offset,
+                               torrent.Id,
+                               strings.ToUpper(hex.EncodeToString(torrent.Hash[:])),
+                       )
+               }
+               if err = scanner.Err(); err != nil {
+                       panic(err)
+               }
+               close(printer)
+       }()
+       gobDec := gob.NewDecoder(bufio.NewReader(os.Stdin))
+       for {
+               var torrent rutrackerer.Torrent
+               if err = gobDec.Decode(&torrent); err != nil {
+                       if err == io.EOF {
+                               break
+                       }
+                       panic(err)
+               }
+               torrents = append(torrents, &torrent)
+               if _, err = grepStdin.Write([]byte(torrent.Title + "\n")); err != nil {
+                       panic(err)
+               }
+       }
+       grepStdin.Close()
+       <-printer
+       if err = cmd.Wait(); err != nil {
+               panic(err)
+       }
+}
diff --git a/cmd/show/main.go b/cmd/show/main.go
new file mode 100644 (file)
index 0000000..afffe12
--- /dev/null
@@ -0,0 +1,20 @@
+package main
+
+import (
+       "bufio"
+       "io"
+       "os"
+       "strconv"
+)
+
+func main() {
+       skip, err := strconv.Atoi(os.Args[1])
+       if err != nil {
+               panic(err)
+       }
+       br := bufio.NewReader(os.Stdin)
+       if _, err = br.Discard(skip); err != nil {
+               panic(err)
+       }
+       io.Copy(os.Stdout, br)
+}
diff --git a/go.mod b/go.mod
new file mode 100644 (file)
index 0000000..930aabb
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,5 @@
+module go.stargrave.org/rutrackerer
+
+go 1.17
+
+require github.com/dustin/go-humanize v1.0.0
diff --git a/go.sum b/go.sum
new file mode 100644 (file)
index 0000000..4d39dd1
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,2 @@
+github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
+github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
diff --git a/torrent.go b/torrent.go
new file mode 100644 (file)
index 0000000..2d93c7e
--- /dev/null
@@ -0,0 +1,29 @@
+package rutrackerer
+
+import (
+       "crypto/sha1"
+       "encoding/hex"
+       "strconv"
+       "strings"
+       "time"
+)
+
+type Torrent struct {
+       Offset     int64
+       Id         int64
+       Size       int64
+       Title      string
+       Hash       [sha1.Size]byte
+       Registered time.Time
+}
+
+func (t *Torrent) CSV() string {
+       return strings.Join([]string{
+               strconv.FormatInt(t.Offset, 10),
+               strconv.FormatInt(t.Id, 10),
+               strconv.FormatInt(t.Size, 10),
+               hex.EncodeToString(t.Hash[:]),
+               t.Registered.Format("2006-01-02T15:04:05"),
+               t.Title,
+       }, "\t")
+}