From f7491326a7640c1a590c2d6aa153ed29d7b74014 Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Fri, 11 Mar 2022 17:09:31 +0300 Subject: [PATCH] Initial commit --- README | 56 ++++++++++++++++++ cmd/index/main.go | 138 +++++++++++++++++++++++++++++++++++++++++++++ cmd/search/main.go | 85 ++++++++++++++++++++++++++++ cmd/show/main.go | 20 +++++++ go.mod | 5 ++ go.sum | 2 + torrent.go | 29 ++++++++++ 7 files changed, 335 insertions(+) create mode 100644 README create mode 100644 cmd/index/main.go create mode 100644 cmd/search/main.go create mode 100644 cmd/show/main.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 torrent.go diff --git a/README b/README new file mode 100644 index 0000000..fae6d14 --- /dev/null +++ b/README @@ -0,0 +1,56 @@ +rutrackerer -- index and search through Rutracker's XML + +Rutracker is located now beneath Cloudflare, so you can not use it +anymore without compromising your computer with required downloadable +JavaScript programs. + +But you can get its full contents (without comments) in XML form from +https://rutracker.org/forum/viewtopic.php?t=5591249 + + $ curl "http://api.rutracker.org/v1/get_tor_hash?by=topic_id&val=5591249" + {"result":{"5591249":"FE63F7FA266E8F3F812FF637EB9025413CB58D51"}} + $ aria2c "magnet:?xt=urn:btih:FE63F7FA266E8F3F812FF637EB9025413CB58D51" + +It is recommended to recompress it with Zstandard, to save the space and +quickly feed in further commands. That XML is huge, so you can get some +troubles searching in it quickly. So here are the indexer, that extracts +titles, sizes, hashes and XML offsets: + + $ cmd/index < rutracker-XXX.xml > rutracker.gob + +On my computer is works for several minutes, that is pretty fast. + +After that, you can search the desired title like that: + + $ cmd/search -i "borknagar .* origin" < rutracker.gob + (Avantgarde/Folk/Black Metal) Borknagar - Коллекция (6 Original CD) [...] + 2011-06-13T12:05:00 2.1 GiB 10625776348 + https://rutracker.org/forum/viewtopic.php?t=3571053 + magnet:?xt=urn:btih:69A167FC69640304D0EF410ADDFBED80C1156989 + (Rock, Acoustic) [LP] [24/96] Borknagar - Origin - 2018 (2006), FLAC + 2021-04-14T10:25:00 762 MiB 22050441855 + https://rutracker.org/forum/viewtopic.php?t=6040594 + magnet:?xt=urn:btih:E562633975C28E3CC385F7AD7446D53B3DD883D1 + +Titles are fed into external "grep" command's stdin and all command +line arguments are also proxied as is. GNU Grep is really pretty fast at +that job. + +Optionally you can view corresponding contents from that XML, by +providing the offset: + + $ zstd -d < rutracker-XXX.xml.zst | cmd/show 22050441855 + (Rock, Acoustic) [LP] [24/96] Borknagar – Origin - 2018 (2006) [...] + <torrent hash="E562633975C28E3CC385F7AD7446D53B3DD883D1" tracker_id="2"/> + <forum id="1756">Hi-Res форматы, оцифровки - Оцифровки с аналоговых [...] + [b]Жанр[/b]: Rock, Acoustic + [b]Носитель[/b]: LP + [b]Год выпуска[/b]: 2018 (2006) + [b]Лейбл[/b]: Cosmic Key Creations ‎– CKC041 + [b]Страна-производитель[/b]: Netherlands + [b]Аудио кодек[/b]: FLAC + [b]Тип рипа[/b]: tracks + [b]Формат записи[/b]: 24/96 + [b]Формат раздачи[/b]: 24/96 + [b]Продолжительность[/b]: 00:35:32 + [...] diff --git a/cmd/index/main.go b/cmd/index/main.go new file mode 100644 index 0000000..f221dd7 --- /dev/null +++ b/cmd/index/main.go @@ -0,0 +1,138 @@ +package main + +import ( + "bufio" + "bytes" + "crypto/sha1" + "encoding/gob" + "encoding/hex" + "encoding/xml" + "flag" + "fmt" + "io" + "os" + "strconv" + "time" + + "go.stargrave.org/rutrackerer" +) + +func main() { + doCSV := flag.Bool("csv", false, "Output CSV instead of gob") + flag.Parse() + br := bufio.NewReader(os.Stdin) + d := xml.NewDecoder(br) + var t xml.Token + var err error + var e xml.StartElement + var ok bool + for { + t, err = d.Token() + if err != nil { + panic(err) + } + e, ok = t.(xml.StartElement) + if ok && e.Name.Local == "torrents" { + break + } + } + var gobEnc *gob.Encoder + bufStdout := bufio.NewWriter(os.Stdout) + if !*doCSV { + gobEnc = gob.NewEncoder(bufStdout) + } + emptyHash := make([]byte, sha1.Size) + var torrent *rutrackerer.Torrent + var c xml.CharData + var attr xml.Attr + for { + t, err = d.Token() + if err != nil { + if err == io.EOF { + break + } + panic(err) + } + e, ok = t.(xml.StartElement) + if !ok { + continue + } + switch e.Name.Local { + case "title": + t, err = d.Token() + if err != nil { + panic(err) + } + c, ok = t.(xml.CharData) + if !ok { + panic("non-character data after title") + } + torrent.Title = string(c) + case "torrent": + if len(e.Attr) < 3 { + for _, attr = range e.Attr { + if attr.Name.Local != "hash" { + continue + } + if len(attr.Value) != sha1.Size*2 { + panic("bad hash size") + } + _, err = hex.Decode(torrent.Hash[:], []byte(attr.Value)) + if err != nil { + panic(err) + } + } + continue + } + if torrent != nil { + if torrent.Title == "" { + panic("empty title") + } + if torrent.Id == 0 { + panic("empty id") + } + if torrent.Size == 0 { + panic("empty size") + } + if bytes.Compare(torrent.Hash[:], emptyHash) == 0 { + panic("empty hash") + } + if *doCSV { + fmt.Println(torrent.CSV()) + } else { + if err = gobEnc.Encode(torrent); err != nil { + panic(err) + } + } + } + torrent = new(rutrackerer.Torrent) + for _, attr = range e.Attr { + switch attr.Name.Local { + case "id": + torrent.Id, err = strconv.ParseInt(attr.Value, 10, 64) + if err != nil { + panic(err) + } + torrent.Offset = d.InputOffset() + case "registred_at": + torrent.Registered, err = time.Parse( + "2006.01.02 15:04:05", attr.Value, + ) + if err != nil { + panic(err) + } + case "size": + torrent.Size, err = strconv.ParseInt(attr.Value, 10, 64) + if err != nil { + panic(err) + } + } + } + } + } + if !*doCSV { + if err = bufStdout.Flush(); err != nil { + panic(err) + } + } +} diff --git a/cmd/search/main.go b/cmd/search/main.go new file mode 100644 index 0000000..0645be0 --- /dev/null +++ b/cmd/search/main.go @@ -0,0 +1,85 @@ +package main + +import ( + "bufio" + "encoding/gob" + "encoding/hex" + "fmt" + "io" + "os" + "os/exec" + "strconv" + "strings" + + "github.com/dustin/go-humanize" + "go.stargrave.org/rutrackerer" +) + +func main() { + cmd := exec.Command("grep", append([]string{"-n"}, os.Args[1:]...)...) + grepStdin, err := cmd.StdinPipe() + if err != nil { + panic(err) + } + grepStdout, err := cmd.StdoutPipe() + if err != nil { + panic(err) + } + if err = cmd.Start(); err != nil { + panic(err) + } + torrents := make([]*rutrackerer.Torrent, 0, 1<<20) + printer := make(chan struct{}) + go func() { + scanner := bufio.NewScanner(grepStdout) + var t string + var i int + var err error + var torrent *rutrackerer.Torrent + for scanner.Scan() { + t = scanner.Text() + i = strings.IndexByte(t, ':') + i, err = strconv.Atoi(t[:i]) + if err != nil { + panic(err) + } + torrent = torrents[i-1] + fmt.Printf( + `%s + %s %s %d + https://rutracker.org/forum/viewtopic.php?t=%d + magnet:?xt=urn:btih:%s +`, + torrent.Title, + torrent.Registered.Format("2006-01-02T15:04:05"), + humanize.IBytes(uint64(torrent.Size)), + torrent.Offset, + torrent.Id, + strings.ToUpper(hex.EncodeToString(torrent.Hash[:])), + ) + } + if err = scanner.Err(); err != nil { + panic(err) + } + close(printer) + }() + gobDec := gob.NewDecoder(bufio.NewReader(os.Stdin)) + for { + var torrent rutrackerer.Torrent + if err = gobDec.Decode(&torrent); err != nil { + if err == io.EOF { + break + } + panic(err) + } + torrents = append(torrents, &torrent) + if _, err = grepStdin.Write([]byte(torrent.Title + "\n")); err != nil { + panic(err) + } + } + grepStdin.Close() + <-printer + if err = cmd.Wait(); err != nil { + panic(err) + } +} diff --git a/cmd/show/main.go b/cmd/show/main.go new file mode 100644 index 0000000..afffe12 --- /dev/null +++ b/cmd/show/main.go @@ -0,0 +1,20 @@ +package main + +import ( + "bufio" + "io" + "os" + "strconv" +) + +func main() { + skip, err := strconv.Atoi(os.Args[1]) + if err != nil { + panic(err) + } + br := bufio.NewReader(os.Stdin) + if _, err = br.Discard(skip); err != nil { + panic(err) + } + io.Copy(os.Stdout, br) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..930aabb --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module go.stargrave.org/rutrackerer + +go 1.17 + +require github.com/dustin/go-humanize v1.0.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..4d39dd1 --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= +github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= diff --git a/torrent.go b/torrent.go new file mode 100644 index 0000000..2d93c7e --- /dev/null +++ b/torrent.go @@ -0,0 +1,29 @@ +package rutrackerer + +import ( + "crypto/sha1" + "encoding/hex" + "strconv" + "strings" + "time" +) + +type Torrent struct { + Offset int64 + Id int64 + Size int64 + Title string + Hash [sha1.Size]byte + Registered time.Time +} + +func (t *Torrent) CSV() string { + return strings.Join([]string{ + strconv.FormatInt(t.Offset, 10), + strconv.FormatInt(t.Id, 10), + strconv.FormatInt(t.Size, 10), + hex.EncodeToString(t.Hash[:]), + t.Registered.Format("2006-01-02T15:04:05"), + t.Title, + }, "\t") +} -- 2.44.0