Skip to content
This repository was archived by the owner on Jan 23, 2022. It is now read-only.

Commit 31a1610

Browse files
committed
Add Yandex Radar top10k
Closes #15
1 parent 611c1db commit 31a1610

File tree

3 files changed

+109
-20
lines changed

3 files changed

+109
-20
lines changed

README.md

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Fast HTTP service which shows a specified domain rank from following providers:
1111
- [Open PageRank](https://www.domcop.com/top-10-million-domains)
1212
- [Tranco List](https://tranco-list.eu/)
1313
- [Quantcast](https://www.quantcast.com/top-sites/)
14+
- [YandexRadar](https://radar.yandex.ru/)
1415

1516
Can be used as a base for a domain categorization / network filters /
1617
suspicious domain detection.
@@ -67,47 +68,54 @@ $: curl 127.0.0.1:8080/rank/github.com
6768
"ranks": [
6869
{
6970
"domain": "github.com",
70-
"rank": 40,
71-
"date": "2019-06-01T00:00:00Z",
72-
"source": "tranco",
71+
"rank": 2698,
72+
"date": "2019-09-07T00:00:00Z",
73+
"source": "umbrella",
7374
"raw": ""
7475
},
7576
{
7677
"domain": "github.com",
77-
"rank": 123,
78-
"date": "2019-06-01T00:00:00Z",
79-
"source": "quantcast",
80-
"raw": ""
78+
"rank": 29,
79+
"date": "2019-09-07T00:00:00Z",
80+
"source": "majestic",
81+
"raw": "29,24,github.com,com,176946,489686,github.com,com,29,24,176096,487221"
8182
},
8283
{
8384
"domain": "github.com",
84-
"rank": 2555,
85-
"date": "2019-06-01T00:00:00Z",
86-
"source": "umbrella",
85+
"rank": 26,
86+
"date": "2019-09-07T00:00:00Z",
87+
"source": "pagerank",
8788
"raw": ""
8889
},
8990
{
9091
"domain": "github.com",
91-
"rank": 45,
92-
"date": "2019-06-01T00:00:00Z",
92+
"rank": 32,
93+
"date": "2019-09-07T00:00:00Z",
9394
"source": "alexa",
9495
"raw": ""
9596
},
9697
{
9798
"domain": "github.com",
98-
"rank": 29,
99-
"date": "2019-06-01T00:00:00Z",
100-
"source": "majestic",
101-
"raw": "29,23,github.com,com,179597,516432,github.com,com,29,23,179410,516889"
99+
"rank": 467,
100+
"date": "2019-09-07T00:00:00Z",
101+
"source": "yandex-radar",
102+
"raw": "The world’s leading software development platform · GitHub,github.com,,Сервисы,,,1520000,2340000,,,"
102103
},
103104
{
104105
"domain": "github.com",
105-
"rank": 39,
106-
"date": "2019-06-01T00:00:00Z",
107-
"source": "pagerank",
106+
"rank": 43,
107+
"date": "2019-09-07T00:00:00Z",
108+
"source": "tranco",
109+
"raw": ""
110+
},
111+
{
112+
"domain": "github.com",
113+
"rank": 168,
114+
"date": "2019-09-07T00:00:00Z",
115+
"source": "quantcast",
108116
"raw": ""
109117
}
110118
],
111-
"timestamp": "2019-06-01T22:06:12.662492307Z"
119+
"timestamp": "2019-09-07T14:32:32.9725943Z"
112120
}
113121
```

ingester.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ var ingesters = []Ingester{
1313
NewPageRank(),
1414
NewTranco(),
1515
NewQuantcast(),
16+
NewYandexRadar(),
1617
}
1718

1819
// IngesterConf represents a top popular domains provider configuration.

yandex_radar.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
package dor
2+
3+
import (
4+
"encoding/csv"
5+
"io"
6+
"log"
7+
"net/http"
8+
"strings"
9+
"time"
10+
)
11+
12+
const (
13+
yandexRadarTop10k = "https://radar.yandex.ru/export?options=%7B%22title%22%3A%22%D0%AF%D0%BD%D0%B4%D0%B5%D0%BA%D1%81.%D0%A0%D0%B0%D0%B4%D0%B0%D1%80%2B-%2B%D0%A2%D0%BE%D0%BF%2B%D0%98%D0%BD%D1%82%D0%B5%D1%80%D0%BD%D0%B5%D1%82-%D0%BF%D1%80%D0%BE%D0%B5%D0%BA%D1%82%D0%BE%D0%B2%22%2C%22exportFormat%22%3A%22csv%22%2C%22limit%22%3A0%2C%22offset%22%3A1000000%7D&mode=top-sites"
14+
)
15+
16+
// YandexRadarIngester represents Ingester implementation for Yandex Radar.
17+
type YandexRadarIngester struct {
18+
IngesterConf
19+
}
20+
21+
// NewYandexRadar bootstraps YandexRadarIngester.
22+
func NewYandexRadar() *YandexRadarIngester {
23+
return &YandexRadarIngester{
24+
IngesterConf: IngesterConf{
25+
Description: "yandex-radar",
26+
},
27+
}
28+
29+
}
30+
31+
// Do implements Ingester Do func with the data.
32+
func (in *YandexRadarIngester) Do() (chan *Entry, error) {
33+
in.Timestamp = time.Now().UTC()
34+
ch := make(chan *Entry)
35+
36+
go func() {
37+
defer close(ch)
38+
resp, err := http.Get(yandexRadarTop10k)
39+
if err != nil {
40+
log.Println(err)
41+
return
42+
}
43+
44+
log.Printf("%s: %s downloaded successfully", in.Description, yandexRadarTop10k)
45+
defer resp.Body.Close()
46+
47+
r := csv.NewReader(resp.Body)
48+
r.LazyQuotes = true
49+
// read the header
50+
// "Название ресурса","URL-адрес ресурса","Тематики ресурса","Тип ресурса","Медиахолдинг","Данные Метрики","Посетители (кросс-девайс)","Посетители (браузер)","Среднее время","Доля пользователей приложения","Дневная аудитория"
51+
_, err = r.Read()
52+
if err != nil {
53+
log.Println(err)
54+
return
55+
}
56+
var i uint32
57+
now := time.Now()
58+
for {
59+
i++
60+
record, err := r.Read()
61+
if err == io.EOF {
62+
break
63+
}
64+
if err != nil {
65+
log.Println(err)
66+
return
67+
}
68+
69+
ch <- &Entry{
70+
Domain: record[1],
71+
Rank: i,
72+
Date: now,
73+
Source: in.Description,
74+
RawData: strings.Join(record, ","),
75+
}
76+
}
77+
}()
78+
79+
return ch, nil
80+
}

0 commit comments

Comments
 (0)