Skip to content

Commit 0b90de7

Browse files
authored
Add ads crawl subcommand (#135)
* Add `ads crawl` subcommand Crawl advertisements from latest to earlier from a specified publisher, printing information about each advertisement.
1 parent b469476 commit 0b90de7

File tree

5 files changed

+374
-5
lines changed

5 files changed

+374
-5
lines changed

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Here are a few examples that use the following commands:
3535
- `ads` Show advertisements on a chain from a specified publisher
3636
- `get` Show information about an advertisement from a specified publisher
3737
- `list` List advertisements from latest to earlier from a specified publisher
38+
- `crawl` Crawl publisher's advertisements and show information for each advertisement
3839
- `dist` Determine the distance between two advertisements in a chain
3940
- `find` Find value by CID or multihash in indexer
4041
- `provider` Show information about providers known to an indexer
@@ -69,6 +70,12 @@ cat ad-cids-list.txt | ipni add get /dns4/ads.example.com/tcp/24001/p2p/<publish
6970
ipni ads list -n 10 --ai=/ip4/38.70.220.112/tcp/10201/p2p/12D3KooWEAcRJ5fYjuavKgAhu79juR7mgaznSZxsm2RRUBiWurv9
7071
```
7172

73+
### `ads crawl`
74+
- Crawl advertisements from a provider and stop after reading 1000 multihashes:
75+
```sh
76+
ipni ads crawl -stop-mhs 1000 --ai=/ip4/38.70.220.112/tcp/10201/p2p/12D3KooWEAcRJ5fYjuavKgAhu79juR7mgaznSZxsm2RRUBiWurv9
77+
```
78+
7279
### `ads dist`
7380
- Get distance from an advertisement to the head of the advertisement chain:
7481
```sh

pkg/adpub/client.go

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ type Client interface {
2424
GetAdvertisement(context.Context, cid.Cid) (*Advertisement, error)
2525
Close() error
2626
List(context.Context, cid.Cid, int, io.Writer) error
27+
Crawl(context.Context, cid.Cid, int, chan<- *Advertisement) error
2728
SyncEntriesWithRetry(context.Context, cid.Cid) error
2829
}
2930

@@ -103,6 +104,61 @@ func (c *client) List(ctx context.Context, latestCid cid.Cid, n int, w io.Writer
103104
return c.store.list(ctx, latestCid, n, w)
104105
}
105106

107+
func (c *client) Crawl(ctx context.Context, latestCid cid.Cid, n int, ads chan<- *Advertisement) error {
108+
const batchSize = 10
109+
110+
var opts []dagsync.SyncOption
111+
if n > syncSegmentSize {
112+
prevAdCid := func(adCid cid.Cid) (cid.Cid, error) {
113+
ad, err := c.store.loadAd(ctx, adCid)
114+
if err != nil {
115+
return cid.Undef, err
116+
}
117+
return ad.PreviousCid(), nil
118+
}
119+
opts = append(opts, dagsync.ScopedSegmentDepthLimit(syncSegmentSize))
120+
opts = append(opts, dagsync.ScopedBlockHook(dagsync.MakeGeneralBlockHook(prevAdCid)))
121+
}
122+
origOptsLen := len(opts)
123+
124+
if n == 0 {
125+
n = -1
126+
}
127+
batch := batchSize
128+
for n != 0 {
129+
if n != -1 {
130+
if n < batchSize {
131+
batch = n
132+
}
133+
n -= batch
134+
}
135+
136+
opts = opts[:origOptsLen]
137+
opts = append(opts, dagsync.WithHeadAdCid(latestCid), dagsync.ScopedDepthLimit(int64(batch)))
138+
139+
var err error
140+
latestCid, err = c.sub.SyncAdChain(ctx, c.publisher, opts...)
141+
if err != nil {
142+
if errors.Is(err, context.Canceled) {
143+
return nil
144+
}
145+
return err
146+
}
147+
148+
latestCid, err = c.store.crawl(ctx, latestCid, batch, ads)
149+
if err != nil {
150+
if errors.Is(err, context.Canceled) {
151+
return nil
152+
}
153+
return err
154+
}
155+
if latestCid == cid.Undef {
156+
break
157+
}
158+
}
159+
return nil
160+
}
161+
106162
func (c *client) GetAdvertisement(ctx context.Context, adCid cid.Cid) (*Advertisement, error) {
107163
// Sync the advertisement without entries first.
108164
adCid, err := c.syncAdWithRetry(ctx, adCid, c.sub)
@@ -116,12 +172,8 @@ func (c *client) GetAdvertisement(ctx context.Context, adCid cid.Cid) (*Advertis
116172
return nil, err
117173
}
118174

119-
if ad.IsRemove {
120-
return ad, nil
121-
}
122-
123175
// Return the partially synced advertisement useful for output to client.
124-
return ad, err
176+
return ad, nil
125177
}
126178

127179
func (c *client) syncAdWithRetry(ctx context.Context, adCid cid.Cid, sub *dagsync.Subscriber) (cid.Cid, error) {

pkg/adpub/client_store.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,3 +192,70 @@ func (s *ClientStore) list(ctx context.Context, nextCid cid.Cid, n int, w io.Wri
192192
}
193193
return nil
194194
}
195+
196+
func (s *ClientStore) crawl(ctx context.Context, nextCid cid.Cid, n int, ads chan<- *Advertisement) (cid.Cid, error) {
197+
for i := 0; i < n; i++ {
198+
val, err := s.Batching.Get(ctx, datastore.NewKey(nextCid.String()))
199+
if err != nil {
200+
return cid.Undef, err
201+
}
202+
203+
nb := schema.AdvertisementPrototype.NewBuilder()
204+
decoder, err := multicodec.LookupDecoder(nextCid.Prefix().Codec)
205+
if err != nil {
206+
return cid.Undef, err
207+
}
208+
209+
err = decoder(nb, bytes.NewBuffer(val))
210+
if err != nil {
211+
return cid.Undef, err
212+
}
213+
node := nb.Build()
214+
215+
ad, err := schema.UnwrapAdvertisement(node)
216+
if err != nil {
217+
return cid.Undef, err
218+
}
219+
220+
dprovid, err := peer.Decode(ad.Provider)
221+
if err != nil {
222+
return cid.Undef, err
223+
}
224+
225+
a := &Advertisement{
226+
ID: nextCid,
227+
ProviderID: dprovid,
228+
ContextID: ad.ContextID,
229+
Metadata: ad.Metadata,
230+
Addresses: ad.Addresses,
231+
PreviousID: ad.PreviousCid(),
232+
IsRemove: ad.IsRm,
233+
ExtendedProvider: ad.ExtendedProvider,
234+
}
235+
236+
if ad.Entries != nil {
237+
entriesCid := ad.Entries.(cidlink.Link).Cid
238+
if entriesCid != cid.Undef {
239+
a.Entries = &EntriesIterator{
240+
root: entriesCid,
241+
next: entriesCid,
242+
ctx: ctx,
243+
store: s,
244+
}
245+
}
246+
}
247+
248+
select {
249+
case ads <- a:
250+
case <-ctx.Done():
251+
return cid.Undef, nil
252+
}
253+
254+
if ad.PreviousID == nil {
255+
return cid.Undef, nil
256+
}
257+
258+
nextCid = ad.PreviousID.(cidlink.Link).Cid
259+
}
260+
return nextCid, nil
261+
}

pkg/ads/ads.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ var AdsCmd = &cli.Command{
1010
Subcommands: []*cli.Command{
1111
adsGetSubCmd,
1212
adsListSubCmd,
13+
adsCrawlSubCmd,
1314
adsDistSubCmd,
1415
},
1516
}

0 commit comments

Comments
 (0)