Skip to content

Commit c710340

Browse files
committed
Add searchable record and search 🔍
Records are added with simhashed tokens for later searching
1 parent 49c4c8e commit c710340

File tree

8 files changed

+516
-1
lines changed

8 files changed

+516
-1
lines changed

index.js

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ const connect = require('./lib/connect')
1313
const { FIREWALL, BOOTSTRAP_NODES, KNOWN_NODES, COMMANDS } = require('./lib/constants')
1414
const { hash, createKeyPair } = require('./lib/crypto')
1515
const { decode } = require('hypercore-id-encoding')
16+
const { hammingDistance } = require('@holepunchto/searchable-record-cache')
1617
const RawStreamSet = require('./lib/raw-stream-set')
1718
const ConnectionPool = require('./lib/connection-pool')
1819
const { STREAM_NOT_CONNECTED } = require('./lib/errors')
@@ -52,6 +53,7 @@ class HyperDHT extends DHT {
5253
this._randomPunchInterval = opts.randomPunchInterval || 20000 // min 20s between random punches...
5354
this._randomPunches = 0
5455
this._randomPunchLimit = 1 // set to one for extra safety for now
56+
this._simhash = opts.simhash
5557

5658
this.once('persistent', () => {
5759
this._persistent = new Persistent(this, persistent)
@@ -184,6 +186,68 @@ class HyperDHT extends DHT {
184186
return this.query({ target, command: COMMANDS.LOOKUP, value: null }, opts)
185187
}
186188

189+
async searchableRecordPut(tokens, value, opts = {}) {
190+
if (!this._simhash) return
191+
192+
const target = this._simhash.hash(tokens)
193+
const query = this.query({ target, command: COMMANDS.SEARCH, value: null }, opts)
194+
await query.finished()
195+
196+
for (const closest of query.closestReplies) {
197+
await this.request(
198+
{
199+
target,
200+
command: COMMANDS.SEARCHABLE_RECORD_PUT,
201+
value: c.encode(m.searchableRecord, { value, key: target })
202+
},
203+
closest.from
204+
)
205+
}
206+
207+
return target
208+
}
209+
210+
async search(tokens, opts = {}) {
211+
if (!this._simhash) return
212+
213+
const target = this._simhash.hash(tokens)
214+
215+
const query = this.query(
216+
{
217+
target,
218+
command: COMMANDS.SEARCH,
219+
value: c.encode(m.searchOptions, {
220+
closest: opts.closest || 5,
221+
values: opts.values || 5
222+
})
223+
},
224+
opts
225+
)
226+
227+
const results = []
228+
const seen = new Set()
229+
230+
for await (const reply of query) {
231+
if (reply.value) {
232+
const res = c.decode(m.searchResponse, reply.value)
233+
234+
for (const r of res) {
235+
const key = r.key.toString('hex')
236+
if (seen.has(key)) continue
237+
238+
const distance = hammingDistance(r.key, target)
239+
seen.add(key)
240+
results.push({ ...r, distance, from: reply.from })
241+
}
242+
}
243+
}
244+
245+
results.sort((a, b) => a.distance - b.distance)
246+
while (results.length > 5) results.pop()
247+
248+
return results
249+
}
250+
187251
lookupAndUnannounce(target, keyPair, opts = {}) {
188252
const unannounces = []
189253
const dht = this
@@ -426,6 +490,14 @@ class HyperDHT extends DHT {
426490
this._persistent.onimmutableget(req)
427491
return true
428492
}
493+
case COMMANDS.SEARCH: {
494+
this._persistent.onsearch(req)
495+
return true
496+
}
497+
case COMMANDS.SEARCHABLE_RECORD_PUT: {
498+
this._persistent.onsearchablerecordput(req)
499+
return true
500+
}
429501
}
430502

431503
return false
@@ -604,6 +676,7 @@ function defaultCacheOpts(opts) {
604676
maxSize: (maxSize / 2) | 0,
605677
maxAge: opts.maxAge || 48 * 60 * 60 * 1000 // 48 hours
606678
},
679+
searchableRecords: { maxSize: Infinity, maxAge: Infinity },
607680
bumps: { maxSize, maxAge }
608681
}
609682
}

lib/constants.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ const COMMANDS = (exports.COMMANDS = {
1010
MUTABLE_PUT: 6,
1111
MUTABLE_GET: 7,
1212
IMMUTABLE_PUT: 8,
13-
IMMUTABLE_GET: 9
13+
IMMUTABLE_GET: 9,
14+
SEARCHABLE_RECORD_PUT: 10,
15+
SEARCH: 11
1416
})
1517

1618
exports.BOOTSTRAP_NODES = global.Pear?.config.dht?.bootstrap || [

lib/messages.js

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,3 +420,56 @@ exports.mutableGetResponse = {
420420
}
421421
}
422422
}
423+
424+
exports.searchOptions = {
425+
preencode(state, m) {
426+
c.uint.preencode(state, m.closest)
427+
c.uint.preencode(state, m.values)
428+
},
429+
encode(state, m) {
430+
c.uint.encode(state, m.closest)
431+
c.uint.encode(state, m.values)
432+
},
433+
decode(state) {
434+
return {
435+
closest: c.uint.decode(state),
436+
values: c.uint.decode(state)
437+
}
438+
}
439+
}
440+
441+
exports.searchableRecord = {
442+
preencode(state, m) {
443+
c.fixed32.preencode(state, m.key)
444+
c.fixed32.preencode(state, m.value)
445+
},
446+
encode(state, m) {
447+
c.fixed32.encode(state, m.key)
448+
c.fixed32.encode(state, m.value)
449+
},
450+
decode(state) {
451+
return {
452+
key: c.fixed32.decode(state),
453+
value: c.fixed32.decode(state)
454+
}
455+
}
456+
}
457+
458+
const searchResponseItem = {
459+
preencode(state, m) {
460+
c.fixed32.preencode(state, m.key)
461+
c.array(c.fixed32).preencode(state, m.values)
462+
},
463+
encode(state, m) {
464+
c.fixed32.encode(state, m.key)
465+
c.array(c.fixed32).encode(state, m.values)
466+
},
467+
decode(state) {
468+
return {
469+
key: c.fixed32.decode(state),
470+
values: c.array(c.fixed32).decode(state)
471+
}
472+
}
473+
}
474+
475+
exports.searchResponse = c.array(searchResponseItem)

lib/persistent.js

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
const c = require('compact-encoding')
22
const sodium = require('sodium-universal')
33
const RecordCache = require('record-cache')
4+
const { SearchableRecordCache } = require('@holepunchto/searchable-record-cache')
45
const Cache = require('xache')
56
const b4a = require('b4a')
67
const unslab = require('unslab')
@@ -21,6 +22,7 @@ module.exports = class Persistent {
2122
this.refreshes = new Cache(opts.refreshes)
2223
this.mutables = new Cache(opts.mutables)
2324
this.immutables = new Cache(opts.immutables)
25+
this.searchableRecords = new SearchableRecordCache(opts.searchableRecords)
2426
}
2527

2628
onlookup(req) {
@@ -226,11 +228,47 @@ module.exports = class Persistent {
226228
req.reply(null)
227229
}
228230

231+
async onsearchablerecordput(req) {
232+
if (!req.target) return
233+
234+
const doc = c.decode(m.searchableRecord, req.value)
235+
const { key, value } = doc
236+
237+
this.searchableRecords.add(key, value)
238+
239+
req.reply(null)
240+
}
241+
242+
onsearch(req) {
243+
if (!req.target || !req.value) {
244+
// allow searchable record put to do an empty search
245+
req.reply(null)
246+
return
247+
}
248+
249+
const opts = c.decode(m.searchOptions, req.value)
250+
const results = []
251+
252+
const res = this.searchableRecords.search(req.target, opts)
253+
254+
for (const r of res) {
255+
results.push({
256+
values: r.values,
257+
key: r.key,
258+
distance: r.distance
259+
})
260+
}
261+
262+
results.sort((a, b) => a.distance - b.distance)
263+
req.reply(c.encode(m.searchResponse, results.slice(0, 12))) // max we can fit
264+
}
265+
229266
destroy() {
230267
this.records.destroy()
231268
this.refreshes.destroy()
232269
this.mutables.destroy()
233270
this.immutables.destroy()
271+
this.searchableRecords.destroy()
234272
}
235273

236274
static signMutable(seq, value, keyPair) {

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
}
2626
},
2727
"dependencies": {
28+
"@holepunchto/searchable-record-cache": "^0.0.6",
29+
"@holepunchto/simhash": "^0.0.6",
2830
"@hyperswarm/secret-stream": "^6.6.2",
2931
"b4a": "^1.3.1",
3032
"bare-events": "^2.2.0",

test/all.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ async function runTests() {
1616
await import('./noncustodial.js')
1717
await import('./pool.js')
1818
await import('./relaying.js')
19+
await import('./search.js')
1920
await import('./storing.js')
2021

2122
test.resume()

test/messages.js

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,3 +440,26 @@ test('announce with refresh', function (t) {
440440

441441
t.alike(d, ann)
442442
})
443+
444+
test('search', function (t) {
445+
const state = { start: 0, end: 0, buffer: null }
446+
447+
const searchResponse = []
448+
for (let i = 0; i < 12; i++) {
449+
searchResponse.push({
450+
key: Buffer.alloc(32).fill(i),
451+
values: [Buffer.alloc(32).fill(i)]
452+
})
453+
}
454+
455+
m.searchResponse.preencode(state, searchResponse)
456+
state.buffer = b4a.allocUnsafe(state.end)
457+
m.searchResponse.encode(state, searchResponse)
458+
459+
t.is(state.end, state.start, 'fully encoded')
460+
461+
state.start = 0
462+
const d = m.searchResponse.decode(state)
463+
464+
t.alike(d, searchResponse)
465+
})

0 commit comments

Comments
 (0)