Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 47 additions & 1 deletion src/DBQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,44 @@

#include "Subscription.h"
#include "filters.h"
#include "HyperLogLog.h"
#include "events.h"


// Returns HLL offset (8–23) from a filter, or -1 if not eligible for HLL.
inline int computeHllOffset(const NostrFilter &filter) {
if (filter.tags.size() != 1) return -1;

auto it = filter.tags.begin();
char tagChar = it->first;
const auto &filterSet = it->second;
if (filterSet.size() != 1) return -1;

std::string val = filterSet.at(0);

int offset;

if (tagChar == 'e' || tagChar == 'p') {
// Stored as 32 raw bytes (hex-decoded)
if (val.size() != 32) return -1;
offset = (((uint8_t)val[16]) >> 4) + 8;
} else {
// Stored as string — must be 64-char hex to derive offset
if (val.size() != 64) return -1;
char c = val[32];
int nibble;
if (c >= '0' && c <= '9') nibble = c - '0';
else if (c >= 'a' && c <= 'f') nibble = c - 'a' + 10;
else if (c >= 'A' && c <= 'F') nibble = c - 'A' + 10;
else return -1;
offset = nibble + 8;
}

if (offset < 8 || offset > 23) return -1;
return offset;
}


struct DBScan : NonCopyable {
struct CandidateEvent {
private:
Expand Down Expand Up @@ -295,7 +330,13 @@ struct DBQuery : NonCopyable {
uint64_t totalTime = 0;
uint64_t totalWork = 0;

DBQuery(Subscription &sub) : sub(std::move(sub)) {}
int hllOffset = -1;
HyperLogLog hll;

DBQuery(Subscription &sub) : sub(std::move(sub)) {
if (this->sub.countOnly && this->sub.filterGroup.size() == 1)
hllOffset = computeHllOffset(this->sub.filterGroup.filters[0]);
}
DBQuery(const tao::json::value &filter, uint64_t maxLimit = MAX_U64) : sub(Subscription(1, ".", NostrFilterGroup::unwrapped(filter, maxLimit))) {}

// If scan is complete, returns true
Expand All @@ -316,6 +357,11 @@ struct DBQuery : NonCopyable {
if (sentEventsFull.find(levId) == sentEventsFull.end()) {
sentEventsFull.insert(levId);
cb(sub, levId);

if (hllOffset >= 0) {
auto view = env.lookup_Event(txn, levId);
if (view) hll.addPubkeyBytes((const uint8_t *)PackedEventView(view->buf).pubkey().data(), hllOffset);
}
}

sentEventsCurr.insert(levId);
Expand Down
46 changes: 46 additions & 0 deletions src/HyperLogLog.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#pragma once

#include <cstdint>
#include <cstring>
#include <string>
#include <algorithm>


struct HyperLogLog {
uint8_t registers[256] = {};

void addPubkeyBytes(const uint8_t *pubkey, int offset) {
const uint8_t *x = pubkey + offset;
uint8_t ri = x[0]; // register index

// Build big-endian uint64 via explicit shifting (no endian-dependent casts)
uint64_t w = (uint64_t)x[0] << 56 | (uint64_t)x[1] << 48 | (uint64_t)x[2] << 40 | (uint64_t)x[3] << 32
| (uint64_t)x[4] << 24 | (uint64_t)x[5] << 16 | (uint64_t)x[6] << 8 | (uint64_t)x[7];

uint8_t zeroBits = clz56(w) + 1;

if (zeroBits > registers[ri]) {
registers[ri] = zeroBits;
}
}

std::string encodeHex() const {
static const char hexChars[] = "0123456789abcdef";
std::string out;
out.resize(512);
for (int i = 0; i < 256; i++) {
out[i * 2] = hexChars[registers[i] >> 4];
out[i * 2 + 1] = hexChars[registers[i] & 0x0F];
}
return out;
}

private:
static uint8_t clz56(uint64_t x) {
uint8_t c = 0;
for (uint64_t m = uint64_t(1) << 55; (m & x) == 0 && m != 0; m >>= 1) {
c++;
}
return c;
}
};
7 changes: 5 additions & 2 deletions src/QueryScheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
struct QueryScheduler : NonCopyable {
std::function<void(lmdb::txn &txn, const Subscription &sub, uint64_t levId, std::string_view eventPayload)> onEvent;
std::function<void(lmdb::txn &txn, const Subscription &sub, const std::vector<uint64_t> &levIds)> onEventBatch;
std::function<void(lmdb::txn &txn, Subscription &sub, uint64_t total)> onComplete;
std::function<void(lmdb::txn &txn, Subscription &sub, uint64_t total, std::string hllHex)> onComplete;

// If false, then levIds returned to above callbacks can be stale (because they were deleted)
// If false, then onEvent's eventPayload will always be ""
Expand Down Expand Up @@ -101,7 +101,10 @@ struct QueryScheduler : NonCopyable {
auto connId = q->sub.connId;
removeSub(connId, q->sub.subId);

if (onComplete) onComplete(txn, q->sub, q->sentEventsFull.size());
std::string hllHex;
if (q->hllOffset >= 0) hllHex = q->hll.encodeHex();

if (onComplete) onComplete(txn, q->sub, q->sentEventsFull.size(), std::move(hllHex));

delete q;
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/apps/relay/RelayNegentropy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ void RelayServer::runNegentropy(ThreadPool<MsgNegentropy>::Thread &thr) {
}
};

queries.onComplete = [&](lmdb::txn &txn, Subscription &sub, uint64_t){
queries.onComplete = [&](lmdb::txn &txn, Subscription &sub, uint64_t, std::string){
auto *userView = views.findView(sub.connId, sub.subId);
if (!userView) return;

Expand Down
3 changes: 2 additions & 1 deletion src/apps/relay/RelayReqWorker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ void RelayServer::runReqWorker(ThreadPool<MsgReqWorker>::Thread &thr) {
sendEvent(sub.connId, sub.subId, decodeEventPayload(txn, decomp, eventPayload, nullptr, nullptr));
};

queries.onComplete = [&](lmdb::txn &, Subscription &sub, uint64_t total){
queries.onComplete = [&](lmdb::txn &, Subscription &sub, uint64_t total, std::string hllHex){
if (sub.countOnly) {
bool limited = false;

Expand All @@ -25,6 +25,7 @@ void RelayServer::runReqWorker(ThreadPool<MsgReqWorker>::Thread &thr) {
});

if (limited) countBody["limited"] = true;
if (hllHex.size()) countBody["hll"] = std::move(hllHex);

sendToConn(sub.connId, tao::json::to_string(tao::json::value::array({ "COUNT", sub.subId.str(), countBody })));
} else {
Expand Down
9 changes: 9 additions & 0 deletions test/cfgs/nip45HllTest.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
db = "./strfry-db-nip45-hll-test/"

events {
rejectEventsOlderThanSeconds = 9999999999
}

relay {
port = 40563
}
82 changes: 82 additions & 0 deletions test/hll_reference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env python3
"""
HyperLogLog reference implementation — exact port of go-nostr nip45/hyperloglog.

Used as oracle for C++ unit tests and E2E test vector generation.
"""

import struct


def clz56(x: int) -> int:
"""Count leading zeros in the lower 56 bits of x (direct port of go-nostr helpers.go)."""
c = 0
m = 1 << 55
while (m & x) == 0 and m != 0:
c += 1
m >>= 1
return c


def hll_add(registers: bytearray, pubkey_bytes: bytes, offset: int):
"""Update HLL registers from a 32-byte pubkey at the given offset (port of AddBytes)."""
x = pubkey_bytes[offset:offset + 8]
j = x[0] # register index
w = struct.unpack('>Q', x)[0] # big-endian uint64
zero_bits = clz56(w) + 1
if zero_bits > registers[j]:
registers[j] = zero_bits


def hll_encode(registers: bytearray) -> str:
"""Encode 256 registers as 512-char lowercase hex string."""
return registers.hex()


def compute_offset_from_hex(tag_value_hex: str) -> int:
"""Compute HLL offset from a 64-char hex tag value (e.g. for #e/#p filters)."""
return int(tag_value_hex[32], 16) + 8


def compute_offset_from_bytes(raw_bytes: bytes) -> int:
"""Compute HLL offset from 32 raw bytes (strfry internal for #e/#p tags)."""
return ((raw_bytes[16] >> 4) & 0xF) + 8


if __name__ == "__main__":
# Generate test vectors for C++ unit test

# Known pubkeys (32 bytes each)
pubkeys_hex = [
"79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798",
"c6047f9441ed7d6d3045406e95c07cd85c778e4b8cef3ca7abac09b95c709ee5",
"f9308a019258c31049344f85f89d5229b531c845836f99b08601f113bce036f9",
"e493dbf1c10d80f3581e4904930b1404cc6c13900ee0758474fa94abe8c4cd13",
]
pubkeys = [bytes.fromhex(h) for h in pubkeys_hex]

offset = 12 # arbitrary test offset

print(f"Test offset: {offset}")
print(f"Number of pubkeys: {len(pubkeys)}")
print()

registers = bytearray(256)
for i, pk in enumerate(pubkeys):
hll_add(registers, pk, offset)
encoded = hll_encode(registers)
print(f"After adding pubkey[{i}] ({pubkeys_hex[i][:16]}...):")
print(f" hex = {encoded}")

print()
print(f"Final hex ({len(hll_encode(registers))} chars): {hll_encode(registers)}")

# Also test offset computation
test_tag = "79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798"
off = compute_offset_from_hex(test_tag)
raw = bytes.fromhex(test_tag)
off2 = compute_offset_from_bytes(raw)
print(f"\nOffset from hex tag '{test_tag[32]}' (pos 32): {off}")
print(f"Offset from raw byte[16]=0x{raw[16]:02x}: {off2}")
assert off == off2, "Offset mismatch!"
print("Offset computation consistent.")
90 changes: 90 additions & 0 deletions test/hll_unit_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>

#include "../src/HyperLogLog.h"

static uint8_t hexVal(char c) {
if (c >= '0' && c <= '9') return c - '0';
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
if (c >= 'A' && c <= 'F') return c - 'A' + 10;
return 0;
}

static void hexToBytes(const char *hex, uint8_t *out, size_t outLen) {
for (size_t i = 0; i < outLen; i++) {
out[i] = (hexVal(hex[i * 2]) << 4) | hexVal(hex[i * 2 + 1]);
}
}

int main() {
// Same test vectors as test/hll_reference.py
const char *pubkeysHex[] = {
"79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798",
"c6047f9441ed7d6d3045406e95c07cd85c778e4b8cef3ca7abac09b95c709ee5",
"f9308a019258c31049344f85f89d5229b531c845836f99b08601f113bce036f9",
"e493dbf1c10d80f3581e4904930b1404cc6c13900ee0758474fa94abe8c4cd13",
};

// Expected hex output after adding each pubkey incrementally (from Python oracle, offset=12)
const char *expectedHex[] = {
"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000",
"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000",
};

int offset = 12;
int passed = 0;
int failed = 0;

HyperLogLog hll;

for (int i = 0; i < 4; i++) {
uint8_t pubkey[32];
hexToBytes(pubkeysHex[i], pubkey, 32);

hll.addPubkeyBytes(pubkey, offset);
std::string hex = hll.encodeHex();

if (hex == expectedHex[i]) {
printf(" PASS after pubkey[%d]\n", i);
passed++;
} else {
printf(" FAIL after pubkey[%d]\n", i);
printf(" expected: %.64s...\n", expectedHex[i]);
printf(" got: %.64s...\n", hex.c_str());
failed++;
}
}

// Test that all-zero registers produce all-zero hex
{
HyperLogLog empty;
std::string hex = empty.encodeHex();
std::string expected(512, '0');
if (hex == expected) {
printf(" PASS empty registers\n");
passed++;
} else {
printf(" FAIL empty registers\n");
failed++;
}
}

// Test encodeHex length
{
std::string hex = hll.encodeHex();
if (hex.size() == 512) {
printf(" PASS encodeHex length == 512\n");
passed++;
} else {
printf(" FAIL encodeHex length == %zu\n", hex.size());
failed++;
}
}

printf("\n%d passed, %d failed\n", passed, failed);
return failed == 0 ? 0 : 1;
}
Loading
Loading