Skip to content

HNSW poc #3516

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 30 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
2fafc3d
save point -- in the middle of just mess
normen662 Jul 24, 2025
0b3fee0
save point -- in the middle of just mess
normen662 Jul 24, 2025
8bdf410
save point -- in the middle of just mess
normen662 Jul 26, 2025
5d07bce
save point -- in the middle of just mess
normen662 Jul 26, 2025
d9078e2
save point -- in the middle of just mess and mess
normen662 Jul 26, 2025
3e63761
save point -- read path almost done
normen662 Jul 27, 2025
102c4ad
save point -- read path done
normen662 Jul 27, 2025
eb7e54d
some renamings
normen662 Jul 27, 2025
66547ca
started on the writing path
normen662 Jul 28, 2025
d1388b8
save point
normen662 Jul 28, 2025
871d92c
insert almost works
normen662 Jul 29, 2025
dae05d5
insert almost works
normen662 Jul 30, 2025
3445c3d
read and insert path both code complete
normen662 Jul 30, 2025
0a4b282
added logging
normen662 Jul 31, 2025
687f562
added some testing
normen662 Aug 1, 2025
f1cda10
inserts and searches work; logic to dump layers works
normen662 Aug 1, 2025
8232270
doing some performance tests
normen662 Aug 2, 2025
2d88cc9
better test helpers
normen662 Aug 5, 2025
71c583b
fix for bad bug that didn't wait for a future
normen662 Aug 5, 2025
bf6a02a
batch insert
normen662 Aug 6, 2025
5d61de3
adding new distance rank comparison and some index helpers
normen662 Aug 12, 2025
b19042c
index maintenance code complete
normen662 Aug 13, 2025
4bf0083
index maintenance code complete
normen662 Aug 13, 2025
d4b8cb5
- Support Vector datatype.
hatyo Aug 8, 2025
108f88d
- ddl support for tables with HNSW.
hatyo Aug 13, 2025
f31e00c
- fixes and cleanups.
hatyo Aug 13, 2025
8de8097
Merge pull request #16 from hatyo/hnsw-sql
normen662 Aug 13, 2025
976abda
get the build to build
normen662 Aug 13, 2025
ef5dc3b
adding some test cases
normen662 Aug 14, 2025
27e58cd
adding a scan with prefix
normen662 Aug 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions fdb-extensions/fdb-extensions.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dependencies {
}
api(libs.fdbJava)
implementation(libs.guava)
implementation(libs.half4j)
implementation(libs.slf4j.api)
compileOnly(libs.jsr305)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@
import com.apple.foundationdb.annotation.API;
import com.apple.foundationdb.util.LoggableException;
import com.google.common.base.Suppliers;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.ThreadFactoryBuilder;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
Expand All @@ -42,9 +44,13 @@
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.function.IntPredicate;
import java.util.function.IntUnaryOperator;
import java.util.function.Predicate;
import java.util.function.Supplier;

Expand Down Expand Up @@ -1051,6 +1057,64 @@ public static CompletableFuture<Void> swallowException(@Nonnull CompletableFutur
return result;
}

@Nonnull
public static <U> CompletableFuture<U> forLoop(final int startI, @Nullable final U startU,
@Nonnull final IntPredicate conditionPredicate,
@Nonnull final IntUnaryOperator stepFunction,
@Nonnull final BiFunction<Integer, U, CompletableFuture<U>> body,
@Nonnull final Executor executor) {
final AtomicInteger loopVariableAtomic = new AtomicInteger(startI);
final AtomicReference<U> lastResultAtomic = new AtomicReference<>(startU);
return whileTrue(() -> {
final int loopVariable = loopVariableAtomic.get();
if (!conditionPredicate.test(loopVariable)) {
return AsyncUtil.READY_FALSE;
}
return body.apply(loopVariable, lastResultAtomic.get())
.thenApply(result -> {
loopVariableAtomic.set(stepFunction.applyAsInt(loopVariable));
lastResultAtomic.set(result);
return true;
});
}, executor).thenApply(ignored -> lastResultAtomic.get());
}

@SuppressWarnings("unchecked")
public static <T, U> CompletableFuture<List<U>> forEach(@Nonnull final Iterable<T> items,
@Nonnull final Function<T, CompletableFuture<U>> body,
final int parallelism,
@Nonnull final Executor executor) {
// this deque is only modified by once upon creation
final ArrayDeque<T> toBeProcessed = new ArrayDeque<>();
for (final T item : items) {
toBeProcessed.addLast(item);
}

final List<CompletableFuture<Void>> working = Lists.newArrayList();
final AtomicInteger indexAtomic = new AtomicInteger(0);
final Object[] resultArray = new Object[toBeProcessed.size()];

return whileTrue(() -> {
working.removeIf(CompletableFuture::isDone);

while (working.size() <= parallelism) {
final T currentItem = toBeProcessed.pollFirst();
if (currentItem == null) {
break;
}

final int index = indexAtomic.getAndIncrement();
working.add(body.apply(currentItem)
.thenAccept(result -> resultArray[index] = result));
}

if (working.isEmpty()) {
return AsyncUtil.READY_FALSE;
}
return whenAny(working).thenApply(ignored -> true);
}, executor).thenApply(ignored -> Arrays.asList((U[])resultArray));
}

/**
* A {@code Boolean} function that is always true.
* @param <T> the type of the (ignored) argument to the function
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* AbstractNode.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2015-2023 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.apple.foundationdb.async.hnsw;

import com.apple.foundationdb.tuple.Tuple;
import com.google.common.collect.ImmutableList;

import javax.annotation.Nonnull;
import java.util.List;

/**
* TODO.
* @param <N> node type class.
*/
abstract class AbstractNode<N extends NodeReference> implements Node<N> {
@Nonnull
private final Tuple primaryKey;

@Nonnull
private final List<N> neighbors;

protected AbstractNode(@Nonnull final Tuple primaryKey,
@Nonnull final List<N> neighbors) {
this.primaryKey = primaryKey;
this.neighbors = ImmutableList.copyOf(neighbors);
}

@Nonnull
@Override
public Tuple getPrimaryKey() {
return primaryKey;
}

@Nonnull
@Override
public List<N> getNeighbors() {
return neighbors;
}

@Nonnull
@Override
public N getNeighbor(final int index) {
return neighbors.get(index);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* AbstractStorageAdapter.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2015-2023 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.apple.foundationdb.async.hnsw;

import com.apple.foundationdb.ReadTransaction;
import com.apple.foundationdb.Transaction;
import com.apple.foundationdb.subspace.Subspace;
import com.apple.foundationdb.tuple.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.concurrent.CompletableFuture;

/**
* Implementations and attributes common to all concrete implementations of {@link StorageAdapter}.
*/
abstract class AbstractStorageAdapter<N extends NodeReference> implements StorageAdapter<N> {
@Nonnull
private static final Logger logger = LoggerFactory.getLogger(AbstractStorageAdapter.class);

@Nonnull
private final HNSW.Config config;
@Nonnull
private final NodeFactory<N> nodeFactory;
@Nonnull
private final Subspace subspace;
@Nonnull
private final OnWriteListener onWriteListener;
@Nonnull
private final OnReadListener onReadListener;

private final Subspace dataSubspace;

protected AbstractStorageAdapter(@Nonnull final HNSW.Config config, @Nonnull final NodeFactory<N> nodeFactory,
@Nonnull final Subspace subspace,
@Nonnull final OnWriteListener onWriteListener,
@Nonnull final OnReadListener onReadListener) {
this.config = config;
this.nodeFactory = nodeFactory;
this.subspace = subspace;
this.onWriteListener = onWriteListener;
this.onReadListener = onReadListener;
this.dataSubspace = subspace.subspace(Tuple.from(SUBSPACE_PREFIX_DATA));
}

@Override
@Nonnull
public HNSW.Config getConfig() {
return config;
}

@Nonnull
@Override
public NodeFactory<N> getNodeFactory() {
return nodeFactory;
}

@Nonnull
@Override
public NodeKind getNodeKind() {
return getNodeFactory().getNodeKind();
}

@Override
@Nonnull
public Subspace getSubspace() {
return subspace;
}

@Override
@Nonnull
public Subspace getDataSubspace() {
return dataSubspace;
}

@Override
@Nonnull
public OnWriteListener getOnWriteListener() {
return onWriteListener;
}

@Override
@Nonnull
public OnReadListener getOnReadListener() {
return onReadListener;
}

@Nonnull
@Override
public CompletableFuture<Node<N>> fetchNode(@Nonnull final ReadTransaction readTransaction,
int layer, @Nonnull Tuple primaryKey) {
return fetchNodeInternal(readTransaction, layer, primaryKey).thenApply(this::checkNode);
}

@Nonnull
protected abstract CompletableFuture<Node<N>> fetchNodeInternal(@Nonnull ReadTransaction readTransaction,
int layer, @Nonnull Tuple primaryKey);

/**
* Method to perform basic invariant check(s) on a newly-fetched node.
*
* @param node the node to check
* was passed in
*
* @return the node that was passed in
*/
@Nullable
private Node<N> checkNode(@Nullable final Node<N> node) {
return node;
}

@Override
public void writeNode(@Nonnull Transaction transaction, @Nonnull Node<N> node, int layer,
@Nonnull NeighborsChangeSet<N> changeSet) {
writeNodeInternal(transaction, node, layer, changeSet);
if (logger.isDebugEnabled()) {
logger.debug("written node with key={} at layer={}", node.getPrimaryKey(), layer);
}
}

protected abstract void writeNodeInternal(@Nonnull Transaction transaction, @Nonnull Node<N> node, int layer,
@Nonnull NeighborsChangeSet<N> changeSet);

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* InliningNode.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2015-2023 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.apple.foundationdb.async.hnsw;

import com.apple.foundationdb.Transaction;
import com.apple.foundationdb.tuple.Tuple;
import com.google.common.collect.ImmutableList;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.List;
import java.util.function.Predicate;

/**
* TODO.
*/
class BaseNeighborsChangeSet<N extends NodeReference> implements NeighborsChangeSet<N> {
@Nonnull
private final List<N> neighbors;

public BaseNeighborsChangeSet(@Nonnull final List<N> neighbors) {
this.neighbors = ImmutableList.copyOf(neighbors);
}

@Nullable
@Override
public BaseNeighborsChangeSet<N> getParent() {
return null;
}

@Nonnull
@Override
public List<N> merge() {
return neighbors;
}

@Override
public void writeDelta(@Nonnull final InliningStorageAdapter storageAdapter, @Nonnull final Transaction transaction,
final int layer, @Nonnull final Node<N> node,
@Nonnull final Predicate<Tuple> primaryKeyPredicate) {
// nothing to be written
}
}
Loading