Skip to content

Commit af0b03b

Browse files
authored
Update type-erased code to keep Ctx alive when calling into C++ to fix Vamana ingestion crash (#347)
1 parent eada21d commit af0b03b

File tree

3 files changed

+68
-21
lines changed

3 files changed

+68
-21
lines changed

apis/python/src/tiledb/vector_search/ingestion.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,7 +1506,6 @@ def ingest_flat(
15061506
ids_array.close()
15071507

15081508
def ingest_vamana(
1509-
ctx,
15101509
index_group_uri: str,
15111510
source_uri: str,
15121511
source_type: str,
@@ -1626,14 +1625,15 @@ def ingest_vamana(
16261625
parts_array.close()
16271626
ids_array.close()
16281627

1629-
# Now that we've ingested the vectors and their IDs, train the index with the data.
1630-
from tiledb.vector_search import _tiledbvspy as vspy
1628+
# Now that we've ingested the vectors and their IDs, train the index with the data.
1629+
from tiledb.vector_search import _tiledbvspy as vspy
16311630

1632-
index = vspy.IndexVamana(ctx, index_group_uri)
1633-
data = vspy.FeatureVectorArray(ctx, parts_array_uri, ids_array_uri)
1634-
index.train(data)
1635-
index.add(data)
1636-
index.write_index(ctx, index_group_uri, index_timestamp)
1631+
ctx = vspy.Ctx(config)
1632+
index = vspy.IndexVamana(ctx, index_group_uri)
1633+
data = vspy.FeatureVectorArray(ctx, parts_array_uri, ids_array_uri)
1634+
index.train(data)
1635+
index.add(data)
1636+
index.write_index(ctx, index_group_uri, index_timestamp)
16371637

16381638
def write_centroids(
16391639
centroids: np.ndarray,
@@ -2183,12 +2183,8 @@ def create_ingestion_dag(
21832183
)
21842184
return d
21852185
elif index_type == "VAMANA":
2186-
from tiledb.vector_search import _tiledbvspy as vspy
2187-
2188-
ctx = vspy.Ctx(config)
21892186
ingest_node = submit(
21902187
ingest_vamana,
2191-
ctx=ctx,
21922188
index_group_uri=index_group_uri,
21932189
source_uri=source_uri,
21942190
source_type=source_type,

apis/python/src/tiledb/vector_search/type_erased_module.cc

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,10 @@ void init_type_erased_module(py::module_& m) {
122122
;
123123
#endif
124124
py::class_<FeatureVector>(m, "FeatureVector", py::buffer_protocol())
125-
.def(py::init<const tiledb::Context&, const std::string&>())
125+
.def(
126+
py::init<const tiledb::Context&, const std::string&>(),
127+
py::keep_alive<1, 2>() // IndexIVFFlat should keep ctx alive.
128+
)
126129
.def(py::init<size_t, const std::string&>())
127130
.def(py::init<size_t, void*, const std::string&>())
128131
.def("dimension", &FeatureVector::dimension)
@@ -165,11 +168,17 @@ void init_type_erased_module(py::module_& m) {
165168
}));
166169

167170
py::class_<FeatureVectorArray>(m, "FeatureVectorArray", py::buffer_protocol())
168-
.def(py::init<const tiledb::Context&, const std::string&>())
169-
.def(py::init<
170-
const tiledb::Context&,
171-
const std::string&,
172-
const std::string&>())
171+
.def(
172+
py::init<const tiledb::Context&, const std::string&>(),
173+
py::keep_alive<1, 2>() // FeatureVectorArray should keep ctx alive.
174+
)
175+
.def(
176+
py::init<
177+
const tiledb::Context&,
178+
const std::string&,
179+
const std::string&>(),
180+
py::keep_alive<1, 2>() // FeatureVectorArray should keep ctx alive.
181+
)
173182
.def(py::init<size_t, size_t, const std::string&, const std::string&>())
174183
.def("dimension", &FeatureVectorArray::dimension)
175184
.def("num_vectors", &FeatureVectorArray::num_vectors)
@@ -222,7 +231,10 @@ void init_type_erased_module(py::module_& m) {
222231
}));
223232

224233
py::class_<IndexFlatL2>(m, "IndexFlatL2")
225-
.def(py::init<const tiledb::Context&, const std::string&>())
234+
.def(
235+
py::init<const tiledb::Context&, const std::string&>(),
236+
py::keep_alive<1, 2>() // IndexFlatL2 should keep ctx alive.
237+
)
226238
.def("add", &IndexFlatL2::add)
227239
.def("add_with_ids", &IndexFlatL2::add_with_ids)
228240
.def("train", &IndexFlatL2::train)
@@ -248,7 +260,10 @@ void init_type_erased_module(py::module_& m) {
248260
}));
249261

250262
py::class_<IndexVamana>(m, "IndexVamana")
251-
.def(py::init<const tiledb::Context&, const std::string&>())
263+
.def(
264+
py::init<const tiledb::Context&, const std::string&>(),
265+
py::keep_alive<1, 2>() // IndexVamana should keep ctx alive.
266+
)
252267
.def(
253268
"__init__",
254269
[](IndexVamana& instance, py::kwargs kwargs) {
@@ -288,6 +303,7 @@ void init_type_erased_module(py::module_& m) {
288303
const std::string& storage_version) {
289304
index.write_index(ctx, group_uri, timestamp, storage_version);
290305
},
306+
py::keep_alive<1, 2>(), // IndexVamana should keep ctx alive.
291307
py::arg("ctx"),
292308
py::arg("group_uri"),
293309
py::arg("timestamp") = py::none(),
@@ -300,7 +316,10 @@ void init_type_erased_module(py::module_& m) {
300316
.def("dimension", &IndexVamana::dimension);
301317

302318
py::class_<IndexIVFFlat>(m, "IndexIVFFlat")
303-
.def(py::init<const tiledb::Context&, const std::string&>())
319+
.def(
320+
py::init<const tiledb::Context&, const std::string&>(),
321+
py::keep_alive<1, 2>() // IndexIVFFlat should keep ctx alive.
322+
)
304323
.def(
305324
"__init__",
306325
[](IndexIVFFlat& instance, py::kwargs kwargs) {

documentation/Building.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,35 @@ pre-commit run --all-files
130130
# Run on single file and make formatting changes:
131131
pre-commit run --files path/to/file.py
132132
```
133+
134+
# Debugging C++ crashes in Python
135+
136+
If you are seeing C++ crashes only when running Python, you can debug by running `lldb -f python -- apis/python/test/crash.py` and then `(lldb) run`. By default tiledbvspy does not have debug symbols, so you'll see a stack trace like:
137+
138+
```bash
139+
(lldb) bt
140+
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1, address=0x30001356a8f20)
141+
* frame #0: 0x0000000124a8d7c0 _tiledbvspy.cpython-39-darwin.so`___lldb_unnamed_symbol832 + 88
142+
frame #1: 0x0000000124a8d954 _tiledbvspy.cpython-39-darwin.so`___lldb_unnamed_symbol834 + 32
143+
frame #2: 0x0000000124ac0c64 _tiledbvspy.cpython-39-darwin.so`___lldb_unnamed_symbol1139 + 104
144+
frame #3: 0x0000000124b6ff00 _tiledbvspy.cpython-39-darwin.so`___lldb_unnamed_symbol2869 + 32
145+
frame #4: 0x0000000124b20150 _tiledbvspy.cpython-39-darwin.so`___lldb_unnamed_symbol1879 + 52
146+
frame #5: 0x0000000124c47354 _tiledbvspy.cpython-39-darwin.so`___lldb_unnamed_symbol3648 + 100
147+
frame #6: 0x000000012073b4d0 cc.cpython-39-darwin.so`pybind11::detail::clear_instance(_object*) + 392
148+
frame #7: 0x000000012073b1c8 cc.cpython-39-darwin.so`pybind11_object_dealloc + 44
149+
frame #8: 0x000000010007feb0 python`frame_dealloc + 840
150+
frame #9: 0x000000010005d260 python`_PyFunction_Vectorcall + 476
151+
frame #10: 0x000000010016ba6c python`_PyEval_EvalFrameDefault + 27688
152+
frame #11: 0x0000000100162dfc python`_PyEval_EvalCode + 700
153+
frame #12: 0x00000001001d7e04 python`run_mod + 188
154+
frame #13: 0x00000001001d6674 python`pyrun_file + 176
155+
frame #14: 0x00000001001d60e4 python`pyrun_simple_file + 352
156+
frame #15: 0x00000001001d5f44 python`PyRun_SimpleFileExFlags + 64
157+
frame #16: 0x00000001001f883c python`pymain_run_file + 264
158+
frame #17: 0x00000001001f7e84 python`pymain_run_python + 360
159+
frame #18: 0x00000001001f7cc4 python`Py_RunMain + 40
160+
frame #19: 0x0000000100006a20 python`main + 56
161+
frame #20: 0x00000001832510e0 dyld`start + 2360
162+
```
163+
164+
To enable symbols, you can update `src/CMakeLists.txt` and add `set(CMAKE_BUILD_TYPE "Debug")` after it adds `set(CMAKE_BUILD_TYPE ...`.

0 commit comments

Comments
 (0)