Skip to content

Commit 337237d

Browse files
Hotfix permission batch (#165)
* test: print generate_batches error messages * model --------- Co-authored-by: LennartSchmidtKern <[email protected]>
1 parent 554bb79 commit 337237d

File tree

2 files changed

+24
-3
lines changed

2 files changed

+24
-3
lines changed

controller.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,29 @@ def generate_batches(
7777
embedding_batches = embedder.transform(document_batches, as_generator=True)
7878
else:
7979
embedding_batches = embedder.fit_transform(document_batches, as_generator=True)
80-
for record_batch in record_batches:
81-
yield {"record_ids": record_batch, "embeddings": next(embedding_batches)}
80+
for i, record_batch in enumerate(record_batches):
81+
try:
82+
yield {"record_ids": record_batch, "embeddings": next(embedding_batches)}
83+
except StopIteration as e:
84+
print(
85+
f"Number of record batches ({len(record_batches)}) "
86+
f"exceeds number of embedding batches ({i+1}). This should never happen.",
87+
flush=True,
88+
)
89+
print(
90+
f"Aborting embedding creation for `{attribute_name}` (for_delta={for_delta}):",
91+
flush=True,
92+
)
93+
print("- project ID:", project_id, flush=True)
94+
print("- record IDs:", record_batch, flush=True)
95+
batch_start = i * embedder.batch_size
96+
batch_stop = batch_start + (embedder.batch_size - 1)
97+
print(
98+
"- document batch:",
99+
document_batches[batch_start:batch_stop],
100+
flush=True,
101+
)
102+
raise e
82103

83104

84105
def get_docbins(

0 commit comments

Comments
 (0)