datastax
diff --git a/‎examples/notebooks/conftest.py
Lines changed: 7 additions & 5 deletions b/‎examples/notebooks/conftest.py
Lines changed: 7 additions & 5 deletions
diff --git a/‎libs/colbert/ragstack_colbert/cassandra_database.py
Lines changed: 12 additions & 9 deletions b/‎libs/colbert/ragstack_colbert/cassandra_database.py
Lines changed: 12 additions & 9 deletions
diff --git a/‎libs/colbert/ragstack_colbert/colbert_retriever.py
Lines changed: 17 additions & 11 deletions b/‎libs/colbert/ragstack_colbert/colbert_retriever.py
Lines changed: 17 additions & 11 deletions
diff --git a/‎libs/colbert/ragstack_colbert/text_encoder.py
Lines changed: 3 additions & 3 deletions b/‎libs/colbert/ragstack_colbert/text_encoder.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎libs/colbert/tests/integration_tests/test_embedding_retrieval.py
Lines changed: 4 additions & 2 deletions b/‎libs/colbert/tests/integration_tests/test_embedding_retrieval.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎libs/colbert/tests/unit_tests/test_colbert_baseline_embeddings.py
Lines changed: 2 additions & 2 deletions b/‎libs/colbert/tests/unit_tests/test_colbert_baseline_embeddings.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎libs/e2e-tests/e2e_tests/conftest.py
Lines changed: 3 additions & 3 deletions b/‎libs/e2e-tests/e2e_tests/conftest.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎libs/e2e-tests/e2e_tests/langchain/rag_application.py
Lines changed: 4 additions & 4 deletions b/‎libs/e2e-tests/e2e_tests/langchain/rag_application.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎libs/e2e-tests/e2e_tests/langchain/test_compatibility_rag.py
Lines changed: 1 addition & 1 deletion b/‎libs/e2e-tests/e2e_tests/langchain/test_compatibility_rag.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎libs/e2e-tests/e2e_tests/llama_index/test_astra.py
Lines changed: 1 addition & 1 deletion b/‎libs/e2e-tests/e2e_tests/llama_index/test_astra.py
Lines changed: 1 addition & 1 deletion
@@ -28,21 +28,23 @@ def get_required_env(name) -> str:
 
 def try_delete_with_backoff(collection: str, sleep=1, max_tries=2):
     try:
-        logging.info(f"deleting collection {collection}")
+        logging.info("deleting collection %s", collection)
         response = client.delete_collection(collection)
-        logging.info(f"delete collection {collection} response: {response!s}")
-    except Exception as e:
+        logging.info("delete collection %s response: %s", collection, response)
+    except Exception:
         max_tries -= 1
         if max_tries < 0:
             raise
 
-        logging.warning(f"An exception occurred deleting collection {collection}: {e}")
+        logging.warning(
+            "An exception occurred deleting collection %s: ", collection, exc_info=True
+        )
         time.sleep(sleep)
         try_delete_with_backoff(collection, sleep * 2, max_tries)
 
 
 def before_notebook():
     collections = client.get_collections().get("status").get("collections")
-    logging.info(f"Existing collections: {collections}")
+    logging.info("Existing collections: %s", collections)
     for collection in collections:
         try_delete_with_backoff(collection)
@@ -95,8 +95,8 @@ def _initialize(
             is_astra = False
 
         logging.info(
-            f"Cassandra store is running on "
-            f"{'AstraDB' if is_astra else 'Apache Cassandra'}."
+            "Cassandra store is running on %s",
+            "AstraDB" if is_astra else "Apache Cassandra",
         )
 
         self._table = ClusteredMetadataVectorCassandraTable(
@@ -114,12 +114,15 @@ def _log_insert_error(
     ):
         if embedding_id == -1:
             logging.error(
-                f"issue inserting document data: {doc_id} chunk: {chunk_id}: {exp}"
+                "issue inserting document data: %s chunk: %s: %s", doc_id, chunk_id, exp
             )
         else:
             logging.error(
-                f"issue inserting document embedding: {doc_id} chunk: {chunk_id} "
-                f"embedding: {embedding_id}: {exp}"
+                "issue inserting document embedding: %s chunk: %s embedding: %s: %s",
+                doc_id,
+                chunk_id,
+                embedding_id,
+                exp,
             )
 
     def add_chunks(self, chunks: List[Chunk]) -> List[Tuple[str, int]]:
@@ -298,13 +301,13 @@ def delete_chunks(self, doc_ids: List[str]) -> bool:
             try:
                 self._table.delete_partition(partition_id=doc_id)
             except Exception:
-                logging.exception(f"issue on delete of document: {doc_id}")
+                logging.exception("issue on delete of document: %s", doc_id)
                 failed_docs.append(doc_id)
 
         if len(failed_docs) > 0:
             raise CassandraDatabaseError(
-                f"delete failed for these docs: {failed_docs}. "
-                f"See error logs for more info."
+                "delete failed for these docs: %s. See error logs for more info.",
+                failed_docs,
             )
 
         return True
@@ -353,7 +356,7 @@ async def adelete_chunks(
 
         for doc_id, exp in results:
             if exp is not None:
-                logging.error(f"issue deleting document: {doc_id}: {exp}")
+                logging.error("issue deleting document: %s: %s", doc_id, exp)
                 success = False
                 failed_docs.append(doc_id)
 
 
@@ -40,9 +40,10 @@ def all_gpus_support_fp16(is_cuda: Optional[bool] = False):
             compute_capability[0] == 5 and compute_capability[1] < 3
         ):
             logging.info(
-                f"Device {device_id} with compute capability {compute_capability} "
-                f"does not support FP16 (half-precision) operations. "
-                f"Using FP32 (full-precision) operations."
+                "Device %s with compute capability %s does not support FP16 "
+                "(half-precision) operations. Using FP32 (full-precision) operations.",
+                device_id,
+                compute_capability,
             )
             return False
 
@@ -186,8 +187,9 @@ async def _query_relevant_chunks(
         for result in results:
             if isinstance(result, Exception):
                 logging.error(
-                    f"Issue on database.get_relevant_chunks(): "
-                    f"{result} at {get_trace(result)}"
+                    "Issue on database.get_relevant_chunks(): %s at %s",
+                    result,
+                    get_trace(result),
                 )
             else:
                 chunks.update(result)
@@ -209,8 +211,9 @@ async def _get_chunk_embeddings(self, chunks: Set[Chunk]) -> List[Chunk]:
         for result in results:
             if isinstance(result, Exception):
                 logging.error(
-                    f"Issue on database.get_chunk_embeddings(): "
-                    f"{result} at {get_trace(result)}"
+                    "Issue on database.get_chunk_embeddings():  %s at %s",
+                    result,
+                    get_trace(result),
                 )
 
         return results
@@ -261,8 +264,9 @@ async def _get_chunk_data(
         for result in results:
             if isinstance(result, Exception):
                 logging.error(
-                    f"Issue on database.get_chunk_data(): "
-                    f"{result} at {get_trace(result)}"
+                    "Issue on database.get_chunk_data(): %s at %s",
+                    result,
+                    get_trace(result),
                 )
 
         return results
@@ -335,8 +339,10 @@ async def aembedding_search(
 
         top_k = max(math.floor(len(query_embedding) / 2), 16)
         logging.debug(
-            f"based on query length of {len(query_embedding)} tokens, "
-            f"retrieving {top_k} results per token-embedding"
+            "based on query length of %s tokens, retrieving %s results per "
+            "token-embedding",
+            len(query_embedding),
+            top_k,
         )
 
         # search for relevant chunks (only with `doc_id` and `chunk_id` set)
 
@@ -57,7 +57,7 @@ def __init__(self, config: ColBERTConfig, verbose: Optional[int] = 3) -> None:
             verbose (int): The level of logging to use
         """
 
-        logging.info(f"Cuda enabled GPU available: {torch.cuda.is_available()}")
+        logging.info("Cuda enabled GPU available: %s", torch.cuda.is_available())
 
         self._checkpoint = Checkpoint(
             config.checkpoint, colbert_config=config, verbose=verbose
@@ -79,7 +79,7 @@ def encode_chunks(self, chunks: List[Chunk], batch_size: int = 640) -> List[Chun
                 document lengths.
         """
 
-        logging.debug(f"#> Encoding {len(chunks)} chunks..")
+        logging.debug("#> Encoding %s chunks..", len(chunks))
 
         embedded_chunks: List[Chunk] = []
 
@@ -115,7 +115,7 @@ def encode_query(
         if query_maxlen < 0:
             tokens = self._checkpoint.query_tokenizer.tokenize([text])
             query_maxlen = calculate_query_maxlen(tokens)
-            logging.debug(f"Calculated dynamic query_maxlen of {query_maxlen}")
+            logging.debug("Calculated dynamic query_maxlen of %s", query_maxlen)
 
         prev_query_maxlen = self._checkpoint.query_tokenizer.query_maxlen
         self._checkpoint.query_tokenizer.query_maxlen = query_maxlen
 
@@ -78,10 +78,12 @@ def chunk_texts(text, chunk_size, overlap_size):
     )
     assert len(chunk_scores) == 5
     for chunk, score in chunk_scores:
-        logging.info(f"got chunk_id {chunk.chunk_id} with score {score}")
+        logging.info("got chunk_id %s with score %s", chunk.chunk_id, score)
 
     best_chunk = chunk_scores[0][0]
     assert len(best_chunk.text) > 0
     logging.info(
-        f"Highest scoring chunk_id: {best_chunk.chunk_id} with text: {best_chunk.text}"
+        "Highest scoring chunk_id: %s with text: %s",
+        best_chunk.chunk_id,
+        best_chunk.text,
     )
@@ -95,7 +95,7 @@ def test_embeddings_with_baseline():
             assert similarity.shape == torch.Size([1])  # this has to be scalar
             # debug code to identify which token deviates
             if similarity.item() < 0.99:
-                logging.warning(f"n = {n}, similarity = {similarity.item()}")
+                logging.warning("n = %s, similarity = %s", n, similarity.item())
             assert similarity.item() > 0.99
             n = n + 1
 
@@ -131,7 +131,7 @@ def test_colbert_embedding_against_vanilla_impl():
 
 
 def model_embedding(model: str):
-    logging.info(f"test model compatibility {model}")
+    logging.info("test model compatibility %s", model)
     colbert_svc = ColbertEmbeddingModel(
         checkpoint=model,
         query_maxlen=32,
 
@@ -100,7 +100,7 @@ def pytest_runtest_makereport(item, call):
         if not info:
             test_path = pathlib.PurePath(item.path)
             info = test_path.parent.name + "::" + test_path.name + "::" + item.name
-        logging.info(f"Test {info} took: {total_time} seconds")
+        logging.info("Test %s took: %s seconds", info, total_time)
         paths = str(item.path).split(os.sep)
         is_langchain = False
         is_llamaindex = False
@@ -127,7 +127,7 @@ def pytest_runtest_makereport(item, call):
             or "unconditional skip" in result
         )
         if not skip_report_line:
-            logging.info("Test report line: " + report_line)
+            logging.info("Test report line: %s", report_line)
             if rep.outcome != "passed":
                 # also keep skipped tests in the report
                 failed_report_lines.append(report_line)
@@ -149,7 +149,7 @@ def pytest_runtest_makereport(item, call):
             elif is_llamaindex:
                 llamaindex_report_lines.append(report_line)
         else:
-            logging.info("Skipping test report line: " + result)
+            logging.info("Skipping test report line: %s", result)
         os.environ["RAGSTACK_E2E_TESTS_TEST_INFO"] = ""
 
     if rep.when == "call":
 
@@ -196,7 +196,7 @@ def run_rag_custom_chain(
         run_id = cb.traced_runs[0].id
         record_langsmith_sharelink(run_id, record_property)
 
-    logging.info("Got response: " + response)
+    logging.info("Got response: %s", response)
     assert "2020" in response, f"Expected 2020 in the answer but got: {response}"
 
 
@@ -209,7 +209,7 @@ def run_conversational_rag(
     logging.info("Starting to add texts to vector store")
     start = time.perf_counter_ns()
     vector_store.add_texts(SAMPLE_DATA)
-    logging.info(f"Added texts in {(time.perf_counter_ns() - start) / 1e9} seconds")
+    logging.info("Added texts in %s seconds", (time.perf_counter_ns() - start) / 1e9)
     retriever = vector_store.as_retriever()
     memory = ConversationSummaryMemory(
         llm=llm,
@@ -229,13 +229,13 @@ def run_conversational_rag(
         result = conversation.invoke({"question": "what is MyFakeProductForTesting?"})
         run_id = cb.traced_runs[0].id
         record_langsmith_sharelink(run_id, record_property)
-        logging.info("First result: " + str(result))
+        logging.info("First result: %s", result)
 
     with callbacks.collect_runs() as cb:
         result = conversation.invoke({"question": "and when was it released?"})
         run_id = cb.traced_runs[0].id
         record_langsmith_sharelink(run_id, record_property)
-        logging.info("Second result: " + str(result))
+        logging.info("Second result: %s", result)
 
     answer = result["answer"]
     assert "2020" in answer, f"Expected 2020 in the answer but got: {answer}"
@@ -465,7 +465,7 @@ def embed_query(self, text: str) -> List[float]:
         f"Tell me which one of these products it is part of. "
         f"Only include product from the ones below: {docs_str}."
     )
-    logging.info(f"Prompt: {prompt}")
+    logging.info("Prompt: %s", prompt)
 
     text_message = {
         "type": "text",
 
@@ -231,5 +231,5 @@ def _get_text_embedding(self, text: str) -> List[float]:
     @staticmethod
     def mock_embedding(text: str):
         res = [len(text) / 2, len(text) / 5, len(text) / 10]
-        logging.debug("mock_embedding for " + text + " : " + str(res))
+        logging.debug("mock_embedding for %s : %s", text, res)
         return res
Original file line number	Diff line number	Diff line change
`@@ -78,10 +78,12 @@ def chunk_texts(text, chunk_size, overlap_size):`
`78`	`78`	`)`
`79`	`79`	`assert len(chunk_scores) == 5`
`80`	`80`	`for chunk, score in chunk_scores:`
`81`		`- logging.info(f"got chunk_id {chunk.chunk_id} with score {score}")`
	`81`	`+ logging.info("got chunk_id %s with score %s", chunk.chunk_id, score)`
`82`	`82`
`83`	`83`	`best_chunk = chunk_scores[0][0]`
`84`	`84`	`assert len(best_chunk.text) > 0`
`85`	`85`	`logging.info(`
`86`		`- f"Highest scoring chunk_id: {best_chunk.chunk_id} with text: {best_chunk.text}"`
	`86`	`+ "Highest scoring chunk_id: %s with text: %s",`
	`87`	`+ best_chunk.chunk_id,`
	`88`	`+ best_chunk.text,`
`87`	`89`	`)`
Original file line number	Diff line number	Diff line change
`@@ -465,7 +465,7 @@ def embed_query(self, text: str) -> List[float]:`
`465`	`465`	`f"Tell me which one of these products it is part of. "`
`466`	`466`	`f"Only include product from the ones below: {docs_str}."`
`467`	`467`	`)`
`468`		`- logging.info(f"Prompt: {prompt}")`
	`468`	`+ logging.info("Prompt: %s", prompt)`
`469`	`469`
`470`	`470`	`text_message = {`
`471`	`471`	`"type": "text",`