Skip to content

Commit da82787

Browse files
authored
RAG Stop Ignoring/Fix mypy Errors. (#34200)
* first set remove ignores * added aupdate acs test * rerun flakey tests
1 parent e480f75 commit da82787

File tree

4 files changed

+17
-19
lines changed

4 files changed

+17
-19
lines changed

sdk/ai/azure-ai-generative/azure/ai/generative/index/_documents/cracking.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,7 @@ def __init__(self, file: IO, document_source: DocumentSource, metadata: dict, mo
128128
"""Initialize a text file loader."""
129129
self.metadata = metadata
130130
self.document_source = document_source
131-
super().__init__(file=file, mode=mode, **unstructured_kwargs) # type: ignore[call-arg]
132-
# TODO: Bug 2878420
131+
super().__init__(file=file, **unstructured_kwargs)
133132

134133
def load(self) -> List[Document]:
135134
"""Load file contents into Documents."""
@@ -343,24 +342,24 @@ def crack_documents(sources: Iterator[DocumentSource], file_extension_loaders=fi
343342
log_batch_size = 100
344343
for i, source in enumerate(sources):
345344
file_start_time = time.time()
346-
# TODO: Bug 2878422 for all type: ignore in this method
347-
files_by_extension[source.path.suffix.lower()] += 1 # type: ignore[union-attr]
348-
loader_cls = file_extension_loaders.get(source.path.suffix.lower()) # type: ignore[union-attr]
345+
assert isinstance(source.path, Path)
346+
files_by_extension[source.path.suffix.lower()] += 1
347+
loader_cls = file_extension_loaders.get(source.path.suffix.lower())
349348
if i % log_batch_size == 0:
350349
for ext in files_by_extension:
351350
if files_by_extension[ext] > 0:
352351
safe_mlflow_log_metric(ext, files_by_extension[ext], logger=logger, step=int(time.time() * 1000))
353352
mode = "r"
354353
if loader_cls is None:
355-
raise RuntimeError(f"Unsupported file extension '{source.path.suffix}': {source.filename}") # type: ignore[union-attr]
354+
raise RuntimeError(f"Unsupported file extension '{source.path.suffix}': {source.filename}")
356355

357356
if hasattr(loader_cls, "file_io_mode"):
358357
mode = loader_cls.file_io_mode()
359358
elif loader_cls is TikaLoader or loader_cls is PDFFileLoader or loader_cls is TextFileIOLoader:
360359
mode = "rb"
361360

362361
try:
363-
with open(source.path, mode=mode) as f: # type: ignore[arg-type]
362+
with open(source.path, mode=mode) as f:
364363
loader = loader_cls(**{
365364
"file": f,
366365
"document_source": source,
@@ -373,7 +372,7 @@ def crack_documents(sources: Iterator[DocumentSource], file_extension_loaders=fi
373372
# if loader_cls has a fallback_loader, try that
374373
if hasattr(loader_cls, "fallback_loader"):
375374
fallback_loader_cls = loader_cls.fallback_loader()
376-
with open(source.path, mode=mode) as f: # type: ignore[arg-type]
375+
with open(source.path, mode=mode) as f:
377376
loader = fallback_loader_cls(**{
378377
"file": f,
379378
"document_source": source,

sdk/ai/azure-ai-generative/azure/ai/generative/index/_embeddings/__init__.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -271,12 +271,13 @@ class ReferenceEmbeddedDocument(EmbeddedDocument):
271271

272272
_last_opened_embeddings: Optional[Tuple[str, object]] = None
273273

274-
def __init__(self, document_id: str, mtime, document_hash: str, path_to_data: str, index, embeddings_container_path: str, metadata: dict):
274+
def __init__(self, document_id: str, mtime, document_hash: str, path_to_data: str, index, embeddings_container_path: str, metadata: dict, is_local: bool = False):
275275
"""Initialize the document."""
276276
super().__init__(document_id, mtime, document_hash, metadata)
277277
self.path_to_data = path_to_data
278278
self.embeddings_container_path = embeddings_container_path
279279
self.index = index
280+
self.is_local = is_local
280281

281282
def get_data(self) -> str:
282283
"""Get the data of the document."""
@@ -619,8 +620,7 @@ def load_v2(self, dir_name: str, embeddings_container_path):
619620
doc_id,
620621
mtime,
621622
document_hash,
622-
path_to_data=None, # type: ignore[arg-type]
623-
#TODO: Bug 2879181
623+
path_to_data="",
624624
index=None,
625625
embeddings_container_path=embeddings_container_path,
626626
metadata=metadata
@@ -898,9 +898,9 @@ def _get_embeddings_internal(self, input_documents: Union[Iterator[Document], Ba
898898
raise ValueError("No embed function provided.")
899899

900900
if hasattr(input_documents, "__module__") and "langchain" in input_documents.__module__ and "document_loaders" in input_documents.__module__:
901+
assert isinstance(input_documents, BaseLoader)
901902
input_documents = iter([WrappedLangChainDocument(d)
902-
for d in input_documents.load()]) # type: ignore[union-attr]
903-
# TODO: Bug 2879186
903+
for d in input_documents.load()])
904904
elif isinstance(input_documents, DocumentChunksIterator):
905905
flattened_docs: List = []
906906
for chunked_doc in input_documents:

sdk/ai/azure-ai-generative/azure/ai/generative/index/_mlindex.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -469,8 +469,7 @@ def override_connections(
469469
from azure.ai.resources._index._utils.connections import get_connection_by_id_v2
470470
index_connection = get_connection_by_id_v2(index_connection, credential=credential)
471471
self.index_config["connection"] = {"id": get_id_from_connection(index_connection)}
472-
self.save(just_config=True) # type: ignore[call-arg]
473-
# TODO: Bug 2877747
472+
self.save(self.base_uri, just_config=True)
474473
return self
475474

476475
def set_embeddings_connection(

sdk/ai/azure-ai-generative/azure/ai/generative/index/_tasks/update_acs.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,14 @@ def create_search_index_sdk(acs_config: dict, credential, embeddings: Optional[E
9494
elif field_type == "metadata":
9595
fields.append(SimpleField(name=field_name, type=SearchFieldDataType.String))
9696
elif field_type == "embedding":
97-
# TODO: Bug 2878424 to address type: ignore in this section
97+
assert isinstance(embeddings, EmbeddingsContainer)
9898
if current_version >= pkg_version.parse("11.4.0b11"):
9999
fields.append(
100100
SearchField(
101101
name=field_name,
102102
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
103103
searchable=True,
104-
vector_search_dimensions=embeddings.get_embedding_dimensions(), # type: ignore[union-attr]
104+
vector_search_dimensions=embeddings.get_embedding_dimensions(),
105105
vector_search_profile=f"{field_name}_config",
106106
)
107107
)
@@ -111,7 +111,7 @@ def create_search_index_sdk(acs_config: dict, credential, embeddings: Optional[E
111111
name=field_name,
112112
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
113113
searchable=True,
114-
vector_search_dimensions=embeddings.get_embedding_dimensions(), # type: ignore[union-attr]
114+
vector_search_dimensions=embeddings.get_embedding_dimensions(),
115115
vector_search_configuration=f"{field_name}_config",
116116
)
117117
)
@@ -369,7 +369,7 @@ def batched_docs_to_delete(embeddings_container) -> Iterator[List[Dict[str, str]
369369
# was generated for this snapshot and needs to pushed to the index.
370370

371371
# TODO: Bug 2878426
372-
if syncing_index and isinstance(emb_doc, ReferenceEmbeddedDocument) and not emb_doc.is_local: # type: ignore[attr-defined]
372+
if syncing_index and isinstance(emb_doc, ReferenceEmbeddedDocument) and not emb_doc.is_local:
373373
skipped_prefix_documents += 1
374374
num_source_docs += 1
375375
if verbosity > 2:

0 commit comments

Comments
 (0)