RAG Stop Ignoring/Fix mypy Errors. (#34200)

clomeli-ms · web-flow · commit da8278741718 · 2024-02-22T09:40:13.000-08:00
* first set remove ignores

* added aupdate acs test

* rerun flakey tests
diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/index/_documents/cracking.py b/sdk/ai/azure-ai-generative/azure/ai/generative/index/_documents/cracking.py
@@ -128,8 +128,7 @@ def __init__(self, file: IO, document_source: DocumentSource, metadata: dict, mo
         """Initialize a text file loader."""
         self.metadata = metadata
         self.document_source = document_source
-        super().__init__(file=file, mode=mode, **unstructured_kwargs)  # type: ignore[call-arg]
-        # TODO: Bug 2878420
+        super().__init__(file=file, **unstructured_kwargs)
 
     def load(self) -> List[Document]:
         """Load file contents into Documents."""
@@ -343,24 +342,24 @@ def crack_documents(sources: Iterator[DocumentSource], file_extension_loaders=fi
     log_batch_size = 100
     for i, source in enumerate(sources):
         file_start_time = time.time()
-        # TODO: Bug 2878422 for all type: ignore in this method
-        files_by_extension[source.path.suffix.lower()] += 1  # type: ignore[union-attr]
-        loader_cls = file_extension_loaders.get(source.path.suffix.lower())  # type: ignore[union-attr]
+        assert isinstance(source.path, Path)
+        files_by_extension[source.path.suffix.lower()] += 1
+        loader_cls = file_extension_loaders.get(source.path.suffix.lower())
         if i % log_batch_size == 0:
             for ext in files_by_extension:
                 if files_by_extension[ext] > 0:
                     safe_mlflow_log_metric(ext, files_by_extension[ext], logger=logger, step=int(time.time() * 1000))
         mode = "r"
         if loader_cls is None:
-            raise RuntimeError(f"Unsupported file extension '{source.path.suffix}': {source.filename}")  # type: ignore[union-attr]
+            raise RuntimeError(f"Unsupported file extension '{source.path.suffix}': {source.filename}")
 
         if hasattr(loader_cls, "file_io_mode"):
             mode = loader_cls.file_io_mode()
         elif loader_cls is TikaLoader or loader_cls is PDFFileLoader or loader_cls is TextFileIOLoader:
             mode = "rb"
 
         try:
-            with open(source.path, mode=mode) as f:  # type: ignore[arg-type]
+            with open(source.path, mode=mode) as f:
                 loader = loader_cls(**{
                     "file": f,
                     "document_source": source,
@@ -373,7 +372,7 @@ def crack_documents(sources: Iterator[DocumentSource], file_extension_loaders=fi
             # if loader_cls has a fallback_loader, try that
             if hasattr(loader_cls, "fallback_loader"):
                 fallback_loader_cls = loader_cls.fallback_loader()
-                with open(source.path, mode=mode) as f:  # type: ignore[arg-type]
+                with open(source.path, mode=mode) as f:
                     loader = fallback_loader_cls(**{
                         "file": f,
                         "document_source": source,
diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/index/_embeddings/__init__.py b/sdk/ai/azure-ai-generative/azure/ai/generative/index/_embeddings/__init__.py
@@ -271,12 +271,13 @@ class ReferenceEmbeddedDocument(EmbeddedDocument):
 
     _last_opened_embeddings: Optional[Tuple[str, object]] = None
 
-    def __init__(self, document_id: str, mtime, document_hash: str, path_to_data: str, index, embeddings_container_path: str, metadata: dict):
+    def __init__(self, document_id: str, mtime, document_hash: str, path_to_data: str, index, embeddings_container_path: str, metadata: dict, is_local: bool = False):
         """Initialize the document."""
         super().__init__(document_id, mtime, document_hash, metadata)
         self.path_to_data = path_to_data
         self.embeddings_container_path = embeddings_container_path
         self.index = index
+        self.is_local = is_local
 
     def get_data(self) -> str:
         """Get the data of the document."""
@@ -619,8 +620,7 @@ def load_v2(self, dir_name: str, embeddings_container_path):
                     doc_id,
                     mtime,
                     document_hash,
-                    path_to_data=None,  # type: ignore[arg-type]
-                    #TODO: Bug 2879181
+                    path_to_data="",
                     index=None,
                     embeddings_container_path=embeddings_container_path,
                     metadata=metadata
@@ -898,9 +898,9 @@ def _get_embeddings_internal(self, input_documents: Union[Iterator[Document], Ba
             raise ValueError("No embed function provided.")
 
         if hasattr(input_documents, "__module__") and "langchain" in input_documents.__module__ and "document_loaders" in input_documents.__module__:
+            assert isinstance(input_documents, BaseLoader)
             input_documents = iter([WrappedLangChainDocument(d)
-                                   for d in input_documents.load()])  # type: ignore[union-attr]
-            # TODO: Bug 2879186
+                                   for d in input_documents.load()])
         elif isinstance(input_documents, DocumentChunksIterator):
             flattened_docs: List = []
             for chunked_doc in input_documents:
diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/index/_mlindex.py b/sdk/ai/azure-ai-generative/azure/ai/generative/index/_mlindex.py
@@ -469,8 +469,7 @@ def override_connections(
                     from azure.ai.resources._index._utils.connections import get_connection_by_id_v2
                     index_connection = get_connection_by_id_v2(index_connection, credential=credential)
                 self.index_config["connection"] = {"id": get_id_from_connection(index_connection)}
-        self.save(just_config=True)  # type: ignore[call-arg]
-        # TODO: Bug 2877747
+        self.save(self.base_uri, just_config=True)
         return self
 
     def set_embeddings_connection(
diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/index/_tasks/update_acs.py b/sdk/ai/azure-ai-generative/azure/ai/generative/index/_tasks/update_acs.py
@@ -94,14 +94,14 @@ def create_search_index_sdk(acs_config: dict, credential, embeddings: Optional[E
             elif field_type == "metadata":
                 fields.append(SimpleField(name=field_name, type=SearchFieldDataType.String))
             elif field_type == "embedding":
-                # TODO: Bug 2878424 to address type: ignore in this section
+                assert isinstance(embeddings, EmbeddingsContainer)
                 if current_version >= pkg_version.parse("11.4.0b11"):
                     fields.append(
                         SearchField(
                             name=field_name,
                             type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                             searchable=True,
-                            vector_search_dimensions=embeddings.get_embedding_dimensions(),  # type: ignore[union-attr]
+                            vector_search_dimensions=embeddings.get_embedding_dimensions(),
                             vector_search_profile=f"{field_name}_config",
                         )
                     )
@@ -111,7 +111,7 @@ def create_search_index_sdk(acs_config: dict, credential, embeddings: Optional[E
                             name=field_name,
                             type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                             searchable=True,
-                            vector_search_dimensions=embeddings.get_embedding_dimensions(),  # type: ignore[union-attr]
+                            vector_search_dimensions=embeddings.get_embedding_dimensions(),
                             vector_search_configuration=f"{field_name}_config",
                         )
                     )
@@ -369,7 +369,7 @@ def batched_docs_to_delete(embeddings_container) -> Iterator[List[Dict[str, str]
             # was generated for this snapshot and needs to pushed to the index.
             
             # TODO: Bug 2878426
-            if syncing_index and isinstance(emb_doc, ReferenceEmbeddedDocument) and not emb_doc.is_local:  # type: ignore[attr-defined]
+            if syncing_index and isinstance(emb_doc, ReferenceEmbeddedDocument) and not emb_doc.is_local:
                 skipped_prefix_documents += 1
                 num_source_docs += 1
                 if verbosity > 2: