Skip to content

Commit fd8cd1f

Browse files
authored
[ai] Fix typing errors to make ai SDKs mypy-compliant (#33638)
* Add first batch of mypy fixes * Turn on mypy gate * Add missing Optional import * Make __init__ return type None * Make Optional explicit * Make __init__ return type None * Update pyproject.toml * Make Optional explicit * Ignore incorrect mypy error - code is fine * Exclude _restclient from mypy * Fix type mismatch * Fix connection_subtypes errors * Fix tokens errors * Exclude vendor folder * Fix _index_data_source erros * Fix entities/data errors * Fix some _deployment_utils errors * Fix _ai_client_utils errors * Fix logging_handler errors * Fix _utils/logging errors * Fix ai_client errors * Fix _deployment operations errors * Exclude tests from mypy check * Fix _index._docstore errors * Fix _model_operations errors * Fix faiss errors * Fix _model_operations errors * Add type annotation * Fix _ai_resource_operations errors * Fix entities._mlindex errors * Fix _dockerfile_utils errors * Fix data operations errors * Fix _index._langchain._docstore errors * Fix entities.configs errors * Fix _index._mlindex errors * Fix some _index._embeddings.__init__ errors * Fix _index._utils.connections errors * fix operations._ingest_data_to_index error * Fix connection_operations errors * Update pyproject.toml files to not follow imports * Update pyproject.toml files * Fix some index._embeddings.__init__ errors * Resolve merge conflicts * Create work items and ignore errors * Update pyproject.toml to call out unused ignore * Fix deployment utils and deployment operations bugs * Revert "Resolve merge conflicts" This reverts commit 09bdc79. * Resolve merge conflict * Fix typing error in _documents/document.py * Add cspell.json files * Disabled following stubs * Disabled following stubs * Fix _index/_langchain/acs error * Fix data_index_func errors * Fix typing error in _utils/connections * Ignore missing stubs from imports error * Fix dsl/pipeline_decorator errors * Fix _langchain/acs errors * Fix _index/_embeddings/openai * Ignore import stub errors * Fix _index/models errors * Fix evaluate/_utils errors * Fix _model_tools/images * Fix _model_tools/images * Fix _index_data_source errors * Fix resources/_index/_embeddings/__init__ * Fix _mlindex_operations * Ignore _langchain/docstore error and assign bug * Fix errors * Fix dsl/_pipeline_decorator errors * Fix errors * Fix evaluate errors * Fix import errors * Fix _embeddings/openai errors * Exclude vendor code from mypy check * Fix _documents/chunking errors * Fix _builder/data_index_func errors * Fix _tasks/crack_and_chunk_and_embed_and_index error * Fix _tasks/crack_and_chunk_and_embed errors * Update excluded files/directories * Fix _documents/cracking errors * Fix index/_tasks/update_acs errors * Fix _documents/__init__ errors * Fix _documnets/__init__ errors * Fix _tasks/update_acs errors * Fix index/_mlindex errors * Fix synthetic/qa errors * Fix index/_docstore errors * Fix faiss errors * Fix _langchain/acs errors * Fix index/_models and _conversation/conversation_request errors * Fix evaluate/_evaluation_result error * Fix _tasks/crack_and_chunk errors * Ignore _tasks/generate_qa error * Fix _tasks/update_pinecone errors * Fix index/_embeddings/__init__ errors * Fix _langchain/docstore errors * Fix type: ignore comment * Fix remaining errors and add TODO
1 parent 97f43d2 commit fd8cd1f

File tree

97 files changed

+810
-667
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+810
-667
lines changed

sdk/ai/azure-ai-generative/azure/ai/generative/_telemetry/logging_handler.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import logging
1010
import platform
1111
import traceback
12+
from typing import Union, Dict
1213
import os
1314

1415
from opencensus.ext.azure.log_exporter import AzureLogHandler
@@ -45,7 +46,7 @@ def __init__(self, name: str):
4546
self.package_logger: logging.Logger = logging.getLogger(GEN_AI_INTERNAL_LOGGER_NAMESPACE + name)
4647
self.package_logger.propagate = False
4748
self.module_logger = logging.getLogger(name)
48-
self.custom_dimensions = {}
49+
self.custom_dimensions: Dict[str, Union[str, Dict]] = {}
4950

5051
def update_info(self) -> None:
5152
self.package_logger.addHandler(get_appinsights_log_handler(USER_AGENT))

sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_evaluate.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import time
99
import logging
1010
from pathlib import Path
11-
from typing import Callable, Optional, Dict, List
11+
from typing import Callable, Optional, Dict, List, Mapping
1212

1313
import mlflow
1414
import pandas as pd
@@ -87,13 +87,13 @@ def _log_metrics(run_id, metrics):
8787
@monitor_with_activity(package_logger, "Evaluate", ActivityType.PUBLICAPI)
8888
def evaluate(
8989
*,
90-
evaluation_name: str = None,
90+
evaluation_name: Optional[str] = None,
9191
target: Optional[Callable] = None,
9292
data: Optional[str] = None,
93-
task_type: str = None,
93+
task_type: Optional[str] = None,
9494
metrics_list: Optional[List[str]] = None,
95-
model_config: Dict[str, str] = None,
96-
data_mapping: Dict[str, str] = None,
95+
model_config: Optional[Dict[str, str]] = None,
96+
data_mapping: Optional[Mapping] = None,
9797
output_path: Optional[str] = None,
9898
**kwargs
9999
):
@@ -114,7 +114,7 @@ def evaluate(
114114
:keyword model_config: GPT configuration details needed for AI-assisted metrics.
115115
:paramtype model_config: Dict[str, str]
116116
:keyword data_mapping: GPT configuration details needed for AI-assisted metrics.
117-
:paramtype data_mapping: Dict[str, str]
117+
:paramtype data_mapping: typing.Mapping
118118
:keyword output_path: The local folder path to save evaluation artifacts to if set
119119
:paramtype output_path: Optional[str]
120120
:keyword tracking_uri: Tracking uri to log evaluation results to AI Studio

sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_evaluation_result.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Dict
1+
from typing import Dict, Optional
22

33
from azure.ai.generative.evaluate._utils import _get_ai_studio_url
44

@@ -8,8 +8,8 @@ class EvaluationResult(object):
88
def __init__(self, metrics_summary: Dict[str, float], artifacts: Dict[str, str], **kwargs):
99
self._metrics_summary = metrics_summary
1010
self._artifacts = artifacts
11-
self._tracking_uri = kwargs.get("tracking_uri")
12-
self._evaluation_id = kwargs.get("evaluation_id")
11+
self._tracking_uri: Optional[str] = kwargs.get("tracking_uri")
12+
self._evaluation_id: str = kwargs.get("evaluation_id", "")
1313
if self._tracking_uri:
1414
self._studio_url = _get_ai_studio_url(self._tracking_uri, self._evaluation_id)
1515

@@ -23,13 +23,13 @@ def artifacts(self) -> Dict[str, str]:
2323

2424
@property
2525
def tracking_uri(self) -> str:
26-
return self._tracking_uri
26+
return self._tracking_uri # type: ignore[return-value]
2727

2828
@property
2929
def studio_url(self) -> str:
3030
return self._studio_url
3131

32-
def download_evaluation_artifacts(self, path: str) -> str:
32+
def download_evaluation_artifacts(self, path: str) -> None:
3333
from mlflow.artifacts import download_artifacts
3434
for artifact, artifact_uri in self.artifacts.items():
3535
download_artifacts(

sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_utils.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import re
88
import shutil
99
from pathlib import Path
10+
from typing import Optional
1011
import pandas as pd
1112

1213
_SUB_ID = "sub-id"
@@ -108,20 +109,20 @@ def _write_properties_to_run_history(properties: dict, logger) -> None:
108109
logger.error("Fail writing properties '%s' to run history: %s", properties, e)
109110

110111

111-
def _get_ai_studio_url(tracking_uri, evaluation_id):
112+
def _get_ai_studio_url(tracking_uri: str, evaluation_id: str):
112113
_PROJECT_INFO_REGEX = (
113114
r".*/subscriptions/(.+)/resourceGroups/(.+)"
114115
r"/providers/Microsoft.MachineLearningServices/workspaces/([^/]+)"
115116
)
116117

117118
pattern = re.compile(_PROJECT_INFO_REGEX)
118119

119-
mo = pattern.match(tracking_uri)
120+
mo: Optional[re.Match[str]] = pattern.match(tracking_uri)
120121

121122
ret = {}
122-
ret[_SUB_ID] = mo.group(1)
123-
ret[_RES_GRP] = mo.group(2)
124-
ret[_WS_NAME] = mo.group(3)
123+
ret[_SUB_ID] = mo.group(1) if mo else mo
124+
ret[_RES_GRP] = mo.group(2) if mo else mo
125+
ret[_WS_NAME] = mo.group(3) if mo else mo
125126

126127
studio_base_url = os.getenv("AI_STUDIO_BASE_URL", "https://ai.azure.com")
127128

sdk/ai/azure-ai-generative/azure/ai/generative/index/_build_mlindex.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
# ---------------------------------------------------------
44

55
from pathlib import Path
6-
from typing import Union
6+
from typing import Dict, Optional, Union
77

8-
import yaml
8+
import yaml # type: ignore[import]
99

1010
from azure.ai.resources.entities.mlindex import Index
1111
from azure.ai.resources.operations._index_data_source import ACSSource, LocalSource
@@ -15,24 +15,20 @@
1515

1616
def build_index(
1717
*,
18-
######## required args ##########
1918
output_index_name: str,
2019
vector_store: str,
21-
######## embedding model information ##########
22-
embeddings_model: str = None,
23-
aoai_connection_id: str = None,
24-
######## chunking information ##########
25-
data_source_url: str = None,
20+
index_input_config: Union[ACSSource, LocalSource],
21+
acs_config: ACSOutputConfig, # todo better name?
22+
embeddings_model: str,
23+
aoai_connection_id: Optional[str] = None,
24+
data_source_url: Optional[str] = None,
2625
chunk_size: int = 1024,
2726
chunk_overlap: int = 0,
2827
input_glob: str = "**/*",
29-
max_sample_files: int = None,
30-
chunk_prepend_summary: bool = None,
31-
document_path_replacement_regex: str = None,
32-
embeddings_cache_path: str = None,
33-
######## data source info ########
34-
index_input_config: Union[ACSSource, LocalSource] = None,
35-
acs_config: ACSOutputConfig = None, # todo better name?
28+
max_sample_files: Optional[int] = None,
29+
chunk_prepend_summary: Optional[bool] = None,
30+
document_path_replacement_regex: Optional[Dict[str, str]] = None,
31+
embeddings_cache_path: Optional[str] = None,
3632
) -> Index:
3733

3834
"""Generates embeddings locally and stores Index reference in memory
@@ -53,12 +49,13 @@ def build_index(
5349
if vector_store == "azure_cognitive_search" and isinstance(index_input_config, ACSSource):
5450
return _create_mlindex_from_existing_acs(
5551
output_index_name=output_index_name,
56-
embedding_model=embeddings_model,
52+
# TODO: Fix Bug 2818331
53+
embedding_model=embeddings_model, # type: ignore[no-redef,arg-type]
5754
aoai_connection=aoai_connection_id,
5855
acs_config=index_input_config,
5956
)
60-
embeddings_cache_path = Path(embeddings_cache_path) if embeddings_cache_path else Path.cwd()
61-
save_path = embeddings_cache_path/f"{output_index_name}-mlindex"
57+
embeddings_cache_path = str(Path(embeddings_cache_path) if embeddings_cache_path else Path.cwd())
58+
save_path = str(Path(embeddings_cache_path)/f"{output_index_name}-mlindex")
6259
splitter_args= {
6360
'chunk_size': chunk_size,
6461
'chunk_overlap': chunk_overlap,
@@ -101,6 +98,7 @@ def build_index(
10198
uri=embeddings_model,
10299
**connection_args,
103100
)
101+
104102
embeddings = embedder.embed(chunked_docs)
105103

106104
if vector_store.lower() == "faiss":
@@ -165,7 +163,7 @@ def build_index(
165163
def _create_mlindex_from_existing_acs(
166164
output_index_name: str,
167165
embedding_model: str,
168-
aoai_connection: str,
166+
aoai_connection: Optional[str],
169167
acs_config: ACSSource,
170168
) -> Index:
171169
try:
@@ -209,6 +207,7 @@ def _create_mlindex_from_existing_acs(
209207
if acs_config.acs_metadata_key:
210208
mlindex_config["index"]["field_mapping"]["metadata"] = acs_config.acs_metadata_key
211209

210+
model_connection_args: Dict[str, Optional[Union[str, Dict]]]
212211
if not aoai_connection:
213212
import openai
214213
model_connection_args = {
@@ -220,7 +219,7 @@ def _create_mlindex_from_existing_acs(
220219
"connection": {"id": aoai_connection}
221220
}
222221

223-
embedding = EmbeddingsContainer.from_uri(embedding_model, **model_connection_args)
222+
embedding = EmbeddingsContainer.from_uri(embedding_model, credential=None, **model_connection_args)
224223
mlindex_config["embeddings"] = embedding.get_metadata()
225224

226225
path = Path.cwd() / f"import-acs-{acs_config.acs_index_name}-mlindex"

sdk/ai/azure-ai-generative/azure/ai/generative/index/_dataindex/data_index/models.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# ---------------------------------------------------------
44
"""DataIndex embedding model helpers."""
55
import re
6+
from typing import Optional
67

78
OPEN_AI_PROTOCOL_TEMPLATE = "azure_open_ai://deployment/{}/model/{}"
89
OPEN_AI_PROTOCOL_REGEX_PATTERN = OPEN_AI_PROTOCOL_TEMPLATE.format(".*", ".*")
@@ -13,7 +14,7 @@
1314
HUGGINGFACE_PROTOCOL_REGEX_PATTERN = HUGGINGFACE_PROTOCOL_TEMPLATE.format(".*")
1415

1516

16-
def build_model_protocol(model: str = None):
17+
def build_model_protocol(model: Optional[str] = None):
1718
"""Build a model protocol from user input."""
1819
if not model or re.match(OPEN_AI_PROTOCOL_REGEX_PATTERN, model, re.IGNORECASE):
1920
return model

sdk/ai/azure-ai-generative/azure/ai/generative/index/_dataindex/dsl/_pipeline_decorator.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@
5757

5858
# Overload the returns a decorator when func is None
5959
@overload
60-
def pipeline(
60+
def pipeline( # type: ignore[misc]
61+
# TODO: Bug 2876412
6162
func: None = None,
6263
*,
6364
name: Optional[str] = None,
@@ -74,7 +75,7 @@ def pipeline(
7475
# Overload the returns a decorated function when func isn't None
7576
@overload
7677
def pipeline(
77-
func: Callable[P, T] = None,
78+
func: Optional[Callable[P, T]] = None,
7879
*,
7980
name: Optional[str] = None,
8081
version: Optional[str] = None,
@@ -137,7 +138,8 @@ def pipeline(
137138
get_component = kwargs.get("get_component", False)
138139

139140
def pipeline_decorator(func: Callable[P, T]) -> Callable[P, PipelineJob]:
140-
if not isinstance(func, Callable): # pylint: disable=isinstance-second-argument-not-valid-type
141+
# pylint: disable=isinstance-second-argument-not-valid-type
142+
if not isinstance(func, Callable): # type: ignore
141143
raise UserErrorException(f"Dsl pipeline decorator accept only function type, got {type(func)}.")
142144

143145
non_pipeline_inputs = kwargs.get("non_pipeline_inputs", []) or kwargs.get("non_pipeline_parameters", [])
@@ -246,9 +248,9 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> PipelineJob:
246248

247249
return built_pipeline
248250

249-
wrapper._is_dsl_func = True
250-
wrapper._job_settings = job_settings
251-
wrapper._pipeline_builder = pipeline_builder
251+
wrapper._is_dsl_func = True # type: ignore[attr-defined]
252+
wrapper._job_settings = job_settings # type: ignore[attr-defined]
253+
wrapper._pipeline_builder = pipeline_builder # type: ignore[attr-defined]
252254
return wrapper
253255

254256
# enable use decorator without "()" if all arguments are default values

sdk/ai/azure-ai-generative/azure/ai/generative/index/_dataindex/entities/_assets/_artifacts/data.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
from azure.ai.ml.entities._assets._artifacts.data import Data
66

77

8-
@classmethod
8+
@classmethod # type: ignore[misc]
9+
# TODO: Bug 2874139
910
def _resolve_cls_and_type(cls, data, params_override):
1011
from azure.ai.ml.entities._data_import.data_import import DataImport
1112
from azure.ai.generative.index._dataindex.entities.data_index import DataIndex

0 commit comments

Comments
 (0)