diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 1a8115ac..e1620627 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -17,7 +17,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: - python-version: "3.8" + python-version: "3.9" - run: pip install -r dev-requirements.txt - name: Run Pre-Commit run: pre-commit run --all-files diff --git a/.github/workflows/run-samples.yml b/.github/workflows/run-samples.yml new file mode 100644 index 00000000..a475a6c5 --- /dev/null +++ b/.github/workflows/run-samples.yml @@ -0,0 +1,86 @@ +name: Run Samples +on: + # By design pull_request_target event run against the version of the workflow in the target branch. + # So you have to merge changes to this workflow to observe the effects. + pull_request_target: + branches: + - main + paths: + - scenarios/** + - .infra/deployments/**/*.bicep +jobs: + check-if-external: + runs-on: ubuntu-latest + outputs: + environment: ${{ steps.set-environment.outputs.result }} + steps: + - uses: actions/github-script@v7 + id: set-environment + with: + script: | + const actionInitiator = context.payload.sender.login; + const org = "Azure-Samples"; + let isPublicMember = true; + + // Check if initiator is a public member of the org + try { + await github.rest.orgs.checkPublicMembershipForUser({ + org, + username: actionInitiator + }); + } catch (error) { + if (error.status != 404) { + throw new Error("Unknown error", {cause: error}); + } + + console.debug([ + `User is not a public member of the organization "${org}"`, + "", + `If you are a Microsoft employee, you can join the "${org}" org and set your org membership visibility to public: https://docs.github.com/en/account-and-profile/setting-up-and-managing-your-personal-account-on-github/managing-your-membership-in-organizations/publicizing-or-hiding-organization-membership#changing-the-visibility-of-your-organization-membership` + ].join("\n")); + + isPublicMember = false; + } + + + const isPullRequestEvent = ["pull_request", "pull_request_target"].includes(context.eventName); + + if (!(isPublicMember && isPullRequestEvent)) { + return "external-contribution"; + } + return ""; + result-encoding: string + run-samples: + permissions: + contents: 'read' + id-token: 'write' + needs: check-if-external + runs-on: ubuntu-latest + # Require manual approval if initiator is not a public member of Azure-Samples + environment: ${{ needs.check-if-external.outputs.environment }} + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha || github.ref }} + - uses: actions/setup-python@v5 + with: + python-version: "3.9" + - name: Install dev dependencies + run: | + pip install -r dev-requirements.txt + - uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + - name: Deploy resources + run: | + principalId="$(az ad sp show --id ${{ secrets.AZURE_CLIENT_ID }} -o tsv --query id)" + az deployment sub create --location eastus \ + --template-file .infra/deployment/main.bicep \ + --parameters principalType=ServicePrincipal \ + --parameters principalId="$principalId" \ + -o json > deployment.json + - name: Run samples + run: + pytest --changed-samples-only-from ${{ github.base_ref }} diff --git a/.gitignore b/.gitignore index 7be844da..58d55b91 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,20 @@ +# Ignore output of infra deployment used for testing +deployment.json + +.virtual_documents/ + +# Ignore output files generated by running samples +*.output.ipynb +*output.json +*output.jsonl +*outputs.json +*outputs.jsonl +*result.json +*result.jsonl +*results.json +*results.jsonl + ## Python Gitignore ## From: https://github.com/github/gitignore/blob/main/Python.gitignore diff --git a/.infra/deployment/main.bicep b/.infra/deployment/main.bicep new file mode 100644 index 00000000..91919eef --- /dev/null +++ b/.infra/deployment/main.bicep @@ -0,0 +1,110 @@ +targetScope = 'subscription' +param workspaceName string = 'azureai_samples_hub' +param projectName string = 'azureai_samples_proj' +param resourceGroupName string = 'rg-azureai-samples-validation-${utcNow('yyyyMM')}' +param location string = 'eastus2' +@description('The ID of the principal (user, service principal, etc...) to create role assignments for.') +param principalId string = '' + +@description('The Type of the principal (user, service principal, etc...) to create role assignments for.') +@allowed([ 'User', 'ServicePrincipal', '' ]) +param principalType string = '' + +var acsName = 'acs-samples-${uniqueString(rg.id, workspaceName, workspaceName)}' + +resource rg 'Microsoft.Resources/resourceGroups@2023-07-01' = { + #disable-next-line use-stable-resource-identifiers + name: resourceGroupName + location: location +} + +module acs 'modules/acs.bicep' = { name: 'acs', params: { name: acsName, location: location }, scope: rg } + +module workspace_hub 'modules/workspace_hub.bicep' = { + name: 'workspace-hub' + params: { + location: location + name: workspaceName + searchName: acs.outputs.name + + } + scope: rg +} + +module project 'modules/ai_project.bicep' = { + name: 'project' + params: { + location: location + workspaceHubID: workspace_hub.outputs.id + name: projectName + } + scope: rg +} + +var deployments = { + gpt4: { + name: 'gpt-4' + properties: { + model: { + format: 'OpenAI' + name: 'gpt-4' + version: '1106-Preview' + } + } + } + + text_embedding_ada_002: { + name: 'text-embedding-ada-002' + properties: { + model: { + format: 'OpenAI' + name: 'text-embedding-ada-002' + version: '2' + } + raiPolicyName: 'Microsoft.Default' + versionUpgradeOption: 'OnceNewDefaultVersionAvailable' + type: 'Azure.OpenAI' + sku: { + name: 'Standard' + capacity: 1 + } + } + } +} + +var roleDefinitionIds = [ + 'a001fd3d-188f-4b5d-821b-7da978bf7442' // Cognitive Services OpenAI Contributor + 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' // Storage Blob Data Contributor +] + +module role_assignments 'modules/role_assignment.bicep' = [for rd in roleDefinitionIds: if (!empty(principalId)) { + name: 'role_assignment-${rd}' + params: { + principalId: principalId + principalType: principalType + roleDefinitionId: rd + } + scope: rg +}] + +@batchSize(1) +module project_deployments 'modules/ai_project_deployment.bicep' = [for deployment in items(deployments): { + name: 'project_deployment-${deployment.value.name}' + params: { + name: deployment.value.name + properties: deployment.value.properties + ai_services_name: workspace_hub.outputs.ai_services_name + } + scope: rg +}] + +@description('The ID of the subscription deployed to.') +output subscription_id string = subscription().subscriptionId +@description('The name of the resource group deployed to.') +output resource_group_name string = rg.name +@description('The name of the Azure AI Project.') +output project_name string = project.outputs.name +output project_location string = project.outputs.location +output azure_openai_endpoint string = workspace_hub.outputs.azure_openai_endpoint +output azure_openai_gpt4_api_version string = '2024-08-01-preview' +output azure_openai_gpt4_deployment_name string = deployments.gpt4.name \ No newline at end of file diff --git a/.infra/deployment/modules/acs.bicep b/.infra/deployment/modules/acs.bicep new file mode 100644 index 00000000..4fd7a6ce --- /dev/null +++ b/.infra/deployment/modules/acs.bicep @@ -0,0 +1,58 @@ +@description('Service name must only contain lowercase letters, digits or dashes, cannot use dash as the first two or last one characters, cannot contain consecutive dashes, and is limited between 2 and 60 characters in length.') +@minLength(2) +@maxLength(60) +param name string + +@allowed([ + 'free' + 'basic' + 'standard' + 'standard2' + 'standard3' + 'storage_optimized_l1' + 'storage_optimized_l2' +]) +@description('The pricing tier of the search service you want to create (for example, basic or standard).') +param sku string = 'standard' + +@description('Replicas distribute search workloads across the service. You need at least two replicas to support high availability of query workloads (not applicable to the free tier).') +@minValue(1) +@maxValue(12) +param replicaCount int = 1 + +@description('Partitions allow for scaling of document count as well as faster indexing by sharding your index over multiple search units.') +@allowed([ + 1 + 2 + 3 + 4 + 6 + 12 +]) +param partitionCount int = 1 + +@description('Applicable only for SKUs set to standard3. You can set this property to enable a single, high density partition that allows up to 1000 indexes, which is much higher than the maximum indexes allowed for any other SKU.') +@allowed([ + 'default' + 'highDensity' +]) +param hostingMode string = 'default' + +@description('Location for all resources.') +param location string = resourceGroup().location + +resource search 'Microsoft.Search/searchServices@2020-08-01' = { + name: name + location: location + sku: { + name: sku + } + properties: { + replicaCount: replicaCount + partitionCount: partitionCount + hostingMode: hostingMode + } +} + +output name string = search.name +output id string = search.id \ No newline at end of file diff --git a/.infra/deployment/modules/ai_project.bicep b/.infra/deployment/modules/ai_project.bicep new file mode 100644 index 00000000..a51c636e --- /dev/null +++ b/.infra/deployment/modules/ai_project.bicep @@ -0,0 +1,22 @@ +param name string +param nameFriendly string = name +param workspaceHubID string +param location string = resourceGroup().location + +resource project 'Microsoft.MachineLearningServices/workspaces@2023-10-01' = { + name: name + #disable-next-line BCP187 + kind: 'Project' + location: location + identity: { + type: 'SystemAssigned' + } + properties: { + friendlyName: nameFriendly + #disable-next-line BCP037 + hubResourceId: workspaceHubID + } +} + +output name string = project.name +output location string = project.location \ No newline at end of file diff --git a/.infra/deployment/modules/ai_project_deployment.bicep b/.infra/deployment/modules/ai_project_deployment.bicep new file mode 100644 index 00000000..c95ec590 --- /dev/null +++ b/.infra/deployment/modules/ai_project_deployment.bicep @@ -0,0 +1,32 @@ +param name string +param properties object +param ai_services_name string + +var defaults = { + raiPolicyName: 'Microsoft.Default' + versionUpgradeOption: 'OnceNewDefaultVersionAvailable' + type: 'Azure.OpenAI' + sku: { + name: 'Standard' + capacity: 20 + } +} + +var properties_with_defaults = union(defaults, properties) + +#disable-next-line BCP081 +resource aiResource 'Microsoft.CognitiveServices/accounts@2024-06-01-preview' existing = { + name: ai_services_name + +} + +#disable-next-line BCP081 +resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2024-06-01-preview' = { + parent: aiResource + name: name + sku: properties_with_defaults.sku + properties: { + model: properties_with_defaults.model + versionUpgradeOption: properties_with_defaults.versionUpgradeOption + } +} \ No newline at end of file diff --git a/.infra/deployment/modules/container_registry.bicep b/.infra/deployment/modules/container_registry.bicep new file mode 100644 index 00000000..f33a0b5d --- /dev/null +++ b/.infra/deployment/modules/container_registry.bicep @@ -0,0 +1,15 @@ +param name string +param location string = resourceGroup().id + +resource containerRegistry 'Microsoft.ContainerRegistry/registries@2022-02-01-preview' = { + name: name + location: location + sku: { + name: 'Standard' + } + properties: { + adminUserEnabled: false + } +} + +output id string = containerRegistry.id \ No newline at end of file diff --git a/.infra/deployment/modules/keyvault.bicep b/.infra/deployment/modules/keyvault.bicep new file mode 100644 index 00000000..a493bc2a --- /dev/null +++ b/.infra/deployment/modules/keyvault.bicep @@ -0,0 +1,19 @@ +param name string +param location string = resourceGroup().location +param tenantId string = tenant().tenantId + +resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' = { + name: name + location: location + properties: { + tenantId: tenantId + sku: { + name: 'standard' + family: 'A' + } + accessPolicies: [] + enableSoftDelete: true + } +} + +output id string = keyVault.id \ No newline at end of file diff --git a/.infra/deployment/modules/role_assignment.bicep b/.infra/deployment/modules/role_assignment.bicep new file mode 100644 index 00000000..cd966f5a --- /dev/null +++ b/.infra/deployment/modules/role_assignment.bicep @@ -0,0 +1,17 @@ +param principalId string +param principalType string +param roleDefinitionId string + +resource roleDefinition 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = { + scope: resourceGroup() + name: roleDefinitionId +} + +resource roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + name: guid(resourceGroup().id, principalId, roleDefinition.id) + properties: { + roleDefinitionId: roleDefinition.id + principalId: principalId + principalType: principalType + } +} \ No newline at end of file diff --git a/.infra/deployment/modules/storageAccount.bicep b/.infra/deployment/modules/storageAccount.bicep new file mode 100644 index 00000000..fceeda1b --- /dev/null +++ b/.infra/deployment/modules/storageAccount.bicep @@ -0,0 +1,28 @@ +param name string +param location string = resourceGroup().location + +resource storageAccount 'Microsoft.Storage/storageAccounts@2022-05-01' = { + name: name + location: location + sku: { + name: 'Standard_LRS' + } + kind: 'StorageV2' + properties: { + encryption: { + services: { + blob: { + enabled: true + } + file: { + enabled: true + } + } + keySource: 'Microsoft.Storage' + } + supportsHttpsTrafficOnly: true + minimumTlsVersion: 'TLS1_2' + } +} + +output id string = storageAccount.id \ No newline at end of file diff --git a/.infra/deployment/modules/workspace_hub.bicep b/.infra/deployment/modules/workspace_hub.bicep new file mode 100644 index 00000000..a06aa442 --- /dev/null +++ b/.infra/deployment/modules/workspace_hub.bicep @@ -0,0 +1,167 @@ +@description('Specifies the name of the deployment.') +param name string + +@description('Specifies the friendly name of the deployment.') +param nameFriendly string = name + +@description('Specifies whether the workspace can be accessed by public networks or not.') +param publicNetworkAccess string = 'Enabled' + +@description('Specifies the managedVnet IsolationMode') +@allowed([ + 'Disabled' + 'AllowOnlyApprovedOutbound' + 'AllowInternetOutbound' +]) +param isolationMode string = 'Disabled' + +@description('AI services name') +param aiServicesName string = 'samples-ai-${uniqueString(subscription().id, resourceGroup().name, name)}' + +@description('Determines whether or not a new container registry should be provisioned.') +@allowed([ + 'new' + 'existing' + 'none' +]) +param containerRegistryOption string = 'new' +param containerRegistryId string = 'null' +param defaultProjectResourceGroupId string = resourceGroup().id + +@description('Specifies the location of the Azure Machine Learning workspace and dependent resources.') +param location string = resourceGroup().location + +@description('Indicates whether or not the resourceId is OpenAI or AIServices.') +@allowed([ + 'OpenAI' + 'AIServices' +]) +param endpointKind string = 'AIServices' + +@description('The name of the search service.') +param searchName string = '' + +var uniqueSuffix = uniqueString(resourceGroup().id, name, nameFriendly) + +module storageAccount 'storageAccount.bicep' = { name: 'storageAccount', params: { name: 'st${uniqueSuffix}', location: location } } +module keyVault 'keyvault.bicep' = { name: 'keyvault', params: { name: 'kv-${uniqueSuffix}', location: location } } +module containerRegistry 'container_registry.bicep' = if (containerRegistryOption == 'new') { + name: 'containerRegistry', params: { name: 'cr${uniqueSuffix}', location: location } +} + +@description('Either the user supplied ID, a new created one, or null') +var actualContainerRegistryId = (containerRegistryOption == 'new') ? containerRegistry.outputs.id : (containerRegistryOption == 'existing') ? containerRegistryId : null + +resource workspace 'Microsoft.MachineLearningServices/workspaces@2023-02-01-preview' = { + name: name + location: location + kind: 'Hub' + identity: { + type: 'SystemAssigned' + } + properties: { + friendlyName: nameFriendly + storageAccount: storageAccount.outputs.id + keyVault: keyVault.outputs.id + containerRegistry: actualContainerRegistryId + publicNetworkAccess: publicNetworkAccess + #disable-next-line BCP037 + managedNetwork: { + isolationMode: isolationMode + } + #disable-next-line BCP037 + workspaceHubConfig: { + defaultWorkspaceResourceGroup: defaultProjectResourceGroupId + } + } + dependsOn: [ aiServices ] +} + +resource aiServices 'Microsoft.CognitiveServices/accounts@2021-10-01' = { + name: aiServicesName + location: location + sku: { + name: 'S0' + } + kind: endpointKind + properties: { + publicNetworkAccess: 'Enabled' + customSubDomainName: toLower(aiServicesName) + apiProperties: {} + } +} + +#disable-next-line BCP081 +resource aiServicesConnection 'Microsoft.MachineLearningServices/workspaces/connections@2024-07-01-preview' = { + parent: workspace + name: aiServicesName + properties: { + authType: 'ApiKey' + category: 'AIServices' + target: 'https://${aiServicesName}.cognitiveservices.azure.com/' + useWorkspaceManagedIdentity: true + isSharedToAll: true + sharedUserList: [] + peRequirement: 'NotRequired' + peStatus: 'NotApplicable' + credentials: { + key: aiServices.listKeys().key1 + } + metadata: { + ApiType: 'Azure' + ResourceId: aiServices.id + } + } +} + +#disable-next-line BCP081 +resource aoaiConnection 'Microsoft.MachineLearningServices/workspaces/connections@2024-07-01-preview' = { + parent: workspace + name: '${aiServicesName}_aoai' + properties: { + authType: 'ApiKey' + category: 'AzureOpenAI' + target: 'https://${aiServicesName}.openai.azure.com/' + useWorkspaceManagedIdentity: true + isSharedToAll: true + sharedUserList: [] + peRequirement: 'NotRequired' + peStatus: 'NotApplicable' + credentials: { + key: aiServices.listKeys().key1 + } + metadata: { + ApiType: 'Azure' + ResourceId: aiServices.id + } + } +} + +resource workspaceName_Azure_Cognitive_Search 'Microsoft.MachineLearningServices/workspaces/connections@2023-10-01' = if (!empty(searchName)) { + parent: workspace + name: 'AzureAISearch' + properties: { + #disable-next-line BCP036 + authType: 'ApiKey' + category: 'CognitiveSearch' + credentials: { + key: listAdminKeys( + resourceId(subscription().subscriptionId, resourceGroup().name, 'Microsoft.Search/searchServices', searchName), + '2020-08-01' + ).primaryKey + } + metadata: { ApiType: 'Azure' } + target: 'https://${searchName}.search.windows.net' + value: '{"authType":"ApiKey","category":"AzureOpenAI","target":"https://${searchName}.search.windows.net"}' + } +} + +output id string = workspace.id +@description('The name of the workspace connection to the Search Service.') +output acs_connection_name string = (searchName != '') ? workspaceName_Azure_Cognitive_Search.name : '' +@description('The name of AI Services resource.') +output ai_services_name string = aiServices.name +@description('The azure openai endpint.') +output azure_openai_endpoint string = aoaiConnection.properties.target +@description('The name of the azure openai connection.') +output azure_openai_connection_name string = aoaiConnection.name \ No newline at end of file diff --git a/.infra/pytest_plugins/changed_samples/README.md b/.infra/pytest_plugins/changed_samples/README.md new file mode 100644 index 00000000..952b2b26 --- /dev/null +++ b/.infra/pytest_plugins/changed_samples/README.md @@ -0,0 +1,3 @@ +# pytest-changed-samples + + diff --git a/.infra/pytest_plugins/changed_samples/pyproject.toml b/.infra/pytest_plugins/changed_samples/pyproject.toml new file mode 100644 index 00000000..eb7e6291 --- /dev/null +++ b/.infra/pytest_plugins/changed_samples/pyproject.toml @@ -0,0 +1,42 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "pytest-changed-samples" +version = "0.1.0" +authors = [] +readme = "README.md" +classifiers = [ + "Development Status :: 4 - Beta", + "Framework :: Pytest", + "Intended Audience :: Developers", + "Topic :: Software Development :: Testing", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Operating System :: OS Independent", + "License :: OSI Approved :: MIT License", +] +description = "A Pytest plugin to add the option to only run changed samples" +requires-python = ">=3.8" +dependencies = ["gitpython~=3.0", "pytest>=7.0.0"] + + +[project.entry-points.pytest11] +changed_samples = "pytest_changed_samples.plugin" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.black] +line-length = 120 + +[tool.ruff] +line-length = 120 diff --git a/.infra/pytest_plugins/changed_samples/src/pytest_changed_samples/__init__.py b/.infra/pytest_plugins/changed_samples/src/pytest_changed_samples/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/.infra/pytest_plugins/changed_samples/src/pytest_changed_samples/git_utils.py b/.infra/pytest_plugins/changed_samples/src/pytest_changed_samples/git_utils.py new file mode 100644 index 00000000..a7e151a3 --- /dev/null +++ b/.infra/pytest_plugins/changed_samples/src/pytest_changed_samples/git_utils.py @@ -0,0 +1,47 @@ +from pathlib import Path +from typing import Iterable, Optional + +from git import Repo + + +def get_diff_paths(a: str, b: Optional[str]) -> Iterable[Path]: + """Get a list of paths that have changed between two git refs + + :param str a: The base ref to diff against + :param Optional[str] b: The ending ref to diff against. If "None", + will diff against the working tree + :returns: The list of paths + :rtype: Iterable[Path] + """ + repo = Repo(search_parent_directories=True) + repo_path = Path(repo.working_dir).resolve() + + # Diffs that are either in the working tree or staged in the index + changed_files = repo.commit(a).diff(b) + + for c in changed_files: + for p in {c.a_path, c.b_path}: + if p is None: + continue + + yield Path(repo_path, p).resolve() + + +def get_all_modified_paths() -> Iterable[Path]: + """Get paths to all non-committed changes tracked by git + + This list includes files in the working tree and staged in the index + + :returns: List of changed paths + :rtype: Iterable[Path] + """ + return get_diff_paths("HEAD", None) + + +def get_branch_diff_paths(ref: str = "main") -> Iterable[Path]: + """Get a list of all paths changed between HEAD and the main branch + + :returns: List of changed paths + :rtype: Iterable[Path] + """ + return get_diff_paths("HEAD", ref) diff --git a/.infra/pytest_plugins/changed_samples/src/pytest_changed_samples/plugin.py b/.infra/pytest_plugins/changed_samples/src/pytest_changed_samples/plugin.py new file mode 100644 index 00000000..3071c771 --- /dev/null +++ b/.infra/pytest_plugins/changed_samples/src/pytest_changed_samples/plugin.py @@ -0,0 +1,100 @@ +from pathlib import Path +from typing import Callable, Iterable, Optional + +import pytest + +from .git_utils import get_all_modified_paths, get_branch_diff_paths +from .trie import Trie + +DIFF_PATH_TRIE_KEY = pytest.StashKey[Trie]() +"""A Stash key to a Trie that stores paths to files present in a diff""" + +WORKING_TREE_CHANGES_OPTION = "--changed-samples-only" +PR_CHANGES_OPTION = "--changed-samples-only-from" + + +def pytest_addoption(parser: pytest.Parser) -> None: + parser.addoption( + WORKING_TREE_CHANGES_OPTION, + action="store_true", + help=( + "Only collect tests for samples that have changed relative to the last commit (HEAD)." + + " A sample has 'changed' if any file in its parent directory has been modified." + ), + ) + + parser.addoption( + PR_CHANGES_OPTION, + action="store", + help=( + "Only collect tests for samples that have changed relative to the specified git ref." + + " A sample has 'changed' if any file in its parent directory has been modified." + ), + ) + + +def pytest_configure(config: pytest.Config) -> None: + # Validate that mutually exclusive options haven't been provided + mutually_exclusive_options = (WORKING_TREE_CHANGES_OPTION, PR_CHANGES_OPTION) + if sum(bool(config.getoption(opt_var(o))) for o in mutually_exclusive_options) > 1: + raise pytest.UsageError(f"{' and '.join(mutually_exclusive_options)} are mutually exclusive") + + +@pytest.hookimpl(hookwrapper=True) +def pytest_collection(session: pytest.Session) -> None: + config = session.config + diff_path_trie = Trie() + + for p in get_diff_paths_function(config)(): + diff_path_trie.insert(p.parts) + + config.stash[DIFF_PATH_TRIE_KEY] = diff_path_trie + + yield + + del config.stash[DIFF_PATH_TRIE_KEY] + + +def pytest_ignore_collect(collection_path: Path, config: pytest.Config) -> Optional[bool]: + if DIFF_PATH_TRIE_KEY not in config.stash: + # Occures when calling `pytest --fixtures` + return None + + diff_path_trie = config.stash[DIFF_PATH_TRIE_KEY] + + # NOOP if diff is empty + if len(diff_path_trie) == 0: + return None + + ignore_dir = collection_path if collection_path.is_dir() else collection_path.parent + + # Either definitely ignore this path, or defer decision to other plugins + return (not diff_path_trie.is_prefix(ignore_dir.resolve().parts)) or None + + +def get_diff_paths_function(config: pytest.Config) -> Callable[[], Iterable[Path]]: + """Get the function that returns paths present in a diff specfied by cmdline arguments + + :param pytest.Config config: The pytest config + :returns: A function that returns one of: + * Paths to files that have changed between HEAD and the working tree + * Paths to files that have changed between HEAD and main + * No paths + :rtype: Callable[[],Iterable[Path]] + """ + if config.getoption(opt_var(WORKING_TREE_CHANGES_OPTION)): + return get_all_modified_paths + if config.getoption(opt_var(PR_CHANGES_OPTION)): + return get_branch_diff_paths + + return lambda: () + + +def opt_var(s: str) -> str: + """Return the name of the variable associated with a given commandline option + + :param str s: A string in the form of a commandline option (e.g. `--hello-world`) + :returns: The variable associated with the commandline option (e.g. `hello_world`) + :rtype: str + """ + return s.lstrip("-").replace("-", "_") diff --git a/.infra/pytest_plugins/changed_samples/src/pytest_changed_samples/trie.py b/.infra/pytest_plugins/changed_samples/src/pytest_changed_samples/trie.py new file mode 100644 index 00000000..e615ecf7 --- /dev/null +++ b/.infra/pytest_plugins/changed_samples/src/pytest_changed_samples/trie.py @@ -0,0 +1,55 @@ +from dataclasses import dataclass, field +from typing import Dict, Generic, Iterable, TypeVar + +from typing_extensions import Self + +T = TypeVar("T") +T2 = TypeVar("T2") + + +class Trie(Generic[T]): + """A trie that accepts the parts (i.e `Path.parts`) of an absolute path, and stores some associated payload.""" + + @dataclass + class Node(Generic[T2]): + """A trie node.""" + + is_end: bool = False + children: Dict[T2, Self] = field(default_factory=dict) + + def __init__(self) -> None: + self.root: Trie.Node[T] = Trie.Node() + self.len = 0 + + def __len__(self) -> int: + return self.len + + def insert(self, elems: Iterable[T]) -> None: + """Insert a payload for a given path. + + :param Optional[PathType] p: The path to insert. If `None`, will insert at root + :param T payload: The payload to store + :returns: Whether or not the inserted path is the prefix of another path in the trie. + :rtype: InsertType + """ + curr = self.root + + for elem in elems: + curr = curr.children.setdefault(elem, Trie.Node()) + + if not curr.is_end: + curr.is_end = True + self.len += 1 + + def is_prefix(self, elems: Iterable[T]) -> bool: + """Check whether is the prefix of anything inserted into the trie""" + + curr = self.root + + for part in elems: + if part not in curr.children: + return False + + curr = curr.children[part] + + return True diff --git a/scenarios/README-template.md b/.infra/templates/README-template.md similarity index 100% rename from scenarios/README-template.md rename to .infra/templates/README-template.md diff --git a/scenarios/template.ipynb b/.infra/templates/template.ipynb similarity index 100% rename from scenarios/template.ipynb rename to .infra/templates/template.ipynb diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5579dff6..d893294f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,11 +10,12 @@ repos: - id: nb-clean name: nb-clean description: "Clean Jupyter notebooks of outputs, metadata, and empty cells, with Git integration" - entry: tox -qqq run -e nb-clean -- clean + entry: python require_serial: true - language: python + language: system types_or: [jupyter] minimum_pre_commit_version: 2.9.2 + args: ["-m", "tox", "-qqq", "run", "-e", "nb-clean", "--", "clean", "--preserve-cell-metadata", "tags", "--"] - id: ruff name: ruff description: "Run 'ruff' for extremely fast Python linting" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7089d786..fbb29b74 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -151,6 +151,6 @@ Edit the product, description, and languages as needed. The Code Samples browser content is updated twice a week, so it may take a few days for your changes to be reflected. -[readme template]: ./scenarios/README-template.md -[jupyter notebook template]: ./scenarios/template.ipynb +[readme template]: ./.infra/templates/README-template.md +[jupyter notebook template]: ./.infra/templates/template.ipynb diff --git a/conftest.py b/conftest.py new file mode 100644 index 00000000..7bfe31dc --- /dev/null +++ b/conftest.py @@ -0,0 +1,84 @@ +import json +import types +from pathlib import Path +from typing import Dict + +import pytest + +REPO_ROOT = Path(__file__).parent.resolve() + + +@pytest.fixture() +def notebook_path( + # Create and activate a new venv for each test that requests `notebook_path` + venv: types.SimpleNamespace, # noqa: ARG001 + notebook_path: Path, +) -> Path: + """Activates a virtual environment for tests that request notebook_path (Jupyter Notebook tests).""" + return notebook_path + + +@pytest.fixture() +def deployment_outputs() -> Dict[str, str]: + """The outputs of the deployment used to setup resources for testing samples. + + Depends on the existence of a `deployment.json` file in the root of the repository, + which is the output of running `az deployment sub create -o json` + """ + deployment_file_path = REPO_ROOT / "deployment.json" + + try: + with deployment_file_path.open() as f: + deployment = json.load(f) + except (FileExistsError, json.JSONDecodeError) as e: + raise AssertionError("Please use azure-cli to perform a deployment and same result to deployment.json") from e + + properties = deployment.get("properties") + + if properties is None or "outputs" not in properties: + raise AssertionError("Key 'properties.outputs' not present in deployment json") + + outputs = properties.get("outputs") + + return {output_name: output["value"] for output_name, output in outputs.items()} + + +@pytest.fixture() +def azure_ai_project(deployment_outputs: Dict[str, str]) -> Dict[str, str]: + """Azure ai project dictionary.""" + return { + "subscription_id": deployment_outputs["subscription_id"], + "resource_group_name": deployment_outputs["resource_group_name"], + "project_name": deployment_outputs["project_name"], + } + + +@pytest.fixture() +def azure_ai_project_connection_string(deployment_outputs: Dict[str, str]) -> str: + """The connection string for the azure ai project""" + return ";".join( + [ + f"{deployment_outputs['project_location']}.api.azureml.ms", + deployment_outputs["subscription_id"], + deployment_outputs["resource_group_name"], + deployment_outputs["project_name"], + ] + ) + + +@pytest.fixture() +def azure_openai_endpoint(deployment_outputs: Dict[str, str]) -> str: + """The azure openai endpoint for the azure ai project.""" + return deployment_outputs["azure_openai_endpoint"] + + +@pytest.fixture() +def azure_openai_gpt4_deployment(deployment_outputs: Dict[str, str]) -> str: + """The deployment name of the gpt-4 deployment.""" + return deployment_outputs["azure_openai_gpt4_deployment_name"] + + +@pytest.fixture() +def azure_openai_gpt4_api_version(deployment_outputs: Dict[str, str]) -> str: + """The api version of the gpt-4 deployment.""" + return deployment_outputs["azure_openai_gpt4_api_version"] diff --git a/dev-requirements.txt b/dev-requirements.txt index 9f47522b..db7587d7 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,2 +1,7 @@ tox ~= 4.0 -pre-commit +pre-commit ~= 3.0 +python-dotenv ~= 1.0 +pytest ~= 8.0 +pytest-iovis[papermill] == 0.1.0 +ipykernel ~= 6.0 +.infra/pytest_plugins/changed_samples diff --git a/not_conftest.py b/not_conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/not_conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/pyproject.toml b/pyproject.toml index 5e8ba793..4d5cc49e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,9 @@ extend-select = [ "ANN", "RUF", ] - +ignore = [ + "ANN101", # missing-type-self: This can be inferred and will be deprecated by ruff +] [tool.ruff.lint.extend-per-file-ignores] "*.ipynb" = [ @@ -26,3 +28,8 @@ extend-select = [ "E402", # module-import-not-at-top-of-file: It's relatively common to have to import "just in time" "E501", # line-too-long: Let black handle this ] + +[tool.pytest.ini_options] +console_output_style = "count" +addopts = "--strict-markers" +testpaths = ["scenarios"] diff --git a/scenarios/Assistants/api-in-a-box/failed_banks/conftest.py b/scenarios/Assistants/api-in-a-box/failed_banks/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/Assistants/api-in-a-box/failed_banks/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/Assistants/api-in-a-box/math_tutor/conftest.py b/scenarios/Assistants/api-in-a-box/math_tutor/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/Assistants/api-in-a-box/math_tutor/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/Assistants/api-in-a-box/personal_finance/conftest.py b/scenarios/Assistants/api-in-a-box/personal_finance/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/Assistants/api-in-a-box/personal_finance/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/Assistants/api-in-a-box/wind_farm/conftest.py b/scenarios/Assistants/api-in-a-box/wind_farm/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/Assistants/api-in-a-box/wind_farm/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/Assistants/function_calling/conftest.py b/scenarios/Assistants/function_calling/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/Assistants/function_calling/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/Assistants/multi-agent/conftest.py b/scenarios/Assistants/multi-agent/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/Assistants/multi-agent/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/GPT-4V/basic/conftest.py b/scenarios/GPT-4V/basic/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/GPT-4V/basic/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/GPT-4V/enhancement_OCR/conftest.py b/scenarios/GPT-4V/enhancement_OCR/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/GPT-4V/enhancement_OCR/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/GPT-4V/enhancement_grounding/conftest.py b/scenarios/GPT-4V/enhancement_grounding/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/GPT-4V/enhancement_grounding/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/GPT-4V/face/conftest.py b/scenarios/GPT-4V/face/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/GPT-4V/face/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/GPT-4V/multiple_images/conftest.py b/scenarios/GPT-4V/multiple_images/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/GPT-4V/multiple_images/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/GPT-4V/rag/conftest.py b/scenarios/GPT-4V/rag/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/GPT-4V/rag/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/GPT-4V/video/conftest.py b/scenarios/GPT-4V/video/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/GPT-4V/video/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/GPT-4V/video_by_manual_sampling/conftest.py b/scenarios/GPT-4V/video_by_manual_sampling/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/GPT-4V/video_by_manual_sampling/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/GPT-4V/video_chunk/conftest.py b/scenarios/GPT-4V/video_chunk/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/GPT-4V/video_chunk/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/GPT-4V/video_chunk_by_manual_sampling/conftest.py b/scenarios/GPT-4V/video_chunk_by_manual_sampling/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/GPT-4V/video_chunk_by_manual_sampling/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/evaluate/Image/image-evaluation.ipynb b/scenarios/evaluate/Image/image-evaluation.ipynb index 941ca762..e55fb9fc 100644 --- a/scenarios/evaluate/Image/image-evaluation.ipynb +++ b/scenarios/evaluate/Image/image-evaluation.ipynb @@ -51,47 +51,21 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "parameters" + ] + }, "outputs": [], "source": [ "from pprint import pprint\n", - "from azure.ai.evaluation import ViolenceEvaluator, SexualEvaluator, SelfHarmEvaluator, HateUnfairnessEvaluator" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Project settings\n", - "\n", - "Make sure you fill these values accurately" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"AZURE_SUBSCRIPTION_ID\"] = \"\"\n", - "os.environ[\"AZURE_RESOURCE_GROUP\"] = \"\"\n", - "os.environ[\"AZURE_PROJECT_NAME\"] = \"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + "from azure.ai.evaluation import ViolenceEvaluator, SexualEvaluator, SelfHarmEvaluator, HateUnfairnessEvaluator\n", "from azure.identity import DefaultAzureCredential\n", "\n", - "project_scope = {\n", - " \"subscription_id\": os.environ.get(\"AZURE_SUBSCRIPTION_ID\"),\n", - " \"resource_group_name\": os.environ.get(\"AZURE_RESOURCE_GROUP\"),\n", - " \"project_name\": os.environ.get(\"AZURE_PROJECT_NAME\"),\n", + "azure_ai_project = {\n", + " \"subscription_id\": \"\",\n", + " \"resource_group_name\": \"\",\n", + " \"project_name\": \"\",\n", "}" ] }, @@ -108,10 +82,10 @@ "metadata": {}, "outputs": [], "source": [ - "violence_evaluator = ViolenceEvaluator(credential=DefaultAzureCredential(), azure_ai_project=project_scope)\n", - "sexual_evaluator = SexualEvaluator(credential=DefaultAzureCredential(), azure_ai_project=project_scope)\n", - "self_harm_evaluator = SelfHarmEvaluator(credential=DefaultAzureCredential(), azure_ai_project=project_scope)\n", - "hate_unfair_evaluator = HateUnfairnessEvaluator(credential=DefaultAzureCredential(), azure_ai_project=project_scope)\n", + "violence_evaluator = ViolenceEvaluator(credential=DefaultAzureCredential(), azure_ai_project=azure_ai_project)\n", + "sexual_evaluator = SexualEvaluator(credential=DefaultAzureCredential(), azure_ai_project=azure_ai_project)\n", + "self_harm_evaluator = SelfHarmEvaluator(credential=DefaultAzureCredential(), azure_ai_project=azure_ai_project)\n", + "hate_unfair_evaluator = HateUnfairnessEvaluator(credential=DefaultAzureCredential(), azure_ai_project=azure_ai_project)\n", "\n", "conversation = {\n", " \"messages\": [\n", @@ -168,7 +142,7 @@ "from pprint import pprint\n", "from azure.ai.evaluation import ContentSafetyEvaluator\n", "\n", - "evaluator = ContentSafetyEvaluator(credential=DefaultAzureCredential(), azure_ai_project=project_scope)\n", + "evaluator = ContentSafetyEvaluator(credential=DefaultAzureCredential(), azure_ai_project=azure_ai_project)\n", "result = evaluator(conversation=conversation)\n", "pprint(result)" ] @@ -189,7 +163,7 @@ "from pprint import pprint\n", "from azure.ai.evaluation import ProtectedMaterialEvaluator\n", "\n", - "evaluator = ProtectedMaterialEvaluator(credential=DefaultAzureCredential(), azure_ai_project=project_scope)\n", + "evaluator = ProtectedMaterialEvaluator(credential=DefaultAzureCredential(), azure_ai_project=azure_ai_project)\n", "result = evaluator(conversation=conversation)\n", "pprint(result)" ] @@ -214,11 +188,11 @@ "\n", "from azure.ai.evaluation import evaluate\n", "\n", - "content_safety_eval = ContentSafetyEvaluator(azure_ai_project=project_scope, credential=DefaultAzureCredential())\n", + "content_safety_eval = ContentSafetyEvaluator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())\n", "\n", "result = evaluate(\n", " data=file_path,\n", - " azure_ai_project=project_scope,\n", + " azure_ai_project=azure_ai_project,\n", " evaluators={\"content_safety\": content_safety_eval},\n", ")\n", "pprint(result)" @@ -243,7 +217,7 @@ "\n", "base64_image = \"\"\n", "\n", - "with Path.open(\"Image1.jpg\", \"rb\") as image_file:\n", + "with Path(\"image1.jpg\").open(\"rb\") as image_file:\n", " base64_image = base64.b64encode(image_file.read()).decode(\"utf-8\")\n", "\n", "conversation = {\n", @@ -262,7 +236,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".env", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/scenarios/evaluate/Simulators/Simulate_Adversarial_Data/Simulate_Adversarial.ipynb b/scenarios/evaluate/Simulators/Simulate_Adversarial_Data/Simulate_Adversarial.ipynb index 86627fca..e1becffb 100644 --- a/scenarios/evaluate/Simulators/Simulate_Adversarial_Data/Simulate_Adversarial.ipynb +++ b/scenarios/evaluate/Simulators/Simulate_Adversarial_Data/Simulate_Adversarial.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "# Adversarial Simulator for an online endpont\n", "\n", @@ -74,27 +76,36 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "parameters" + ] + }, "outputs": [], "source": [ + "# Use the following code to set the variables with your values.\n", "azure_ai_project = {\n", " \"subscription_id\": \"\",\n", " \"resource_group_name\": \"\",\n", " \"project_name\": \"\",\n", - "}" + "}\n", + "\n", + "azure_openai_api_version = \"\"\n", + "azure_openai_deployment = \"\"\n", + "azure_openai_endpoint = \"\"" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "# Use the following code to set the environment variables if not already set. If set, you can skip this step.\n", - "\n", - "os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"\"\n", - "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = \"\"\n", - "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"\"" + "os.environ[\"AZURE_OPENAI_API_VERSION\"] = azure_openai_api_version\n", + "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = azure_openai_deployment\n", + "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = azure_openai_endpoint" ] }, { @@ -143,10 +154,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "simulator = AdversarialSimulator(azure_ai_project=azure_ai_project)" + "simulator = AdversarialSimulator(credential=DefaultAzureCredential(), azure_ai_project=azure_ai_project)" ] }, { @@ -194,7 +207,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "outputs = await simulator(\n", @@ -212,17 +227,19 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "with Path.open(\"outputs.jsonl\", \"w\") as f:\n", + "with Path(\"outputs.jsonl\").open(\"w\") as f:\n", " f.write(outputs.to_eval_qr_json_lines())" ] } ], "metadata": { "kernelspec": { - "display_name": "venv-azureai-samples", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -239,5 +256,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Azure_Search_Index/Simulate_From_Azure_Search_Index.ipynb b/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Azure_Search_Index/Simulate_From_Azure_Search_Index.ipynb index 99ebf5e4..de69fb2e 100644 --- a/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Azure_Search_Index/Simulate_From_Azure_Search_Index.ipynb +++ b/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Azure_Search_Index/Simulate_From_Azure_Search_Index.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "# Simulate Queries and Responses from Azure Search Index" ] @@ -121,7 +123,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import os\n", @@ -133,7 +137,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# project details\n", @@ -283,7 +289,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "### Generate Query Responses from index\n", "In this example we use the `description` field from the `realestate-us-sample-index` search index as raw text to generate Query Response pairs. For any index you use to generate Query Responses, you must identify from which field from `result` in the code below you would like to generate.\n", @@ -458,7 +466,7 @@ "toc_visible": true }, "kernelspec": { - "display_name": "venv3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Azure_Search_Index/conftest.py b/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Azure_Search_Index/conftest.py new file mode 100644 index 00000000..5ce92d60 --- /dev/null +++ b/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Azure_Search_Index/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing. (Needs to initialize a search index.)") diff --git a/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Conversation_Starter/Simulate_From_Conversation_Starter.ipynb b/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Conversation_Starter/Simulate_From_Conversation_Starter.ipynb index 8447b44d..82e94768 100644 --- a/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Conversation_Starter/Simulate_From_Conversation_Starter.ipynb +++ b/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Conversation_Starter/Simulate_From_Conversation_Starter.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "# Simulate conversations from conversation starter" ] @@ -78,28 +80,38 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "parameters" + ] + }, "outputs": [], "source": [ - "import json\n", - "import os\n", + "# Use the following code to set the variables with your values\n", "\n", - "# Use the following code to set the environment variables if not already set. If set, you can skip this step.\n", - "\n", - "os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"\"\n", - "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = \"\"\n", - "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"\"" + "azure_openai_api_version = \"\"\n", + "azure_openai_deployment = \"\"\n", + "azure_openai_endpoint = \"\"" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ + "import os\n", + "\n", + "os.environ[\"AZURE_OPENAI_API_VERSION\"] = azure_openai_api_version\n", + "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = azure_openai_deployment\n", + "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = azure_openai_endpoint\n", + "\n", + "\n", "model_config = {\n", - " \"azure_endpoint\": os.environ.get(\"AZURE_OPENAI_ENDPOINT\"),\n", - " \"azure_deployment\": os.environ.get(\"AZURE_OPENAI_DEPLOYMENT\"),\n", + " \"azure_endpoint\": azure_openai_endpoint,\n", + " \"azure_deployment\": azure_openai_deployment,\n", "}" ] }, @@ -131,7 +143,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from typing import List, Dict, Any, Optional\n", @@ -143,11 +157,11 @@ " # logic to call your application\n", " # use a try except block to catch any errors\n", " token_provider = get_bearer_token_provider(DefaultAzureCredential(), \"https://cognitiveservices.azure.com/.default\")\n", - " deployment = os.environ.get(\"AZURE_DEPLOYMENT\")\n", - " endpoint = os.environ.get(\"AZURE_ENDPOINT\")\n", + " deployment = os.environ.get(\"AZURE_OPENAI_DEPLOYMENT\")\n", + " endpoint = os.environ.get(\"AZURE_OPENAI_ENDPOINT\")\n", " client = AzureOpenAI(\n", " azure_endpoint=endpoint,\n", - " api_version=os.environ.get(\"AZURE_API_VERSION\"),\n", + " api_version=os.environ.get(\"AZURE_OPENAI_API_VERSION\"),\n", " azure_ad_token_provider=token_provider,\n", " )\n", " completion = client.chat.completions.create(\n", @@ -238,7 +252,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "outputs = await simulator(\n", @@ -258,10 +274,13 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from pathlib import Path\n", + "import json\n", "\n", "output_file = Path(\"output.json\")\n", "with output_file.open(\"a\") as f:\n", @@ -271,14 +290,22 @@ ], "metadata": { "kernelspec": { - "display_name": "venv-azureai-samples", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Conversation_Starter/conftest.py b/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Conversation_Starter/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Conversation_Starter/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Input_Text/.gitignore b/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Input_Text/.gitignore new file mode 100644 index 00000000..c25746b5 --- /dev/null +++ b/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Input_Text/.gitignore @@ -0,0 +1 @@ +eval_input_data.jsonl diff --git a/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Input_Text/Simulate_From_Input_Text.ipynb b/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Input_Text/Simulate_From_Input_Text.ipynb index 87d7d2b2..59f5d342 100644 --- a/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Input_Text/Simulate_From_Input_Text.ipynb +++ b/scenarios/evaluate/Simulators/Simulate_Context-Relevant_Data/Simulate_From_Input_Text/Simulate_From_Input_Text.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "# Simulate Queries and Responses from input text" ] @@ -82,43 +84,59 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "### Parameters\n", "\n", - "Lets initialize some variables. We need a way to connect to a LLM to use the notebook. This sample suggests a way to use `gpt-4o-mini` deployment in your Azure AI project. Replace the `azure_endpoint` with a link to your endpoint. If your applications calls `AzureOpenAI`'s chat completion endpoint, you will need to replace the values in `<>` with your `AzureOpenAI` deployment details. \n", + "Lets initialize some variables. We need a way to connect to a LLM to use the notebook. This sample suggests a way to use `gpt-4o-mini` deployment in your Azure AI project. Replace the `azure_openai_endpoint` with a link to your endpoint. If your applications calls `AzureOpenAI`'s chat completion endpoint, you will need to replace the values in `<>` with your `AzureOpenAI` deployment details. \n", "\n" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "parameters" + ] + }, "outputs": [], "source": [ - "import os\n", + "# project details\n", + "azure_openai_api_version = \"\"\n", + "azure_openai_endpoint = \"\"\n", + "azure_openai_deployment = \"gpt-4o-mini\" # replace with your deployment name, if different\n", "\n", - "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"\"\n", - "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = \"\"\n", - "os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"\"" + "# Optionally set the azure_ai_project to upload the evaluation results to Azure AI Studio.\n", + "azure_ai_project = {\n", + " \"subscription_id\": \"\",\n", + " \"resource_group\": \"\",\n", + " \"workspace_name\": \"\",\n", + "}" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "# project details\n", - "azure_endpoint = \"https://.openai.azure.com\"\n", - "azure_deployment = \"gpt-4o-mini\" # replace with your deployment name, if different\n", + "import os\n", "\n", - "should_cleanup: bool = False" + "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = azure_openai_endpoint\n", + "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = azure_openai_deployment\n", + "os.environ[\"AZURE_OPENAI_API_VERSION\"] = azure_openai_api_version" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "### Connect to your project\n", "\n", @@ -128,15 +146,18 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import json\n", "from pathlib import Path\n", "\n", "model_config = {\n", - " \"azure_endpoint\": azure_endpoint,\n", - " \"azure_deployment\": azure_deployment,\n", + " \"azure_endpoint\": azure_openai_endpoint,\n", + " \"azure_deployment\": azure_openai_deployment,\n", + " \"api_version\": azure_openai_api_version,\n", "}\n", "\n", "# JSON mode supported model preferred to avoid errors ex. gpt-4o-mini, gpt-4o, gpt-4 (1106)" @@ -409,7 +430,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "### Run evaluation\n", "`QAEvaluator` is a composite evaluator which runs GroundednessEvaluator, RelevanceEvaluator, CoherenceEvaluator, FluencyEvaluator, SimilarityEvaluator, F1ScoreEvaluator\n", @@ -420,20 +443,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "azure_ai_project = {\n", - " \"subscription_id\": \"\",\n", - " \"resource_group\": \"\",\n", - " \"workspace_name\": \"\",\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from azure.ai.evaluation import evaluate, QAEvaluator\n", diff --git a/scenarios/evaluate/Simulators/Simulate_Evaluate_ContentSafety/.gitignore b/scenarios/evaluate/Simulators/Simulate_Evaluate_ContentSafety/.gitignore new file mode 100644 index 00000000..f3a9eaee --- /dev/null +++ b/scenarios/evaluate/Simulators/Simulate_Evaluate_ContentSafety/.gitignore @@ -0,0 +1 @@ +adv_convo_eval.jsonl diff --git a/scenarios/evaluate/Simulators/Simulate_Evaluate_ContentSafety/Simulate_Evaluate_ContentSafety.ipynb b/scenarios/evaluate/Simulators/Simulate_Evaluate_ContentSafety/Simulate_Evaluate_ContentSafety.ipynb index 00c3bafb..9348ad02 100644 --- a/scenarios/evaluate/Simulators/Simulate_Evaluate_ContentSafety/Simulate_Evaluate_ContentSafety.ipynb +++ b/scenarios/evaluate/Simulators/Simulate_Evaluate_ContentSafety/Simulate_Evaluate_ContentSafety.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "# Simulating and Evaluating Multiturn Conversations for Content Harms\n", "\n", @@ -30,57 +32,61 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ - "Set the following environment variables for use in this notebook:" + "### Configuration\n", + "The following simulator and evaluators require an Azure AI Studio project configuration and an Azure credential to use. \n", + "Your project configuration will be what is used to log your evaluation results in your project after the evaluation run is finished.\n", + "\n", + "For full region supportability, see [our documentation](https://learn.microsoft.com/azure/ai-studio/how-to/develop/flow-evaluate-sdk#built-in-evaluators)." ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "tags": [] + }, "source": [ - "import os\n", - "\n", - "os.environ[\"AZURE_DEPLOYMENT_NAME\"] = \"\"\n", - "os.environ[\"AZURE_API_VERSION\"] = \"\"\n", - "os.environ[\"AZURE_SUBSCRIPTION_ID\"] = \"\"\n", - "os.environ[\"AZURE_RESOURCE_GROUP\"] = \"\"\n", - "os.environ[\"AZURE_PROJECT_NAME\"] = \"\"" + "Set the following variables for use in this notebook:" ] }, { - "cell_type": "markdown", - "metadata": {}, + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], "source": [ - "### Configuration\n", - "The following simulator and evaluators require an Azure AI Studio project configuration and an Azure credential to use. \n", - "Your project configuration will be what is used to log your evaluation results in your project after the evaluation run is finished.\n", + "azure_ai_project = {\n", + " \"subscription_id\": \"\",\n", + " \"resource_group\": \"\",\n", + " \"workspace_name\": \"\",\n", + "}\n", "\n", - "For full region supportability, see [our documentation](https://learn.microsoft.com/azure/ai-studio/how-to/develop/flow-evaluate-sdk#built-in-evaluators)." + "\n", + "azure_openai_endpoint = \"\"\n", + "azure_openai_deployment = \"\"\n", + "azure_openai_api_version = \"\"" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n", - "from azure.ai.evaluation import evaluate\n", - "from azure.ai.evaluation import ContentSafetyEvaluator\n", - "from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario\n", - "from openai import AzureOpenAI\n", - "\n", - "\n", - "azure_ai_project = {\n", - " \"subscription_id\": os.environ.get(\"AZURE_SUBSCRIPTION_ID\"),\n", - " \"resource_group_name\": os.environ.get(\"AZURE_RESOURCE_GROUP\"),\n", - " \"project_name\": os.environ.get(\"AZURE_PROJECT_NAME\"),\n", - "}\n", + "import os\n", "\n", - "credential = DefaultAzureCredential()" + "os.environ[\"AZURE_DEPLOYMENT_NAME\"] = azure_openai_deployment\n", + "os.environ[\"AZURE_API_VERSION\"] = azure_openai_api_version\n", + "os.environ[\"AZURE_ENDPOINT\"] = azure_openai_endpoint" ] }, { @@ -97,11 +103,21 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from typing import List, Dict, Optional\n", "\n", + "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n", + "from azure.ai.evaluation import evaluate\n", + "from azure.ai.evaluation import ContentSafetyEvaluator\n", + "from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario\n", + "from openai import AzureOpenAI\n", + "\n", + "credential = DefaultAzureCredential()\n", + "\n", "\n", "async def content_safety_callback(\n", " messages: List[Dict], stream: bool = False, session_state: Optional[str] = None, context: Optional[Dict] = None\n", @@ -199,7 +215,7 @@ "import json\n", "from pathlib import Path\n", "\n", - "with Path.open(\"adv_convo_eval.jsonl\", \"w\") as f:\n", + "with Path(\"adv_convo_eval.jsonl\").open(\"w\") as f:\n", " for output in content_safety_outputs:\n", " f.write(json.dumps({\"conversation\": output}))\n", " f.write(\"\\n\")" @@ -234,7 +250,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/scenarios/evaluate/Simulators/Simulate_Evaluate_Groundedness/Simulate_Evaluate_Groundedness.ipynb b/scenarios/evaluate/Simulators/Simulate_Evaluate_Groundedness/Simulate_Evaluate_Groundedness.ipynb index 7cdb54af..ccb85314 100644 --- a/scenarios/evaluate/Simulators/Simulate_Evaluate_Groundedness/Simulate_Evaluate_Groundedness.ipynb +++ b/scenarios/evaluate/Simulators/Simulate_Evaluate_Groundedness/Simulate_Evaluate_Groundedness.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "# Evaluating Model Groundedness with Azure AI Evaluation SDK\n", "\n", @@ -51,72 +53,73 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ - "### Parameters and imports" + "### Configuration\n", + "\n", + "The following simulator and evaluators require an Azure AI Studio project configuration and an Azure credential to use. \n", + "Your project configuration will be what is used to log your evaluation results in your project after the evaluation run is finished.\n", + "\n", + "For full region supportability, see [our documentation](https://learn.microsoft.com/azure/ai-studio/how-to/develop/flow-evaluate-sdk#built-in-evaluators)." ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ - "Here we define the data, `grounding.json` on which we will simulate query and response pairs to help us evaluate the groundedness of our model's responses. Based on the use case of your model, the data you use to evaluate groundedness might differ. " + "Set the following variables for use in this notebook:" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "parameters" + ] + }, "outputs": [], "source": [ - "import os\n", - "from typing import Any, Dict, List, Optional\n", - "import json\n", - "from pathlib import Path\n", + "azure_ai_project = {\n", + " \"subscription_id\": \"\",\n", + " \"resource_group\": \"\",\n", + " \"workspace_name\": \"\",\n", + "}\n", "\n", - "from azure.ai.evaluation import evaluate\n", - "from azure.ai.evaluation import GroundednessEvaluator\n", - "from azure.ai.evaluation.simulator import Simulator\n", - "from openai import AzureOpenAI\n", - "import importlib.resources as pkg_resources\n", - "from azure.identity import DefaultAzureCredential, get_bearer_token_provider" + "azure_openai_endpoint = \"\"\n", + "azure_openai_deployment = \"\"\n", + "azure_openai_api_version = \"\"" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "os.environ[\"AZURE_SUBSCRIPTION_ID\"] = \"\"\n", - "os.environ[\"RESOURCE_GROUP\"] = \"\"\n", - "os.environ[\"PROJECT_NAME\"] = \"\"\n", - "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"\"\n", - "os.environ[\"AZURE_DEPLOYMENT_NAME\"] = \"\"\n", - "os.environ[\"AZURE_API_VERSION\"] = \"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "project_scope = {\n", - " \"subscription_id\": os.environ.get(\"AZURE_SUBSCRIPTION_ID\"),\n", - " \"resource_group_name\": os.environ.get(\"RESOURCE_GROUP\"),\n", - " \"project_name\": os.environ.get(\"PROJECT_NAME\"),\n", - "}\n", + "import os\n", "\n", "model_config = {\n", - " \"azure_endpoint\": os.environ.get(\"AZURE_OPENAI_ENDPOINT\"),\n", - " \"azure_deployment\": os.environ.get(\"AZURE_DEPLOYMENT_NAME\"),\n", - "}" + " \"azure_endpoint\": azure_openai_endpoint,\n", + " \"azure_deployment\": azure_openai_deployment,\n", + "}\n", + "\n", + "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = azure_openai_endpoint\n", + "os.environ[\"AZURE_DEPLOYMENT_NAME\"] = azure_openai_deployment\n", + "os.environ[\"AZURE_API_VERSION\"] = azure_openai_api_version" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Data\n", "Here we define the data, `grounding.json` on which we will simulate query and response pairs to help us evaluate the groundedness of our model's responses. Based on the use case of your model, the data you use to evaluate groundedness might differ. " @@ -128,11 +131,22 @@ "metadata": {}, "outputs": [], "source": [ + "from typing import Any, Dict, List, Optional\n", + "import json\n", + "from pathlib import Path\n", + "\n", + "from azure.ai.evaluation import evaluate\n", + "from azure.ai.evaluation import GroundednessEvaluator\n", + "from azure.ai.evaluation.simulator import Simulator\n", + "from openai import AzureOpenAI\n", + "import importlib.resources as pkg_resources\n", + "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n", + "\n", "resource_name = \"grounding.json\"\n", "package = \"azure.ai.evaluation.simulator._data_sources\"\n", "conversation_turns = []\n", "\n", - "with pkg_resources.path(package, resource_name) as grounding_file, Path.open(grounding_file, \"r\") as file:\n", + "with pkg_resources.path(package, resource_name) as grounding_file, Path(grounding_file).open(\"r\") as file:\n", " data = json.load(file)\n", "\n", "for item in data:\n", @@ -236,7 +250,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "custom_simulator = Simulator(model_config=model_config)" @@ -270,7 +286,7 @@ "outputs": [], "source": [ "output_file = \"ground_sim_output.jsonl\"\n", - "with Path.open(output_file, \"w\") as file:\n", + "with Path(output_file).open(\"w\") as file:\n", " for output in outputs:\n", " file.write(output.to_eval_qr_json_lines())" ] @@ -288,7 +304,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "groundedness_evaluator = GroundednessEvaluator(model_config=model_config)\n", @@ -297,7 +315,7 @@ " evaluators={\n", " \"groundedness\": groundedness_evaluator,\n", " },\n", - " azure_ai_project=project_scope,\n", + " azure_ai_project=azure_ai_project,\n", ")\n", "print(eval_output)" ] @@ -305,7 +323,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Quality/AI_Judge_Evaluators_Quality.ipynb b/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Quality/AI_Judge_Evaluators_Quality.ipynb index 579f86fe..ad5d1d45 100644 --- a/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Quality/AI_Judge_Evaluators_Quality.ipynb +++ b/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Quality/AI_Judge_Evaluators_Quality.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "# Evaluate using AI as Judge Quality Evaluators with Azure AI Evaluation SDK\n", "\n", @@ -76,7 +78,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "\n", "Please provide Azure AI Project details so that traces and eval results are pushing in the project in Azure AI Studio." @@ -85,33 +89,46 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "parameters" + ] + }, "outputs": [], "source": [ "azure_ai_project = {\n", - " \"subscription_id\": \"\",\n", - " \"resource_group_name\": \"\",\n", - " \"project_name\": \"\",\n", - "}" + " \"subscription_id\": \"\",\n", + " \"resource_group_name\": \"\",\n", + " \"project_name\": \"\",\n", + "}\n", + "\n", + "azure_openai_api_version = \"\"\n", + "azure_openai_deployment = \"\"\n", + "azure_openai_endpoint = \"\"" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import os\n", "\n", "# Use the following code to set the environment variables if not already set. If set, you can skip this step. In addition, you should also set \"AZURE_OPENAI_ENDPOINT\" to the endpoint of your AzureOpenAI service.\n", "\n", - "os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"\"\n", - "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = \"\"" + "os.environ[\"AZURE_OPENAI_API_VERSION\"] = azure_openai_api_version\n", + "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = azure_openai_deployment\n", + "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = azure_openai_endpoint" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Data\n", "\n", @@ -139,7 +156,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import os\n", @@ -172,7 +191,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import pathlib\n", @@ -263,7 +284,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -280,5 +301,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Safety_Risks/AI_Judge_Evaluators_Safety_Risks.ipynb b/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Safety_Risks/AI_Judge_Evaluators_Safety_Risks.ipynb index ab037f19..cb327ce2 100644 --- a/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Safety_Risks/AI_Judge_Evaluators_Safety_Risks.ipynb +++ b/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Safety_Risks/AI_Judge_Evaluators_Safety_Risks.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "# Evaluate Risk and Safety - Protected Material and Indirect Attack Jailbreak\n", "\n", @@ -21,7 +23,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Install the packages\n", @@ -30,30 +34,39 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ - "Set the following environment variables for use in this notebook:" + "Set the following variables for use in this notebook:" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "parameters" + ] + }, "outputs": [], "source": [ - "import os\n", + "azure_ai_project = {\n", + " \"subscription_id\": \"\",\n", + " \"resource_group_name\": \"\",\n", + " \"project_name\": \"\",\n", + "}\n", "\n", - "os.environ[\"AZURE_DEPLOYMENT_NAME\"] = \"\"\n", - "os.environ[\"AZURE_ENDPOINT\"] = \"\"\n", - "os.environ[\"AZURE_API_VERSION\"] = \"\"\n", - "os.environ[\"AZURE_SUBSCRIPTION_ID\"] = \"\"\n", - "os.environ[\"AZURE_RESOURCE_GROUP\"] = \"\"\n", - "os.environ[\"AZURE_PROJECT_NAME\"] = \"\"" + "azure_openai_deployment = \"\"\n", + "azure_openai_endpoint = \"\"\n", + "azure_openai_api_version = \"\"" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "### Configuration\n", "The following simulator and evaluators require an Azure AI Studio project configuration and an Azure credential to use. \n", @@ -65,22 +78,29 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ + "import os\n", + "\n", "from pprint import pprint\n", "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n", "from azure.ai.evaluation import evaluate\n", "from azure.ai.evaluation import ProtectedMaterialEvaluator, IndirectAttackEvaluator\n", - "from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario, IndirectAttackSimulator\n", + "from azure.ai.evaluation.simulator import (\n", + " AdversarialSimulator,\n", + " AdversarialScenario,\n", + " AdversarialScenarioJailbreak,\n", + " IndirectAttackSimulator,\n", + ")\n", "from openai import AzureOpenAI\n", "\n", "\n", - "azure_ai_project = {\n", - " \"subscription_id\": os.environ.get(\"AZURE_SUBSCRIPTION_ID\"),\n", - " \"resource_group_name\": os.environ.get(\"AZURE_RESOURCE_GROUP\"),\n", - " \"project_name\": os.environ.get(\"AZURE_PROJECT_NAME\"),\n", - "}\n", + "os.environ[\"AZURE_DEPLOYMENT_NAME\"] = azure_openai_deployment\n", + "os.environ[\"AZURE_ENDPOINT\"] = azure_openai_endpoint\n", + "os.environ[\"AZURE_API_VERSION\"] = azure_openai_api_version\n", "\n", "credential = DefaultAzureCredential()" ] @@ -378,7 +398,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "indirect_attack_simulator = IndirectAttackSimulator(\n", @@ -387,7 +409,7 @@ "\n", "unfiltered_indirect_attack_outputs = await indirect_attack_simulator(\n", " target=xpia_callback,\n", - " scenario=AdversarialScenario.ADVERSARIAL_INDIRECT_JAILBREAK,\n", + " scenario=AdversarialScenarioJailbreak.ADVERSARIAL_INDIRECT_JAILBREAK,\n", " max_simulation_results=10,\n", " max_conversation_turns=3,\n", ")" @@ -429,7 +451,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from pathlib import Path\n", @@ -454,7 +478,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "indirect_attack_eval = IndirectAttackEvaluator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())\n", @@ -481,12 +507,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "filtered_indirect_attack_outputs = await indirect_attack_simulator(\n", " target=xpia_callback, # now with the Prompt Shield attached to our model deployment\n", - " scenario=AdversarialScenario.ADVERSARIAL_INDIRECT_JAILBREAK,\n", + " scenario=AdversarialScenarioJailbreak.ADVERSARIAL_INDIRECT_JAILBREAK,\n", " max_simulation_results=10,\n", " max_conversation_turns=3,\n", ")" @@ -495,7 +523,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "print(filtered_indirect_attack_outputs)\n", @@ -534,7 +564,7 @@ ], "metadata": { "kernelspec": { - "display_name": "venv-azureai-samples", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/scenarios/evaluate/Supported_Evaluation_Metrics/Custom_Evaluators/Custom_Evaluators_Blocklisting/Custom_Evaluators_Blocklisting.ipynb b/scenarios/evaluate/Supported_Evaluation_Metrics/Custom_Evaluators/Custom_Evaluators_Blocklisting/Custom_Evaluators_Blocklisting.ipynb index 2964eb85..f20020bf 100644 --- a/scenarios/evaluate/Supported_Evaluation_Metrics/Custom_Evaluators/Custom_Evaluators_Blocklisting/Custom_Evaluators_Blocklisting.ipynb +++ b/scenarios/evaluate/Supported_Evaluation_Metrics/Custom_Evaluators/Custom_Evaluators_Blocklisting/Custom_Evaluators_Blocklisting.ipynb @@ -3,7 +3,9 @@ { "cell_type": "markdown", "id": "2e932e4c-5d55-461e-a313-3a087d8983b5", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "\n", "\n", @@ -71,7 +73,11 @@ "cell_type": "code", "execution_count": null, "id": "257fd898-7ef2-4d89-872e-da9e426aaf0b", - "metadata": {}, + "metadata": { + "tags": [ + "parameters" + ] + }, "outputs": [], "source": [ "import pandas as pd\n", @@ -80,7 +86,11 @@ "from pprint import pprint\n", "from azure.ai.evaluation import evaluate\n", "from openai import AzureOpenAI\n", - "from azure.identity import DefaultAzureCredential, get_bearer_token_provider" + "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n", + "\n", + "azure_openai_api_version = \"\"\n", + "azure_openai_deployment = \"\"\n", + "azure_openai_endpoint = \"\"" ] }, { @@ -107,9 +117,9 @@ "source": [ "# Use the following code to set the environment variables if not already set. If set, you can skip this step.\n", "\n", - "os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"\"\n", - "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = \"\"\n", - "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"\"" + "os.environ[\"AZURE_OPENAI_API_VERSION\"] = azure_openai_api_version\n", + "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = azure_openai_deployment\n", + "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = azure_openai_endpoint" ] }, { @@ -243,7 +253,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/scenarios/evaluate/Supported_Evaluation_Metrics/Custom_Evaluators/Custom_Evaluators_Blocklisting/blocklist.py b/scenarios/evaluate/Supported_Evaluation_Metrics/Custom_Evaluators/Custom_Evaluators_Blocklisting/blocklist.py index ee5e60fd..cd64af1f 100644 --- a/scenarios/evaluate/Supported_Evaluation_Metrics/Custom_Evaluators/Custom_Evaluators_Blocklisting/blocklist.py +++ b/scenarios/evaluate/Supported_Evaluation_Metrics/Custom_Evaluators/Custom_Evaluators_Blocklisting/blocklist.py @@ -4,7 +4,7 @@ class BlocklistEvaluator: - def __init__(self, blocklist) -> None: # noqa: ANN101, ANN001 + def __init__(self, blocklist) -> None: # noqa: ANN001 self._blocklist = blocklist def __call__(self: "BlocklistEvaluator", *, response: str): # noqa: ANN204 diff --git a/scenarios/evaluate/Supported_Evaluation_Metrics/NLP_Evaluators/NLP_Evaluators.ipynb b/scenarios/evaluate/Supported_Evaluation_Metrics/NLP_Evaluators/NLP_Evaluators.ipynb index 902b6e73..287b24ed 100644 --- a/scenarios/evaluate/Supported_Evaluation_Metrics/NLP_Evaluators/NLP_Evaluators.ipynb +++ b/scenarios/evaluate/Supported_Evaluation_Metrics/NLP_Evaluators/NLP_Evaluators.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "# Evaluate with quantitative NLP evaluators\n", "\n", @@ -23,16 +25,37 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Install the packages\n", "%pip install azure-ai-evaluation" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "azure_ai_project = {\n", + " \"subscription_id\": \"\",\n", + " \"resource_group_name\": \"\",\n", + " \"project_name\": \"\",\n", + "}" + ] + }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## NLP Evaluators" ] @@ -201,11 +224,7 @@ " \"rouge\": rouge,\n", " },\n", " # Optionally provide your AI Studio project information to track your evaluation results in your Azure AI Studio project\n", - " azure_ai_project={\n", - " \"subscription_id\": \"\",\n", - " \"resource_group_name\": \"\",\n", - " \"project_name\": \"\",\n", - " },\n", + " azure_ai_project=azure_ai_project,\n", ")" ] }, @@ -230,7 +249,7 @@ ], "metadata": { "kernelspec": { - "display_name": "env", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -247,5 +266,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_App_Endpoint/Evaluate_App_Endpoint.ipynb b/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_App_Endpoint/Evaluate_App_Endpoint.ipynb index 494ab159..2530bb52 100644 --- a/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_App_Endpoint/Evaluate_App_Endpoint.ipynb +++ b/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_App_Endpoint/Evaluate_App_Endpoint.ipynb @@ -59,7 +59,11 @@ "cell_type": "code", "execution_count": null, "id": "257fd898-7ef2-4d89-872e-da9e426aaf0b", - "metadata": {}, + "metadata": { + "tags": [ + "parameters" + ] + }, "outputs": [], "source": [ "import pandas as pd\n", @@ -67,7 +71,11 @@ "\n", "from pprint import pprint\n", "from azure.ai.evaluation import evaluate\n", - "from azure.ai.evaluation import RelevanceEvaluator" + "from azure.ai.evaluation import RelevanceEvaluator\n", + "\n", + "azure_openai_api_version = \"\"\n", + "azure_openai_deployment = \"\"\n", + "azure_openai_endpoint = \"\"" ] }, { @@ -89,14 +97,16 @@ "cell_type": "code", "execution_count": null, "id": "fbfc3a3b", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Use the following code to set the environment variables if not already set. If set, you can skip this step.\n", "\n", - "os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"\"\n", - "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = \"\"\n", - "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"\"" + "os.environ[\"AZURE_OPENAI_API_VERSION\"] = azure_openai_api_version\n", + "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = azure_openai_deployment\n", + "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = azure_openai_endpoint" ] }, { @@ -232,7 +242,7 @@ ], "metadata": { "kernelspec": { - "display_name": "venv-azureai-samples", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_Base_Model_Endpoint/Evaluate_Base_Model_Endpoint.ipynb b/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_Base_Model_Endpoint/Evaluate_Base_Model_Endpoint.ipynb index f60683da..cc58c79e 100644 --- a/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_Base_Model_Endpoint/Evaluate_Base_Model_Endpoint.ipynb +++ b/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_Base_Model_Endpoint/Evaluate_Base_Model_Endpoint.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "# Evaluate Base Model Endpoints using Azure AI Evaluation APIs\n", "\n", @@ -63,7 +65,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Target Application\n", "\n", @@ -295,7 +299,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -312,5 +316,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_Base_Model_Endpoint/conftest.py b/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_Base_Model_Endpoint/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_Base_Model_Endpoint/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_On_Cloud/Evaluate_On_Cloud.ipynb b/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_On_Cloud/Evaluate_On_Cloud.ipynb index 17df3fa3..d40272eb 100644 --- a/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_On_Cloud/Evaluate_On_Cloud.ipynb +++ b/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_On_Cloud/Evaluate_On_Cloud.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "# Cloud evaluation: Evaluating AI app data remotely in the cloud \n", "\n", @@ -50,7 +52,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from azure.ai.projects import AIProjectClient\n", @@ -66,7 +70,35 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [] + }, + "source": [ + "### Configuration\n", + "\n", + "Set the following variables for use in this notebook:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "azure_ai_connection_string = \"\" # At the moment, it should be in the format \".api.azureml.ms;;;\" Ex: eastus2.api.azureml.ms;xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxxxxx;rg-sample;sample-project-eastus2\n", + "azure_openai_deployment = \"\" # Your AOAI resource, you must use an AOAI GPT model\n", + "azure_openai_api_version = \"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, "source": [ "### Connect to your Azure Open AI deployment\n", "To evaluate your LLM-generated data remotely in the cloud, we must connect to your Azure Open AI deployment. This deployment must be a GPT model which supports `chat completion`, such as `gpt-4`. To see the proper value for `conn_str`, navigate to the connection string at the \"Project Overview\" page for your Azure AI project. " @@ -75,26 +107,30 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "project_client = AIProjectClient.from_connection_string(\n", " credential=DefaultAzureCredential(),\n", - " conn_str=\"\", # At the moment, it should be in the format \".api.azureml.ms;;;\" Ex: eastus2.api.azureml.ms;xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxxxxx;rg-sample;sample-project-eastus2\n", + " conn_str=azure_ai_connection_string,\n", ")" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Connect to your AOAI resource, you must use an AOAI GPT model\n", - "deployment_name = \"gpt-4\"\n", - "api_version = \"2024-06-01\"\n", "default_connection = project_client.connections.get_default(connection_type=ConnectionType.AZURE_OPEN_AI)\n", - "model_config = default_connection.to_evaluator_model_config(deployment_name=deployment_name, api_version=api_version)" + "model_config = default_connection.to_evaluator_model_config(\n", + " deployment_name=azure_openai_deployment, api_version=azure_openai_api_version\n", + ")" ] }, { @@ -202,7 +238,7 @@ ], "metadata": { "kernelspec": { - "display_name": "azureai-samples313", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_Online/conftest.py b/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_Online/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/evaluate/Supported_Evaluation_Targets/Evaluate_Online/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/evaluate/conftest.py b/scenarios/evaluate/conftest.py new file mode 100644 index 00000000..60d1e644 --- /dev/null +++ b/scenarios/evaluate/conftest.py @@ -0,0 +1,20 @@ +from typing import Any, Dict + +import pytest + + +@pytest.fixture() +def papermill_parameters( + azure_ai_project: Dict[str, str], + azure_ai_project_connection_string: str, + azure_openai_gpt4_deployment: str, + azure_openai_endpoint: str, + azure_openai_gpt4_api_version: str, +) -> Dict[str, Any]: + return { + "azure_ai_connection_string": azure_ai_project_connection_string, + "azure_ai_project": azure_ai_project, + "azure_openai_endpoint": azure_openai_endpoint, + "azure_openai_deployment": azure_openai_gpt4_deployment, + "azure_openai_api_version": azure_openai_gpt4_api_version, + } diff --git a/scenarios/fine-tuning/function calling/conftest.py b/scenarios/fine-tuning/function calling/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/fine-tuning/function calling/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/langchain/conftest.py b/scenarios/langchain/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/langchain/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/llama-index/conftest.py b/scenarios/llama-index/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/llama-index/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/projects/basic/conftest.py b/scenarios/projects/basic/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/projects/basic/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/rag/custom-rag-app/conftest.py b/scenarios/rag/custom-rag-app/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/rag/custom-rag-app/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/scenarios/resource-creation/basic/conftest.py b/scenarios/resource-creation/basic/conftest.py new file mode 100644 index 00000000..234bb4cc --- /dev/null +++ b/scenarios/resource-creation/basic/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(autouse=True) +def _skipAll() -> None: + """Skips all pytest tests in this directory.""" + pytest.skip(reason="Excluded from testing.") diff --git a/tox.ini b/tox.ini index a6ec9eae..6ca96ad5 100644 --- a/tox.ini +++ b/tox.ini @@ -23,3 +23,8 @@ commands = nb-clean {posargs} deps = typos ~= 1.16.0 commands = typos {posargs} + +[testenv:pytest] +deps = + -r dev-requirements.txt +commands = pytest {posargs}