Skip to content

Commit 05c69ea

Browse files
authored
Merge pull request #24 from ks6088ts-labs/feature/issue-7_image-analysis
add Azure AI Vision solution
2 parents e7f81b1 + eb3d23e commit 05c69ea

File tree

12 files changed

+229
-6
lines changed

12 files changed

+229
-6
lines changed

azure_ai_vision.env.sample

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
AZURE_AI_VISION_API_KEY="<your-api-key>"
2+
AZURE_AI_VISION_ENDPOINT="<your-endpoint>"

backend/fastapi.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from fastapi import FastAPI
22
from fastapi.openapi.utils import get_openapi
33

4+
from backend.routers import azure_ai_vision as azure_ai_vision_router
45
from backend.routers import azure_openai as azure_openai_router
56
from backend.routers import azure_storage as azure_storage_router
67
from backend.routers import document_intelligence as document_intelligence_router
@@ -12,6 +13,7 @@
1213
app.include_router(azure_openai_router.router)
1314
app.include_router(document_intelligence_router.router)
1415
app.include_router(azure_storage_router.router)
16+
app.include_router(azure_ai_vision_router.router)
1517

1618

1719
def custom_openapi():
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
from logging import getLogger
2+
3+
from azure.ai.vision.imageanalysis import ImageAnalysisClient
4+
from azure.ai.vision.imageanalysis.models import VisualFeatures
5+
from azure.core.credentials import AzureKeyCredential
6+
7+
from backend.settings.azure_ai_vision import Settings
8+
9+
logger = getLogger(__name__)
10+
11+
12+
class AzureAiVisionClient:
13+
def __init__(self, settings: Settings) -> None:
14+
self.settings = settings
15+
16+
def get_image_analysis_client(self) -> ImageAnalysisClient:
17+
return ImageAnalysisClient(
18+
endpoint=self.settings.azure_ai_vision_endpoint,
19+
credential=AzureKeyCredential(self.settings.azure_ai_vision_api_key),
20+
)
21+
22+
def analyze_image(
23+
self,
24+
image: bytes,
25+
) -> dict:
26+
image_analysis_client = self.get_image_analysis_client()
27+
result = image_analysis_client.analyze(
28+
image_data=image,
29+
visual_features=[
30+
VisualFeatures.CAPTION,
31+
VisualFeatures.READ,
32+
],
33+
)
34+
logger.info("Analyzed image")
35+
return result.as_dict()
36+
37+
def vectorize_image(
38+
self,
39+
image: bytes,
40+
) -> dict:
41+
# FIXME: replace with Azure SDK when available
42+
from urllib.parse import urljoin
43+
44+
import requests
45+
46+
url = urljoin(
47+
self.settings.azure_ai_vision_endpoint,
48+
"/computervision/retrieval:vectorizeImage",
49+
)
50+
params = {
51+
"overload": "stream",
52+
"api-version": "2023-02-01-preview",
53+
"modelVersion": "latest",
54+
}
55+
headers = {
56+
"Content-Type": "application/octet-stream",
57+
"Ocp-Apim-Subscription-Key": self.settings.azure_ai_vision_api_key,
58+
}
59+
response = requests.post(
60+
url=url,
61+
params=params,
62+
headers=headers,
63+
data=image,
64+
)
65+
response.raise_for_status()
66+
return response.json()

backend/routers/azure_ai_vision.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
from logging import getLogger
2+
3+
from fastapi import APIRouter, UploadFile
4+
5+
from backend.internals import azure_ai_vision
6+
from backend.schemas import azure_ai_vision as azure_ai_vision_schemas
7+
from backend.settings.azure_ai_vision import Settings
8+
9+
logger = getLogger(__name__)
10+
client = azure_ai_vision.AzureAiVisionClient(
11+
settings=Settings(),
12+
)
13+
14+
router = APIRouter(
15+
prefix="/azure_ai_vision",
16+
tags=["azure_ai_vision"],
17+
responses={404: {"description": "Not found"}},
18+
)
19+
20+
21+
@router.post(
22+
"/image/analyze/",
23+
response_model=azure_ai_vision_schemas.ImageAnalysisResponse,
24+
status_code=200,
25+
)
26+
async def analyze_image(file: UploadFile):
27+
try:
28+
content = await file.read()
29+
result = client.analyze_image(
30+
image=content,
31+
)
32+
except Exception as e:
33+
logger.error(f"Failed to analyze image: {e}")
34+
raise
35+
return azure_ai_vision_schemas.ImageAnalysisResponse(
36+
result=result,
37+
)
38+
39+
40+
@router.post(
41+
"/image/vectorize/",
42+
status_code=200,
43+
)
44+
async def vectorize_image(file: UploadFile):
45+
try:
46+
content = await file.read()
47+
result = client.vectorize_image(
48+
image=content,
49+
)
50+
except Exception as e:
51+
logger.error(f"Failed to vectorize image: {e}")
52+
raise
53+
return azure_ai_vision_schemas.VectorizeImageResponse(
54+
result=result,
55+
)

backend/schemas/azure_ai_vision.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from logging import getLogger
2+
3+
from pydantic import BaseModel
4+
5+
logger = getLogger(__name__)
6+
7+
8+
class ImageAnalysisResponse(BaseModel):
9+
result: dict
10+
11+
12+
class VectorizeImageResponse(BaseModel):
13+
result: dict
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from pydantic_settings import BaseSettings, SettingsConfigDict
2+
3+
4+
class Settings(BaseSettings):
5+
azure_ai_vision_endpoint: str = "https://<name>.cognitiveservices.azure.com/"
6+
azure_ai_vision_api_key: str = "<api-key>"
7+
8+
model_config = SettingsConfigDict(
9+
env_file="azure_ai_vision.env",
10+
env_file_encoding="utf-8",
11+
)

docs/README.md

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,15 @@
2424

2525
## Azure
2626

27-
### Azure OpenAI Service
28-
29-
- [Azure-Samples/openai](https://github.com/Azure-Samples/openai)
30-
3127
### Azure Functions
3228

3329
- [Quickstart: Create a Python function in Azure from the command line](https://learn.microsoft.com/en-us/azure/azure-functions/create-first-function-cli-python?tabs=linux%2Cbash%2Cazure-cli%2Cbrowser)
3430
- [Using FastAPI Framework with Azure Functions](https://learn.microsoft.com/en-us/samples/azure-samples/fastapi-on-azure-functions/fastapi-on-azure-functions/)
3531

32+
### Azure OpenAI Service
33+
34+
- [Azure-Samples/openai](https://github.com/Azure-Samples/openai)
35+
3636
### Azure AI Speech Service
3737

3838
- [How to recognize speech > Recognize speech from a microphone](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/how-to-recognize-speech?pivots=programming-language-python#recognize-speech-from-a-microphone)
@@ -42,6 +42,13 @@
4242
- [How to recognize speech > Use continuous recognition](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/how-to-recognize-speech?pivots=programming-language-python#use-continuous-recognition)
4343
- [Task 02 - Perform speech requests with Streamlit (40 minutes)](https://microsoft.github.io/TechExcel-Implementing-automation-practices-using-Azure-OpenAI/docs/04_implement_audio_transcription/0402.html)
4444

45+
### Azure AI Vision
46+
47+
- [Quickstart: Image Analysis 4.0](https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/quickstarts-sdk/image-analysis-client-library-40?tabs=visual-studio%2Cwindows&pivots=programming-language-python)
48+
- [Do image retrieval using multimodal embeddings (version 4.0)](https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/how-to/image-retrieval?tabs=python)
49+
- [Multimodal embeddings (version 4.0)](https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/concept-image-retrieval)
50+
- [Image Retrieval - Vectorize Image From Image Stream](https://learn.microsoft.com/en-us/rest/api/computervision/image-retrieval/vectorize-image-from-image-stream?view=rest-computervision-2023-02-01-preview&tabs=HTTP)
51+
4552
### Azure AI Document Intelligence
4653

4754
- [Azure AI Document Intelligence client library for Python](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/documentintelligence/azure-ai-documentintelligence/README.md)

frontend/entrypoint.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
22

3-
from frontend.solutions import azure_storage, document_intelligence, sandbox, transcription
3+
from frontend.solutions import azure_ai_vision, azure_storage, document_intelligence, sandbox, transcription
44
from frontend.solutions.types import SolutionType
55

66
logger = logging.getLogger(__name__)
@@ -31,3 +31,8 @@ def start(
3131
backend_url=backend_url,
3232
log_level=log_level,
3333
)
34+
if solution_type == SolutionType.AZURE_AI_VISION:
35+
return azure_ai_vision.start(
36+
backend_url=backend_url,
37+
log_level=log_level,
38+
)
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import asyncio
2+
import logging
3+
from io import BytesIO
4+
from urllib.parse import urljoin
5+
6+
import streamlit as st
7+
8+
from frontend.solutions.utilities import http_post_file
9+
10+
logger = logging.getLogger(__name__)
11+
12+
13+
def start(
14+
backend_url: str,
15+
log_level: int,
16+
):
17+
logger.setLevel(log_level)
18+
logger.debug(f"set log level to {log_level}")
19+
20+
st.header("Azure AI Vision")
21+
22+
file_uploader = st.file_uploader(
23+
label="Choose a file",
24+
key="file_uploader",
25+
)
26+
27+
analyze_button = st.button(
28+
label="Analyze",
29+
key="analyze_button",
30+
)
31+
32+
if file_uploader is not None:
33+
st.image(file_uploader, caption="Uploaded image")
34+
if analyze_button:
35+
with st.spinner("Analyzing..."):
36+
bytes_data = file_uploader.getvalue()
37+
response = asyncio.run(
38+
http_post_file(
39+
url=urljoin(base=backend_url, url="/azure_ai_vision/image/analyze/"),
40+
data_bytes_io=BytesIO(bytes_data),
41+
)
42+
)
43+
st.write(response)
44+
else:
45+
st.warning("Please upload a file first")

frontend/solutions/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ class SolutionType(Enum):
66
TRANSCRIPTION = "TRANSCRIPTION"
77
DOCUMENT_INTELLIGENCE = "DOCUMENT_INTELLIGENCE"
88
AZURE_STORAGE = "AZURE_STORAGE"
9+
AZURE_AI_VISION = "AZURE_AI_VISION"

0 commit comments

Comments
 (0)