update sections

tonyeiyalla · tonyeiyalla · commit 5e35bf260fa6 · 2025-04-23T11:58:19.000-04:00
diff --git a/articles/ai-services/content-understanding/tutorial/RAG-tutorial.md b/articles/ai-services/content-understanding/tutorial/RAG-tutorial.md
@@ -65,7 +65,120 @@ Analyzers are reusable components in Content Understanding that streamline the d
 
 The following code samples demonstrate how to create analyzers for each modality, specifying the structured data to be extracted, such as key fields, summaries, or classifications. These analyzers will serve as the foundation for extracting and enriching content in your RAG solution.
 
-**Starting off with the schema details for each modality:**
+#### Load all environment variables and necessary libraries from Langchain
+
+``` python
+
+import os
+from dotenv import load_dotenv
+load_dotenv()
+
+# Load and validate Azure AI Services configs
+AZURE_AI_SERVICE_ENDPOINT = os.getenv("AZURE_AI_SERVICE_ENDPOINT")
+AZURE_AI_SERVICE_API_VERSION = os.getenv("AZURE_AI_SERVICE_API_VERSION") or "2024-12-01-preview"
+AZURE_DOCUMENT_INTELLIGENCE_API_VERSION = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_API_VERSION") or "2024-11-30"
+
+# Load and validate Azure OpenAI configs
+AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
+AZURE_OPENAI_CHAT_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME")
+AZURE_OPENAI_CHAT_API_VERSION = os.getenv("AZURE_OPENAI_CHAT_API_VERSION") or "2024-08-01-preview"
+AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
+AZURE_OPENAI_EMBEDDING_API_VERSION = os.getenv("AZURE_OPENAI_EMBEDDING_API_VERSION") or "2023-05-15"
+
+# Load and validate Azure Search Services configs
+AZURE_SEARCH_ENDPOINT = os.getenv("AZURE_SEARCH_ENDPOINT")
+AZURE_SEARCH_INDEX_NAME = os.getenv("AZURE_SEARCH_INDEX_NAME") or "sample-doc-index"
+
+# Import libraries from Langchain 
+from langchain import hub
+from langchain_openai import AzureChatOpenAI
+from langchain_openai import AzureOpenAIEmbeddings
+from langchain.schema import StrOutputParser
+from langchain.schema.runnable import RunnablePassthrough
+from langchain.text_splitter import MarkdownHeaderTextSplitter
+from langchain.vectorstores.azuresearch import AzureSearch
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.schema import Document
+import requests
+import json
+import sys
+import uuid
+from pathlib import Path
+from dotenv import find_dotenv, load_dotenv
+from azure.identity import DefaultAzureCredential, get_bearer_token_provider
+
+# Add the parent directory to the path to use shared modules
+parent_dir = Path(Path.cwd()).parent
+sys.path.append(str(parent_dir))
+
+```
+---
+
+#### Create analyzers
+
+``` python
+from pathlib import Path
+from python.content_understanding_client import AzureContentUnderstandingClient
+credential = DefaultAzureCredential()
+token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")
+
+#set analyzer configs
+analyzer_configs = [
+    {
+        "id": "doc-analyzer" + str(uuid.uuid4()),
+        "template_path": "../analyzer_templates/content_document.json",
+        "location": Path("../data/sample_layout.pdf"),
+    },
+    {
+        "id": "image-analyzer" + str(uuid.uuid4()),
+        "template_path": "../analyzer_templates/image_chart_diagram_understanding.json",
+        "location": Path("../data/sample_report.pdf"),
+    },
+    {
+        "id": "audio-analyzer" + str(uuid.uuid4()),
+        "template_path": "../analyzer_templates/call_recording_analytics.json",
+        "location": Path("../data/callCenterRecording.mp3"),
+    },
+    {
+        "id": "video-analyzer" + str(uuid.uuid4()),
+        "template_path": "../analyzer_templates/video_content_understanding.json",
+        "location": Path("../data/FlightSimulator.mp4"),
+    },
+]
+
+# Create Content Understanding client
+content_understanding_client = AzureContentUnderstandingClient(
+    endpoint=AZURE_AI_SERVICE_ENDPOINT,
+    api_version=AZURE_AI_SERVICE_API_VERSION,
+    token_provider=token_provider,
+    x_ms_useragent="azure-ai-content-understanding-python/content_extraction", # This header is used for sample usage telemetry, please comment out this line if you want to opt out.
+)
+
+# Iterate through each config and create an analyzer
+for analyzer in analyzer_configs:
+    analyzer_id = analyzer["id"]
+    template_path = analyzer["template_path"]
+
+    try:
+        
+        # Create the analyzer using the content understanding client
+        response = content_understanding_client.begin_create_analyzer(
+            analyzer_id=analyzer_id,
+            analyzer_template_path=template_path
+        )
+        result = content_understanding_client.poll_result(response)
+        print(f"Successfully created analyzer: {analyzer_id}")
+        
+    except Exception as e:
+        print(f"Failed to create analyzer: {analyzer_id}")
+        print(f"Error: {e}")
+
+```
+---
+
+**Note:** Field extraction schemas are optional and not required for performing content extraction. To execute content extraction and create analyzers without defining field schemas, simply provide the analyzer ID and the file to be analyzed.
+
+**Here is a sample schema definition:**
 
 # [Document](#tab/document)
 
@@ -198,117 +311,6 @@ To create a custom analyzer, you need to define a field schema that describes th
 
 ---
 
-#### Load all environment variables and necessary libraries from Langchain
-
-``` python
-
-import os
-from dotenv import load_dotenv
-load_dotenv()
-
-# Load and validate Azure AI Services configs
-AZURE_AI_SERVICE_ENDPOINT = os.getenv("AZURE_AI_SERVICE_ENDPOINT")
-AZURE_AI_SERVICE_API_VERSION = os.getenv("AZURE_AI_SERVICE_API_VERSION") or "2024-12-01-preview"
-AZURE_DOCUMENT_INTELLIGENCE_API_VERSION = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_API_VERSION") or "2024-11-30"
-
-# Load and validate Azure OpenAI configs
-AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME")
-AZURE_OPENAI_CHAT_API_VERSION = os.getenv("AZURE_OPENAI_CHAT_API_VERSION") or "2024-08-01-preview"
-AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
-AZURE_OPENAI_EMBEDDING_API_VERSION = os.getenv("AZURE_OPENAI_EMBEDDING_API_VERSION") or "2023-05-15"
-
-# Load and validate Azure Search Services configs
-AZURE_SEARCH_ENDPOINT = os.getenv("AZURE_SEARCH_ENDPOINT")
-AZURE_SEARCH_INDEX_NAME = os.getenv("AZURE_SEARCH_INDEX_NAME") or "sample-doc-index"
-
-# Import libraries from Langchain 
-from langchain import hub
-from langchain_openai import AzureChatOpenAI
-from langchain_openai import AzureOpenAIEmbeddings
-from langchain.schema import StrOutputParser
-from langchain.schema.runnable import RunnablePassthrough
-from langchain.text_splitter import MarkdownHeaderTextSplitter
-from langchain.vectorstores.azuresearch import AzureSearch
-from langchain_core.prompts import ChatPromptTemplate
-from langchain.schema import Document
-import requests
-import json
-import sys
-import uuid
-from pathlib import Path
-from dotenv import find_dotenv, load_dotenv
-from azure.identity import DefaultAzureCredential, get_bearer_token_provider
-
-# Add the parent directory to the path to use shared modules
-parent_dir = Path(Path.cwd()).parent
-sys.path.append(str(parent_dir))
-
-```
----
-
-#### Create analyzers
-
-``` python
-from pathlib import Path
-from python.content_understanding_client import AzureContentUnderstandingClient
-credential = DefaultAzureCredential()
-token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")
-
-#set analyzer configs
-analyzer_configs = [
-    {
-        "id": "doc-analyzer" + str(uuid.uuid4()),
-        "template_path": "../analyzer_templates/content_document.json",
-        "location": Path("../data/sample_layout.pdf"),
-    },
-    {
-        "id": "image-analyzer" + str(uuid.uuid4()),
-        "template_path": "../analyzer_templates/image_chart_diagram_understanding.json",
-        "location": Path("../data/sample_report.pdf"),
-    },
-    {
-        "id": "audio-analyzer" + str(uuid.uuid4()),
-        "template_path": "../analyzer_templates/call_recording_analytics.json",
-        "location": Path("../data/callCenterRecording.mp3"),
-    },
-    {
-        "id": "video-analyzer" + str(uuid.uuid4()),
-        "template_path": "../analyzer_templates/video_content_understanding.json",
-        "location": Path("../data/FlightSimulator.mp4"),
-    },
-]
-
-# Create Content Understanding client
-content_understanding_client = AzureContentUnderstandingClient(
-    endpoint=AZURE_AI_SERVICE_ENDPOINT,
-    api_version=AZURE_AI_SERVICE_API_VERSION,
-    token_provider=token_provider,
-    x_ms_useragent="azure-ai-content-understanding-python/content_extraction", # This header is used for sample usage telemetry, please comment out this line if you want to opt out.
-)
-
-# Iterate through each config and create an analyzer
-for analyzer in analyzer_configs:
-    analyzer_id = analyzer["id"]
-    template_path = analyzer["template_path"]
-
-    try:
-        
-        # Create the analyzer using the content understanding client
-        response = content_understanding_client.begin_create_analyzer(
-            analyzer_id=analyzer_id,
-            analyzer_template_path=template_path
-        )
-        result = content_understanding_client.poll_result(response)
-        print(f"Successfully created analyzer: {analyzer_id}")
-        
-    except Exception as e:
-        print(f"Failed to create analyzer: {analyzer_id}")
-        print(f"Error: {e}")
-
-```
----
-
 ## Perform Content and Field Analysis
 **Content extraction** is the first step in the RAG implementation process. It transforms raw multimodal data—such as documents, images, audio, and video—into structured, searchable formats. This foundational step ensures that the content is organized and ready for indexing and retrieval. Content extraction provides the baseline for indexing and retrieval but may not fully address domain-specific needs or provide deeper contextual insights. 
 [Learn more]() about content extraction capabilities for each modality.