Skip to content

Commit 3b1604e

Browse files
authored
Merge pull request #680 from microsoft/macae-BS-LegalContract
Macae legal contract use case
2 parents 60fff4a + 6074d9c commit 3b1604e

File tree

7 files changed

+374
-212
lines changed

7 files changed

+374
-212
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
{
2+
"id": "1",
3+
"team_id": "team-legal-1",
4+
"name": "Legal Contract Review Team",
5+
"status": "visible",
6+
"created": "",
7+
"created_by": "",
8+
"deployment_name": "gpt-4.1-mini",
9+
"description": "A multi-agent legal review team that summarizes NDAs, identifies risks, checks compliance, and recommends improvements using advanced legal reasoning and retrieval-augmented analysis.",
10+
"logo": "",
11+
"plan": "",
12+
"agents": [
13+
{
14+
"input_key": "",
15+
"type": "summary",
16+
"name": "SummaryAgent",
17+
"deployment_name": "gpt-4.1-mini",
18+
"icon": "",
19+
"system_message": "You are the Summary Agent for legal contract analysis. Your task is to produce a clear, accurate, and structured executive summary of NDA and legal agreement documents. You must deliver summaries organized into labeled sections including: Overview, Parties, Effective Date, Purpose, Definition of Confidential Information, Receiving Party Obligations, Term & Termination, Governing Law, Restrictions & Limitations, Miscellaneous Clauses, Notable or Unusual Terms, and Key Items for Risk & Compliance Agents. Highlight missing elements such as liability caps, dispute resolution mechanisms, data handling obligations, or ambiguous language. Maintain a precise, neutral legal tone. Do not give legal opinions or risk assessments—only summarize the content as written. Use retrieval results from the search index to ensure completeness and reference contextual definitions or standard clause expectations when needed.",
20+
"description": "Produces comprehensive, structured summaries of NDAs and legal contracts, capturing all key terms, clauses, obligations, jurisdictions, and notable provisions.",
21+
"use_rag": true,
22+
"use_mcp": false,
23+
"use_bing": false,
24+
"use_reasoning": false,
25+
"index_name": "legal-doc-index",
26+
"coding_tools": false
27+
},
28+
{
29+
"input_key": "",
30+
"type": "risk",
31+
"name": "RiskAgent",
32+
"deployment_name": "gpt-4.1-mini",
33+
"icon": "",
34+
"system_message": "You are the Risk Agent for NDA and legal contract analysis. Use the NDA Risk Assessment Reference document and retrieved context to identify High, Medium, and Low risk issues. Evaluate clauses for missing liability caps, ambiguous terms, overly broad confidentiality definitions, jurisdiction misalignment, missing termination rights, unclear data handling obligations, missing dispute resolution, and any incomplete or poorly scoped definitions. For every risk you identify, provide: (1) Risk Category (High/Medium/Low), (2) Clause or Section impacted, (3) Description of the issue, (4) Why it matters or what exposure it creates, and (5) Suggested edit or corrective language. Apply the risk scoring framework: High = escalate immediately; Medium = requires revision; Low = minor issue. Be precise, legally aligned, and practical. Reference retrieved examples or standards when appropriate. Your output must be structured and actionable.",
35+
"description": "Identifies and classifies legal risks in NDAs and contracts using the organization's risk framework, and provides suggested edits to reduce exposure.",
36+
"use_rag": true,
37+
"use_mcp": false,
38+
"use_bing": false,
39+
"use_reasoning": false,
40+
"index_name": "legal-doc-index",
41+
"coding_tools": false
42+
},
43+
{
44+
"input_key": "",
45+
"type": "compliance",
46+
"name": "ComplianceAgent",
47+
"deployment_name": "gpt-4.1-mini",
48+
"icon": "",
49+
"system_message": "You are the Compliance Agent responsible for validating NDAs and legal agreements against mandatory legal and policy requirements. Use the NDA Compliance Reference Document and retrieval results to evaluate whether the contract includes all required clauses: Confidentiality, Term & Termination, Governing Law aligned to approved jurisdictions, Non-Assignment, and Entire Agreement. Identify compliance gaps including ambiguous language, missing liability protections, improper jurisdiction, excessive term length, insufficient data protection obligations, missing dispute resolution mechanisms, or export control risks. For each issue provide: (1) Compliance Area (e.g., Term Length, Jurisdiction, Confidentiality), (2) Status (Pass/Fail), (3) Issue Description, (4) Whether it is Mandatory or Recommended, (5) Corrective Recommendation or Suggested Language. Deliver a final Compliance Status summary. Maintain professional, objective, legally accurate tone.",
50+
"description": "Performs compliance validation of NDAs and contracts against legal policy requirements, identifies gaps, and provides corrective recommendations and compliance status.",
51+
"use_rag": true,
52+
"use_mcp": false,
53+
"use_bing": false,
54+
"use_reasoning": false,
55+
"index_name": "legal-doc-index",
56+
"coding_tools": false
57+
}
58+
],
59+
"protected": false,
60+
"starting_tasks": [
61+
{
62+
"id": "task-1",
63+
"name": "NDA Contract Review",
64+
"prompt": "Review Contoso's NDA. Provide a summary (parties, date, term, governing law), assess risks (High/Medium/Low with clause references), audit compliance against company policy, and suggest edits for any issues.",
65+
"created": "",
66+
"creator": "",
67+
"logo": ""
68+
}
69+
]
70+
}
16.5 KB
Binary file not shown.
16.9 KB
Binary file not shown.
16.7 KB
Binary file not shown.

infra/scripts/index_datasets.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,39 @@ def extract_pdf_text(pdf_bytes):
4848
return "PDF_ERROR: PyPDF2 library not available. Install with: pip install PyPDF2"
4949
except Exception as e:
5050
return f"PDF_ERROR: Error reading PDF content: {str(e)}"
51+
52+
53+
# DOCX text extraction function
54+
def extract_docx_text(docx_bytes):
55+
"""Extract text content from DOCX bytes using python-docx"""
56+
try:
57+
from docx import Document
58+
import io
59+
60+
docx_file = io.BytesIO(docx_bytes)
61+
doc = Document(docx_file)
62+
63+
text_content = []
64+
65+
# Extract text from paragraphs
66+
for paragraph in doc.paragraphs:
67+
if paragraph.text.strip():
68+
text_content.append(paragraph.text)
69+
70+
# Extract text from tables
71+
for table in doc.tables:
72+
for row in table.rows:
73+
for cell in row.cells:
74+
if cell.text.strip():
75+
text_content.append(cell.text)
76+
77+
full_text = "\n".join(text_content).strip()
78+
return full_text if full_text else "DOCX_NO_TEXT: No readable text content found in DOCX."
79+
80+
except ImportError:
81+
return "DOCX_ERROR: python-docx library not available. Install with: pip install python-docx"
82+
except Exception as e:
83+
return f"DOCX_ERROR: Error reading DOCX content: {str(e)}"
5184

5285
if len(sys.argv) < 4:
5386
print("Usage: python index_datasets.py <storage_account_name> <blob_container_name> <ai_search_endpoint> [<ai_search_index_name>]")
@@ -96,6 +129,8 @@ def extract_pdf_text(pdf_bytes):
96129
title = blob.name.replace(".csv", "")
97130
title = title.replace(".json", "")
98131
title = title.replace(".pdf", "")
132+
title = title.replace(".docx", "")
133+
title = title.replace(".pptx", "")
99134
data = container_client.download_blob(blob.name).readall()
100135

101136
try:
@@ -104,6 +139,8 @@ def extract_pdf_text(pdf_bytes):
104139
# Check if this is a PDF file and process accordingly
105140
if blob.name.lower().endswith('.pdf'):
106141
text = extract_pdf_text(data)
142+
elif blob.name.lower().endswith('.docx'):
143+
text = extract_docx_text(data)
107144
else:
108145
# Original processing for non-PDF files
109146
text = data.decode('utf-8')

src/frontend/package-lock.json

Lines changed: 24 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)