Skip to content

Commit bec59be

Browse files
Apply acls to citations (#1160)
* working * object urls * fixes and feedback * fix typing errors * adding more tests * Update app/backend/app.py * Update app/backend/app.py Co-authored-by: Pamela Fox <[email protected]> * addressing feedback * more feedback --------- Co-authored-by: Matt Gotteiner <[email protected]> Co-authored-by: Pamela Fox <[email protected]>
1 parent ce5b9b0 commit bec59be

File tree

8 files changed

+252
-44
lines changed

8 files changed

+252
-44
lines changed

app/backend/app.py

Lines changed: 29 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import mimetypes
66
import os
77
from pathlib import Path
8-
from typing import AsyncGenerator, cast
8+
from typing import Any, AsyncGenerator, Dict, cast
99

1010
from azure.core.exceptions import ResourceNotFoundError
1111
from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider
@@ -14,7 +14,7 @@
1414
from azure.search.documents.aio import SearchClient
1515
from azure.search.documents.indexes.aio import SearchIndexClient
1616
from azure.storage.blob.aio import BlobServiceClient
17-
from openai import APIError, AsyncAzureOpenAI, AsyncOpenAI
17+
from openai import AsyncAzureOpenAI, AsyncOpenAI
1818
from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor
1919
from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware
2020
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
@@ -36,24 +36,20 @@
3636
from approaches.chatreadretrievereadvision import ChatReadRetrieveReadVisionApproach
3737
from approaches.retrievethenread import RetrieveThenReadApproach
3838
from approaches.retrievethenreadvision import RetrieveThenReadVisionApproach
39+
from config import (
40+
CONFIG_ASK_APPROACH,
41+
CONFIG_ASK_VISION_APPROACH,
42+
CONFIG_AUTH_CLIENT,
43+
CONFIG_BLOB_CONTAINER_CLIENT,
44+
CONFIG_CHAT_APPROACH,
45+
CONFIG_CHAT_VISION_APPROACH,
46+
CONFIG_GPT4V_DEPLOYED,
47+
CONFIG_OPENAI_CLIENT,
48+
CONFIG_SEARCH_CLIENT,
49+
)
3950
from core.authentication import AuthenticationHelper
40-
41-
CONFIG_OPENAI_TOKEN = "openai_token"
42-
CONFIG_CREDENTIAL = "azure_credential"
43-
CONFIG_ASK_APPROACH = "ask_approach"
44-
CONFIG_ASK_VISION_APPROACH = "ask_vision_approach"
45-
CONFIG_CHAT_VISION_APPROACH = "chat_vision_approach"
46-
CONFIG_CHAT_APPROACH = "chat_approach"
47-
CONFIG_BLOB_CONTAINER_CLIENT = "blob_container_client"
48-
CONFIG_AUTH_CLIENT = "auth_client"
49-
CONFIG_GPT4V_DEPLOYED = "gpt4v_deployed"
50-
CONFIG_SEARCH_CLIENT = "search_client"
51-
CONFIG_OPENAI_CLIENT = "openai_client"
52-
ERROR_MESSAGE = """The app encountered an error processing your request.
53-
If you are an administrator of the app, view the full error in the logs. See aka.ms/appservice-logs for more information.
54-
Error type: {error_type}
55-
"""
56-
ERROR_MESSAGE_FILTER = """Your message contains content that was flagged by the OpenAI content filter."""
51+
from decorators import authenticated, authenticated_path
52+
from error import error_dict, error_response
5753

5854
bp = Blueprint("routes", __name__, static_folder="static")
5955
# Fix Windows registry issue with mimetypes
@@ -83,11 +79,16 @@ async def assets(path):
8379
return await send_from_directory(Path(__file__).resolve().parent / "static" / "assets", path)
8480

8581

86-
# Serve content files from blob storage from within the app to keep the example self-contained.
87-
# *** NOTE *** this assumes that the content files are public, or at least that all users of the app
88-
# can access all the files. This is also slow and memory hungry.
8982
@bp.route("/content/<path>")
83+
@authenticated_path
9084
async def content_file(path: str):
85+
"""
86+
Serve content files from blob storage from within the app to keep the example self-contained.
87+
*** NOTE *** if you are using app services authentication, this route will return unauthorized to all users that are not logged in
88+
if AZURE_ENFORCE_ACCESS_CONTROL is not set or false, logged in users can access all files regardless of access control
89+
if AZURE_ENFORCE_ACCESS_CONTROL is set to true, logged in users can only access files they have access to
90+
This is also slow and memory hungry.
91+
"""
9192
# Remove page number from path, filename-1.txt -> filename.txt
9293
if path.find("#page=") > 0:
9394
path_parts = path.rsplit("#page=", 1)
@@ -110,28 +111,15 @@ async def content_file(path: str):
110111
return await send_file(blob_file, mimetype=mime_type, as_attachment=False, attachment_filename=path)
111112

112113

113-
def error_dict(error: Exception) -> dict:
114-
if isinstance(error, APIError) and error.code == "content_filter":
115-
return {"error": ERROR_MESSAGE_FILTER}
116-
return {"error": ERROR_MESSAGE.format(error_type=type(error))}
117-
118-
119-
def error_response(error: Exception, route: str, status_code: int = 500):
120-
logging.exception("Exception in %s: %s", route, error)
121-
if isinstance(error, APIError) and error.code == "content_filter":
122-
status_code = 400
123-
return jsonify(error_dict(error)), status_code
124-
125-
126114
@bp.route("/ask", methods=["POST"])
127-
async def ask():
115+
@authenticated
116+
async def ask(auth_claims: Dict[str, Any]):
128117
if not request.is_json:
129118
return jsonify({"error": "request must be json"}), 415
130119
request_json = await request.get_json()
131120
context = request_json.get("context", {})
132-
auth_helper = current_app.config[CONFIG_AUTH_CLIENT]
121+
context["auth_claims"] = auth_claims
133122
try:
134-
context["auth_claims"] = await auth_helper.get_auth_claims_if_enabled(request.headers)
135123
use_gpt4v = context.get("overrides", {}).get("use_gpt4v", False)
136124
approach: Approach
137125
if use_gpt4v and CONFIG_ASK_VISION_APPROACH in current_app.config:
@@ -163,14 +151,14 @@ async def format_as_ndjson(r: AsyncGenerator[dict, None]) -> AsyncGenerator[str,
163151

164152

165153
@bp.route("/chat", methods=["POST"])
166-
async def chat():
154+
@authenticated
155+
async def chat(auth_claims: Dict[str, Any]):
167156
if not request.is_json:
168157
return jsonify({"error": "request must be json"}), 415
169158
request_json = await request.get_json()
170159
context = request_json.get("context", {})
171-
auth_helper = current_app.config[CONFIG_AUTH_CLIENT]
160+
context["auth_claims"] = auth_claims
172161
try:
173-
context["auth_claims"] = await auth_helper.get_auth_claims_if_enabled(request.headers)
174162
use_gpt4v = context.get("overrides", {}).get("use_gpt4v", False)
175163
approach: Approach
176164
if use_gpt4v and CONFIG_CHAT_VISION_APPROACH in current_app.config:

app/backend/config.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
CONFIG_OPENAI_TOKEN = "openai_token"
2+
CONFIG_CREDENTIAL = "azure_credential"
3+
CONFIG_ASK_APPROACH = "ask_approach"
4+
CONFIG_ASK_VISION_APPROACH = "ask_vision_approach"
5+
CONFIG_CHAT_VISION_APPROACH = "chat_vision_approach"
6+
CONFIG_CHAT_APPROACH = "chat_approach"
7+
CONFIG_BLOB_CONTAINER_CLIENT = "blob_container_client"
8+
CONFIG_AUTH_CLIENT = "auth_client"
9+
CONFIG_GPT4V_DEPLOYED = "gpt4v_deployed"
10+
CONFIG_SEARCH_CLIENT = "search_client"
11+
CONFIG_OPENAI_CLIENT = "openai_client"

app/backend/core/authentication.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import Any, Optional
66

77
import aiohttp
8+
from azure.search.documents.aio import SearchClient
89
from azure.search.documents.indexes.models import SearchIndex
910
from msal import ConfidentialClientApplication
1011
from msal.token_cache import TokenCache
@@ -216,3 +217,23 @@ async def get_auth_claims_if_enabled(self, headers: dict) -> dict[str, Any]:
216217
if self.require_access_control:
217218
raise
218219
return {}
220+
221+
async def check_path_auth(self, path: str, auth_claims: dict[str, Any], search_client: SearchClient) -> bool:
222+
# Start with the standard security filter for all queries
223+
security_filter = self.build_security_filters(overrides={}, auth_claims=auth_claims)
224+
# If there was no security filter, then the path is allowed
225+
if not security_filter:
226+
return True
227+
228+
# Filter down to only chunks that are from the specific source file
229+
filter = f"{security_filter} and (sourcepage eq '{path}')"
230+
231+
# If the filter returns any results, the user is allowed to access the document
232+
# Otherwise, access is denied
233+
results = await search_client.search(search_text="*", top=1, filter=filter)
234+
allowed = False
235+
async for _ in results:
236+
allowed = True
237+
break
238+
239+
return allowed

app/backend/decorators.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import logging
2+
from functools import wraps
3+
from typing import Any, Callable, Dict
4+
5+
from quart import abort, current_app, request
6+
7+
from config import CONFIG_AUTH_CLIENT, CONFIG_SEARCH_CLIENT
8+
from core.authentication import AuthError
9+
from error import error_response
10+
11+
12+
def authenticated_path(route_fn: Callable[[str], Any]):
13+
"""
14+
Decorator for routes that request a specific file that might require access control enforcement
15+
"""
16+
17+
@wraps(route_fn)
18+
async def auth_handler(path=""):
19+
# If authentication is enabled, validate the user can access the file
20+
auth_helper = current_app.config[CONFIG_AUTH_CLIENT]
21+
search_client = current_app.config[CONFIG_SEARCH_CLIENT]
22+
authorized = False
23+
try:
24+
auth_claims = await auth_helper.get_auth_claims_if_enabled(request.headers)
25+
authorized = await auth_helper.check_path_auth(path, auth_claims, search_client)
26+
except AuthError:
27+
abort(403)
28+
except Exception as error:
29+
logging.exception("Problem checking path auth %s", error)
30+
return error_response(error, route="/content")
31+
32+
if not authorized:
33+
abort(403)
34+
35+
return await route_fn(path)
36+
37+
return auth_handler
38+
39+
40+
def authenticated(route_fn: Callable[[Dict[str, Any]], Any]):
41+
"""
42+
Decorator for routes that might require access control. Unpacks Authorization header information into an auth_claims dictionary
43+
"""
44+
45+
@wraps(route_fn)
46+
async def auth_handler():
47+
auth_helper = current_app.config[CONFIG_AUTH_CLIENT]
48+
try:
49+
auth_claims = await auth_helper.get_auth_claims_if_enabled(request.headers)
50+
except AuthError:
51+
abort(403)
52+
53+
return await route_fn(auth_claims)
54+
55+
return auth_handler

app/backend/error.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import logging
2+
3+
from openai import APIError
4+
from quart import jsonify
5+
6+
ERROR_MESSAGE = """The app encountered an error processing your request.
7+
If you are an administrator of the app, view the full error in the logs. See aka.ms/appservice-logs for more information.
8+
Error type: {error_type}
9+
"""
10+
ERROR_MESSAGE_FILTER = """Your message contains content that was flagged by the OpenAI content filter."""
11+
12+
13+
def error_dict(error: Exception) -> dict:
14+
if isinstance(error, APIError) and error.code == "content_filter":
15+
return {"error": ERROR_MESSAGE_FILTER}
16+
return {"error": ERROR_MESSAGE.format(error_type=type(error))}
17+
18+
19+
def error_response(error: Exception, route: str, status_code: int = 500):
20+
logging.exception("Exception in %s: %s", route, error)
21+
if isinstance(error, APIError) and error.code == "content_filter":
22+
status_code = 400
23+
return jsonify(error_dict(error)), status_code

app/frontend/src/api/api.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ const BACKEND_URI = "";
33
import { ChatAppResponse, ChatAppResponseOrError, ChatAppRequest, Config } from "./models";
44
import { useLogin, appServicesToken } from "../authConfig";
55

6-
function getHeaders(idToken: string | undefined): Record<string, string> {
6+
export function getHeaders(idToken: string | undefined): Record<string, string> {
77
var headers: Record<string, string> = {
88
"Content-Type": "application/json"
99
};

app/frontend/src/components/AnalysisPanel/AnalysisPanel.tsx

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ import { SupportingContent } from "../SupportingContent";
77
import { ChatAppResponse } from "../../api";
88
import { AnalysisPanelTabs } from "./AnalysisPanelTabs";
99
import { ThoughtProcess } from "./ThoughtProcess";
10+
import { useMsal } from "@azure/msal-react";
11+
import { getHeaders } from "../../api";
12+
import { useLogin, getToken } from "../../authConfig";
13+
import { useState, useEffect } from "react";
1014

1115
interface Props {
1216
className: string;
@@ -23,6 +27,25 @@ export const AnalysisPanel = ({ answer, activeTab, activeCitation, citationHeigh
2327
const isDisabledThoughtProcessTab: boolean = !answer.choices[0].context.thoughts;
2428
const isDisabledSupportingContentTab: boolean = !answer.choices[0].context.data_points;
2529
const isDisabledCitationTab: boolean = !activeCitation;
30+
const [citation, setCitation] = useState("");
31+
32+
const client = useLogin ? useMsal().instance : undefined;
33+
34+
const fetchCitation = async () => {
35+
const token = client ? await getToken(client) : undefined;
36+
if (activeCitation) {
37+
const response = await fetch(activeCitation, {
38+
method: "GET",
39+
headers: getHeaders(token)
40+
});
41+
const citationContent = await response.blob();
42+
const citationObjectUrl = URL.createObjectURL(citationContent);
43+
setCitation(citationObjectUrl);
44+
}
45+
};
46+
useEffect(() => {
47+
fetchCitation();
48+
}, []);
2649

2750
return (
2851
<Pivot
@@ -50,9 +73,9 @@ export const AnalysisPanel = ({ answer, activeTab, activeCitation, citationHeigh
5073
headerButtonProps={isDisabledCitationTab ? pivotItemDisabledStyle : undefined}
5174
>
5275
{activeCitation?.endsWith(".png") ? (
53-
<img src={activeCitation} className={styles.citationImg} />
76+
<img src={citation} className={styles.citationImg} />
5477
) : (
55-
<iframe title="Citation" src={activeCitation} width="100%" height={citationHeight} />
78+
<iframe title="Citation" src={citation} width="100%" height={citationHeight} />
5679
)}
5780
</PivotItem>
5881
</Pivot>

0 commit comments

Comments
 (0)