Skip to content

Commit f0b57c4

Browse files
fix: Added Method to download the blob file via API
1 parent 70a0801 commit f0b57c4

File tree

2 files changed

+117
-3
lines changed

2 files changed

+117
-3
lines changed

code/create_app.py

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,15 @@
1414
from openai.types.chat import ChatCompletionChunk
1515
from flask import Flask, Response, request, Request, jsonify
1616
from dotenv import load_dotenv
17-
from urllib.parse import quote
17+
from urllib.parse import quote, urlparse
1818
from backend.batch.utilities.helpers.env_helper import EnvHelper
1919
from backend.batch.utilities.helpers.azure_search_helper import AzureSearchHelper
2020
from backend.batch.utilities.helpers.orchestrator_helper import Orchestrator
2121
from backend.batch.utilities.helpers.config.config_helper import ConfigHelper
2222
from backend.batch.utilities.helpers.config.conversation_flow import ConversationFlow
2323
from backend.api.chat_history import bp_chat_history_response
2424
from azure.mgmt.cognitiveservices import CognitiveServicesManagementClient
25+
from azure.core.exceptions import ClientAuthenticationError, ResourceNotFoundError, ServiceRequestError
2526
from backend.batch.utilities.helpers.azure_credential_utils import get_azure_credential
2627
from backend.batch.utilities.helpers.azure_blob_storage_client import (
2728
AzureBlobStorageClient,
@@ -419,6 +420,96 @@ def static_file(path):
419420
def health():
420421
return "OK"
421422

423+
@app.route("/api/files/<filename>", methods=["GET"])
424+
def get_file(filename):
425+
"""
426+
Download a file from the 'docs' container in Azure Blob Storage using Managed Identity.
427+
428+
Args:
429+
filename (str): Name of the file to retrieve from storage
430+
431+
Returns:
432+
Flask Response: The file content with appropriate headers, or error response
433+
"""
434+
logger.info("File download request for: %s", filename)
435+
436+
try:
437+
# Enhanced input validation - prevent path traversal and unauthorized access
438+
if not filename:
439+
logger.warning("Empty filename provided")
440+
return jsonify({"error": "Filename is required"}), 400
441+
442+
# Prevent path traversal attacks
443+
if '..' in filename or '/' in filename or '\\' in filename:
444+
logger.warning("Invalid filename with path traversal attempt: %s", filename)
445+
return jsonify({"error": "Invalid filename"}), 400
446+
447+
# Validate filename length and characters
448+
if len(filename) > 255:
449+
logger.warning("Filename too long: %s", filename)
450+
return jsonify({"error": "Filename too long"}), 400
451+
452+
# Only allow safe characters (alphanumeric, dots, dashes, underscores, spaces)
453+
if not re.match(r'^[a-zA-Z0-9._\-\s]+$', filename):
454+
logger.warning("Filename contains invalid characters: %s", filename)
455+
return jsonify({"error": "Invalid filename characters"}), 400
456+
457+
# Initialize blob storage client with 'documents' container
458+
blob_client = AzureBlobStorageClient(container_name="documents")
459+
460+
# Check if file exists
461+
if not blob_client.file_exists(filename):
462+
logger.info("File not found: %s", filename)
463+
return jsonify({"error": "File not found"}), 404
464+
465+
# Download the file
466+
file_data = blob_client.download_file(filename)
467+
468+
# Determine content type based on file extension
469+
content_type, _ = mimetypes.guess_type(filename)
470+
if not content_type:
471+
content_type = 'application/octet-stream'
472+
473+
file_size = len(file_data)
474+
logger.info("File downloaded successfully: %s, size: %d bytes", filename, file_size)
475+
476+
# For large files (>10MB), consider implementing streaming
477+
if file_size > 10 * 1024 * 1024: # 10MB threshold
478+
logger.info("Large file detected: %s, size: %d bytes", filename, file_size)
479+
480+
# Create response with comprehensive headers
481+
response = Response(
482+
file_data,
483+
status=200,
484+
mimetype=content_type,
485+
headers={
486+
'Content-Disposition': f'inline; filename="{filename}"',
487+
'Content-Length': str(file_size),
488+
'Cache-Control': 'public, max-age=3600',
489+
'X-Content-Type-Options': 'nosniff',
490+
'X-Frame-Options': 'DENY',
491+
'Content-Security-Policy': "default-src 'none'"
492+
}
493+
)
494+
495+
return response
496+
497+
except (ClientAuthenticationError, ResourceNotFoundError, ServiceRequestError) as e:
498+
# Handle specific Azure errors
499+
if isinstance(e, ClientAuthenticationError):
500+
logger.error("Authentication failed for file %s: %s", filename, str(e))
501+
return jsonify({"error": "Authentication failed"}), 401
502+
elif isinstance(e, ResourceNotFoundError):
503+
logger.info("File not found: %s", filename)
504+
return jsonify({"error": "File not found"}), 404
505+
elif isinstance(e, ServiceRequestError):
506+
logger.error("Storage service error for file %s: %s", filename, str(e))
507+
return jsonify({"error": "Storage service unavailable"}), 503
508+
except Exception as e:
509+
error_message = str(e)
510+
logger.exception("Unexpected error downloading file %s: %s", filename, error_message)
511+
return jsonify({"error": "Internal server error"}), 500
512+
422513
def conversation_azure_byod():
423514
logger.info("Method conversation_azure_byod started")
424515
try:

code/frontend/src/components/CitationPanel/CitationPanel.tsx

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,23 @@ type CitationPanelProps = {
1111
setIsCitationPanelOpen: (flag: boolean) => void;
1212
};
1313

14+
function rewriteCitationUrl(markdownText: string) {
15+
return markdownText.replace(
16+
/\[([^\]]+)\]\(([^)]+)\)/,
17+
(match, title, url) => {
18+
try {
19+
const parsed = new URL(url);
20+
21+
// Take only the last segment of the path
22+
const filename = parsed.pathname.split('/').pop();
23+
return `[${title}](/api/files/${filename})`;
24+
} catch {
25+
return match; // fallback if URL parsing fails
26+
}
27+
}
28+
);
29+
}
30+
1431
export const CitationPanel: React.FC<CitationPanelProps> = (props) => {
1532
const { activeCitation, setIsCitationPanelOpen } = props;
1633
return (
@@ -46,11 +63,17 @@ export const CitationPanel: React.FC<CitationPanelProps> = (props) => {
4663
Please follow the link to review the original document.
4764
</div>
4865
<ReactMarkdown
49-
className={`${styles.citationPanelContent} ${styles.mobileCitationPanelContent}`}
50-
children={activeCitation[0]}
66+
className="citation-panel"
67+
children={rewriteCitationUrl(activeCitation[0])}
5168
remarkPlugins={[remarkGfm]}
5269
rehypePlugins={[rehypeRaw]}
70+
components={{
71+
a: ({ node, ...props }) => (
72+
<a {...props} target="_blank" rel="noopener noreferrer" />
73+
),
74+
}}
5375
/>
76+
5477
</Stack.Item>
5578
);
5679
};

0 commit comments

Comments
 (0)