diff --git a/End_to_end_Solutions/AOAISearchDemo/scripts/indexing/prepdocs.py b/End_to_end_Solutions/AOAISearchDemo/scripts/indexing/prepdocs.py index 5c87789c..7993caf0 100644 --- a/End_to_end_Solutions/AOAISearchDemo/scripts/indexing/prepdocs.py +++ b/End_to_end_Solutions/AOAISearchDemo/scripts/indexing/prepdocs.py @@ -69,6 +69,14 @@ parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output") args = parser.parse_args() +# Define a safe root directory +SAFE_ROOT = os.path.abspath(os.getcwd()) + +# Normalize and validate the user-provided path +user_path = os.path.abspath(os.path.normpath(args.files)) +if not user_path.startswith(SAFE_ROOT): + raise ValueError("The provided path is not allowed.") + # Use the current user identity to connect to Azure services unless a key is explicitly set for any of them azd_credential = AzureDeveloperCliCredential() if args.tenantid == None else AzureDeveloperCliCredential( tenant_id=args.tenantid) @@ -360,7 +368,7 @@ def remove_from_index(filename): create_search_index() print(f"Processing files...") - for root, dirs, files in os.walk(args.files): + for root, dirs, files in os.walk(user_path): for file in files: filename = os.path.join(root, file) if args.verbose: print(f"Processing '{filename}'") diff --git a/End_to_end_Solutions/AOAISearchDemo/scripts/prepopulate/prepopulate.py b/End_to_end_Solutions/AOAISearchDemo/scripts/prepopulate/prepopulate.py index a2d95f7e..42757d04 100644 --- a/End_to_end_Solutions/AOAISearchDemo/scripts/prepopulate/prepopulate.py +++ b/End_to_end_Solutions/AOAISearchDemo/scripts/prepopulate/prepopulate.py @@ -9,9 +9,16 @@ from data.managers.permissions.manager import PermissionsManager from typing import List, Set +import os + +SAFE_ROOT = os.path.abspath("./entries") + def read_yaml(file_path, verbose) -> dict: if verbose: print(f"Reading YAML file: {file_path}") - with open(file_path, "r") as f: + normalized_path = os.path.normpath(os.path.join(SAFE_ROOT, file_path)) + if not normalized_path.startswith(SAFE_ROOT): + raise Exception(f"Invalid file path: {file_path}") + with open(normalized_path, "r") as f: try: return yaml.safe_load(f) except yaml.YAMLError as exc: diff --git a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py index 0b8c56f2..67d2f8c4 100644 --- a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py +++ b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/__main__.py @@ -176,12 +176,9 @@ def get_arguments(): experiment_id = f"RAG-Bot-Eval_Dataset_eq_{aml_dataset}_Start_eq_{current_time}" parsed_config.experiment_id = experiment_id - base_path = os.path.join(os.path.dirname(__file__), "results") - save_path = os.path.normpath(os.path.join(base_path, experiment_id)) - if not save_path.startswith(base_path): - raise Exception("Invalid experiment ID resulting in unsafe path.") + save_path = os.path.join(os.path.dirname(__file__), f"results/{experiment_id}") os.makedirs(save_path, exist_ok=True) - with open(os.path.join(save_path, "config.json"), "w") as f: + with open(f"{save_path}/config.json", "w") as f: json.dump(vars(parsed_config), f, indent=4) return parsed_config @@ -203,10 +200,7 @@ def load_previous_run_config(): default=None, ) parsed_config = arg_parser.parse_args() - base_path = os.path.join(os.path.dirname(__file__), "results") - config_path = os.path.normpath(os.path.join(base_path, parsed_config.resume_run_id, "config.json")) - if not config_path.startswith(base_path): - raise Exception("Invalid resume run ID resulting in unsafe path.") + config_path = f"{os.path.dirname(__file__)}/results/{parsed_config.resume_run_id}/config.json" parsed_config = json.load(open(config_path, "r")) print(f"Resuming run with ID: {parsed_config['experiment_id']}") @@ -719,14 +713,11 @@ def evaluate(config: argparse.Namespace): combined_results = { "config": config.__dict__, - "metrics": json.load(open(os.path.normpath(os.path.join(current_dir, "results", config.experiment_id, "run_metrics.json")))), - "answers": pd.read_csv(os.path.normpath(os.path.join(current_dir, "results", config.experiment_id, "run_details.csv"))).to_dict(), + "metrics": json.load(open(f"{current_dir}/results/{config.experiment_id}/run_metrics.json")), + "answers": pd.read_csv(f"{current_dir}/results/{config.experiment_id}/run_details.csv").to_dict(), } - metrics_path = os.path.normpath(os.path.join(current_dir, "results", config.experiment_id, "combined_results.json")) - if not metrics_path.startswith(os.path.join(current_dir, "results")): - raise Exception("Invalid experiment ID resulting in unsafe path.") json.dump( - combined_results, open(metrics_path, "w"), indent=4 + combined_results, open(f"{current_dir}/results/{config.experiment_id}/combined_results.json", "w"), indent=4 ) return combined_results diff --git a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/evaluation_params.py b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/evaluation_params.py index f092edae..041d75ae 100644 --- a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/evaluation_params.py +++ b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/evaluation_params.py @@ -186,10 +186,13 @@ def get_arguments(): experiment_id = f"RAG-Eval_Dataset_eq_{dataset_name}_Start_eq_{current_time}" parsed_config.experiment_id = experiment_id - save_path = os.path.join(os.path.dirname(__file__), f"results/{experiment_id}") + base_path = os.path.join(os.path.dirname(__file__), "results") + save_path = os.path.normpath(os.path.join(base_path, experiment_id)) + if not save_path.startswith(base_path): + raise Exception("Invalid experiment ID resulting in unsafe path") parsed_config.save_path = save_path os.makedirs(save_path, exist_ok=True) - with open(f"{save_path}/config.json", "w") as f: + with open(os.path.join(save_path, "config.json"), "w") as f: json.dump(vars(parsed_config), f, indent=4) return parsed_config diff --git a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/requirements.txt b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/requirements.txt index b1be9e53..01fa7a6c 100644 --- a/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/requirements.txt +++ b/Solution_Accelerators/Advanced_RAG/src/evals/rag_eval/requirements.txt @@ -21,4 +21,4 @@ build==1.2.2.post1 marshmallow==3.23.2 azure-keyvault-secrets==4.7.0 opencensus-ext-azure==1.1.9 -opencensus==0.11.2 \ No newline at end of file +opencensus==0.11.2 diff --git a/Solution_Accelerators/Advanced_RAG/src/session_manager/app.py b/Solution_Accelerators/Advanced_RAG/src/session_manager/app.py index e39fe1dc..7882f3c1 100644 --- a/Solution_Accelerators/Advanced_RAG/src/session_manager/app.py +++ b/Solution_Accelerators/Advanced_RAG/src/session_manager/app.py @@ -108,7 +108,11 @@ async def health_check(request: web.Request): @routes.get("/assets/{rest_of_path}") async def assets(request: web.Request): rest_of_path = request.match_info.get("rest_of_path", None) - return web.FileResponse(f"assets/{rest_of_path}") + base_path = os.path.join("assets") + full_path = os.path.normpath(os.path.join(base_path, rest_of_path)) + if not full_path.startswith(base_path): + raise web.HTTPForbidden(reason="Invalid path") + return web.FileResponse(full_path) # Serve content files from blob storage from within the app to keep the example self-contained. diff --git a/Solution_Accelerators/Retail/src/session_manager/app.py b/Solution_Accelerators/Retail/src/session_manager/app.py index bec8a6a4..0abd3d0a 100644 --- a/Solution_Accelerators/Retail/src/session_manager/app.py +++ b/Solution_Accelerators/Retail/src/session_manager/app.py @@ -108,7 +108,11 @@ async def health_check(request: web.Request): @routes.get("/assets/{rest_of_path}") async def assets(request: web.Request): rest_of_path = request.match_info.get("rest_of_path", None) - return web.FileResponse(f"assets/{rest_of_path}") + base_path = "assets" + full_path = os.path.normpath(os.path.join(base_path, rest_of_path)) + if not full_path.startswith(base_path): + raise web.HTTPForbidden(reason="Invalid path") + return web.FileResponse(full_path) # Serve content files from blob storage from within the app to keep the example self-contained.