diff --git a/HISTORY.md b/HISTORY.md index 87cf291a..196ed26a 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,18 @@ # History +## v0.12.0 - 2025-11-20 + +### New Features + +* Rename create_sdv_synthesizer_variant to create_synthesizer_variant - Issue [#491](https://github.com/sdv-dev/SDGym/issues/491) by @R-Palazzo +* SDGym should be able to automatically discover SDV Enterprise synthesizers - Issue [#481](https://github.com/sdv-dev/SDGym/issues/481) by @R-Palazzo +* Incorporate the `get_available_datasets` functionality into the `DatasetExplorer` - Issue [#473](https://github.com/sdv-dev/SDGym/issues/473) by @fealho + +### Bugs Fixed + +* Update result aggregation logic in the ResultExplorer to match new naming schema - Issue [#494](https://github.com/sdv-dev/SDGym/issues/494) by @R-Palazzo +* When running a benchmark locally, the `additional_datasets_folder` path should be the root path - Issue [#484](https://github.com/sdv-dev/SDGym/issues/484) by @fealho + ## v0.11.1 - 2025-11-03 ### Bugs Fixed diff --git a/static_code_analysis.txt b/static_code_analysis.txt index b81cbb15..9c1d44fd 100644 --- a/static_code_analysis.txt +++ b/static_code_analysis.txt @@ -1,4 +1,4 @@ -Run started:2025-11-03 21:42:26.598320 +Run started:2025-11-20 21:17:17.099743 Test results: >> Issue: [B403:blacklist] Consider possible security implications associated with pickle module. @@ -15,105 +15,105 @@ Test results: Severity: Low Confidence: High CWE: CWE-703 (https://cwe.mitre.org/data/definitions/703.html) More Info: https://bandit.readthedocs.io/en/1.7.7/plugins/b101_assert_used.html - Location: ./sdgym/benchmark.py:327:8 -326 if isinstance(synthesizer, type): -327 assert issubclass(synthesizer, BaselineSynthesizer), ( -328 '`synthesizer` must be a synthesizer class' -329 ) -330 synthesizer = synthesizer() + Location: ./sdgym/benchmark.py:331:8 +330 if isinstance(synthesizer, type): +331 assert issubclass(synthesizer, BaselineSynthesizer), ( +332 '`synthesizer` must be a synthesizer class' +333 ) +334 synthesizer = synthesizer() -------------------------------------------------- >> Issue: [B101:assert_used] Use of assert detected. The enclosed code will be removed when compiling to optimised byte code. Severity: Low Confidence: High CWE: CWE-703 (https://cwe.mitre.org/data/definitions/703.html) More Info: https://bandit.readthedocs.io/en/1.7.7/plugins/b101_assert_used.html - Location: ./sdgym/benchmark.py:332:8 -331 else: -332 assert issubclass(type(synthesizer), BaselineSynthesizer), ( -333 '`synthesizer` must be an instance of a synthesizer class.' -334 ) -335 + Location: ./sdgym/benchmark.py:336:8 +335 else: +336 assert issubclass(type(synthesizer), BaselineSynthesizer), ( +337 '`synthesizer` must be an instance of a synthesizer class.' +338 ) +339 -------------------------------------------------- >> Issue: [B608:hardcoded_sql_expressions] Possible SQL injection vector through string-based query construction. Severity: Medium Confidence: Low CWE: CWE-89 (https://cwe.mitre.org/data/definitions/89.html) More Info: https://bandit.readthedocs.io/en/1.7.7/plugins/b608_hardcoded_sql_expressions.html - Location: ./sdgym/benchmark.py:940:27 -939 # User data script to install the library -940 user_data_script = f"""#!/bin/bash -941 sudo apt update -y -942 sudo apt install -y python3-pip python3-venv awscli -943 echo "======== Create Virtual Environment ============" -944 python3 -m venv ~/env -945 source ~/env/bin/activate -946 echo "======== Install Dependencies in venv ============" -947 pip install --upgrade pip -948 pip install sdgym[all] -949 pip install anyio -950 echo "======== Configure AWS CLI ============" -951 aws configure set aws_access_key_id {credentials.access_key} -952 aws configure set aws_secret_access_key {credentials.secret_key} -953 aws configure set region {session.region_name} -954 echo "======== Write Script ===========" -955 printf '%s\\n' "{escaped_script}" > ~/sdgym_script.py -956 echo "======== Run Script ===========" -957 python ~/sdgym_script.py -958 -959 echo "======== Complete ===========" -960 INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) -961 aws ec2 terminate-instances --instance-ids $INSTANCE_ID -962 """ -963 + Location: ./sdgym/benchmark.py:944:27 +943 # User data script to install the library +944 user_data_script = f"""#!/bin/bash +945 sudo apt update -y +946 sudo apt install -y python3-pip python3-venv awscli +947 echo "======== Create Virtual Environment ============" +948 python3 -m venv ~/env +949 source ~/env/bin/activate +950 echo "======== Install Dependencies in venv ============" +951 pip install --upgrade pip +952 pip install sdgym[all] +953 pip install anyio +954 echo "======== Configure AWS CLI ============" +955 aws configure set aws_access_key_id {credentials.access_key} +956 aws configure set aws_secret_access_key {credentials.secret_key} +957 aws configure set region {session.region_name} +958 echo "======== Write Script ===========" +959 printf '%s\\n' "{escaped_script}" > ~/sdgym_script.py +960 echo "======== Run Script ===========" +961 python ~/sdgym_script.py +962 +963 echo "======== Complete ===========" +964 INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) +965 aws ec2 terminate-instances --instance-ids $INSTANCE_ID +966 """ +967 -------------------------------------------------- >> Issue: [B608:hardcoded_sql_expressions] Possible SQL injection vector through string-based query construction. Severity: Medium Confidence: Low CWE: CWE-89 (https://cwe.mitre.org/data/definitions/89.html) More Info: https://bandit.readthedocs.io/en/1.7.7/plugins/b608_hardcoded_sql_expressions.html - Location: ./sdgym/benchmark.py:1406:31 -1405 def _get_user_data_script(access_key, secret_key, region_name, script_content): -1406 return textwrap.dedent(f"""\ -1407 #!/bin/bash -1408 set -e -1409 -1410 # Always terminate the instance when the script exits (success or failure) -1411 trap ' -1412 INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id); -1413 echo "======== Terminating EC2 instance: $INSTANCE_ID =========="; -1414 aws ec2 terminate-instances --instance-ids $INSTANCE_ID; -1415 ' EXIT -1416 -1417 exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 -1418 echo "======== Update and Install Dependencies ============" -1419 sudo apt update -y -1420 sudo apt install -y python3-pip python3-venv awscli -1421 echo "======== Configure AWS CLI ============" -1422 aws configure set aws_access_key_id '{access_key}' -1423 aws configure set aws_secret_access_key '{secret_key}' -1424 aws configure set default.region '{region_name}' -1425 -1426 echo "======== Create Virtual Environment ============" -1427 python3 -m venv ~/env -1428 source ~/env/bin/activate + Location: ./sdgym/benchmark.py:1410:31 +1409 def _get_user_data_script(access_key, secret_key, region_name, script_content): +1410 return textwrap.dedent(f"""\ +1411 #!/bin/bash +1412 set -e +1413 +1414 # Always terminate the instance when the script exits (success or failure) +1415 trap ' +1416 INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id); +1417 echo "======== Terminating EC2 instance: $INSTANCE_ID =========="; +1418 aws ec2 terminate-instances --instance-ids $INSTANCE_ID; +1419 ' EXIT +1420 +1421 exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 +1422 echo "======== Update and Install Dependencies ============" +1423 sudo apt update -y +1424 sudo apt install -y python3-pip python3-venv awscli +1425 echo "======== Configure AWS CLI ============" +1426 aws configure set aws_access_key_id '{access_key}' +1427 aws configure set aws_secret_access_key '{secret_key}' +1428 aws configure set default.region '{region_name}' 1429 -1430 echo "======== Install Dependencies in venv ============" -1431 pip install --upgrade pip -1432 pip install sdgym[all] -1433 pip install s3fs -1434 -1435 echo "======== Write Script ===========" -1436 cat << 'EOF' > ~/sdgym_script.py -1437 {script_content} -1438 EOF -1439 -1440 echo "======== Run Script ===========" -1441 python ~/sdgym_script.py -1442 echo "======== Complete ===========" -1443 INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) -1444 aws ec2 terminate-instances --instance-ids $INSTANCE_ID -1445 """).strip() -1446 +1430 echo "======== Create Virtual Environment ============" +1431 python3 -m venv ~/env +1432 source ~/env/bin/activate +1433 +1434 echo "======== Install Dependencies in venv ============" +1435 pip install --upgrade pip +1436 pip install sdgym[all] +1437 pip install s3fs +1438 +1439 echo "======== Write Script ===========" +1440 cat << 'EOF' > ~/sdgym_script.py +1441 {script_content} +1442 EOF +1443 +1444 echo "======== Run Script ===========" +1445 python ~/sdgym_script.py +1446 echo "======== Complete ===========" +1447 INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) +1448 aws ec2 terminate-instances --instance-ids $INSTANCE_ID +1449 """).strip() +1450 -------------------------------------------------- >> Issue: [B403:blacklist] Consider possible security implications associated with pickle module. @@ -130,20 +130,20 @@ Test results: Severity: Medium Confidence: High CWE: CWE-502 (https://cwe.mitre.org/data/definitions/502.html) More Info: https://bandit.readthedocs.io/en/1.7.7/blacklists/blacklist_calls.html#b301-pickle - Location: ./sdgym/result_explorer/result_handler.py:253:19 -252 with open(os.path.join(self.base_path, file_path), 'rb') as f: -253 return pickle.load(f) -254 + Location: ./sdgym/result_explorer/result_handler.py:262:19 +261 with open(os.path.join(self.base_path, file_path), 'rb') as f: +262 return pickle.load(f) +263 -------------------------------------------------- >> Issue: [B301:blacklist] Pickle and modules that wrap it can be unsafe when used to deserialize untrusted data, possible security issue. Severity: Medium Confidence: High CWE: CWE-502 (https://cwe.mitre.org/data/definitions/502.html) More Info: https://bandit.readthedocs.io/en/1.7.7/blacklists/blacklist_calls.html#b301-pickle - Location: ./sdgym/result_explorer/result_handler.py:364:15 -363 ) -364 return pickle.loads(response['Body'].read()) -365 + Location: ./sdgym/result_explorer/result_handler.py:373:15 +372 ) +373 return pickle.loads(response['Body'].read()) +374 -------------------------------------------------- >> Issue: [B403:blacklist] Consider possible security implications associated with pickle module. @@ -209,20 +209,20 @@ Test results: Severity: Low Confidence: High CWE: CWE-78 (https://cwe.mitre.org/data/definitions/78.html) More Info: https://bandit.readthedocs.io/en/1.7.7/plugins/b603_subprocess_without_shell_equals_true.html - Location: ./sdgym/utils.py:134:17 -133 command = ['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'] -134 output = subprocess.run(command, stdout=subprocess.PIPE) -135 return len(output.stdout.decode().split()) + Location: ./sdgym/utils.py:143:17 +142 command = ['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'] +143 output = subprocess.run(command, stdout=subprocess.PIPE) +144 return len(output.stdout.decode().split()) -------------------------------------------------- >> Issue: [B603:subprocess_without_shell_equals_true] subprocess call - check for execution of untrusted input. Severity: Low Confidence: High CWE: CWE-78 (https://cwe.mitre.org/data/definitions/78.html) More Info: https://bandit.readthedocs.io/en/1.7.7/plugins/b603_subprocess_without_shell_equals_true.html - Location: ./sdgym/utils.py:150:17 -149 command = ['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'] -150 output = subprocess.run(command, stdout=subprocess.PIPE) -151 loads = np.array(output.stdout.decode().split()).astype(float) + Location: ./sdgym/utils.py:159:17 +158 command = ['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'] +159 output = subprocess.run(command, stdout=subprocess.PIPE) +160 loads = np.array(output.stdout.decode().split()).astype(float) -------------------------------------------------- >> Issue: [B105:hardcoded_password_string] Possible hardcoded password: 'sdgym[' @@ -237,7 +237,7 @@ Test results: -------------------------------------------------- Code scanned: - Total lines of code: 4969 + Total lines of code: 4839 Total lines skipped (#nosec): 0 Total potential issues skipped due to specifically being disabled (e.g., #nosec BXXX): 0