github
diff --git a/‎python/ql/src/experimental/Security/CWE-409/DecompressionBombs.qhelp
Lines changed: 34 additions & 0 deletions b/‎python/ql/src/experimental/Security/CWE-409/DecompressionBombs.qhelp
Lines changed: 34 additions & 0 deletions
diff --git a/‎python/ql/src/experimental/Security/CWE-409/DecompressionBombs.ql
Lines changed: 21 additions & 0 deletions b/‎python/ql/src/experimental/Security/CWE-409/DecompressionBombs.ql
Lines changed: 21 additions & 0 deletions
diff --git a/‎python/ql/src/experimental/Security/CWE-409/example_bad.py
Lines changed: 5 additions & 0 deletions b/‎python/ql/src/experimental/Security/CWE-409/example_bad.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎python/ql/src/experimental/Security/CWE-409/example_good.py
Lines changed: 34 additions & 0 deletions b/‎python/ql/src/experimental/Security/CWE-409/example_good.py
Lines changed: 34 additions & 0 deletions
@@ -0,0 +1,34 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+<overview>
+<p>Extracting Compressed files with any compression algorithm like gzip can cause to denial of service attacks.</p>
+<p>Attackers can compress a huge file which created by repeated similiar byte and convert it to a small compressed file.</p>
+
+</overview>
+<recommendation>
+
+<p>When you want to decompress a user-provided compressed file you must be careful about the decompression ratio or read these files within a loop byte by byte to be able to manage the decompressed size in each cycle of the loop.</p>
+
+</recommendation>
+<example>
+<p>python ZipFile library is vulnerable by default</p>
+<sample src="example_bad.py" />
+
+<p>By checking the decompressed size from input zipped file you can check the decompression ratio. attackers can forge this decompressed size header too.
+So can't rely on file_size attribute of ZipInfo class. this is recommended to use "ZipFile.open" method to be able to manage decompressed size.</p>
+<p>Reading decompressed file byte by byte and verifying the total current size in each loop cycle in recommended to use in any decompression library.</p>
+<sample src="example_good.py" />
+</example>
+<references>
+
+<li>
+<a href="https://nvd.nist.gov/vuln/detail/CVE-2023-22898">CVE-2023-22898</a>
+</li>
+<li>
+<a href="https://www.bamsoftware.com/hacks/zipbomb/">A great research to gain more impact by this kind of attack</a>
+</li>
+
+</references>
+</qhelp>
@@ -0,0 +1,21 @@
+/**
+ * @name Decompression Bomb
+ * @description Uncontrolled data that flows into decompression library APIs without checking the compression rate is dangerous
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 7.8
+ * @precision high
+ * @id py/decompression-bomb
+ * @tags security
+ *       experimental
+ *       external/cwe/cwe-409
+ */
+
+import python
+import experimental.semmle.python.security.DecompressionBomb
+import BombsFlow::PathGraph
+
+from BombsFlow::PathNode source, BombsFlow::PathNode sink
+where BombsFlow::flowPath(source, sink)
+select sink.getNode(), source, sink, "This uncontrolled file extraction is $@.", source.getNode(),
+  "depends on this user controlled data"
@@ -0,0 +1,5 @@
+import zipfile
+
+
+def Bad(zip_path):
+    zipfile.ZipFile(zip_path, "r").extractall()
@@ -0,0 +1,34 @@
+import zipfile
+
+
+def safeUnzip(zipFileName):
+    '''
+    safeUnzip reads each file inside the zipfile 1 MB by 1 MB
+    and during extraction or reading of these files it checks the total decompressed size
+    doesn't exceed the SIZE_THRESHOLD
+    '''
+    buffer_size = 1024 * 1024 * 1  # 1 MB
+    total_size = 0
+    SIZE_THRESHOLD = 1024 * 1024 * 10  # 10 MB
+    with zipfile.ZipFile(zipFileName) as myzip:
+        for fileinfo in myzip.infolist():
+            with myzip.open(fileinfo.filename, mode="r") as myfile:
+                content = b''
+                chunk = myfile.read(buffer_size)
+                total_size += buffer_size
+                if total_size > SIZE_THRESHOLD:
+                    print("Bomb detected")
+                    return False  # it isn't a successful extract or read
+                content += chunk
+                # reading next bytes of uncompressed data
+                while chunk:
+                    chunk = myfile.read(buffer_size)
+                    total_size += buffer_size
+                    if total_size > SIZE_THRESHOLD:
+                        print("Bomb detected")
+                        return False  # it isn't a successful extract or read
+                    content += chunk
+
+                # An example of extracting or reading each decompressed file here
+                print(bytes.decode(content, 'utf-8'))
+    return True  # it is a successful extract or read