codeqlhelper
diff --git a/‎ruby/ql/src/experimental/CWE-522-DecompressionBombs/BombsV1.ql
Lines changed: 479 additions & 0 deletions b/‎ruby/ql/src/experimental/CWE-522-DecompressionBombs/BombsV1.ql
Lines changed: 479 additions & 0 deletions
diff --git a/‎ruby/ql/src/experimental/CWE-522-DecompressionBombs/BombsV2.ql
Lines changed: 179 additions & 0 deletions b/‎ruby/ql/src/experimental/CWE-522-DecompressionBombs/BombsV2.ql
Lines changed: 179 additions & 0 deletions
diff --git a/‎ruby/ql/src/experimental/CWE-522-DecompressionBombs/DecompressionBombs.qhelp
Lines changed: 31 additions & 0 deletions b/‎ruby/ql/src/experimental/CWE-522-DecompressionBombs/DecompressionBombs.qhelp
Lines changed: 31 additions & 0 deletions
diff --git a/‎ruby/ql/src/experimental/CWE-522-DecompressionBombs/example_bad.rb
Lines changed: 5 additions & 0 deletions b/‎ruby/ql/src/experimental/CWE-522-DecompressionBombs/example_bad.rb
Lines changed: 5 additions & 0 deletions
diff --git a/‎ruby/ql/src/experimental/CWE-522-DecompressionBombs/example_good.rb
Lines changed: 11 additions & 0 deletions b/‎ruby/ql/src/experimental/CWE-522-DecompressionBombs/example_good.rb
Lines changed: 11 additions & 0 deletions
@@ -0,0 +1,179 @@
+/**
+ * @name User-controlled file decompression
+ * @description User-controlled data that flows into decompression library APIs without checking the compression rate is dangerous
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 7.8
+ * @precision high
+ * @id rb/user-controlled-file-decompression
+ * @tags security
+ *       experimental
+ *       external/cwe/cwe-409
+ */
+
+import codeql.ruby.AST
+import codeql.ruby.ApiGraphs
+import codeql.ruby.DataFlow
+import codeql.ruby.dataflow.RemoteFlowSources
+import codeql.ruby.TaintTracking
+import DataFlow::PathGraph
+
+module DecompressionBombs {
+  abstract class DecompressionBombSink extends DataFlow::Node { }
+
+  module Zlib {
+    /**
+     * `Zlib::GzipReader`
+     * > Note that if you use the lower level Zip::InputStream interface, rubyzip does not check the entry sizes.
+     *
+     * according to above warning from Doc we don't need to go forward after open()
+     * or new() methods, we just need the argument node of them
+     */
+    private API::Node gzipReaderInstance() {
+      result = API::getTopLevelMember("Zlib").getMember("GzipReader")
+    }
+
+    /**
+     * return values of following methods
+     * `Zlib::GzipReader.open`
+     * `Zlib::GzipReader.zcat`
+     * `Zlib::GzipReader.new`
+     */
+    class ZipSink extends DecompressionBombSink {
+      ZipSink() {
+        this = gzipReaderInstance().getMethod(["open", "new", "zcat"]).getReturn().asSource()
+      }
+    }
+
+    predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+      exists(API::Node zipnode | zipnode = gzipReaderInstance().getMethod(["open", "new", "zcat"]) |
+        nodeFrom = zipnode.getParameter(0).asSink() and
+        nodeTo = zipnode.getReturn().asSource()
+      )
+    }
+  }
+
+  module ZipInputStream {
+    /**
+     * `Zip::InputStream`
+     * > Note that if you use the lower level Zip::InputStream interface, rubyzip does not check the entry sizes.
+     *
+     * according to above warning from Doc we don't need to go forward after open()
+     * or new() methods, we just need the argument node of them
+     */
+    private API::Node zipInputStream() {
+      result = API::getTopLevelMember("Zip").getMember("InputStream")
+    }
+
+    /**
+     * return values of following methods
+     * `ZipIO.read`
+     * `ZipEntry.extract`
+     */
+    class ZipSink extends DecompressionBombSink {
+      ZipSink() { this = zipInputStream().getMethod(["open", "new"]).getReturn().asSource() }
+    }
+
+    predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+      exists(API::Node zipnode | zipnode = zipInputStream().getMethod(["open", "new"]) |
+        nodeFrom = zipnode.getParameter(0).asSink() and
+        nodeTo = zipnode.getReturn().asSource()
+      )
+    }
+  }
+
+  module ZipFile {
+    // // Because of additional step and ZipSink predicates, I couldn't use unary predicate
+    // // I put the explanation because I think there should be a soloution to not use other rubyZipNode predicate
+    // API::Node rubyZipNode() {
+    //   result = zipFile() or
+    //   result = rubyZipNode().getMethod(_).getReturn() or
+    //   result = rubyZipNode().getMethod(_).getBlock().getParameter(_) or
+    //   result = rubyZipNode().getMethod(_).getParameter(0) or
+    //   result = rubyZipNode().getAnElement()
+    // }
+    API::Node rubyZipNode(API::Node n) {
+      result = n
+      or
+      result = rubyZipNode(n).getMethod(_).getReturn()
+      or
+      result = rubyZipNode(n).getMethod(_).getBlock().getParameter(_)
+      or
+      result = rubyZipNode(n).getMethod(_).getParameter(0)
+      or
+      result = rubyZipNode(n).getAnElement()
+    }
+
+    /**
+     * return values of following methods
+     * `ZipIO.read`
+     * `ZipEntry.extract`
+     * sanitize the nodes which have `entry.size > someOBJ`
+     */
+    class ZipSink extends DecompressionBombSink {
+      ZipSink() {
+        exists(API::Node zipnodes | zipnodes = zipFile() |
+          this = rubyZipNode(zipnodes).getMethod(["extract", "read"]).getReturn().asSource() and
+          not exists(
+            rubyZipNode(zipnodes).getMethod("size").getReturn().getMethod(">").getParameter(0)
+          )
+        )
+      }
+    }
+
+    predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+      exists(API::Node zipnodes | zipnodes = zipFile() |
+        nodeTo = [rubyZipNode(zipnodes).getMethod(["extract", "read"]).getReturn().asSource()] and
+        nodeFrom = zipnodes.getMethod(["new", "open"]).getParameter(0).asSink()
+      )
+    }
+
+    /**
+     * `Zip::File`
+     */
+    private API::Node zipFile() { result = API::getTopLevelMember("Zip").getMember("File") }
+  }
+}
+
+class Bombs extends TaintTracking::Configuration {
+  Bombs() { this = "Decompression Bombs" }
+
+  override predicate isSource(DataFlow::Node source) {
+    source instanceof RemoteFlowSource or
+    source instanceof DataFlow::LocalSourceNode
+  }
+
+  override predicate isSink(DataFlow::Node sink) {
+    sink instanceof DecompressionBombs::DecompressionBombSink
+  }
+
+  override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+    DecompressionBombs::ZipFile::isAdditionalTaintStep(nodeFrom, nodeTo)
+    or
+    DecompressionBombs::ZipInputStream::isAdditionalTaintStep(nodeFrom, nodeTo)
+    or
+    DecompressionBombs::Zlib::isAdditionalTaintStep(nodeFrom, nodeTo)
+    or
+    exists(API::Node n | n = API::root().getMember("File").getMethod("open") |
+      nodeFrom = n.getParameter(0).asSink() and
+      nodeTo = n.getReturn().asSource()
+    )
+    or
+    exists(API::Node n | n = API::root().getMember("StringIO").getMethod("new") |
+      nodeFrom = n.getParameter(0).asSink() and
+      nodeTo = n.getReturn().asSource()
+    )
+    or
+    exists(DataFlow::CallNode cn |
+      cn.getMethodName() = "open" and cn.getReceiver().toString() = "self"
+    |
+      nodeFrom = cn.getArgument(0) and
+      nodeTo = cn
+    )
+  }
+}
+
+from Bombs cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "This file extraction depends on a $@.", source.getNode(),
+  "potentially untrusted source"
@@ -0,0 +1,31 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+<overview>
+<p>Extracting Compressed files with any compression algorithm like gzip can cause to denial of service attacks.</p>
+<p>Attackers can compress a huge file which created by repeated similiar byte and convert it to a small compressed file.</p>
+
+</overview>
+<recommendation>
+
+<p>When you want to decompress a user-provided compressed file you must be careful about the decompression ratio or read these files within a loop byte by byte to be able to manage the decompressed size in each cycle of the loop.</p>
+
+</recommendation>
+<p>Please read official RubyZip Documentation <a href="https://github.com/rubyzip/rubyzip/#size-validation">here</a>
+<example>
+<p>Rubyzip: According to <a href="https://github.com/rubyzip/rubyzip/#reading-a-zip-file">official</a> Documentation</p>
+<sample src="example_good.rb" />
+<sample src="example_bad.rb" />
+</example>
+<references>
+
+<li>
+<a href="https://nvd.nist.gov/vuln/detail/CVE-2023-22898">CVE-2023-22898</a>
+</li>
+<li>
+<a href="https://www.bamsoftware.com/hacks/zipbomb/">A great research to gain more impact by this kind of attack</a>
+</li>
+
+</references>
+</qhelp>
@@ -0,0 +1,5 @@
+# "Note that if you use the lower level Zip::InputStream interface, rubyzip does not check the entry sizes"
+zip_stream = Zip::InputStream.new(File.open('file.zip'))
+while entry = zip_stream.get_next_entry
+  # All required operations on `entry` go here.
+end
@@ -0,0 +1,11 @@
+MAX_FILE_SIZE = 10 * 1024**2 # 10MiB
+MAX_FILES = 100
+Zip::File.open('foo.zip') do |zip_file|
+  num_files = 0
+  zip_file.each do |entry|
+    num_files += 1 if entry.file?
+    raise 'Too many extracted files' if num_files > MAX_FILES
+    raise 'File too large when extracted' if entry.size > MAX_FILE_SIZE
+    entry.extract
+  end
+end