[AUTO-CHERRYPICK] pytorch: patch for CVE-2024-27319, CVE-2021-22918 - branch 3.0-dev (#12073)

CBL-Mariner-Bot · arc9693 · web-flow · commit 2d22c9a3f109 · 2025-01-26T10:28:52.000-05:00
Co-authored-by: Archana Choudhary &lt;36061892+arc9693@users.noreply.github.com&gt;
diff --git a/SPECS/pytorch/CVE-2021-22918.patch b/SPECS/pytorch/CVE-2021-22918.patch
@@ -0,0 +1,218 @@
+From 86dbeb4bd665749d6234ae90d30923e210de21b9 Mon Sep 17 00:00:00 2001
+From: Ben Noordhuis <info@bnoordhuis.nl>
+Date: Fri, 21 May 2021 11:23:36 +0200
+Subject: [PATCH] idna: fix OOB read in punycode decoder
+
+libuv was vulnerable to out-of-bounds reads in the uv__idna_toascii()
+function which is used to convert strings to ASCII. This is called by
+the DNS resolution function and can lead to information disclosures or
+crashes.
+
+Reported by Eric Sesterhenn in collaboration with Cure53 and ExpressVPN.
+
+Reported-By: Eric Sesterhenn <eric.sesterhenn@x41-dsec.de>
+Fixes: https://github.com/libuv/libuv/issues/3147
+PR-URL: https://github.com/libuv/libuv-private/pull/1
+Refs: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22918
+Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
+Reviewed-By: Richard Lau <riclau@uk.ibm.com>
+---
+ src/idna.c       | 49 +++++++++++++++++++++++++++++++++++-------------
+ test/test-idna.c | 19 +++++++++++++++++++
+ test/test-list.h |  2 ++
+ 3 files changed, 57 insertions(+), 13 deletions(-)
+
+diff --git a/third_party/tensorpipe/third_party/libuv/src/idna.c b/third_party/tensorpipe/third_party/libuv/src/idna.c
+index 13ffac6be81..b44cb16a1ee 100644
+--- a/third_party/tensorpipe/third_party/libuv/src/idna.c
++++ b/third_party/tensorpipe/third_party/libuv/src/idna.c
+@@ -19,6 +19,7 @@
+ 
+ #include "uv.h"
+ #include "idna.h"
++#include <assert.h>
+ #include <string.h>
+ 
+ static unsigned uv__utf8_decode1_slow(const char** p,
+@@ -32,7 +33,7 @@ static unsigned uv__utf8_decode1_slow(const char** p,
+   if (a > 0xF7)
+     return -1;
+ 
+-  switch (*p - pe) {
++  switch (pe - *p) {
+   default:
+     if (a > 0xEF) {
+       min = 0x10000;
+@@ -62,6 +63,8 @@ static unsigned uv__utf8_decode1_slow(const char** p,
+       a = 0;
+       break;
+     }
++    /* Fall through. */
++  case 0:
+     return -1;  /* Invalid continuation byte. */
+   }
+ 
+@@ -88,6 +91,8 @@ static unsigned uv__utf8_decode1_slow(const char** p,
+ unsigned uv__utf8_decode1(const char** p, const char* pe) {
+   unsigned a;
+ 
++  assert(*p < pe);
++
+   a = (unsigned char) *(*p)++;
+ 
+   if (a < 128)
+@@ -96,9 +101,6 @@ unsigned uv__utf8_decode1(const char** p, const char* pe) {
+   return uv__utf8_decode1_slow(p, pe, a);
+ }
+ 
+-#define foreach_codepoint(c, p, pe) \
+-  for (; (void) (*p <= pe && (c = uv__utf8_decode1(p, pe))), *p <= pe;)
+-
+ static int uv__idna_toascii_label(const char* s, const char* se,
+                                   char** d, char* de) {
+   static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789";
+@@ -121,15 +123,22 @@ static int uv__idna_toascii_label(const char* s, const char* se,
+   ss = s;
+   todo = 0;
+ 
+-  foreach_codepoint(c, &s, se) {
++  /* Note: after this loop we've visited all UTF-8 characters and know
++   * they're legal so we no longer need to check for decode errors.
++   */
++  while (s < se) {
++    c = uv__utf8_decode1(&s, se);
++
++    if (c == -1u)
++      return UV_EINVAL;
++
+     if (c < 128)
+       h++;
+-    else if (c == (unsigned) -1)
+-      return UV_EINVAL;
+     else
+       todo++;
+   }
+ 
++  /* Only write "xn--" when there are non-ASCII characters. */
+   if (todo > 0) {
+     if (*d < de) *(*d)++ = 'x';
+     if (*d < de) *(*d)++ = 'n';
+@@ -137,9 +146,13 @@ static int uv__idna_toascii_label(const char* s, const char* se,
+     if (*d < de) *(*d)++ = '-';
+   }
+ 
++  /* Write ASCII characters. */
+   x = 0;
+   s = ss;
+-  foreach_codepoint(c, &s, se) {
++  while (s < se) {
++    c = uv__utf8_decode1(&s, se);
++    assert(c != -1u);
++
+     if (c > 127)
+       continue;
+ 
+@@ -166,10 +179,15 @@ static int uv__idna_toascii_label(const char* s, const char* se,
+   while (todo > 0) {
+     m = -1;
+     s = ss;
+-    foreach_codepoint(c, &s, se)
++
++    while (s < se) {
++      c = uv__utf8_decode1(&s, se);
++      assert(c != -1u);
++
+       if (c >= n)
+         if (c < m)
+           m = c;
++    }
+ 
+     x = m - n;
+     y = h + 1;
+@@ -181,7 +199,10 @@ static int uv__idna_toascii_label(const char* s, const char* se,
+     n = m;
+ 
+     s = ss;
+-    foreach_codepoint(c, &s, se) {
++    while (s < se) {
++      c = uv__utf8_decode1(&s, se);
++      assert(c != -1u);
++
+       if (c < n)
+         if (++delta == 0)
+           return UV_E2BIG;  /* Overflow. */
+@@ -245,8 +266,6 @@ static int uv__idna_toascii_label(const char* s, const char* se,
+   return 0;
+ }
+ 
+-#undef foreach_codepoint
+-
+ long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
+   const char* si;
+   const char* st;
+@@ -256,10 +275,14 @@ long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
+ 
+   ds = d;
+ 
+-  for (si = s; si < se; /* empty */) {
++  si = s;
++  while (si < se) {
+     st = si;
+     c = uv__utf8_decode1(&si, se);
+ 
++    if (c == -1u)
++      return UV_EINVAL;
++
+     if (c != '.')
+       if (c != 0x3002)  /* 。 */
+         if (c != 0xFF0E)  /* ． */
+diff --git a/third_party/tensorpipe/third_party/libuv/test/test-idna.c b/third_party/tensorpipe/third_party/libuv/test/test-idna.c
+index b76853cb996..f4fad9653df 100644
+--- a/third_party/tensorpipe/third_party/libuv/test/test-idna.c
++++ b/third_party/tensorpipe/third_party/libuv/test/test-idna.c
+@@ -96,6 +96,25 @@ TEST_IMPL(utf8_decode1) {
+   return 0;
+ }
+ 
++TEST_IMPL(utf8_decode1_overrun) {
++  const char* p;
++  char b[1];
++
++  /* Single byte. */
++  p = b;
++  b[0] = 0x7F;
++  ASSERT_EQ(0x7F, uv__utf8_decode1(&p, b + 1));
++  ASSERT_EQ(p, b + 1);
++
++  /* Multi-byte. */
++  p = b;
++  b[0] = 0xC0;
++  ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + 1));
++  ASSERT_EQ(p, b + 1);
++
++  return 0;
++}
++
+ /* Doesn't work on z/OS because that platform uses EBCDIC, not ASCII. */
+ #ifndef __MVS__
+ 
+diff --git a/third_party/tensorpipe/third_party/libuv/test/test-list.h b/third_party/tensorpipe/third_party/libuv/test/test-list.h
+index d7c7b086f03..74588407cfb 100644
+--- a/third_party/tensorpipe/third_party/libuv/test/test-list.h
++++ b/third_party/tensorpipe/third_party/libuv/test/test-list.h
+@@ -524,6 +524,7 @@ TEST_DECLARE  (fork_threadpool_queue_work_simple)
+ 
+ TEST_DECLARE  (idna_toascii)
+ TEST_DECLARE  (utf8_decode1)
++TEST_DECLARE  (utf8_decode1_overrun)
+ TEST_DECLARE  (uname)
+ 
+ TEST_DECLARE  (metrics_idle_time)
+@@ -1120,6 +1121,7 @@ TASK_LIST_START
+ #endif
+ 
+   TEST_ENTRY  (utf8_decode1)
++  TEST_ENTRY  (utf8_decode1_overrun)
+   TEST_ENTRY  (uname)
+ 
+ /* Doesn't work on z/OS because that platform uses EBCDIC, not ASCII. */
diff --git a/SPECS/pytorch/CVE-2024-27319.patch b/SPECS/pytorch/CVE-2024-27319.patch
@@ -0,0 +1,50 @@
+From 08a399ba75a805b7813ab8936b91d0e274b08287 Mon Sep 17 00:00:00 2001
+From: liqun Fu <liqfu@microsoft.com>
+Date: Fri, 9 Feb 2024 14:45:49 -0800
+Subject: [PATCH] Fix Out of bounds read due to lack of string termination in
+ assert (#5918)
+
+Signed-off-by: liqunfu <liqun.fu@microsoft.com>
+Co-authored-by: G. Ramalingam <grama@microsoft.com>
+---
+ onnx/common/assertions.cc | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/third_party/onnx/onnx/common/assertions.cc b/third_party/onnx/onnx/common/assertions.cc
+index 29c07ba0d78..7675c453482 100644
+--- a/third_party/onnx/onnx/common/assertions.cc
++++ b/third_party/onnx/onnx/common/assertions.cc
+@@ -9,6 +9,7 @@
+ 
+ #include "onnx/common/assertions.h"
+ 
++#include <array>
+ #include <cstdarg>
+ #include <cstdio>
+ 
+@@ -17,16 +18,20 @@
+ namespace ONNX_NAMESPACE {
+ 
+ std::string barf(const char* fmt, ...) {
+-  char msg[2048];
++  constexpr size_t buffer_size = 2048;
++  std::array<char, buffer_size> msg{};
+   va_list args;
+ 
+   va_start(args, fmt);
+-  // Although vsnprintf might have vulnerability issue while using format string with overflowed length,
+-  // it should be safe here to use fixed length for buffer "msg". No further checking is needed.
+-  vsnprintf(msg, 2048, fmt, args);
++
++  // use fixed length for buffer "msg" to avoid buffer overflow
++  vsnprintf(static_cast<char*>(msg.data()), msg.size() - 1, fmt, args);
++
++  // ensure null-terminated string to avoid out of bounds read
++  msg.back() = '\0';
+   va_end(args);
+ 
+-  return std::string(msg);
++  return std::string(msg.data());
+ }
+ 
+ void throw_assert_error(std::string& msg) {
diff --git a/SPECS/pytorch/pytorch.spec b/SPECS/pytorch/pytorch.spec
@@ -2,7 +2,7 @@
 Summary:        Tensors and Dynamic neural networks in Python with strong GPU acceleration.
 Name:           pytorch
 Version:        2.2.2
-Release:        3%{?dist}
+Release:        4%{?dist}
 License:        BSD-3-Clause
 Vendor:         Microsoft Corporation
 Distribution:   Azure Linux
@@ -25,6 +25,8 @@ BuildRequires:  python3-six
 Patch1:         CVE-2024-27318.patch
 Patch2:         CVE-2022-1941.patch
 Patch3:         CVE-2024-5187.patch
+Patch4:         CVE-2024-27319.patch
+Patch5:         CVE-2021-22918.patch
 
 %description
 PyTorch is a Python package that provides two high-level features:
@@ -86,6 +88,9 @@ cp -arf docs %{buildroot}/%{_pkgdocdir}
 %{_docdir}/*
 
 %changelog
+* Mon Jan 20 2025 Archana Choudhary <archana1@microsoft.com> - 2.2.2-4
+- patch for CVE-2024-27319, CVE-2021-22918
+
 * Tue Nov 12 2024 Sean Dougherty <sdougherty@microsoft.com> - 2.2.2-3
 - Add patch to address CVE-2024-5187
 - Remove unnecessary double vendoring of the third_party directory. Doubling happens because the contents of the submodule tarball are pulled directly from the original source tarball and then re-uploaded as this "submodule tarball".