Skip to content

Commit 5190496

Browse files
committed
Revert "Fix CVE-2024-48063 pytorch (#10914)"
This reverts commit 7a97946.
1 parent 7a97946 commit 5190496

File tree

9 files changed

+574
-11
lines changed

9 files changed

+574
-11
lines changed

SPECS/pytorch/CVE-2024-27318.patch

Lines changed: 377 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,377 @@
1+
Modified patch to apply to vendored onnx sources
2+
Modified by: [email protected]
3+
4+
From 66b7fb630903fdcf3e83b6b6d56d82e904264a20 Mon Sep 17 00:00:00 2001
5+
From: liqun Fu <[email protected]>
6+
Date: Mon, 19 Feb 2024 11:12:40 -0800
7+
Subject: [PATCH] Fix path sanitization bypass leading to arbitrary read
8+
(#5917)
9+
10+
Signed-off-by: liqunfu <[email protected]>
11+
Signed-off-by: liqun Fu <[email protected]>
12+
Co-authored-by: Justin Chu <[email protected]>
13+
---
14+
third_party/onnx/onnx/checker.cc | 163 ++++++++++--------
15+
third_party/onnx/onnx/checker.h | 5 +-
16+
third_party/onnx/onnx/common/path.h | 17 +-
17+
third_party/onnx/onnx/cpp2py_export.cc | 2 +
18+
third_party/onnx/onnx/external_data_helper.py | 17 +-
19+
.../onnx/onnx/test/test_external_data.py | 47 +++++
20+
6 files changed, 160 insertions(+), 91 deletions(-)
21+
22+
diff --git a/third_party/onnx/onnx/checker.cc b/third_party/onnx/onnx/checker.cc
23+
index 0c81c87e..38a068dd 100644
24+
--- a/third_party/onnx/onnx/checker.cc
25+
+++ b/third_party/onnx/onnx/checker.cc
26+
@@ -4,7 +4,6 @@
27+
28+
#include "onnx/checker.h"
29+
#include "onnx/common/file_utils.h"
30+
-#include "onnx/common/path.h"
31+
#include "onnx/defs/schema.h"
32+
#include "onnx/defs/tensor_proto_util.h"
33+
#include "onnx/proto_utils.h"
34+
@@ -129,80 +128,7 @@ void check_tensor(const TensorProto& tensor, const CheckerContext& ctx) {
35+
for (const StringStringEntryProto& entry : tensor.external_data()) {
36+
if (entry.has_key() && entry.has_value() && entry.key() == "location") {
37+
has_location = true;
38+
-#ifdef _WIN32
39+
- auto file_path = std::filesystem::path(utf8str_to_wstring(entry.value()));
40+
- if (file_path.is_absolute()) {
41+
- fail_check(
42+
- "Location of external TensorProto ( tensor name: ",
43+
- tensor.name(),
44+
- ") should be a relative path, but it is an absolute path: ",
45+
- entry.value());
46+
- }
47+
- auto relative_path = file_path.lexically_normal().make_preferred().wstring();
48+
- // Check that normalized relative path contains ".." on Windows.
49+
- if (relative_path.find(L"..", 0) != std::string::npos) {
50+
- fail_check(
51+
- "Data of TensorProto ( tensor name: ",
52+
- tensor.name(),
53+
- ") should be file inside the ",
54+
- ctx.get_model_dir(),
55+
- ", but the '",
56+
- entry.value(),
57+
- "' points outside the directory");
58+
- }
59+
- std::wstring data_path = path_join(utf8str_to_wstring(ctx.get_model_dir()), relative_path);
60+
- struct _stat buff;
61+
- if (_wstat(data_path.c_str(), &buff) != 0) {
62+
- fail_check(
63+
- "Data of TensorProto ( tensor name: ",
64+
- tensor.name(),
65+
- ") should be stored in ",
66+
- entry.value(),
67+
- ", but it doesn't exist or is not accessible.");
68+
- }
69+
-#else // POSIX
70+
- if (entry.value().empty()) {
71+
- fail_check("Location of external TensorProto ( tensor name: ", tensor.name(), ") should not be empty.");
72+
- } else if (entry.value()[0] == '/') {
73+
- fail_check(
74+
- "Location of external TensorProto ( tensor name: ",
75+
- tensor.name(),
76+
- ") should be a relative path, but it is an absolute path: ",
77+
- entry.value());
78+
- }
79+
- std::string relative_path = clean_relative_path(entry.value());
80+
- // Check that normalized relative path contains ".." on POSIX
81+
- if (relative_path.find("..", 0) != std::string::npos) {
82+
- fail_check(
83+
- "Data of TensorProto ( tensor name: ",
84+
- tensor.name(),
85+
- ") should be file inside the ",
86+
- ctx.get_model_dir(),
87+
- ", but the '",
88+
- entry.value(),
89+
- "' points outside the directory");
90+
- }
91+
- std::string data_path = path_join(ctx.get_model_dir(), relative_path);
92+
- // use stat to check whether the file exists
93+
- struct stat buffer;
94+
- if (stat((data_path).c_str(), &buffer) != 0) {
95+
- fail_check(
96+
- "Data of TensorProto ( tensor name: ",
97+
- tensor.name(),
98+
- ") should be stored in ",
99+
- data_path,
100+
- ", but it doesn't exist or is not accessible.");
101+
- }
102+
- // Do not allow symlinks or directories.
103+
- if (!S_ISREG(buffer.st_mode)) {
104+
- fail_check(
105+
- "Data of TensorProto ( tensor name: ",
106+
- tensor.name(),
107+
- ") should be stored in ",
108+
- data_path,
109+
- ", but it is not regular file.");
110+
- }
111+
-#endif
112+
+ resolve_external_data_location(ctx.get_model_dir(), entry.value(), tensor.name());
113+
}
114+
}
115+
if (!has_location) {
116+
@@ -1028,6 +954,93 @@ void check_model(const ModelProto& model, bool full_check) {
117+
}
118+
}
119+
120+
+std::string resolve_external_data_location(
121+
+ const std::string& base_dir,
122+
+ const std::string& location,
123+
+ const std::string& tensor_name) {
124+
+#ifdef _WIN32
125+
+ auto file_path = std::filesystem::path(utf8str_to_wstring(location));
126+
+ if (file_path.is_absolute()) {
127+
+ fail_check(
128+
+ "Location of external TensorProto ( tensor name: ",
129+
+ tensor_name,
130+
+ ") should be a relative path, but it is an absolute path: ",
131+
+ location);
132+
+ }
133+
+ auto relative_path = file_path.lexically_normal().make_preferred().wstring();
134+
+ // Check that normalized relative path contains ".." on Windows.
135+
+ if (relative_path.find(L"..", 0) != std::string::npos) {
136+
+ fail_check(
137+
+ "Data of TensorProto ( tensor name: ",
138+
+ tensor_name,
139+
+ ") should be file inside the ",
140+
+ base_dir,
141+
+ ", but the '",
142+
+ location,
143+
+ "' points outside the directory");
144+
+ }
145+
+ std::wstring data_path = path_join(utf8str_to_wstring(base_dir), relative_path);
146+
+ struct _stat64 buff;
147+
+ if (data_path.empty() || (data_path[0] != '#' && _wstat64(data_path.c_str(), &buff) != 0)) {
148+
+ fail_check(
149+
+ "Data of TensorProto ( tensor name: ",
150+
+ tensor_name,
151+
+ ") should be stored in ",
152+
+ location,
153+
+ ", but it doesn't exist or is not accessible.");
154+
+ }
155+
+ return wstring_to_utf8str(data_path);
156+
+#else // POSIX
157+
+ if (location.empty()) {
158+
+ fail_check("Location of external TensorProto ( tensor name: ", tensor_name, ") should not be empty.");
159+
+ } else if (location[0] == '/') {
160+
+ fail_check(
161+
+ "Location of external TensorProto ( tensor name: ",
162+
+ tensor_name,
163+
+ ") should be a relative path, but it is an absolute path: ",
164+
+ location);
165+
+ }
166+
+ std::string relative_path = clean_relative_path(location);
167+
+ // Check that normalized relative path contains ".." on POSIX
168+
+ if (relative_path.find("..", 0) != std::string::npos) {
169+
+ fail_check(
170+
+ "Data of TensorProto ( tensor name: ",
171+
+ tensor_name,
172+
+ ") should be file inside the ",
173+
+ base_dir,
174+
+ ", but the '",
175+
+ location,
176+
+ "' points outside the directory");
177+
+ }
178+
+ std::string data_path = path_join(base_dir, relative_path);
179+
+ // use stat64 to check whether the file exists
180+
+#if defined(__APPLE__) || defined(__wasm__) || !defined(__GLIBC__)
181+
+ struct stat buffer; // APPLE, wasm and non-glic stdlibs do not have stat64
182+
+ if (data_path.empty() || (data_path[0] != '#' && stat((data_path).c_str(), &buffer) != 0)) {
183+
+#else
184+
+ struct stat64 buffer; // All POSIX under glibc except APPLE and wasm have stat64
185+
+ if (data_path.empty() || (data_path[0] != '#' && stat64((data_path).c_str(), &buffer) != 0)) {
186+
+#endif
187+
+ fail_check(
188+
+ "Data of TensorProto ( tensor name: ",
189+
+ tensor_name,
190+
+ ") should be stored in ",
191+
+ data_path,
192+
+ ", but it doesn't exist or is not accessible.");
193+
+ }
194+
+ // Do not allow symlinks or directories.
195+
+ if (data_path.empty() || (data_path[0] != '#' && !S_ISREG(buffer.st_mode))) {
196+
+ fail_check(
197+
+ "Data of TensorProto ( tensor name: ",
198+
+ tensor_name,
199+
+ ") should be stored in ",
200+
+ data_path,
201+
+ ", but it is not regular file.");
202+
+ }
203+
+ return data_path;
204+
+#endif
205+
+}
206+
+
207+
std::set<std::string> experimental_ops = {
208+
"ATen",
209+
"Affine",
210+
diff --git a/third_party/onnx/onnx/checker.h b/third_party/onnx/onnx/checker.h
211+
index 05eeaad0..50381aae 100644
212+
--- a/third_party/onnx/onnx/checker.h
213+
+++ b/third_party/onnx/onnx/checker.h
214+
@@ -148,7 +148,10 @@ void check_model_local_functions(
215+
216+
void check_model(const ModelProto& model, bool full_check = false);
217+
void check_model(const std::string& model_path, bool full_check = false);
218+
-
219+
+std::string resolve_external_data_location(
220+
+ const std::string& base_dir,
221+
+ const std::string& location,
222+
+ const std::string& tensor_name);
223+
bool check_is_experimental_op(const NodeProto& node);
224+
225+
} // namespace checker
226+
diff --git a/third_party/onnx/onnx/common/path.h b/third_party/onnx/onnx/common/path.h
227+
index f71b5b12..3d69e448 100644
228+
--- a/third_party/onnx/onnx/common/path.h
229+
+++ b/third_party/onnx/onnx/common/path.h
230+
@@ -30,12 +30,23 @@ inline std::wstring utf8str_to_wstring(const std::string& utf8str) {
231+
if (utf8str.size() > INT_MAX) {
232+
fail_check("utf8str_to_wstring: string is too long for converting to wstring.");
233+
}
234+
- int size_required = MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), (int)utf8str.size(), NULL, 0);
235+
+ int size_required = MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), static_cast<int>(utf8str.size()), NULL, 0);
236+
std::wstring ws_str(size_required, 0);
237+
- MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), (int)utf8str.size(), &ws_str[0], size_required);
238+
+ MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), static_cast<int>(utf8str.size()), &ws_str[0], size_required);
239+
return ws_str;
240+
}
241+
-
242+
+inline std::string wstring_to_utf8str(const std::wstring& ws_str) {
243+
+ if (ws_str.size() > INT_MAX) {
244+
+ fail_check("wstring_to_utf8str: string is too long for converting to UTF-8.");
245+
+ }
246+
+ int size_required =
247+
+ WideCharToMultiByte(CP_UTF8, 0, ws_str.c_str(), static_cast<int>(ws_str.size()), NULL, 0, NULL, NULL);
248+
+ std::string utf8str(size_required, 0);
249+
+ WideCharToMultiByte(
250+
+ CP_UTF8, 0, ws_str.c_str(), static_cast<int>(ws_str.size()), &utf8str[0], size_required, NULL, NULL);
251+
+ return utf8str;
252+
+}
253+
+
254+
#else
255+
std::string path_join(const std::string& origin, const std::string& append);
256+
// TODO: also use std::filesystem::path for clean_relative_path after ONNX has supported C++17 for POSIX
257+
diff --git a/third_party/onnx/onnx/cpp2py_export.cc b/third_party/onnx/onnx/cpp2py_export.cc
258+
index f6e1738e..02aabcab 100644
259+
--- a/third_party/onnx/onnx/cpp2py_export.cc
260+
+++ b/third_party/onnx/onnx/cpp2py_export.cc
261+
@@ -395,6 +395,8 @@ PYBIND11_MODULE(onnx_cpp2py_export, onnx_cpp2py_export) {
262+
"path"_a,
263+
"full_check"_a = false);
264+
265+
+ checker.def("_resolve_external_data_location", &checker::resolve_external_data_location);
266+
+
267+
// Submodule `version_converter`
268+
auto version_converter = onnx_cpp2py_export.def_submodule("version_converter");
269+
version_converter.doc() = "VersionConverter submodule";
270+
diff --git a/third_party/onnx/onnx/external_data_helper.py b/third_party/onnx/onnx/external_data_helper.py
271+
index 54c0f403..6763b11d 100644
272+
--- a/third_party/onnx/onnx/external_data_helper.py
273+
+++ b/third_party/onnx/onnx/external_data_helper.py
274+
@@ -5,7 +5,8 @@ import sys
275+
import uuid
276+
from itertools import chain
277+
from typing import Callable, Iterable, Optional
278+
-
279+
+
280+
+import onnx.onnx_cpp2py_export.checker as c_checker
281+
from .onnx_pb import AttributeProto, GraphProto, ModelProto, TensorProto
282+
283+
284+
@@ -37,9 +38,9 @@ def load_external_data_for_tensor(tensor: TensorProto, base_dir: str) -> None:
285+
base_dir: directory that contains the external data.
286+
"""
287+
info = ExternalDataInfo(tensor)
288+
- file_location = _sanitize_path(info.location)
289+
- external_data_file_path = os.path.join(base_dir, file_location)
290+
-
291+
+ external_data_file_path = c_checker._resolve_external_data_location( # type: ignore[attr-defined]
292+
+ base_dir, info.location, tensor.name
293+
+ )
294+
with open(external_data_file_path, "rb") as data_file:
295+
if info.offset:
296+
data_file.seek(info.offset)
297+
@@ -251,14 +252,6 @@ def _get_attribute_tensors(onnx_model_proto: ModelProto) -> Iterable[TensorProto
298+
yield from _get_attribute_tensors_from_graph(onnx_model_proto.graph)
299+
300+
301+
-def _sanitize_path(path: str) -> str:
302+
- """Remove path components which would allow traversing up a directory tree from a base path.
303+
-
304+
- Note: This method is currently very basic and should be expanded.
305+
- """
306+
- return path.lstrip("/.")
307+
-
308+
-
309+
def _is_valid_filename(filename: str) -> bool:
310+
"""Utility to check whether the provided filename is valid."""
311+
exp = re.compile('^[^<>:;,?"*|/]+$')
312+
diff --git a/third_party/onnx/onnx/test/test_external_data.py b/third_party/onnx/onnx/test/test_external_data.py
313+
index 5ba6b9cf..6e06312b 100644
314+
--- a/third_party/onnx/onnx/test/test_external_data.py
315+
+++ b/third_party/onnx/onnx/test/test_external_data.py
316+
@@ -1,4 +1,5 @@
317+
# SPDX-License-Identifier: Apache-2.0
318+
+import itertools
319+
import os
320+
import os.path as Path
321+
import shutil
322+
@@ -186,6 +187,52 @@ class TestLoadExternalDataSingleFile(TestLoadExternalDataBase):
323+
attribute_tensor = new_model.graph.node[0].attribute[0].t
324+
self.assertTrue(np.allclose(to_array(attribute_tensor), self.attribute_value))
325+
326+
+ @parameterized.parameterized.expand(itertools.product((True, False), (True, False)))
327+
+ def test_save_external_invalid_single_file_data_and_check(
328+
+ self, use_absolute_path: bool, use_model_path: bool
329+
+ ) -> None:
330+
+ model = onnx.load_model(self.model_filename, self.serialization_format)
331+
+
332+
+ model_dir = os.path.join(self.temp_dir, "save_copy")
333+
+ os.mkdir(model_dir)
334+
+
335+
+ traversal_external_data_dir = os.path.join(
336+
+ self.temp_dir, "invlid_external_data"
337+
+ )
338+
+ os.mkdir(traversal_external_data_dir)
339+
+
340+
+ if use_absolute_path:
341+
+ traversal_external_data_location = os.path.join(
342+
+ traversal_external_data_dir, "tensors.bin"
343+
+ )
344+
+ else:
345+
+ traversal_external_data_location = "../invlid_external_data/tensors.bin"
346+
+
347+
+ external_data_dir = os.path.join(self.temp_dir, "external_data")
348+
+ os.mkdir(external_data_dir)
349+
+ new_model_filepath = os.path.join(model_dir, "model.onnx")
350+
+
351+
+ def convert_model_to_external_data_no_check(model: ModelProto, location: str):
352+
+ for tensor in model.graph.initializer:
353+
+ if tensor.HasField("raw_data"):
354+
+ set_external_data(tensor, location)
355+
+
356+
+ convert_model_to_external_data_no_check(
357+
+ model,
358+
+ location=traversal_external_data_location,
359+
+ )
360+
+
361+
+ onnx.save_model(model, new_model_filepath, self.serialization_format)
362+
+ if use_model_path:
363+
+ with self.assertRaises(onnx.checker.ValidationError):
364+
+ _ = onnx.load_model(new_model_filepath, self.serialization_format)
365+
+ else:
366+
+ onnx_model = onnx.load_model(
367+
+ new_model_filepath, self.serialization_format, load_external_data=False
368+
+ )
369+
+ with self.assertRaises(onnx.checker.ValidationError):
370+
+ load_external_data_for_model(onnx_model, external_data_dir)
371+
+
372+
373+
class TestSaveAllTensorsAsExternalData(TestLoadExternalDataBase):
374+
def setUp(self) -> None:
375+
--
376+
2.25.1
377+

0 commit comments

Comments
 (0)