Skip to content

Commit 2e887ac

Browse files
fix: add path validation for remaining Python file operations (PT vulnerabilities)
- convert_legacy_llama.py: Add file existence check before opening - inspect-org-model.py: Validate index file before opening - gguf_new_metadata.py: Validate chat template config and file paths - convert_image_encoder_to_gguf.py: Validate vocab.json and config.json paths - glmedge-convert-image-encoder-to-gguf.py: Validate vocab.json and config.json paths - minicpmv-convert-image-encoder-to-gguf.py: Validate vocab.json path Addresses remaining Python path traversal vulnerabilities (CWE-23) Co-Authored-By: Jake Cosme <[email protected]>
1 parent 69d8be7 commit 2e887ac

File tree

6 files changed

+36
-5
lines changed

6 files changed

+36
-5
lines changed

examples/convert_legacy_llama.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,8 @@ def must_read(fp: IO[bytes], length: int) -> bytes:
679679

680680
@functools.lru_cache(maxsize=None)
681681
def lazy_load_file(path: Path) -> ModelPlus:
682+
if not os.path.isfile(path):
683+
raise ValueError(f"File does not exist: {path}")
682684
fp = open(path, 'rb')
683685
first8 = fp.read(8)
684686
fp.seek(0)

examples/model-conversion/scripts/utils/inspect-org-model.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
# Multi-file model
2323
print("Multi-file model detected")
2424

25+
if not os.path.isfile(index_path) or not index_path.endswith('.json'):
26+
print(f"Error: Invalid index file: {index_path}")
27+
exit(1)
2528
with open(index_path, 'r') as f:
2629
index_data = json.load(f)
2730

gguf-py/gguf/scripts/gguf_new_metadata.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,13 +135,19 @@ def main() -> None:
135135
new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, json.loads(args.chat_template) if args.chat_template.startswith('[') else args.chat_template)
136136

137137
if args.chat_template_config:
138+
if not os.path.isfile(args.chat_template_config) or not str(args.chat_template_config).endswith('.json'):
139+
logger.error(f"Invalid chat template config file: {args.chat_template_config}")
140+
sys.exit(1)
138141
with open(args.chat_template_config, 'r', encoding='utf-8') as fp:
139142
config = json.load(fp)
140143
template = config.get('chat_template')
141144
if template:
142145
new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, template)
143146

144147
if args.chat_template_file:
148+
if not os.path.isfile(args.chat_template_file):
149+
logger.error(f"Chat template file does not exist: {args.chat_template_file}")
150+
sys.exit(1)
145151
with open(args.chat_template_file, 'r', encoding='utf-8') as fp:
146152
template = fp.read()
147153
new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, template)

tools/mtmd/legacy-models/convert_image_encoder_to_gguf.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,11 +137,19 @@ def bytes_to_unicode():
137137
vocab = None
138138
tokens = None
139139
else:
140-
with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f:
140+
vocab_path = dir_model + "/vocab.json"
141+
if not os.path.isfile(vocab_path):
142+
print(f"Error: vocab.json not found at {vocab_path}")
143+
sys.exit(1)
144+
with open(vocab_path, "r", encoding="utf-8") as f:
141145
vocab = json.load(f)
142146
tokens = [key for key in vocab]
143147

144-
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
148+
config_path = dir_model + "/config.json"
149+
if not os.path.isfile(config_path):
150+
print(f"Error: config.json not found at {config_path}")
151+
sys.exit(1)
152+
with open(config_path, "r", encoding="utf-8") as f:
145153
config = json.load(f)
146154
if args.clip_model_is_vision:
147155
v_hparams = config

tools/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,11 +124,19 @@ def bytes_to_unicode():
124124
vocab = None
125125
tokens = None
126126
else:
127-
with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f:
127+
vocab_path = dir_model + "/vocab.json"
128+
if not os.path.isfile(vocab_path):
129+
print(f"Error: vocab.json not found at {vocab_path}")
130+
sys.exit(1)
131+
with open(vocab_path, "r", encoding="utf-8") as f:
128132
vocab = json.load(f)
129133
tokens = [key for key in vocab]
130134

131-
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
135+
config_path = dir_model + "/config.json"
136+
if not os.path.isfile(config_path):
137+
print(f"Error: config.json not found at {config_path}")
138+
sys.exit(1)
139+
with open(config_path, "r", encoding="utf-8") as f:
132140
config = json.load(f)
133141
if args.clip_model_is_vision:
134142
v_hparams = config

tools/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,11 @@ def bytes_to_unicode():
542542
vocab = None
543543
tokens = None
544544
else:
545-
with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f:
545+
vocab_path = dir_model + "/vocab.json"
546+
if not os.path.isfile(vocab_path):
547+
print(f"Error: vocab.json not found at {vocab_path}")
548+
sys.exit(1)
549+
with open(vocab_path, "r", encoding="utf-8") as f:
546550
vocab = json.load(f)
547551
tokens = [key for key in vocab]
548552

0 commit comments

Comments
 (0)