|
| 1 | +import json |
| 2 | +import re |
| 3 | +import sys |
| 4 | +from pathlib import Path |
| 5 | +from subprocess import STDOUT, CalledProcessError |
| 6 | +from test.e2e.utils import RamalamaExecWorkspace |
| 7 | + |
| 8 | +import pytest |
| 9 | + |
| 10 | +GGUF_MODEL = "ollama://tinyllama" |
| 11 | +ST_MODEL = "https://huggingface.co/LiheYoung/depth-anything-small-hf/resolve/main/model.safetensors" |
| 12 | + |
| 13 | + |
| 14 | +@pytest.fixture(scope="module") |
| 15 | +def shared_ctx(): |
| 16 | + with RamalamaExecWorkspace() as ctx: |
| 17 | + ctx.check_call(["ramalama", "-q", "pull", GGUF_MODEL]) |
| 18 | + ctx.check_call(["ramalama", "-q", "pull", ST_MODEL]) |
| 19 | + yield ctx |
| 20 | + |
| 21 | + |
| 22 | +@pytest.mark.e2e |
| 23 | +def test_inspect_non_existent_model(shared_ctx): |
| 24 | + ctx = shared_ctx |
| 25 | + model_name = "non_existent_model_for_inspect" |
| 26 | + with pytest.raises(CalledProcessError) as exc_info: |
| 27 | + ctx.check_output(["ramalama", "inspect", model_name], stderr=STDOUT) |
| 28 | + assert exc_info.value.returncode == 22 |
| 29 | + assert f"Error: No ref file found for '{model_name}'. Please pull model." in exc_info.value.output.decode("utf-8") |
| 30 | + |
| 31 | + |
| 32 | +@pytest.mark.e2e |
| 33 | +@pytest.mark.parametrize( |
| 34 | + "model_name, use_all_flag, expected_key, expected_value", |
| 35 | + [ |
| 36 | + # GGUF inspect (no --all) |
| 37 | + pytest.param(GGUF_MODEL, False, ["Name"], "tinyllama", id="gguf_inspect_name"), |
| 38 | + pytest.param(GGUF_MODEL, False, ["Registry"], "ollama", id="gguf_inspect_registry"), |
| 39 | + pytest.param(GGUF_MODEL, False, ["Format"], "GGUF", id="gguf_inspect_format"), |
| 40 | + pytest.param(GGUF_MODEL, False, ["Version"], "3", id="gguf_inspect_version"), |
| 41 | + pytest.param( |
| 42 | + GGUF_MODEL, False, ["Endianness"], "0" if sys.byteorder == "little" else "1", id="gguf_inspect_endianness" |
| 43 | + ), |
| 44 | + pytest.param(GGUF_MODEL, False, ["Metadata"], "23", id="gguf_inspect_metadata_count"), |
| 45 | + pytest.param(GGUF_MODEL, False, ["Tensors"], "201", id="gguf_inspect_tensors_count"), |
| 46 | + pytest.param( |
| 47 | + GGUF_MODEL, |
| 48 | + False, |
| 49 | + ["Path"], |
| 50 | + rf".*{Path("store", "ollama", "library", "tinyllama")}.*", |
| 51 | + id="gguf_inspect_path", |
| 52 | + ), |
| 53 | + # Safetensors inspect (no --all) |
| 54 | + pytest.param(ST_MODEL, False, ["Name"], "model.safetensors", id="safetensors_inspect_name"), |
| 55 | + pytest.param(ST_MODEL, False, ["Registry"], "https", id="safetensors_inspect_registry"), |
| 56 | + pytest.param(ST_MODEL, False, ["Metadata"], "288", id="safetensors_inspect_metadata_count"), |
| 57 | + # GGUF inspect --all |
| 58 | + pytest.param(GGUF_MODEL, True, ["Name"], "tinyllama", id="gguf_inspect_all_name"), |
| 59 | + pytest.param(GGUF_MODEL, True, ["Registry"], "ollama", id="gguf_inspect_all_registry"), |
| 60 | + pytest.param(GGUF_MODEL, True, ["Format"], "GGUF", id="gguf_inspect_all_format"), |
| 61 | + pytest.param(GGUF_MODEL, True, ["Version"], "3", id="gguf_inspect_all_version"), |
| 62 | + pytest.param( |
| 63 | + GGUF_MODEL, |
| 64 | + True, |
| 65 | + ["Endianness"], |
| 66 | + "0" if sys.byteorder == "little" else "1", |
| 67 | + id="gguf_inspect_all_endianness", |
| 68 | + ), |
| 69 | + pytest.param( |
| 70 | + GGUF_MODEL, True, ["Metadata", "data", "general.architecture"], "llama", id="gguf_inspect_all_meta_arch" |
| 71 | + ), |
| 72 | + # Safetensors inspect --all |
| 73 | + pytest.param(ST_MODEL, True, ["Name"], "model.safetensors", id="safetensors_inspect_all_name"), |
| 74 | + pytest.param(ST_MODEL, True, ["Registry"], "https", id="safetensors_inspect_all_registry"), |
| 75 | + pytest.param( |
| 76 | + ST_MODEL, True, ["Header", "__metadata__", "format"], "pt", id="safetensors_inspect_all_header_format" |
| 77 | + ), |
| 78 | + ], |
| 79 | +) |
| 80 | +def test_inspect_model_json_output(shared_ctx, model_name, use_all_flag, expected_key, expected_value): |
| 81 | + ctx = shared_ctx |
| 82 | + result = ctx.check_output(["ramalama", "inspect", "--json"] + (["--all"] if use_all_flag else []) + [model_name]) |
| 83 | + data = json.loads(result) |
| 84 | + |
| 85 | + value = data |
| 86 | + for k in expected_key: |
| 87 | + value = value[k] |
| 88 | + |
| 89 | + assert re.match(expected_value, str(value)) |
| 90 | + |
| 91 | + |
| 92 | +@pytest.mark.e2e |
| 93 | +@pytest.mark.parametrize( |
| 94 | + "key, expected_value", |
| 95 | + [ |
| 96 | + pytest.param("general.architecture", "llama", id="general.architecture"), |
| 97 | + pytest.param("general.file_type", "2", id="general.file_type"), |
| 98 | + pytest.param("general.name", "TinyLlama", id="general.name"), |
| 99 | + pytest.param("general.quantization_version", "2", id="general.quantization_version"), |
| 100 | + pytest.param("llama.attention.head_count", "32", id="llama.attention.head_count"), |
| 101 | + pytest.param("llama.attention.head_count_kv", "4", id="llama.attention.head_count_kv"), |
| 102 | + pytest.param( |
| 103 | + "llama.attention.layer_norm_rms_epsilon", |
| 104 | + "9.999999747378752e-06", |
| 105 | + id="llama.attention.layer_norm_rms_epsilon", |
| 106 | + ), |
| 107 | + pytest.param("llama.block_count", "22", id="llama.block_count"), |
| 108 | + pytest.param("llama.context_length", "2048", id="llama.context_length"), |
| 109 | + pytest.param("llama.embedding_length", "2048", id="llama.embedding_length"), |
| 110 | + pytest.param("llama.feed_forward_length", "5632", id="llama.feed_forward_length"), |
| 111 | + pytest.param("llama.rope.dimension_count", "64", id="llama.rope.dimension_count"), |
| 112 | + pytest.param("llama.rope.freq_base", "10000.0", id="llama.rope.freq_base"), |
| 113 | + pytest.param("tokenizer.ggml.bos_token_id", "1", id="tokenizer.ggml.bos_token_id"), |
| 114 | + pytest.param("tokenizer.ggml.eos_token_id", "2", id="tokenizer.ggml.eos_token_id"), |
| 115 | + pytest.param("tokenizer.ggml.model", "llama", id="tokenizer.ggml.model"), |
| 116 | + pytest.param("tokenizer.ggml.padding_token_id", "2", id="tokenizer.ggml.padding_token_id"), |
| 117 | + pytest.param("tokenizer.ggml.unknown_token_id", "0", id="tokenizer.ggml.unknown_token_id"), |
| 118 | + ], |
| 119 | +) |
| 120 | +def test_inspect_gguf_model_with_get(shared_ctx, key, expected_value): |
| 121 | + ctx = shared_ctx |
| 122 | + |
| 123 | + output = ctx.check_output(["ramalama", "inspect", "--get", key, GGUF_MODEL]) |
| 124 | + assert output.strip() == expected_value |
| 125 | + |
| 126 | + output_all = ctx.check_output(["ramalama", "inspect", "--get", "all", GGUF_MODEL]) |
| 127 | + assert f"{key}: {expected_value}" in output_all |
0 commit comments