|
27 | 27 | import urllib.parse as urlparse |
28 | 28 | from concurrent.futures import ThreadPoolExecutor |
29 | 29 | from datetime import datetime, timezone |
30 | | -from pathlib import Path |
31 | 30 |
|
32 | 31 | # constants |
33 | 32 | sampler_order_max = 7 |
@@ -665,6 +664,110 @@ def get_capabilities(): |
665 | 664 | has_tts = (ttsmodelpath!="") |
666 | 665 | return {"result":"KoboldCpp", "version":KcppVersion, "protected":has_password, "llm":has_llm, "txt2img":has_txt2img,"vision":has_vision,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search,"tts":has_tts} |
667 | 666 |
|
| 667 | +def dump_gguf_metadata(file_path): #if you're gonna copy this into your own project at least credit concedo |
| 668 | + chunk_size = 1024*1024*12 # read first 12mb of file |
| 669 | + try: |
| 670 | + data = None |
| 671 | + fptr = 0 |
| 672 | + dt_table = ["u8","i8","u16","i16","u32","i32","f32","bool","str","arr","u64","i64","f64"] #13 types, else error |
| 673 | + tt_table = ["f32","f16","q4_0","q4_1","q4_2","q4_3","q5_0","q5_1","q8_0","q8_1","q2_k","q3_k","q4_k","q5_k","q6_k","q8_k","iq2_xxs","iq2_xs","iq3_xxs","iq1_s","iq4_nl","iq3_s","iq2_s","iq4_xs","i8","i16","i32","i64","f64","iq1_m","bf16","q4_0_4_4","q4_0_4_8","q4_0_8_8","tq1_0","tq2_0","iq4_nl_4_4","unknown","unknown","unknown","unknown","unknown"] |
| 674 | + def read_data(datatype): |
| 675 | + nonlocal fptr, data, dt_table |
| 676 | + if datatype=="u32": |
| 677 | + val_bytes = data[fptr:fptr + 4] |
| 678 | + val = struct.unpack('<I', val_bytes)[0] |
| 679 | + fptr += 4 |
| 680 | + return val |
| 681 | + if datatype=="u64": |
| 682 | + val_bytes = data[fptr:fptr + 8] |
| 683 | + val = struct.unpack('<Q', val_bytes)[0] |
| 684 | + fptr += 8 |
| 685 | + return val |
| 686 | + if datatype=="i32": |
| 687 | + val_bytes = data[fptr:fptr + 4] |
| 688 | + val = struct.unpack('<i', val_bytes)[0] |
| 689 | + fptr += 4 |
| 690 | + return val |
| 691 | + if datatype=="bool": |
| 692 | + val_bytes = data[fptr:fptr + 1] |
| 693 | + val = struct.unpack('<B', val_bytes)[0] |
| 694 | + fptr += 1 |
| 695 | + return val |
| 696 | + if datatype=="f32": |
| 697 | + val_bytes = data[fptr:fptr + 4] |
| 698 | + val = struct.unpack('<f', val_bytes)[0] |
| 699 | + fptr += 4 |
| 700 | + return val |
| 701 | + if datatype=="str": |
| 702 | + val_bytes = data[fptr:fptr + 8] |
| 703 | + str_len = struct.unpack('<Q', val_bytes)[0] |
| 704 | + fptr += 8 |
| 705 | + val_bytes = data[fptr:fptr + str_len] |
| 706 | + str_val = val_bytes.split(b'\0', 1)[0].decode('utf-8') |
| 707 | + fptr += str_len |
| 708 | + return str_val |
| 709 | + if datatype=="arr": |
| 710 | + val_bytes = data[fptr:fptr + 4] |
| 711 | + arr_type = struct.unpack('<I', val_bytes)[0] |
| 712 | + fptr += 4 |
| 713 | + val_bytes = data[fptr:fptr + 8] |
| 714 | + arr_elems = struct.unpack('<Q', val_bytes)[0] |
| 715 | + fptr += 8 |
| 716 | + arr_vals = [] |
| 717 | + for i in range(arr_elems): |
| 718 | + dt_translated = dt_table[arr_type] |
| 719 | + arr_val = read_data(dt_translated) |
| 720 | + arr_vals.append(arr_val) |
| 721 | + return arr_vals |
| 722 | + print(f"Unknown Datatype: {datatype}") |
| 723 | + return |
| 724 | + |
| 725 | + fsize = os.path.getsize(file_path) |
| 726 | + if fsize < (chunk_size + 256): #ignore files under file size limit |
| 727 | + print("This GGUF file is too small to analyze. Please ensure it is valid.") |
| 728 | + return |
| 729 | + with open(file_path, 'rb') as f: |
| 730 | + file_header = f.read(4) |
| 731 | + if file_header != b'GGUF': #file is not GGUF |
| 732 | + print(f"File does not seem to be a GGUF: {file_header}") |
| 733 | + return |
| 734 | + data = f.read(chunk_size) |
| 735 | + read_ver = read_data("u32") |
| 736 | + if read_ver < 2: |
| 737 | + print(f"This GGUF file is too old. Version detected: {read_ver}") |
| 738 | + return |
| 739 | + read_tensorcount = read_data("u64") |
| 740 | + read_kvcount = read_data("u64") |
| 741 | + print(f"*** GGUF FILE METADATA ***\nGGUF.version = {read_ver}\nGGUF.tensor_count = {read_tensorcount}\nGGUF.kv_count = {read_kvcount}") |
| 742 | + for kn in range(read_kvcount): |
| 743 | + curr_key = read_data("str") |
| 744 | + curr_datatype = read_data("u32") |
| 745 | + dt_translated = dt_table[curr_datatype] |
| 746 | + curr_val = read_data(dt_translated) |
| 747 | + if dt_translated=="arr": |
| 748 | + print(f"{dt_translated}: {curr_key} = [{len(curr_val)}]") |
| 749 | + elif dt_translated=="str": |
| 750 | + print(f"{dt_translated}: {curr_key} = {curr_val[:100]}") |
| 751 | + else: |
| 752 | + print(f"{dt_translated}: {curr_key} = {curr_val}") |
| 753 | + print("\n*** GGUF TENSOR INFO ***") |
| 754 | + for kn in range(read_tensorcount): |
| 755 | + tensor_name = read_data("str") |
| 756 | + dims = read_data("u32") |
| 757 | + dim_val_str = "[" |
| 758 | + for d in range(dims): |
| 759 | + dim_val = read_data("u64") |
| 760 | + dim_val_str += f"{'' if d==0 else ', '}{dim_val}" |
| 761 | + dim_val_str += "]" |
| 762 | + tensor_type = read_data("u32") |
| 763 | + read_data("u64") # tensor_offset not used |
| 764 | + tensor_type_str = tt_table[tensor_type] |
| 765 | + print(f"{kn:<3}: {tensor_type_str:<8} | {tensor_name:<30} | {dim_val_str}") |
| 766 | + print(f"Metadata and TensorInfo Bytes: {fptr}") |
| 767 | + except Exception as e: |
| 768 | + print(f"Error Analyzing File: {e}") |
| 769 | + return |
| 770 | + |
668 | 771 | def read_gguf_metadata(file_path): |
669 | 772 | chunk_size = 8192 # read only first 8kb of file |
670 | 773 | try: |
@@ -4741,12 +4844,7 @@ def download_model_from_url(url,permitted_types=[".gguf",".safetensors"]): |
4741 | 4844 | def analyze_gguf_model(args,filename): |
4742 | 4845 | try: |
4743 | 4846 | stime = datetime.now() |
4744 | | - from gguf.scripts.gguf_dump import dump_metadata |
4745 | | - from gguf import GGUFReader |
4746 | | - reader = GGUFReader(filename, 'r') |
4747 | | - ns = argparse.Namespace() |
4748 | | - ns.no_tensors = False |
4749 | | - dump_metadata(reader, ns) |
| 4847 | + dump_gguf_metadata(filename) |
4750 | 4848 | atime = (datetime.now() - stime).total_seconds() |
4751 | 4849 | print(f"---\nAnalyzing completed in {atime:.2f}s.\n---",flush=True) |
4752 | 4850 | except Exception as e: |
@@ -4781,13 +4879,6 @@ def main(launch_args,start_server=True): |
4781 | 4879 |
|
4782 | 4880 | print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") # just update version manually |
4783 | 4881 | # print("Python version: " + sys.version) |
4784 | | - # connect path |
4785 | | - try: |
4786 | | - if (Path(__file__).parent / "gguf-py").exists(): |
4787 | | - ggufpy_path = str(Path(__file__).parent / "gguf-py") |
4788 | | - sys.path.append(ggufpy_path) |
4789 | | - except Exception as e: |
4790 | | - print(f"Cannot import gguf-py path: {e}") |
4791 | 4882 |
|
4792 | 4883 | #perform some basic cleanup of old temporary directories |
4793 | 4884 | try: |
|
0 commit comments