@@ -94,76 +94,83 @@ type modelfile struct {
9494 quantization string
9595}
9696
97- // File type patterns, ignore the case of the file extensions
97+ // Config file patterns - supported configuration and resource files
9898var (
99- // Config file patterns
10099 configFilePatterns = []string {
101- // Common config files
102- "*.json" ,
103- "*.jsonl" ,
104- "*.yaml" ,
105- "*.yml" ,
106- "*.toml" ,
107- "*.txt" ,
108- "*.config" ,
109- "*.modelcard" ,
110- "*.meta" ,
111- "*.ini" ,
112-
113- // Common doc files
114- "*.md" ,
115- "LICENSE*" ,
116- "README*" ,
117- "SETUP*" ,
118- "*requirements*" ,
119-
120- // Image file patterns
121- "*.jpg" ,
122- "*.jpeg" ,
123- "*.png" ,
124- "*.gif" ,
125- "*.bmp" ,
126- "*.tiff" ,
127- "*.ico" ,
128-
129- // Other files
130- "*tokenizer.model*" , // For mistral tokenizer.model.v3
131- "config.json.*" ,
132- }
133-
134- // Model file patterns
100+ // Configuration formats
101+ "*.json" , // JSON configuration files
102+ "*.jsonl" , // JSON Lines format
103+ "*.yaml" , // YAML configuration files
104+ "*.yml" , // YAML alternative extension
105+ "*.toml" , // TOML configuration files
106+ "*.ini" , // INI configuration files
107+ "*.config" , // Generic config files
108+ "*.txt" , // Text files
109+ "*.modelcard" , // Model card metadata
110+ "*.meta" , // Model metadata
111+
112+ // Documentation files
113+ "*.md" , // Markdown documentation
114+ "LICENSE*" , // License files
115+ "README*" , // Project documentation
116+ "SETUP*" , // Setup instructions
117+ "*requirements*" , // Dependency specifications
118+
119+ // Image assets
120+ "*.jpg" , // JPEG image format
121+ "*.jpeg" , // JPEG alternative extension
122+ "*.png" , // PNG image format
123+ "*.gif" , // GIF image format
124+ "*.bmp" , // Bitmap image format
125+ "*.tiff" , // TIFF image format
126+ "*.ico" , // Icon format
127+
128+ // Model-specific files
129+ "*tokenizer.model*" , // Tokenizer files (e.g., Mistral v3)
130+ "config.json.*" , // Model configuration variants
131+ }
132+
133+ // Model file patterns - supported model file extensions
135134 modelFilePatterns = []string {
136- "*.bin" ,
137- "*.safetensors" ,
138- "*.pt" ,
139- "*.pth" ,
140- "*.onnx" ,
141- "*.gguf" ,
142- "*.msgpack" ,
143- "*.tflite" , // tensorflow lite
144- "*.h5" , // keras
145- "*.hdf" , // keras
146- "*.hdf5" , // keras
147- "*.ot" , // openvino
148- "*.engine" , // tensorrt
149- "*.trt" , // tensorrt
150- }
151-
152- // Code file patterns
135+ // Huggingface formats
136+ "*.safetensors" , // Safe and efficient tensor serialization format
137+
138+ // PyTorch formats
139+ "*.bin" , // General binary format
140+ "*.pt" , // PyTorch model
141+ "*.pth" , // PyTorch model (alternative extension)
142+
143+ // TensorFlow formats
144+ "*.tflite" , // TensorFlow Lite
145+ "*.h5" , // Keras HDF5 format
146+ "*.hdf" , // Hierarchical Data Format
147+ "*.hdf5" , // HDF5 (alternative extension)
148+
149+ // Other ML frameworks
150+ "*.ot" , // OpenVINO format
151+ "*.engine" , // TensorRT format
152+ "*.trt" , // TensorRT format (alternative extension)
153+ "*.onnx" , // Open Neural Network Exchange format
154+ "*.gguf" , // GGML Universal Format
155+ "*.msgpack" , // MessagePack serialization
156+ "*.model" , // Some NLP frameworks
157+ }
158+
159+ // Code file patterns - supported script and notebook files
153160 codeFilePatterns = []string {
154- "*.py" ,
155- "*.sh" ,
156- "*.ipynb" ,
161+ "*.py" , // Python source files
162+ "*.sh" , // Shell scripts
163+ "*.ipynb" , // Jupyter notebooks
157164 }
158165
159- // Skip files/ directories that match these patterns
166+ // Skip patterns - files and directories to ignore during processing
160167 skipPatterns = []string {
161- ".*" ,
162- "modelfile" ,
163- "__pycache__" ,
164- "*.pyc" ,
165- "*.pyo" ,
166- "*.pyd" ,
168+ ".*" , // Hidden files and directories
169+ "modelfile" , // Modelfile configuration
170+ "__pycache__" , // Python bytecode cache directory
171+ "*.pyc" , // Python compiled bytecode
172+ "*.pyo" , // Python optimized bytecode
173+ "*.pyd" , // Python dynamic modules
167174 }
168175)
169176
0 commit comments