-
Notifications
You must be signed in to change notification settings - Fork 174
Expand file tree
/
Copy pathpyproject.toml
More file actions
96 lines (90 loc) · 2.29 KB
/
pyproject.toml
File metadata and controls
96 lines (90 loc) · 2.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
[build-system]
requires = [
"setuptools==82.0.1",
]
build-backend = "setuptools.build_meta"
[project]
name = "GPTQModel"
dynamic = ["version", "dependencies"]
description = "Production ready LLM model compression/quantization toolkit with hw accelerated inference support for both cpu/gpu via HF, vLLM, and SGLang."
readme = "README.md"
requires-python = ">=3.10"
license = "Apache-2.0"
authors = [
{ name = "ModelCloud", email = "qubitium@modelcloud.ai" },
]
keywords = ["gptq", "awq", "qqq", "autogptq", "autoawq", "eora", "gar", "quantization", "large-language-models", "transformers", "llm", "moe", "compression"]
classifiers = [
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Programming Language :: C++",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"Intended Audience :: Information Technology",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Information Analysis",
]
[project.urls]
Homepage = "https://github.com/ModelCloud/GPTQModel"
[tool.setuptools.dynamic]
dependencies = { file = ["requirements.txt"] }
[project.optional-dependencies]
test = [
"pytest>=8.3.5",
"pytest-timeout>=2.3.1",
"parameterized",
]
quality = [
"ruff==0.13.0",
# "isort==6.0.1",
]
vllm = [
"vllm>=0.10.2",
"flashinfer-python>=0.3.1",
]
sglang = [
"sglang[srt]>=0.4.6",
"flashinfer-python>=0.3.1",
]
bitblas = [
"bitblas==0.1.0.post1",
]
bitsandbytes = [
"bitsandbytes>=0.49.3",
]
hf = [
"optimum>=1.21.2",
]
eval = [
"Evalution",
]
triton = [
"triton>=3.4.0",
]
marlin-cuda12 = [
"nvidia-cuda-runtime-cu12==12.9.79",
"nvidia-cublas-cu12==12.9.1.4",
"nvidia-cusparse-cu12==12.5.10.65",
"nvidia-cusolver-cu12==11.7.5.82",
]
marlin-cuda = [
"nvidia-cuda-runtime>=13.0.96",
"nvidia-cublas>=13.1.0.3",
"nvidia-cusparse>=12.6.3.3",
"nvidia-cusolver>=12.0.4.66",
]
openai = [
"uvicorn",
"fastapi",
"pydantic",
]
mlx = [
"mlx_lm>=0.24.0",
]
[tool.uv]
torch-backend = "auto"