GPTQModel/pyproject.toml at main · ModelCloud/GPTQModel · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
[build-system]
requires = [
    "setuptools==82.0.1",
]
build-backend = "setuptools.build_meta"

[project]
name = "GPTQModel"
dynamic = ["version", "dependencies"]
description = "Production ready LLM model compression/quantization toolkit with hw accelerated inference support for both cpu/gpu via HF, vLLM, and SGLang."
readme = "README.md"
requires-python = ">=3.10"
license = "Apache-2.0"
authors = [
    { name = "ModelCloud", email = "qubitium@modelcloud.ai" },
]
keywords = ["gptq", "awq", "qqq", "autogptq", "autoawq", "eora", "gar", "quantization", "large-language-models", "transformers", "llm", "moe", "compression"]
classifiers = [
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Programming Language :: Python :: 3.14",
    "Programming Language :: C++",
    "Intended Audience :: Developers",
    "Intended Audience :: Education",
    "Intended Audience :: Science/Research",
    "Intended Audience :: Information Technology",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Topic :: Scientific/Engineering :: Information Analysis",
]

[project.urls]
Homepage = "https://github.com/ModelCloud/GPTQModel"

[tool.setuptools.dynamic]
dependencies = { file = ["requirements.txt"] }


[project.optional-dependencies]
test = [
    "pytest>=8.3.5",
    "pytest-timeout>=2.3.1",
    "parameterized",
]
quality = [
    "ruff==0.13.0",
    # "isort==6.0.1",
]
vllm = [
    "vllm>=0.10.2",
    "flashinfer-python>=0.3.1",
]
sglang = [
    "sglang[srt]>=0.4.6",
    "flashinfer-python>=0.3.1",
]
bitblas = [
    "bitblas==0.1.0.post1",
]
bitsandbytes = [
    "bitsandbytes>=0.49.3",
]
hf = [
    "optimum>=1.21.2",
]
eval = [
    "Evalution",
]
triton = [
    "triton>=3.4.0",
]
marlin-cuda12 = [
    "nvidia-cuda-runtime-cu12==12.9.79",
    "nvidia-cublas-cu12==12.9.1.4",
    "nvidia-cusparse-cu12==12.5.10.65",
    "nvidia-cusolver-cu12==11.7.5.82",
]
marlin-cuda = [
    "nvidia-cuda-runtime>=13.0.96",
    "nvidia-cublas>=13.1.0.3",
    "nvidia-cusparse>=12.6.3.3",
    "nvidia-cusolver>=12.0.4.66",
]
openai = [
    "uvicorn",
    "fastapi",
    "pydantic",
]
mlx = [
    "mlx_lm>=0.24.0",
]

[tool.uv]
torch-backend = "auto"