Skip to content

Commit ff9f8f2

Browse files
authored
Merge branch 'quic:main' into qwen3_vl_moe
2 parents 1d23a92 + f64f703 commit ff9f8f2

File tree

30 files changed

+488
-218
lines changed

30 files changed

+488
-218
lines changed

QEfficient/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161

6262

6363
# Conditionally import QAIC-related modules if the SDK is installed
64-
__version__ = "0.0.1.dev0"
64+
__version__ = "1.22.0.dev0"
6565

6666

6767
def check_qaic_sdk():

QEfficient/diffusers/pipelines/configs/flux_config.json

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
{
22
"description": "Default configuration for Flux pipeline",
33

4-
"modules":
4+
"modules":
55
{
6-
"text_encoder":
6+
"text_encoder":
77
{
88
"specializations":{
99
"batch_size": 1,
1010
"seq_len": 77
1111
},
12-
"compilation":
12+
"compilation":
1313
{
1414
"onnx_path": null,
1515
"compile_dir": null,
@@ -21,18 +21,19 @@
2121
},
2222
"execute":
2323
{
24-
"device_ids": null
25-
}
24+
"device_ids": null,
25+
"qpc_path" : null
26+
}
2627

2728
},
28-
"text_encoder_2":
29+
"text_encoder_2":
2930
{
30-
"specializations":
31+
"specializations":
3132
{
3233
"batch_size": 1,
3334
"seq_len": 256
3435
},
35-
"compilation":
36+
"compilation":
3637
{
3738
"onnx_path": null,
3839
"compile_dir": null,
@@ -44,18 +45,19 @@
4445
},
4546
"execute":
4647
{
47-
"device_ids": null
48+
"device_ids": null,
49+
"qpc_path" : null
4850
}
4951
},
50-
"transformer":
52+
"transformer":
5153
{
52-
"specializations":
54+
"specializations":
5355
{
5456
"batch_size": 1,
5557
"seq_len": 256,
5658
"steps": 1
5759
},
58-
"compilation":
60+
"compilation":
5961
{
6062
"onnx_path": null,
6163
"compile_dir": null,
@@ -69,17 +71,18 @@
6971
},
7072
"execute":
7173
{
72-
"device_ids": null
74+
"device_ids": null,
75+
"qpc_path" : null
7376
}
7477
},
75-
"vae_decoder":
78+
"vae_decoder":
7679
{
77-
"specializations":
80+
"specializations":
7881
{
7982
"batch_size": 1,
8083
"channels": 16
8184
},
82-
"compilation":
85+
"compilation":
8386
{
8487
"onnx_path": null,
8588
"compile_dir": null,
@@ -92,7 +95,8 @@
9295
},
9396
"execute":
9497
{
95-
"device_ids": null
98+
"device_ids": null,
99+
"qpc_path" : null
96100
}
97101
}
98102
}

QEfficient/diffusers/pipelines/configs/wan_config.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,15 @@
3030
"mdts_mos": 1
3131
},
3232
"execute": {
33-
"device_ids": null
33+
"device_ids": null,
34+
"qpc_path" : null
3435
}
3536
},
3637
"vae_decoder":{
37-
"specializations": [
38-
{
38+
"specializations":{
3939
"batch_size": 1,
4040
"num_channels": 16
41-
}
42-
],
41+
},
4342
"compilation":
4443
{
4544
"onnx_path": null,
@@ -55,7 +54,8 @@
5554
},
5655
"execute":
5756
{
58-
"device_ids": null
57+
"device_ids": null,
58+
"qpc_path" : null
5959
}
6060
}
6161
}

QEfficient/diffusers/pipelines/flux/pipeline_flux.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
compile_modules_parallel,
3636
compile_modules_sequential,
3737
config_manager,
38-
set_module_device_ids,
38+
set_execute_params,
3939
)
4040
from QEfficient.generation.cloud_infer import QAICInferenceSession
4141
from QEfficient.utils.logging_utils import logger
@@ -237,7 +237,8 @@ def export(self, export_dir: Optional[str] = None, use_onnx_subfunctions: bool =
237237
if use_onnx_subfunctions and module_name in ONNX_SUBFUNCTION_MODULE:
238238
export_params["use_onnx_subfunctions"] = True
239239

240-
module_obj.export(**export_params)
240+
if module_obj.qpc_path is None:
241+
module_obj.export(**export_params)
241242

242243
@staticmethod
243244
def get_default_config_path() -> str:
@@ -248,7 +249,7 @@ def get_default_config_path() -> str:
248249
str: Absolute path to the flux_config.json file containing default pipeline
249250
configuration settings for compilation and device allocation.
250251
"""
251-
return "QEfficient/diffusers/pipelines/configs/flux_config.json"
252+
return os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs/flux_config.json")
252253

253254
def compile(
254255
self,
@@ -292,6 +293,12 @@ def compile(
292293
... width=512
293294
... )
294295
"""
296+
# Load compilation configuration
297+
config_manager(self, config_source=compile_config, use_onnx_subfunctions=use_onnx_subfunctions)
298+
299+
# Set device IDs, qpc path if precompiled qpc exist
300+
set_execute_params(self)
301+
295302
# Ensure all modules are exported to ONNX before compilation
296303
if any(
297304
path is None
@@ -304,9 +311,6 @@ def compile(
304311
):
305312
self.export(use_onnx_subfunctions=use_onnx_subfunctions)
306313

307-
# Load compilation configuration
308-
config_manager(self, config_source=compile_config, use_onnx_subfunctions=use_onnx_subfunctions)
309-
310314
# Calculate compressed latent dimension using utility function
311315
cl, latent_height, latent_width = calculate_compressed_latent_dimension(
312316
height, width, self.model.vae_scale_factor
@@ -640,9 +644,6 @@ def __call__(
640644
use_onnx_subfunctions=use_onnx_subfunctions,
641645
)
642646

643-
# Set device IDs for all modules based on configuration
644-
set_module_device_ids(self)
645-
646647
# Validate all inputs
647648
self.model.check_inputs(
648649
prompt,

QEfficient/diffusers/pipelines/pipeline_utils.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -115,16 +115,22 @@ def config_manager(cls, config_source: Optional[str] = None, use_onnx_subfunctio
115115
cls.custom_config["modules"][module_name]["compilation"]["use_onnx_subfunctions"] = use_onnx_subfunctions
116116

117117

118-
def set_module_device_ids(cls):
118+
def set_execute_params(cls):
119119
"""
120-
Set device IDs for each module based on the custom configuration.
120+
Set device IDs, qpc_paths for each module based on the custom configuration.
121121
122-
Iterates through all modules in the pipeline and assigns device IDs
123-
from the configuration file to each module's device_ids attribute.
122+
Iterates through all modules in the pipeline and assigns device IDs, qpc_paths
123+
from the configuration file to each module's attribute.
124124
"""
125125
config_modules = cls.custom_config["modules"]
126126
for module_name, module_obj in cls.modules.items():
127127
module_obj.device_ids = config_modules[module_name]["execute"]["device_ids"]
128+
module_obj.qpc_path = config_modules[module_name]["execute"]["qpc_path"]
129+
if module_obj.qpc_path:
130+
if not os.path.exists(module_obj.qpc_path):
131+
raise FileNotFoundError(
132+
f"Given qpc path: {module_obj.qpc_path} does not exist. Please provide correct path or keep null"
133+
)
128134

129135

130136
def compile_modules_parallel(
@@ -158,8 +164,10 @@ def _prepare_and_compile(module_name: str, module_obj: Any) -> None:
158164
specializations = [specializations]
159165
else:
160166
specializations = [specializations]
161-
# Compile with prepared specializations
162-
module_obj.compile(specializations=specializations, **compile_kwargs)
167+
168+
if module_obj.qpc_path is None:
169+
# Compile with prepared specializations
170+
module_obj.compile(specializations=specializations, **compile_kwargs)
163171

164172
# Execute compilations in parallel
165173
with ThreadPoolExecutor(max_workers=len(modules)) as executor:
@@ -209,8 +217,10 @@ def compile_modules_sequential(
209217
specializations = [specializations]
210218
else:
211219
specializations = [specializations]
212-
# Compile with prepared specializations
213-
module_obj.compile(specializations=specializations, **compile_kwargs)
220+
221+
if module_obj.qpc_path is None:
222+
# Compile with prepared specializations
223+
module_obj.compile(specializations=specializations, **compile_kwargs)
214224

215225

216226
@dataclass(frozen=True)

QEfficient/diffusers/pipelines/wan/pipeline_wan.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
compile_modules_parallel,
3434
compile_modules_sequential,
3535
config_manager,
36-
set_module_device_ids,
36+
set_execute_params,
3737
)
3838
from QEfficient.generation.cloud_infer import QAICInferenceSession
3939
from QEfficient.utils import constants
@@ -243,7 +243,8 @@ def export(
243243
if use_onnx_subfunctions and module_name in ONNX_SUBFUNCTION_MODULE:
244244
export_params["use_onnx_subfunctions"] = True
245245

246-
module_obj.export(**export_params)
246+
if module_obj.qpc_path is None:
247+
module_obj.export(**export_params)
247248

248249
@staticmethod
249250
def get_default_config_path():
@@ -253,7 +254,7 @@ def get_default_config_path():
253254
Returns:
254255
str: Path to the default WAN configuration JSON file.
255256
"""
256-
return os.path.join(os.path.dirname(__file__), "wan_config.json")
257+
return os.path.join(os.path.dirname(os.path.dirname(__file__)), "configs/wan_config.json")
257258

258259
def compile(
259260
self,
@@ -303,6 +304,12 @@ def compile(
303304
... num_frames=81
304305
... )
305306
"""
307+
# Load compilation configuration
308+
config_manager(self, config_source=compile_config, use_onnx_subfunctions=use_onnx_subfunctions)
309+
310+
# Set device IDs, qpc path if precompiled qpc exist
311+
set_execute_params(self)
312+
306313
# Ensure all modules are exported to ONNX before compilation
307314
if any(
308315
path is None
@@ -313,9 +320,6 @@ def compile(
313320
):
314321
self.export(use_onnx_subfunctions=use_onnx_subfunctions)
315322

316-
# Load compilation configuration
317-
config_manager(self, config_source=compile_config, use_onnx_subfunctions=use_onnx_subfunctions)
318-
319323
# Configure pipeline dimensions and calculate compressed latent parameters
320324
cl, latent_height, latent_width, latent_frames = calculate_latent_dimensions_with_frames(
321325
height,
@@ -461,9 +465,6 @@ def __call__(
461465
num_frames=num_frames,
462466
)
463467

464-
# Set device IDs for all modules based on configuration
465-
set_module_device_ids(self)
466-
467468
# Step 1: Validate all inputs
468469
self.model.check_inputs(
469470
prompt,

0 commit comments

Comments
 (0)