Skip to content

Commit 614dd42

Browse files
committed
generate quantized lora
1 parent f7515a9 commit 614dd42

File tree

7 files changed

+73
-46
lines changed

7 files changed

+73
-46
lines changed

.ci/scripts/test_llama_lora.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,9 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
107107
model.dtype_override="fp32" \
108108
backend.xnnpack.enabled=true \
109109
backend.xnnpack.extended_ops=true \
110-
export.output_name="${MODEL_SEPARATE}.pte" \
111-
export.foundation_weights_file="${MODEL_SEPARATE}.ptd"
110+
quantization.pt2e_quantize="xnnpack_dynamic" \
111+
export.output_name="${MODEL}.pte" \
112+
export.foundation_weights_file="${MODEL}.ptd"
112113

113114
# Run llama runner.
114115
NOW=$(date +"%H:%M:%S")

backends/xnnpack/_passes/propagate_custom_meta_pass.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def call(self, graph_module: torch.fx.GraphModule):
3333
continue
3434

3535
if "custom" in parent_node.meta:
36+
print(f"PROPAGATING CUSTOM META FROM {parent_node.name} TO {node.name}")
3637
node.meta["custom"] = parent_node.meta["custom"]
3738

3839
graph_module.recompile()

backends/xnnpack/operators/node_visitor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ def get_quant_params(
296296
offset=UINT64_MAX, size=num_bytes, named_key=scale_name
297297
)
298298
)
299+
print(f"NDM: adding scale tensor with key {scale_name}")
299300
self._named_data_store.add_named_data(
300301
scale_name, bytes(scale_array), CONSTANT_TENSOR_ALIGNMENT
301302
)
@@ -630,6 +631,7 @@ def get_serialized_buffer_index(
630631
logging.info(
631632
f"Adding constant data with name {tensor.name}, key {named_key} and external_tag {external_tag} to named_data_store"
632633
)
634+
print(f"NDM: Adding constant data with name {tensor.name}, key {named_key} and tag {external_tag}")
633635
self._named_data_store.add_named_data(
634636
named_key,
635637
bytes(array),

backends/xnnpack/test/passes/test_propagate_custom_meta_pass.py

Lines changed: 16 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,30 @@
88

99
from typing import Callable, Optional, Tuple, Union
1010

11-
import executorch.backends.test.harness.stages as BaseStages
12-
1311
import torch
1412
from executorch.backends.test.harness.stages import StageType
13+
from executorch.backends.xnnpack.test.tester import RunPasses, Tester
14+
from executorch.backends.xnnpack.test.tester import Quantize as XNNPackQuantize
15+
import executorch.backends.test.harness.stages as BaseStages
16+
from executorch.backends.xnnpack.test.tester.tester import (
17+
ToEdgeTransformAndLower,
18+
)
1519
from executorch.backends.xnnpack.partition.config.xnnpack_config import (
1620
ConfigPrecisionType,
1721
)
1822
from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
1923
XnnpackFloatingPointPartitioner,
2024
XnnpackPartitioner,
2125
)
22-
23-
from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import (
24-
get_symmetric_quantization_config,
25-
)
26-
from executorch.backends.xnnpack.test.tester import (
27-
Quantize as XNNPackQuantize,
28-
RunPasses,
29-
Tester,
30-
)
31-
from executorch.backends.xnnpack.test.tester.tester import ToEdgeTransformAndLower
3226
from executorch.exir import (
3327
EdgeCompileConfig,
3428
ExecutorchBackendConfig,
3529
to_edge_transform_and_lower,
3630
)
31+
32+
from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import (
33+
get_symmetric_quantization_config,
34+
)
3735
from executorch.exir.passes.external_constants_pass import (
3836
delegate_external_constants_pass_unlifted,
3937
)
@@ -56,7 +54,6 @@
5654
has_quantized_ops = False
5755
print("Missing quantized ops")
5856

59-
6057
class TestPropagateCustomMetaPass(unittest.TestCase):
6158
class ModuleLinear(torch.nn.Module):
6259
def __init__(
@@ -99,14 +96,12 @@ def run(
9996
module=tagged_module,
10097
gen_tag_fn=lambda x: "model", # This is the filename the weights will be saved to. In this case, weights will be saved as "model.ptd"
10198
)
102-
self.exported_program = export(
103-
tagged_module, inputs, dynamic_shapes=self.dynamic_shapes, strict=True
104-
)
99+
self.exported_program = export(tagged_module, inputs, dynamic_shapes=self.dynamic_shapes, strict=True)
105100

106101
def _test_linear(
107102
self,
108103
partitioner: XnnpackPartitioner,
109-
quantization_stage: Union[BaseStages.Quantize, BaseStages.Quantize_],
104+
quantization_stage: Union[BaseStages.Quantize, BaseStages.Quantize_]
110105
):
111106
eager_model = self.ModuleLinear(
112107
in_size=1,
@@ -143,8 +138,8 @@ def test_quantize_(self):
143138
weight_granularity=PerGroup(32),
144139
)
145140
self._test_linear(
146-
DynamicallyQuantizedPartitioner, BaseStages.Quantize_(config=linear_config)
147-
)
141+
DynamicallyQuantizedPartitioner,
142+
BaseStages.Quantize_(config=linear_config))
148143

149144
def test_pt2e_quantize(self):
150145
# Quantize with pt2e quantize.
@@ -157,10 +152,7 @@ def test_pt2e_quantize(self):
157152
get_symmetric_quantization_config(is_per_channel=True, is_dynamic=True),
158153
]
159154
partitioners = []
160-
for config_precision in [
161-
ConfigPrecisionType.STATIC_QUANT,
162-
ConfigPrecisionType.DYNAMIC_QUANT,
163-
]:
155+
for config_precision in [ConfigPrecisionType.STATIC_QUANT, ConfigPrecisionType.DYNAMIC_QUANT]:
164156
for per_op_mode in [True, False]:
165157
partitioners.append(
166158
XnnpackPartitioner(
@@ -170,6 +162,4 @@ def test_pt2e_quantize(self):
170162
)
171163
for quant_config in quant_configs:
172164
for partitioner in partitioners:
173-
self._test_linear(
174-
partitioner, XNNPackQuantize(quantization_config=quant_config)
175-
)
165+
self._test_linear(partitioner, XNNPackQuantize(quantization_config=quant_config))

examples/models/llama/export_llama_lib.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -854,6 +854,7 @@ def _to_edge_and_lower_llama_xnnpack(
854854
xnnpack_extended_ops: bool = False,
855855
generate_etrecord: bool = False,
856856
verbose: bool = False,
857+
gen_tag_fn: Optional[Callable[[torch.fx.Node], Optional[str]]] = None,
857858
) -> LLMEdgeManager: # noqa: C901
858859
partitioners = []
859860

@@ -876,9 +877,22 @@ def _to_edge_and_lower_llama_xnnpack(
876877
if generate_etrecord:
877878
builder_exported.generate_etrecord = True
878879

879-
builder = builder_exported.pt2e_quantize(quantizers).to_edge_transform_and_lower(
880+
builder = builder_exported.pt2e_quantize(quantizers)
881+
from executorch.exir.passes.external_constants_pass import (
882+
delegate_external_constants_pass_unlifted,
883+
)
884+
assert (
885+
builder_exported.pre_autograd_graph_module is not None
886+
), "pre_autograd_graph_module shouldn't be None here"
887+
delegate_external_constants_pass_unlifted(
888+
module=builder_exported.pre_autograd_graph_module,
889+
gen_tag_fn=gen_tag_fn,
890+
)
891+
892+
builder = builder.to_edge_transform_and_lower(
880893
partitioners
881894
)
895+
882896
if verbose:
883897
print_delegation_info(builder.edge_manager.exported_program().graph_module)
884898

@@ -1088,6 +1102,7 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager: # noqa: C901
10881102
llm_config.backend.xnnpack.enabled = True
10891103

10901104
if llm_config.backend.xnnpack.enabled:
1105+
gen_tag_fn = None
10911106
if llm_config.export.foundation_weights_file is not None:
10921107
gen_tag_fn: Callable[[torch.fx.Node], Optional[str]] = lambda x: (
10931108
llm_config.export.foundation_weights_file
@@ -1096,17 +1111,17 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager: # noqa: C901
10961111
)
10971112

10981113
from executorch.exir.passes.external_constants_pass import (
1099-
delegate_external_constants_pass_unlifted,
1114+
# delegate_external_constants_pass_unlifted,
11001115
external_constants_pass,
11011116
)
11021117

1103-
assert (
1104-
builder_exported.pre_autograd_graph_module is not None
1105-
), "pre_autograd_graph_module shouldn't be None here"
1106-
delegate_external_constants_pass_unlifted(
1107-
module=builder_exported.pre_autograd_graph_module,
1108-
gen_tag_fn=gen_tag_fn,
1109-
)
1118+
# assert (
1119+
# builder_exported.pre_autograd_graph_module is not None
1120+
# ), "pre_autograd_graph_module shouldn't be None here"
1121+
# delegate_external_constants_pass_unlifted(
1122+
# module=builder_exported.pre_autograd_graph_module,
1123+
# gen_tag_fn=gen_tag_fn,
1124+
# )
11101125

11111126
# Also add a pass for 'to_executorch' to tag weights that aren't delegated.
11121127
additional_passes.append(
@@ -1123,6 +1138,7 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager: # noqa: C901
11231138
xnnpack_extended_ops=llm_config.backend.xnnpack.extended_ops,
11241139
generate_etrecord=llm_config.debug.generate_etrecord,
11251140
verbose=llm_config.debug.verbose,
1141+
gen_tag_fn=gen_tag_fn,
11261142
)
11271143
else:
11281144
builder = _to_edge_and_lower_llama(

exir/emit/_emitter.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -387,13 +387,15 @@ def _save_new_const_tensor(
387387

388388
# Update buffer_idx to point to the end of the list where we are adding the new buffer.
389389
buffer = Buffer(storage=buffer_data)
390-
391390
# Tensor is stored outside of the PTE file.
392391
if (
393392
spec.extra_tensor_info is not None
394393
and spec.extra_tensor_info.fully_qualified_name is not None
395394
and spec.extra_tensor_info.location == TensorDataLocation.EXTERNAL
396395
):
396+
print(f"EXTERNAL CONSTANT {spec.extra_tensor_info.fully_qualified_name}")
397+
if spec.extra_tensor_info.fully_qualified_name == "_tensor_constant_2":
398+
breakpoint()
397399
assert (
398400
constant_tag is not None
399401
), "Constant tag is not set for external tensor"
@@ -466,9 +468,15 @@ def _tensor_spec_to_evalue(
466468
and spec.extra_tensor_info.location == TensorDataLocation.EXTERNAL
467469
):
468470
buffer_idx = self.program_state.external_constant_hash.get(hashed, -1)
471+
if buffer_idx != -1:
472+
# Save the constant tag for the external tensor
473+
if constant_tag not in self.program_state.external_constant_map:
474+
self.program_state.external_constant_map[constant_tag] = {}
475+
self.program_state.external_constant_map[constant_tag][
476+
spec.extra_tensor_info.fully_qualified_name # pyre-ignore Undefined attribute [16]: `Optional` has no attribute `fully_qualified_name`.
477+
] = buffer_idx
469478
else:
470479
buffer_idx = self.program_state.cached_spec_hash_values.get(hashed, -1)
471-
472480
# Haven't seen this constant before.
473481
if buffer_idx == -1:
474482
buffer_idx = self._save_new_const_tensor(
@@ -1645,18 +1653,23 @@ def _is_buffer(node: Node, graph_signature: ExportGraphSignature) -> bool:
16451653
# suggest that the same abstract buffer is mutable in another entry point so we should
16461654
# compel it to be considered mutable in all entry points at emission just as the user did with
16471655
# memory planning.
1648-
is_mutable_buffer |= (
1649-
_is_buffer(self.node, self.exported_program.graph_signature)
1650-
and spec.mem_id is not None
1651-
and spec.mem_offset is not None
1652-
)
1653-
1656+
# is_mutable_buffer |= (
1657+
# _is_buffer(self.node, self.exported_program.graph_signature)
1658+
# and spec.mem_id is not None
1659+
# and spec.mem_offset is not None
1660+
# )
1661+
# if fqn is not None:
1662+
# print(f"Node {fqn} is mutable buffer: {is_mutable_buffer}, with cnstant_tag {constant_tag}")
1663+
16541664
# If the placeholder has a constant_tag, it is external to the PTE file
16551665
# and requires a fqn and location=TensorDataLocation.EXTERNAL
16561666
if constant_tag is not None:
16571667
assert (
16581668
fqn is not None
16591669
), "constant tagged tensors require a fully qualified name"
1670+
1671+
if fqn == "_tensor_constant_2":
1672+
breakpoint()
16601673
if spec.extra_tensor_info is None:
16611674
spec.extra_tensor_info = ExtraTensorInfo(
16621675
fully_qualified_name=fqn, location=TensorDataLocation.EXTERNAL
@@ -1666,15 +1679,18 @@ def _is_buffer(node: Node, graph_signature: ExportGraphSignature) -> bool:
16661679
spec.extra_tensor_info.location = TensorDataLocation.EXTERNAL
16671680

16681681
if is_mutable_buffer:
1682+
print("MUTABLE_BUFFE: ", fqn, spec.mem_id, spec.mem_offset)
16691683
# Emit names if we are supposed to.
16701684
if self.emitter_state.emit_mutable_buffer_names:
1685+
breakpoint()
16711686
if spec.extra_tensor_info is None:
16721687
spec.extra_tensor_info = ExtraTensorInfo(
16731688
fully_qualified_name=fqn,
16741689
location=TensorDataLocation.SEGMENT,
16751690
)
16761691
else:
16771692
spec.extra_tensor_info.fully_qualified_name = fqn
1693+
spec.extra_tensor_info.location = TensorDataLocation.SEGMENT
16781694
# if We aren't emitting the name then it needs to be memory planned.
16791695
elif spec.mem_id is None or spec.mem_offset is None:
16801696
raise InternalError(

runtime/executor/method.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,8 @@ Error Method::parse_external_constants(const NamedDataMap* external_data_map) {
394394
ET_CHECK_OR_RETURN_ERROR(
395395
buffer.ok(),
396396
InvalidExternalData,
397-
"Buffer retrieved from get_data is not valid");
397+
"Buffer retrieved from get_data is not valid, error: %zu",
398+
buffer.error());
398399
new (&external_constants_[n_external_constants_].buffer)
399400
FreeableBuffer(std::move(buffer.get()));
400401

0 commit comments

Comments
 (0)