Skip to content

Commit f776965

Browse files
authored
Merge branch 'main' into add-split-copy
2 parents c24f3e8 + 03f6bcc commit f776965

File tree

13 files changed

+212
-49
lines changed

13 files changed

+212
-49
lines changed

CMakeLists.txt

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,9 @@ add_library(executorch_core ${_executorch_core__srcs})
380380
# Legacy name alias.
381381
add_library(executorch_no_prim_ops ALIAS executorch_core)
382382

383+
# A list of all configured backends.
384+
set(_executorch_backends "")
385+
383386
target_link_libraries(executorch_core PRIVATE program_schema)
384387
if(ANDROID)
385388
target_link_libraries(executorch_core PUBLIC log)
@@ -524,6 +527,7 @@ install(FILES tools/cmake/executorch-config.cmake
524527

525528
if(EXECUTORCH_BUILD_ARM_BAREMETAL)
526529
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm)
530+
list(APPEND _executorch_backends executorch_delegate_ethos_u)
527531
endif()
528532

529533
if(EXECUTORCH_BUILD_CADENCE)
@@ -532,30 +536,37 @@ endif()
532536

533537
if(EXECUTORCH_BUILD_NXP_NEUTRON)
534538
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/nxp)
539+
list(APPEND _executorch_backends executorch_delegate_neutron)
535540
endif()
536541

537542
if(EXECUTORCH_BUILD_COREML)
538543
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/coreml)
544+
list(APPEND _executorch_backends coremldelegate)
539545
endif()
540546

541547
if(EXECUTORCH_BUILD_MPS)
542548
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/mps)
549+
list(APPEND _executorch_backends mpsdelegate)
543550
endif()
544551

545552
if(EXECUTORCH_BUILD_NEURON)
546553
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/mediatek)
554+
list(APPEND _executorch_backends neuron_backend)
547555
endif()
548556

549557
if(EXECUTORCH_BUILD_OPENVINO)
550558
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/openvino)
559+
list(APPEND _executorch_backends openvino_backend)
551560
endif()
552561

553562
if(EXECUTORCH_BUILD_QNN)
554563
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/qualcomm)
564+
list(APPEND _executorch_backends qnn_executorch_backend)
555565
endif()
556566

557567
if(EXECUTORCH_BUILD_XNNPACK)
558568
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/xnnpack)
569+
list(APPEND _executorch_backends xnnpack_backend)
559570
endif()
560571

561572
if(EXECUTORCH_BUILD_CORTEX_M)
@@ -757,10 +768,35 @@ if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
757768
executorch_target_link_options_shared_lib(quantized_ops_lib)
758769
endif()
759770

771+
if(EXECUTORCH_BUILD_VULKAN)
772+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/vulkan)
773+
list(APPEND _executorch_backends vulkan_backend vulkan_schema)
774+
endif()
775+
776+
if(EXECUTORCH_BUILD_VGF)
777+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm)
778+
list(APPEND _executorch_backends vgf_backend)
779+
endif()
780+
781+
782+
# Top-level interface targets.
783+
add_library(executorch_backends INTERFACE)
784+
add_library(executorch::backends ALIAS executorch_backends)
785+
786+
# A target containing all configured backends.
787+
target_link_libraries(executorch_backends INTERFACE ${_executorch_backends})
788+
789+
install(
790+
TARGETS executorch_backends
791+
INCLUDES
792+
DESTINATION ${_common_include_directories}
793+
)
794+
760795
if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
761796
# Baseline libraries that executor_runner will link against.
762797
set(_executor_runner_libs executorch extension_evalue_util
763798
extension_runner_util gflags
799+
executorch_backends
764800
)
765801

766802
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
@@ -780,18 +816,10 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
780816
list(APPEND _executor_runner_libs $<LINK_LIBRARY:WHOLE_ARCHIVE,custom_ops>)
781817
endif()
782818

783-
if(EXECUTORCH_BUILD_XNNPACK)
784-
list(APPEND _executor_runner_libs xnnpack_backend)
785-
endif()
786-
787819
if(EXECUTORCH_ENABLE_EVENT_TRACER)
788820
list(APPEND _executor_runner_libs etdump flatccrt)
789821
endif()
790822

791-
if(EXECUTORCH_BUILD_COREML AND APPLE)
792-
list(APPEND _executor_runner_libs coremldelegate)
793-
endif()
794-
795823
add_executable(executor_runner ${_executor_runner__srcs})
796824
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
797825
target_link_options_gc_sections(executor_runner)
@@ -814,14 +842,6 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
814842
endif()
815843
endif()
816844

817-
if(EXECUTORCH_BUILD_VULKAN)
818-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/vulkan)
819-
endif()
820-
if(EXECUTORCH_BUILD_VGF)
821-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm)
822-
endif()
823-
824-
825845
if(EXECUTORCH_BUILD_ANDROID_JNI)
826846
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/android)
827847
endif()

backends/vulkan/CMakeLists.txt

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -122,27 +122,6 @@ executorch_target_link_options_shared_lib(vulkan_backend)
122122

123123
set_property(TARGET vulkan_backend PROPERTY CXX_STANDARD 17)
124124

125-
# Executor Runner
126-
127-
if(NOT CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$")
128-
set(VULKAN_RUNNER_SRCS ${_executor_runner__srcs})
129-
list(TRANSFORM VULKAN_RUNNER_SRCS PREPEND "${EXECUTORCH_ROOT}/")
130-
131-
set(VGF_BACKEND )
132-
if(EXECUTORCH_BUILD_VGF)
133-
set(VGF_BACKEND vgf_backend)
134-
endif()
135-
136-
add_executable(vulkan_executor_runner ${VULKAN_RUNNER_SRCS})
137-
target_link_libraries(
138-
vulkan_executor_runner ${_executor_runner_libs} vulkan_schema
139-
vulkan_backend
140-
${VGF_BACKEND}
141-
)
142-
143-
target_compile_options(vulkan_executor_runner PUBLIC ${VULKAN_CXX_FLAGS})
144-
endif()
145-
146125
# Test targets
147126

148127
install(

backends/xnnpack/operators/node_visitor.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -622,9 +622,10 @@ def get_serialized_buffer_index(
622622
)
623623

624624
external_tag = tensor.meta.get("delegate_constant_tag", None)
625-
logging.info(
626-
f"Adding constant data with name {tensor.name}, key {named_key} and external_tag {external_tag} to named_data_store"
627-
)
625+
if external_tag is not None:
626+
logging.info(
627+
f"Adding constant data with name {tensor.name}, key {named_key} and external_tag {external_tag} to named_data_store"
628+
)
628629
self._named_data_store.add_named_data(
629630
named_key,
630631
bytes(array),

devtools/etrecord/tests/etrecord_test.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,17 @@ def check_graph_closeness(self, graph_a, graph_b):
9292
self.assertEqual(
9393
node_a.meta.get("debug_handle"), node_b.meta.get("debug_handle")
9494
)
95+
from_node_a = node_a.meta.get("from_node")
96+
from_node_b = node_b.meta.get("from_node")
97+
98+
if from_node_a is None:
99+
self.assertIsNone(from_node_b)
100+
else:
101+
self.assertIsNotNone(from_node_b)
102+
for node_source_a, node_source_b in zip(from_node_a, from_node_b):
103+
self.assertEqual(
104+
node_source_a.to_dict(), node_source_b.to_dict()
105+
)
95106

96107
def test_etrecord_generation(self):
97108
captured_output, edge_output, et_output = self.get_test_model()

examples/models/llama/export_llama_lib.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,18 @@ def build_args_parser() -> argparse.ArgumentParser:
239239
help="checkpoint directory. Use with a sharded checkpoint, not for the standard llama2 model. Note, checkpoint_dir takes precedence over checkpoint if both are set.",
240240
)
241241

242+
parser.add_argument(
243+
"--adapter_checkpoint",
244+
required=False,
245+
help="Path to the adapter.pt file from torchtune. Used if the model has trained LoRA adapters. Must provide adapter_config.json",
246+
)
247+
248+
parser.add_argument(
249+
"--adapter_config",
250+
required=False,
251+
help="Path to the adapter_config.json file. Used if the model has trained LoRA adapters. Must provide adapter_checkpoint.",
252+
)
253+
242254
parser.add_argument(
243255
"--use_qnn_sha",
244256
action="store_true",

examples/models/llama/model.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,13 @@ def __init__(self, llm_config: Optional[LlmConfig] = None):
4646
checkpoint_dir = self.llm_config.base.checkpoint_dir
4747
params_path = self.llm_config.base.params
4848

49+
# Adapter checkpoint and config.
50+
adapter_checkpoint_path = self.llm_config.base.adapter_checkpoint
51+
adapter_config_path = self.llm_config.base.adapter_config
52+
assert (adapter_checkpoint_path is None and adapter_config_path is None) or (
53+
adapter_checkpoint_path is not None and adapter_config_path is not None
54+
), "Both adapter_checkpoint_path and adapter_config_path must be specified or neither must be specified."
55+
4956
self.use_kv_cache = self.llm_config.model.use_kv_cache
5057
self.use_sdpa_with_kv_cache_op = self.llm_config.model.use_sdpa_with_kv_cache
5158
self.generate_full_logits = self.llm_config.debug.generate_full_logits
@@ -129,6 +136,20 @@ def __init__(self, llm_config: Optional[LlmConfig] = None):
129136
with open(params_path, "r") as f:
130137
params = json.loads(f.read())
131138

139+
# Get adapter checkpoint and config.
140+
adapter_checkpoint = {}
141+
adapter_config = {}
142+
if adapter_checkpoint_path:
143+
adapter_checkpoint = torch.load(
144+
adapter_checkpoint_path, map_location=device, mmap=True
145+
)
146+
from torchtune.models import convert_weights
147+
148+
adapter_checkpoint = convert_weights.tune_to_meta(adapter_checkpoint)
149+
with open(adapter_config_path, "r") as f:
150+
adapter_config = json.loads(f.read())
151+
checkpoint.update(adapter_checkpoint)
152+
132153
output_prune_map = None
133154
if self.output_prune_map_path is not None:
134155
with open(self.output_prune_map_path, "r") as f:
@@ -153,6 +174,7 @@ def __init__(self, llm_config: Optional[LlmConfig] = None):
153174
output_prune_map=output_prune_map,
154175
enable_dynamic_shape=self.enable_dynamic_shape,
155176
**params,
177+
**adapter_config,
156178
)
157179

158180
if model_args.use_scaled_rope:

examples/models/llama/model_args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class ModelArgs:
5959
lora_args: Optional[dict] = None
6060

6161
# LoRA arguments to set up a LoRA inference model.
62-
# These arguments come directly from a torchtune LoRA config.
62+
# These arguments come directly from a torchtune adapter_config.json file.
6363
r: Optional[int] = None # Rank.
6464
lora_alpha: Optional[int] = None # Alpha.
6565
# Eg. q_proj, k_proj, v_proj, output_proj

examples/models/llama/runner/static_attention_io_manager.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,39 @@ class StaticAttentionIOManager {
602602
}
603603
}
604604

605+
/**
606+
* Prefill helper. Run multiple inferences as needed depending on the length
607+
* of the prompt and method's input length. Returns the position in the output
608+
* that corresponds to the end of the prompt during the last inference.
609+
*/
610+
template <typename TokenT>
611+
size_t prefill(
612+
executorch::runtime::Span<TokenT> tokens,
613+
executorch::runtime::Span<TokenT> input_buffer,
614+
executorch::runtime::Method& method) {
615+
size_t input_len = input_buffer.size();
616+
get_mask(input_buffer.size()).set_causal_mask();
617+
618+
size_t batch_len = 0;
619+
for (size_t i = 0; i < tokens.size(); i += input_len) {
620+
batch_len = std::min(input_len, tokens.size() - i);
621+
std::copy(&tokens[i], &tokens[i + batch_len], input_buffer.begin());
622+
prepare(method);
623+
ET_CHECK(method.execute() == executorch::runtime::Error::Ok);
624+
update(
625+
method,
626+
config_.k_cache_output_indices,
627+
config_.v_cache_output_indices,
628+
batch_len);
629+
}
630+
return batch_len - 1;
631+
}
632+
633+
/**
634+
* Decode helper. The `sample` argument is called after each inference and
635+
* should retrieve the logits from the `method` argument's output and return
636+
* the sampled token.
637+
*/
605638
template <typename TokenT>
606639
std::vector<TokenT> decode(
607640
TokenT prev_tok,
@@ -632,6 +665,11 @@ class StaticAttentionIOManager {
632665
return generated_tokens;
633666
}
634667

668+
/**
669+
* Lookahead decode helper. The `sample` argument is called after each
670+
* inference and should retrieve the logits from the `method` argument's
671+
* output and return the sampled token for all output positions.
672+
*/
635673
template <typename TokenT>
636674
std::vector<TokenT> lookahead_decode(
637675
TokenT prev_tok,

exir/serde/serialize.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
)
4242
from torch._export.verifier import load_verifier
4343
from torch.fx.experimental import symbolic_shapes
44+
from torch.fx.traceback import NodeSource
4445

4546
log: logging.Logger = logging.getLogger(__name__)
4647

@@ -141,8 +142,24 @@ def serialize_metadata(self, node: torch.fx.Node) -> Dict[str, str]:
141142
debug_handle = node.meta["debug_handle"]
142143
meta["debug_handle"] = str(debug_handle)
143144

145+
if "from_node" in node.meta:
146+
from_node = node.meta["from_node"]
147+
# Serialize from_node as JSON since it's a complex nested structure
148+
meta["from_node"] = json.dumps(self._make_from_node_json_acceptable(from_node))
149+
144150
return meta
145151

152+
def _make_from_node_json_acceptable(self, from_node: Optional[List[NodeSource]]):
153+
"""
154+
Serialize from_node metadata from a list of NodeSource objects to a list of dictionaries.
155+
"""
156+
if from_node is None:
157+
return None
158+
159+
json_acceptable_from_node = [node_source.to_dict() for node_source in from_node if isinstance(node_source, NodeSource)]
160+
161+
return json_acceptable_from_node
162+
146163
def serialize_alloc_inputs(
147164
self, inputs # pyre-ignore
148165
) -> List[schema.NamedArgument]:
@@ -473,8 +490,22 @@ def deserialize_metadata(self, metadata: Dict[str, str]) -> Dict[str, Any]:
473490
if debug_handle := metadata.get("debug_handle"):
474491
res["debug_handle"] = int(debug_handle)
475492

493+
if from_node_str := metadata.get("from_node"):
494+
res["from_node"] = self._deserialize_from_node(json.loads(from_node_str))
495+
476496
return res
477497

498+
def _deserialize_from_node(self, from_node_data: Optional[List[Dict[str, Any]]]) -> Optional[List[NodeSource]]:
499+
"""
500+
Recursively deserialize from_node metadata from JSON data.
501+
"""
502+
if from_node_data is None:
503+
return None
504+
505+
assert isinstance(from_node_data, list)
506+
507+
return [NodeSource._from_dict(fn_dict) for fn_dict in from_node_data]
508+
478509
# pyre-ignore
479510
def deserialize_alloc_inputs(self, serialized_inputs: List[schema.NamedArgument]):
480511
def deserialize_alloc_spec(serialized_alloc_spec: str) -> memory.AllocSpec:

0 commit comments

Comments
 (0)