Skip to content

Commit ddbc698

Browse files
authored
Merge branch 'main' into wasm-bindings-js
2 parents f806259 + ce9da63 commit ddbc698

File tree

14 files changed

+225
-52
lines changed

14 files changed

+225
-52
lines changed

CMakeLists.txt

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,12 @@ add_library(executorch_core ${_executorch_core__srcs})
380380
# Legacy name alias.
381381
add_library(executorch_no_prim_ops ALIAS executorch_core)
382382

383+
# A list of all configured backends.
384+
set(_executorch_backends "")
385+
386+
# A list of all configured extensions.
387+
set(_executorch_extensions "")
388+
383389
target_link_libraries(executorch_core PRIVATE program_schema)
384390
if(ANDROID)
385391
target_link_libraries(executorch_core PUBLIC log)
@@ -524,6 +530,7 @@ install(FILES tools/cmake/executorch-config.cmake
524530

525531
if(EXECUTORCH_BUILD_ARM_BAREMETAL)
526532
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm)
533+
list(APPEND _executorch_backends executorch_delegate_ethos_u)
527534
endif()
528535

529536
if(EXECUTORCH_BUILD_CADENCE)
@@ -532,30 +539,37 @@ endif()
532539

533540
if(EXECUTORCH_BUILD_NXP_NEUTRON)
534541
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/nxp)
542+
list(APPEND _executorch_backends executorch_delegate_neutron)
535543
endif()
536544

537545
if(EXECUTORCH_BUILD_COREML)
538546
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/coreml)
547+
list(APPEND _executorch_backends coremldelegate)
539548
endif()
540549

541550
if(EXECUTORCH_BUILD_MPS)
542551
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/mps)
552+
list(APPEND _executorch_backends mpsdelegate)
543553
endif()
544554

545555
if(EXECUTORCH_BUILD_NEURON)
546556
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/mediatek)
557+
list(APPEND _executorch_backends neuron_backend)
547558
endif()
548559

549560
if(EXECUTORCH_BUILD_OPENVINO)
550561
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/openvino)
562+
list(APPEND _executorch_backends openvino_backend)
551563
endif()
552564

553565
if(EXECUTORCH_BUILD_QNN)
554566
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/qualcomm)
567+
list(APPEND _executorch_backends qnn_executorch_backend)
555568
endif()
556569

557570
if(EXECUTORCH_BUILD_XNNPACK)
558571
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/xnnpack)
572+
list(APPEND _executorch_backends xnnpack_backend)
559573
endif()
560574

561575
if(EXECUTORCH_BUILD_CORTEX_M)
@@ -568,6 +582,7 @@ endif()
568582

569583
if(EXECUTORCH_BUILD_EXTENSION_APPLE)
570584
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/apple)
585+
list(APPEND _executorch_extensions apple_extension)
571586
endif()
572587

573588
if(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
@@ -578,6 +593,7 @@ if(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
578593
FILES_MATCHING
579594
PATTERN "*.h"
580595
)
596+
list(APPEND _executorch_extensions extension_data_loader)
581597
endif()
582598

583599
if(EXECUTORCH_BUILD_EXTENSION_EVALUE_UTIL)
@@ -592,6 +608,7 @@ endif()
592608

593609
if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
594610
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/flat_tensor)
611+
list(APPEND _executorch_extensions extension_flat_tensor)
595612
endif()
596613

597614
if(EXECUTORCH_BUILD_EXTENSION_MODULE)
@@ -602,6 +619,7 @@ if(EXECUTORCH_BUILD_EXTENSION_MODULE)
602619
FILES_MATCHING
603620
PATTERN "*.h"
604621
)
622+
list(APPEND _executorch_extensions extension_module_static)
605623
endif()
606624

607625
if(EXECUTORCH_BUILD_EXTENSION_LLM)
@@ -621,14 +639,17 @@ if(EXECUTORCH_BUILD_EXTENSION_LLM)
621639
${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG}
622640
)
623641
endif()
642+
list(APPEND _executorch_extensions tokenizers)
624643
endif()
625644

626645
if(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER)
627646
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/runner)
647+
list(APPEND _executorch_extensions extension_llm_runner)
628648
endif()
629649

630650
if(EXECUTORCH_BUILD_EXTENSION_LLM_APPLE)
631651
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/apple)
652+
list(APPEND _executorch_extensions extension_llm_apple)
632653
endif()
633654

634655
if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
@@ -639,10 +660,12 @@ if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
639660
FILES_MATCHING
640661
PATTERN "*.h"
641662
)
663+
list(APPEND _executorch_extensions extension_runner_util)
642664
endif()
643665

644666
if(EXECUTORCH_BUILD_EXTENSION_TENSOR)
645667
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/tensor)
668+
list(APPEND _executorch_extensions extension_tensor)
646669
endif()
647670

648671
if(EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_CPUINFO)
@@ -749,6 +772,7 @@ endif()
749772

750773
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
751774
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/training)
775+
list(APPEND _executorch_extensions extension_training)
752776
endif()
753777

754778
if(EXECUTORCH_BUILD_KERNELS_LLM)
@@ -761,10 +785,32 @@ if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
761785
executorch_target_link_options_shared_lib(quantized_ops_lib)
762786
endif()
763787

788+
if(EXECUTORCH_BUILD_VULKAN)
789+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/vulkan)
790+
list(APPEND _executorch_backends vulkan_backend vulkan_schema)
791+
endif()
792+
793+
if(EXECUTORCH_BUILD_VGF)
794+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm)
795+
list(APPEND _executorch_backends vgf_backend)
796+
endif()
797+
798+
# Top-level interface targets.
799+
800+
# A target containing all configured backends.
801+
add_library(executorch_backends INTERFACE)
802+
add_library(executorch::backends ALIAS executorch_backends)
803+
target_link_libraries(executorch_backends INTERFACE ${_executorch_backends})
804+
805+
# A target containing all configured extensions.
806+
add_library(executorch_extensions INTERFACE)
807+
add_library(executorch::extensions ALIAS executorch_extensions)
808+
target_link_libraries(executorch_extensions INTERFACE ${_executorch_extensions})
809+
764810
if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
765811
# Baseline libraries that executor_runner will link against.
766812
set(_executor_runner_libs executorch extension_evalue_util
767-
extension_runner_util gflags
813+
extension_runner_util gflags executorch_backends
768814
)
769815

770816
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
@@ -784,18 +830,10 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
784830
list(APPEND _executor_runner_libs $<LINK_LIBRARY:WHOLE_ARCHIVE,custom_ops>)
785831
endif()
786832

787-
if(EXECUTORCH_BUILD_XNNPACK)
788-
list(APPEND _executor_runner_libs xnnpack_backend)
789-
endif()
790-
791833
if(EXECUTORCH_ENABLE_EVENT_TRACER)
792834
list(APPEND _executor_runner_libs etdump flatccrt)
793835
endif()
794836

795-
if(EXECUTORCH_BUILD_COREML AND APPLE)
796-
list(APPEND _executor_runner_libs coremldelegate)
797-
endif()
798-
799837
add_executable(executor_runner ${_executor_runner__srcs})
800838
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
801839
target_link_options_gc_sections(executor_runner)
@@ -818,14 +856,6 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
818856
endif()
819857
endif()
820858

821-
if(EXECUTORCH_BUILD_VULKAN)
822-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/vulkan)
823-
endif()
824-
if(EXECUTORCH_BUILD_VGF)
825-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm)
826-
endif()
827-
828-
829859
if(EXECUTORCH_BUILD_ANDROID_JNI)
830860
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/android)
831861
endif()

backends/vulkan/CMakeLists.txt

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -122,27 +122,6 @@ executorch_target_link_options_shared_lib(vulkan_backend)
122122

123123
set_property(TARGET vulkan_backend PROPERTY CXX_STANDARD 17)
124124

125-
# Executor Runner
126-
127-
if(NOT CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$")
128-
set(VULKAN_RUNNER_SRCS ${_executor_runner__srcs})
129-
list(TRANSFORM VULKAN_RUNNER_SRCS PREPEND "${EXECUTORCH_ROOT}/")
130-
131-
set(VGF_BACKEND )
132-
if(EXECUTORCH_BUILD_VGF)
133-
set(VGF_BACKEND vgf_backend)
134-
endif()
135-
136-
add_executable(vulkan_executor_runner ${VULKAN_RUNNER_SRCS})
137-
target_link_libraries(
138-
vulkan_executor_runner ${_executor_runner_libs} vulkan_schema
139-
vulkan_backend
140-
${VGF_BACKEND}
141-
)
142-
143-
target_compile_options(vulkan_executor_runner PUBLIC ${VULKAN_CXX_FLAGS})
144-
endif()
145-
146125
# Test targets
147126

148127
install(

backends/xnnpack/operators/node_visitor.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -622,9 +622,10 @@ def get_serialized_buffer_index(
622622
)
623623

624624
external_tag = tensor.meta.get("delegate_constant_tag", None)
625-
logging.info(
626-
f"Adding constant data with name {tensor.name}, key {named_key} and external_tag {external_tag} to named_data_store"
627-
)
625+
if external_tag is not None:
626+
logging.info(
627+
f"Adding constant data with name {tensor.name}, key {named_key} and external_tag {external_tag} to named_data_store"
628+
)
628629
self._named_data_store.add_named_data(
629630
named_key,
630631
bytes(array),

devtools/etrecord/tests/etrecord_test.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,17 @@ def check_graph_closeness(self, graph_a, graph_b):
9292
self.assertEqual(
9393
node_a.meta.get("debug_handle"), node_b.meta.get("debug_handle")
9494
)
95+
from_node_a = node_a.meta.get("from_node")
96+
from_node_b = node_b.meta.get("from_node")
97+
98+
if from_node_a is None:
99+
self.assertIsNone(from_node_b)
100+
else:
101+
self.assertIsNotNone(from_node_b)
102+
for node_source_a, node_source_b in zip(from_node_a, from_node_b):
103+
self.assertEqual(
104+
node_source_a.to_dict(), node_source_b.to_dict()
105+
)
95106

96107
def test_etrecord_generation(self):
97108
captured_output, edge_output, et_output = self.get_test_model()

examples/models/llama/export_llama_lib.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,18 @@ def build_args_parser() -> argparse.ArgumentParser:
239239
help="checkpoint directory. Use with a sharded checkpoint, not for the standard llama2 model. Note, checkpoint_dir takes precedence over checkpoint if both are set.",
240240
)
241241

242+
parser.add_argument(
243+
"--adapter_checkpoint",
244+
required=False,
245+
help="Path to the adapter.pt file from torchtune. Used if the model has trained LoRA adapters. Must provide adapter_config.json",
246+
)
247+
248+
parser.add_argument(
249+
"--adapter_config",
250+
required=False,
251+
help="Path to the adapter_config.json file. Used if the model has trained LoRA adapters. Must provide adapter_checkpoint.",
252+
)
253+
242254
parser.add_argument(
243255
"--use_qnn_sha",
244256
action="store_true",

examples/models/llama/model.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,13 @@ def __init__(self, llm_config: Optional[LlmConfig] = None):
4646
checkpoint_dir = self.llm_config.base.checkpoint_dir
4747
params_path = self.llm_config.base.params
4848

49+
# Adapter checkpoint and config.
50+
adapter_checkpoint_path = self.llm_config.base.adapter_checkpoint
51+
adapter_config_path = self.llm_config.base.adapter_config
52+
assert (adapter_checkpoint_path is None and adapter_config_path is None) or (
53+
adapter_checkpoint_path is not None and adapter_config_path is not None
54+
), "Both adapter_checkpoint_path and adapter_config_path must be specified or neither must be specified."
55+
4956
self.use_kv_cache = self.llm_config.model.use_kv_cache
5057
self.use_sdpa_with_kv_cache_op = self.llm_config.model.use_sdpa_with_kv_cache
5158
self.generate_full_logits = self.llm_config.debug.generate_full_logits
@@ -129,6 +136,20 @@ def __init__(self, llm_config: Optional[LlmConfig] = None):
129136
with open(params_path, "r") as f:
130137
params = json.loads(f.read())
131138

139+
# Get adapter checkpoint and config.
140+
adapter_checkpoint = {}
141+
adapter_config = {}
142+
if adapter_checkpoint_path:
143+
adapter_checkpoint = torch.load(
144+
adapter_checkpoint_path, map_location=device, mmap=True
145+
)
146+
from torchtune.models import convert_weights
147+
148+
adapter_checkpoint = convert_weights.tune_to_meta(adapter_checkpoint)
149+
with open(adapter_config_path, "r") as f:
150+
adapter_config = json.loads(f.read())
151+
checkpoint.update(adapter_checkpoint)
152+
132153
output_prune_map = None
133154
if self.output_prune_map_path is not None:
134155
with open(self.output_prune_map_path, "r") as f:
@@ -153,6 +174,7 @@ def __init__(self, llm_config: Optional[LlmConfig] = None):
153174
output_prune_map=output_prune_map,
154175
enable_dynamic_shape=self.enable_dynamic_shape,
155176
**params,
177+
**adapter_config,
156178
)
157179

158180
if model_args.use_scaled_rope:

examples/models/llama/model_args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class ModelArgs:
5959
lora_args: Optional[dict] = None
6060

6161
# LoRA arguments to set up a LoRA inference model.
62-
# These arguments come directly from a torchtune LoRA config.
62+
# These arguments come directly from a torchtune adapter_config.json file.
6363
r: Optional[int] = None # Rank.
6464
lora_alpha: Optional[int] = None # Alpha.
6565
# Eg. q_proj, k_proj, v_proj, output_proj

examples/models/llama/runner/static_attention_io_manager.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,39 @@ class StaticAttentionIOManager {
602602
}
603603
}
604604

605+
/**
606+
* Prefill helper. Run multiple inferences as needed depending on the length
607+
* of the prompt and method's input length. Returns the position in the output
608+
* that corresponds to the end of the prompt during the last inference.
609+
*/
610+
template <typename TokenT>
611+
size_t prefill(
612+
executorch::runtime::Span<TokenT> tokens,
613+
executorch::runtime::Span<TokenT> input_buffer,
614+
executorch::runtime::Method& method) {
615+
size_t input_len = input_buffer.size();
616+
get_mask(input_buffer.size()).set_causal_mask();
617+
618+
size_t batch_len = 0;
619+
for (size_t i = 0; i < tokens.size(); i += input_len) {
620+
batch_len = std::min(input_len, tokens.size() - i);
621+
std::copy(&tokens[i], &tokens[i + batch_len], input_buffer.begin());
622+
prepare(method);
623+
ET_CHECK(method.execute() == executorch::runtime::Error::Ok);
624+
update(
625+
method,
626+
config_.k_cache_output_indices,
627+
config_.v_cache_output_indices,
628+
batch_len);
629+
}
630+
return batch_len - 1;
631+
}
632+
633+
/**
634+
* Decode helper. The `sample` argument is called after each inference and
635+
* should retrieve the logits from the `method` argument's output and return
636+
* the sampled token.
637+
*/
605638
template <typename TokenT>
606639
std::vector<TokenT> decode(
607640
TokenT prev_tok,
@@ -632,6 +665,11 @@ class StaticAttentionIOManager {
632665
return generated_tokens;
633666
}
634667

668+
/**
669+
* Lookahead decode helper. The `sample` argument is called after each
670+
* inference and should retrieve the logits from the `method` argument's
671+
* output and return the sampled token for all output positions.
672+
*/
635673
template <typename TokenT>
636674
std::vector<TokenT> lookahead_decode(
637675
TokenT prev_tok,

examples/xnnpack/aot_compiler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,14 +87,14 @@
8787

8888
model = model.eval()
8989
# pre-autograd export. eventually this will become torch.export
90-
ep = torch.export.export_for_training(model, example_inputs, strict=True)
90+
ep = torch.export.export_for_training(model, example_inputs, strict=False)
9191
model = ep.module()
9292

9393
if args.quantize:
9494
logging.info("Quantizing Model...")
9595
# TODO(T165162973): This pass shall eventually be folded into quantizer
9696
model = quantize(model, example_inputs, quant_type)
97-
ep = torch.export.export_for_training(model, example_inputs, strict=True)
97+
ep = torch.export.export_for_training(model, example_inputs, strict=False)
9898

9999
edge = to_edge_transform_and_lower(
100100
ep,

0 commit comments

Comments
 (0)