Skip to content
2 changes: 1 addition & 1 deletion backends/xnnpack/test/models/llama2_et_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import torch

from executorch.backends.xnnpack.test.tester import Tester
from executorch.examples.models.llama.model import Llama2Model
from executorch.examples.models.llama import Llama2Model


class TestLlama2ETExample(unittest.TestCase):
Expand Down
19 changes: 6 additions & 13 deletions examples/models/llama/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ runtime.python_library(
name = "llama_transformer",
srcs = [
"llama_transformer.py",
"rope.py",
"attention.py",
"model_args.py",
"norm.py",
],
_is_external_target = True,
base_module = "executorch.examples.models.llama",
Expand All @@ -26,23 +22,21 @@ runtime.python_library(
],
deps = [
"//caffe2:torch",
"//executorch/extension/llm/modeling/text_decoder:text_decoder_attention",
"//executorch/extension/llm/modeling/text_decoder:text_decoder_model_args",
"//executorch/extension/llm/modeling/text_decoder:text_decoder_norm",
"//executorch/extension/llm/modeling/text_decoder:text_decoder_rope",
],
)

runtime.python_library(
name = "static_attention",
srcs = [
"static_attention.py",
],
_is_external_target = True,
base_module = "executorch.examples.models.llama",
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
deps = [
":llama_transformer",
"//caffe2:torch",
"//executorch/extension/llm/modeling/text_decoder:text_decoder_static_attention",
],
)

Expand All @@ -51,7 +45,6 @@ runtime.python_library(
srcs = [
"__init__.py",
"fairseq2.py",
"model.py",
],
_is_external_target = True,
base_module = "executorch.examples.models.llama",
Expand All @@ -69,6 +62,7 @@ runtime.python_library(
"//executorch/examples/models/llama:llama_transformer",
"//executorch/extension/llm/export/config:llm_config",
"//executorch/examples/models:checkpoint",
"//executorch/extension/llm/modeling/text_decoder:text_decoder_model",
],
)

Expand Down Expand Up @@ -135,7 +129,6 @@ runtime.python_library(
srcs = [
"export_llama.py",
"export_llama_lib.py",
"model.py",
],
_is_external_target = True,
base_module = "executorch.examples.models.llama",
Expand Down
11 changes: 9 additions & 2 deletions examples/models/llama/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,15 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from .model import Llama2Model
from executorch.extension.llm.modeling.text_decoder.decoder_model import DecoderModel


class Llama2Model(DecoderModel):
"""Llama2 model implementation that inherits from the generic DecoderModel."""

pass


__all__ = [
Llama2Model,
"Llama2Model",
]
4 changes: 2 additions & 2 deletions examples/models/phi_4_mini/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from executorch.examples.models.llama.model import Llama2Model
from executorch.examples.models.phi_4_mini.convert_weights import convert_weights
from executorch.extension.llm.modeling.text_decoder.decoder_model import DecoderModel


class Phi4MiniModel(Llama2Model):
class Phi4MiniModel(DecoderModel):
def __init__(self, **kwargs):
super().__init__(**kwargs)

Expand Down
4 changes: 2 additions & 2 deletions examples/models/qwen2_5/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from executorch.examples.models.llama.model import Llama2Model
from executorch.examples.models.qwen2_5.convert_weights import convert_weights
from executorch.extension.llm.modeling.text_decoder.decoder_model import DecoderModel


class Qwen2_5Model(Llama2Model):
class Qwen2_5Model(DecoderModel):
def __init__(self, **kwargs):
super().__init__(**kwargs)

Expand Down
4 changes: 2 additions & 2 deletions examples/models/qwen3/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from executorch.examples.models.llama.model import Llama2Model
from executorch.examples.models.qwen3.convert_weights import convert_weights
from executorch.extension.llm.modeling.text_decoder.decoder_model import DecoderModel


class Qwen3Model(Llama2Model):
class Qwen3Model(DecoderModel):
def __init__(self, **kwargs):
super().__init__(**kwargs)

Expand Down
4 changes: 2 additions & 2 deletions examples/models/smollm2/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from executorch.examples.models.llama.model import Llama2Model
from executorch.examples.models.smollm2.convert_weights import convert_weights
from executorch.extension.llm.modeling.text_decoder.decoder_model import DecoderModel


class SmolLM2Model(Llama2Model):
class SmolLM2Model(DecoderModel):
def __init__(self, **kwargs):
super().__init__(**kwargs)

Expand Down
16 changes: 16 additions & 0 deletions extension/llm/modeling/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Buck targets for LLM modeling

load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

oncall("executorch")

runtime.python_library(
name = "modeling",
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
deps = [
"//executorch/extension/llm/modeling/text_decoder:text_decoder_all",
],
)
26 changes: 26 additions & 0 deletions extension/llm/modeling/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Modeling modules for LLM text generation
from executorch.extension.llm.modeling.text_decoder import (
Attention,
ATTENTION_REGISTRY,
AttentionMHA,
DecoderModel,
ForwardOptions,
ModelArgs,
register_attention,
RMSNorm,
Rope,
StaticAttention,
)

__all__ = [
"Attention",
"ATTENTION_REGISTRY",
"AttentionMHA",
"DecoderModel",
"ForwardOptions",
"ModelArgs",
"register_attention",
"RMSNorm",
"Rope",
"StaticAttention",
]
129 changes: 129 additions & 0 deletions extension/llm/modeling/text_decoder/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# Any targets that should be shared between fbcode and xplat must be defined in
# targets.bzl. This file can contain fbcode-only targets.

load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

oncall("executorch")

runtime.python_library(
name = "text_decoder_model_args",
srcs = [
"model_args.py",
],
_is_external_target = True,
base_module = "executorch.extension.llm.modeling.text_decoder",
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
)

runtime.python_library(
name = "text_decoder_norm",
srcs = [
"norm.py",
],
_is_external_target = True,
base_module = "executorch.extension.llm.modeling.text_decoder",
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
deps = [
"//caffe2:torch",
],
)

runtime.python_library(
name = "text_decoder_rope",
srcs = [
"rope.py",
],
_is_external_target = True,
base_module = "executorch.extension.llm.modeling.text_decoder",
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
deps = [
":text_decoder_model_args",
"//caffe2:torch",
],
)

runtime.python_library(
name = "text_decoder_attention",
srcs = [
"attention/attention.py",
],
_is_external_target = True,
base_module = "executorch.extension.llm.modeling.text_decoder.attention",
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
deps = [
":text_decoder_model_args",
":text_decoder_norm",
":text_decoder_rope",
"//caffe2:torch",
],
)

runtime.python_library(
name = "text_decoder_static_attention",
srcs = [
"attention/static_attention.py",
],
_is_external_target = True,
base_module = "executorch.extension.llm.modeling.text_decoder.attention",
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
deps = [
":text_decoder_attention",
":text_decoder_model_args",
":text_decoder_rope",
"//caffe2:torch",
],
)

runtime.python_library(
name = "text_decoder_model",
srcs = [
"decoder_model.py",
],
_is_external_target = True,
base_module = "executorch.extension.llm.modeling.text_decoder",
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
deps = [
":text_decoder_model_args",
":text_decoder_rope",
"//caffe2:torch",
"//executorch/examples/models:checkpoint",
"//executorch/examples/models:model_base",
"//executorch/examples/models/llama:llama_transformer",
"//executorch/extension/llm/export/config:llm_config",
"fbsource//third-party/pypi/torchao:torchao",
],
)

runtime.python_library(
name = "text_decoder_all",
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
deps = [
":text_decoder_attention",
":text_decoder_model",
":text_decoder_model_args",
":text_decoder_norm",
":text_decoder_rope",
":text_decoder_static_attention",
],
)
26 changes: 26 additions & 0 deletions extension/llm/modeling/text_decoder/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Text decoder models
from executorch.extension.llm.modeling.text_decoder.attention import (
Attention,
ATTENTION_REGISTRY,
AttentionMHA,
ForwardOptions,
register_attention,
StaticAttention,
)
from executorch.extension.llm.modeling.text_decoder.decoder_model import DecoderModel
from executorch.extension.llm.modeling.text_decoder.model_args import ModelArgs
from executorch.extension.llm.modeling.text_decoder.norm import RMSNorm
from executorch.extension.llm.modeling.text_decoder.rope import Rope

__all__ = [
"Attention",
"ATTENTION_REGISTRY",
"AttentionMHA",
"DecoderModel",
"ForwardOptions",
"ModelArgs",
"register_attention",
"RMSNorm",
"Rope",
"StaticAttention",
]
43 changes: 43 additions & 0 deletions extension/llm/modeling/text_decoder/attention/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Buck targets for attention modules

load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

oncall("executorch")

runtime.python_library(
name = "attention",
srcs = [
"attention.py",
],
_is_external_target = True,
base_module = "executorch.extension.llm.modeling.text_decoder.attention",
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
deps = [
"//executorch/extension/llm/modeling/text_decoder:text_decoder_model_args",
"//executorch/extension/llm/modeling/text_decoder:text_decoder_norm",
"//executorch/extension/llm/modeling/text_decoder:text_decoder_rope",
"//caffe2:torch",
],
)

runtime.python_library(
name = "static_attention",
srcs = [
"static_attention.py",
],
_is_external_target = True,
base_module = "executorch.extension.llm.modeling.text_decoder.attention",
visibility = [
"//executorch/...",
"@EXECUTORCH_CLIENTS",
],
deps = [
":attention",
"//executorch/extension/llm/modeling/text_decoder:text_decoder_model_args",
"//executorch/extension/llm/modeling/text_decoder:text_decoder_rope",
"//caffe2:torch",
],
)
Loading
Loading