Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
ede1463
qwen 2.5 vl code skeleton
albert-inflection May 19, 2025
3269545
model builder progress
albert-inflection May 19, 2025
6b013ec
more model building progress
albert-inflection May 20, 2025
5cb7421
airplane update
albert-inflection May 22, 2025
6d09f1f
WIP transform + rope
Jun 10, 2025
1c5dd67
image transform progress
Jun 10, 2025
8992e50
image transform progress
albert-inflection Jun 10, 2025
74614b2
Qwen2_5_VLImageTransform complete
lawrence-inflection Jun 11, 2025
59fe9cd
remove context.md from tracking
lawrence-inflection Jun 12, 2025
f9cdb83
Qwen2_5_VLTransform implemented
lawrence-inflection Jun 12, 2025
3032d75
module progress
Jun 12, 2025
c634a4b
batch size in ViT forward
Jun 13, 2025
423a268
rehaul modules, start from near HF
Jun 18, 2025
d3d4bd2
Rope + Window attn attempt 1
Jun 20, 2025
ad39ebb
_positional_embeddings.py implementation
lawrencefeng17 Jun 20, 2025
0193832
progress on _component_builders.py for decoder
lawrencefeng17 Jun 20, 2025
caa77ff
upstream cleanup
Jun 21, 2025
f1a235e
more cleanup
Jun 21, 2025
a2eacc9
merge temp branch onto albert/qwen2.5-vl
lawrencefeng17 Jun 23, 2025
16902fa
refactored Qwen25VLRotaryPositionalEmbeddings; passed test cases
lawrencefeng17 Jun 23, 2025
d4fb9c2
refactored Qwen25VLRotaryPositionalEmbeddings; added summary context.md
lawrencefeng17 Jun 23, 2025
f2c3a0e
feat: Qwen25VLEarlyFusionModel wrapper class
lawrencefeng17 Jun 23, 2025
896b070
rebase
Jul 3, 2025
3db79f9
clean up mlps
Jun 23, 2025
7024fdc
clean up encoder builder
Jun 23, 2025
20728a0
fix: removed raise condition; decoder bias fix
lawrencefeng17 Jun 24, 2025
bb3b4a6
checkpointing + edits
Jun 24, 2025
045f71b
init
Jun 24, 2025
b959286
convert weights final
Jun 24, 2025
7bf0a09
model builder slight fix
Jun 24, 2025
06ce596
fixes: minor changes, early end-to-end testing
lawrencefeng17 Jun 25, 2025
e8ab57c
fix: completely rewrote mrope
lawrencefeng17 Jun 26, 2025
4e44c1f
fix: minor fixes to mrope
lawrencefeng17 Jun 26, 2025
00e79f8
transform edits
Jun 26, 2025
257cbcf
feat: mrope cache implemented for decoder (#2)
lawrence-inflection Jun 26, 2025
801efb4
encoder forward pass edits
Jun 26, 2025
3df44cf
bug fixes, training works now
albert-inflection Jun 27, 2025
cc52ebb
tested and fixed _transform
lawrence-inflection Jun 27, 2025
5ab217b
weight saving fix + import
albert-inflection Jun 30, 2025
4928249
Lawrence/qwen2.5 vl/encoder tests
lawrence-inflection Jul 2, 2025
47a9e19
feat: added other qwen variants in model builders
lawrencefeng17 Jul 2, 2025
a8b00df
custom collation + init edits
albert-inflection Jul 2, 2025
e63202a
fix: removed default args to transform
lawrencefeng17 Jul 2, 2025
50314d3
nits
albert-inflection Jul 2, 2025
f6e75d3
7B config
albert-inflection Jul 2, 2025
b2b74bc
config nit
albert-inflection Jul 2, 2025
767b025
added test cases in torchtune style
lawrencefeng17 Jul 3, 2025
e03eb9c
cleanup
albert-inflection Jul 2, 2025
a82e72c
rm uv.lock
albert-inflection Jul 2, 2025
47c60c5
trainable params
albert-inflection Jul 2, 2025
df68e52
updated model builders
albert-inflection Jul 2, 2025
e98578c
rename rope
albert-inflection Jul 3, 2025
346987b
cleanup
lawrencefeng17 Jul 3, 2025
9438ca8
fix
lawrencefeng17 Jul 3, 2025
23e0640
cleanup:
lawrencefeng17 Jul 3, 2025
1ff7ffa
3B recipe and model builder edit
albert-inflection Jul 3, 2025
e7c8b85
32B config and modelbuilder changes'
albert-inflection Jul 3, 2025
d5ff0e9
72B config
albert-inflection Jul 3, 2025
43f1cbe
nit diffs
Jul 3, 2025
c09279c
fix padding token
Jul 3, 2025
d782bff
recipe reg
albert-inflection Jul 7, 2025
5cac20b
fixed linter errors
lawrencefeng17 Jul 7, 2025
49698b2
linter fixes
lawrencefeng17 Jul 7, 2025
ee8ad1c
fixes to pass linter and all unit tests
lawrencefeng17 Jul 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions torchtune/models/qwen2_5/_model_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,9 +372,9 @@ def qwen2_5_tokenizer(
Qwen2_5Tokenizer: Instantiation of the Qwen2.5 tokenizer
"""
special_tokens = (
parse_hf_tokenizer_json(special_tokens_path)
if special_tokens_path is not None
else None
QWEN2_5_SPECIAL_TOKENS
if special_tokens_path is None
else parse_hf_tokenizer_json(special_tokens_path)
)

if prompt_template is not None:
Expand Down
39 changes: 36 additions & 3 deletions torchtune/modules/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import inspect
import logging
from typing import Optional

Expand All @@ -15,6 +16,36 @@
logger = logging.getLogger(__name__)


def _call_pos_embedding_safely(
pos_embedding: nn.Module,
x: torch.Tensor,
input_pos: Optional[torch.Tensor] = None,
window_index: Optional[torch.Tensor] = None,
) -> torch.Tensor:
"""
Call positional embedding with only the parameters it accepts.

Args:
pos_embedding (nn.Module): The positional embedding module
x (torch.Tensor): Input tensor
input_pos (Optional[torch.Tensor]): Optional input position tensor
window_index (Optional[torch.Tensor]): Optional window index tensor

Returns:
Output tensor from positional embedding
"""
sig = inspect.signature(pos_embedding.forward)
kwargs = {}

# Only add parameters that the method accepts
if "input_pos" in sig.parameters:
kwargs["input_pos"] = input_pos
if "window_index" in sig.parameters:
kwargs["window_index"] = window_index

return pos_embedding(x, **kwargs)


Comment on lines +19 to +48

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Current workaround for passing window_index into the positional embedding module.

class MultiHeadAttention(nn.Module):
"""Multi-headed attention layer with support for grouped query
attention (GQA) introduced in https://arxiv.org/abs/2305.13245v1.
Expand Down Expand Up @@ -242,7 +273,9 @@ def forward(

# Apply positional embeddings
if self.pos_embeddings is not None:
q = self.pos_embeddings(q, input_pos=input_pos, window_index=window_index)
q = _call_pos_embedding_safely(
self.pos_embeddings, q, input_pos, window_index
)

# [b, n_h, s_x, h_d]
q = q.transpose(1, 2)
Expand Down Expand Up @@ -270,8 +303,8 @@ def forward(
k = k.view(b, s_y, -1, self.head_dim)
v = v.view(b, s_y, -1, self.head_dim)
if self.pos_embeddings is not None:
k = self.pos_embeddings(
k, input_pos=input_pos, window_index=window_index
k = _call_pos_embedding_safely(
self.pos_embeddings, k, input_pos, window_index
)

# k,v shape: [b, n_kv, s_y, h_d]
Expand Down