Skip to content

Commit 0c410ae

Browse files
committed
Corrected files
1 parent a528b75 commit 0c410ae

File tree

8 files changed

+757
-614
lines changed

8 files changed

+757
-614
lines changed

src/transformers/models/aimv2/__init__.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
1+
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -16,10 +16,7 @@
1616

1717
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available
1818

19-
_import_structure = {
20-
"configuration_aimv2": ["AIMV2_PRETRAINED_CONFIG_ARCHIVE_MAP", "Aimv2Config"],
21-
"modeling_aimv2": ["Aimv2ForImageClassification", "Aimv2Model", "Aimv2PreTrainedModel"],
22-
}
19+
_import_structure = {"configuration_aimv2": ["AIMv2Config"]}
2320

2421
try:
2522
if not is_torch_available():
@@ -28,20 +25,25 @@
2825
pass
2926
else:
3027
_import_structure["modeling_aimv2"] = [
31-
"Aimv2ForImageClassification",
32-
"Aimv2Model",
33-
"Aimv2PreTrainedModel",
28+
"AIMv2ForImageClassification",
29+
"AIMv2Model",
30+
"AIMv2PreTrainedModel",
3431
]
3532

3633
if TYPE_CHECKING:
37-
from .configuration_aimv2 import AIMV2_PRETRAINED_CONFIG_ARCHIVE_MAP, Aimv2Config
34+
from .configuration_aimv2 import AIMv2Config
35+
3836
try:
3937
if not is_torch_available():
4038
raise OptionalDependencyNotAvailable()
4139
except OptionalDependencyNotAvailable:
4240
pass
4341
else:
44-
from .modeling_aimv2 import Aimv2ForImageClassification, Aimv2Model, Aimv2PreTrainedModel
42+
from .modeling_aimv2 import (
43+
AIMv2ForImageClassification,
44+
AIMv2Model,
45+
AIMv2PreTrainedModel,
46+
)
4547

4648
else:
4749
import sys

src/transformers/models/aimv2/configuration_aimv2.py

Lines changed: 79 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# coding=utf-8
2-
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2+
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
55
# you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
1616

1717
from collections import OrderedDict
1818
from typing import Mapping
19+
from typing import Dict, List, Optional, Set, Tuple, Union, Callable, Any
1920
import functools
2021
import torch.nn as nn
2122

@@ -29,9 +30,9 @@
2930
logger = logging.get_logger(__name__)
3031

3132

32-
class Aimv2Config(PretrainedConfig):
33+
class AIMv2Config(PretrainedConfig):
3334
r"""
34-
This is the configuration class to store the configuration of a [`Aimv2Model`]. It is used to instantiate a AIM-v2
35+
This is the configuration class to store the configuration of a [`AIMv2Model`]. It is used to instantiate a AIM-v2
3536
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
3637
defaults will yield a similar configuration to that of the AIM-v2 [apple/aimv2-large-patch14-224](...)
3738
architecture.
@@ -65,76 +66,120 @@ class Aimv2Config(PretrainedConfig):
6566
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
6667
layer_norm_eps (`float`, *optional*, defaults to 1e-5):
6768
The epsilon used by the layer normalization layers.
69+
qkv_bias (`bool`, *optional*, defaults to `False`):
70+
Whether or not to use bias in query, key, value.
71+
use_bias (`bool`, *optional*, defaults to `False`):
72+
Whether or not to use bias in all linear layers.
6873
use_cls_token (`bool`, *optional*, defaults to `False`):
6974
Whether or not to use a classification token.
70-
use_mask_token (`bool`, *optional*, defaults to `False`):
71-
Whether or not to use a mask token.
72-
use_pos_embed (`str`, *optional*, defaults to `"absolute"`):
75+
pos_embed_type (`str`, *optional*, defaults to `"absolute"`):
7376
Positional embedding type. Choose from 'absolute', 'sincos', or 'none'.
77+
use_rms_norm (`bool`, *optional*, defaults to `False`):
78+
Whether or not to use RMS norm.
79+
post_trunk_norm (`bool`, *optional*, defaults to `False`):
80+
Whether or not to use norm layer after the transformer blocks (layers).
81+
probe_layers (`int`, *optional*, defaults to 6):
82+
The layer ids to use for selecting features.
83+
reduce (`bool`, *optional*, defaults to `False`):
84+
Whether or not to reduce features using mean.
85+
ffn_target_type (`str`, *optional*, defaults to `"swiglu"`):
86+
Type of feedforward network (FFN) to use.
87+
is_causal (`bool`, *optional*, defaults to `False`):
88+
Whether or not to use causal attention.
7489
norm_layer (`[torch.nn.Module]`, *optional*, defaults to `torch.nn.LayerNorm`):
7590
Normalization layer to use.
76-
Example:
91+
num_queries (`int`, *optional*, defaults to 1):
92+
Number of query tokens for attention pooling.
93+
use_batch_norm (`bool`, *optional*, defaults to `True`):
94+
Whether to use batch normalization in attention pooling.
95+
proj_bias (`bool`, *optional*, defaults to `False`):
96+
Whether to use bias in the projection layer of the attention pooling.
97+
average_pool (`bool`, *optional*, defaults to `True`):
98+
Whether to use average pooling in the attention pooling.
99+
num_labels (`int`, *optional*, defaults to 1000):
100+
The number of labels for classification tasks.
101+
**kwargs:
102+
Remaining keyword arguments are passed to the superclass.
103+
104+
Example:
77105
78106
```python
79-
>>> from aim.v2.configuration_aimv2 import Aimv2Config
80-
>>> from aim.v2.modeling_aimv2 import Aimv2Model
107+
>>> from aim.v2.configuration_aimv2 import AIMv2Config
81108
82109
>>> # Initializing a aimv2-large-patch14-224 style configuration
83-
>>> configuration = Aimv2Config()
84-
85-
>>> # Initializing a model (with random weights) from the aimv2-large-patch14-224 style configuration
86-
>>> model = Aimv2Model(configuration)
110+
>>> configuration = AIMv2Config()
87111
88112
>>> # Accessing the model configuration
89-
>>> configuration = model.config
113+
>>> print(configuration)
90114
```
91115
"""
116+
92117
model_type = "aimv2"
93118

94119
def __init__(
95120
self,
96-
image_size: int = 224,
97-
patch_size: int = 14,
121+
image_size: Union[int, Tuple[int, int]] = 224,
122+
patch_size: Union[int, Tuple[int, int]] = 14,
98123
num_channels: int = 3,
99124
hidden_size: int = 1024,
100125
num_hidden_layers: int = 24,
101126
num_attention_heads: int = 16,
102-
intermediate_size: int = 4096,
103-
hidden_act: str = "gelu",
104-
hidden_dropout_prob: float = 0.0,
105-
attention_probs_dropout_prob: float = 0.0,
127+
#mlp_ratio: float = 4.0,
128+
hidden_act: Union[str, Callable] = "gelu",
129+
hidden_dropout_prob: float = 0.1,
130+
attention_probs_dropout_prob: float = 0.1,
106131
initializer_range: float = 0.02,
132+
intermediate_size=2816,
107133
layer_norm_eps: float = 1e-5,
108-
use_cls_token: bool = False,
109-
use_mask_token: bool = False,
110-
use_pos_embed: str = "absolute",
111134
qkv_bias: bool = False,
112-
norm_layer=nn.LayerNorm,
135+
use_bias: bool = False,
136+
use_cls_token: bool = False,
137+
pos_embed_type: str = "absolute",
138+
#use_rms_norm: bool = False,
139+
post_trunk_norm: bool = True,
140+
probe_layers: Union[int, Tuple[int, ...]] = 6,
141+
reduce: bool = False,
142+
ffn_target_type: str = "swiglu",
143+
is_causal: bool = False,
144+
norm_layer: Optional[Callable[[int], nn.Module]] = nn.RMSNorm,
145+
num_queries: int = 1,
146+
use_batch_norm: bool = True,
147+
proj_bias: bool = False,
148+
average_pool: bool = True,
149+
num_labels: int = 1000,
113150
**kwargs,
114151
):
115152
super().__init__(**kwargs)
153+
116154
self.image_size = image_size
117155
self.patch_size = patch_size
118156
self.num_channels = num_channels
119157
self.hidden_size = hidden_size
120158
self.num_hidden_layers = num_hidden_layers
121159
self.num_attention_heads = num_attention_heads
122-
self.intermediate_size = intermediate_size
160+
#self.mlp_ratio = mlp_ratio
123161
self.hidden_act = hidden_act
124162
self.hidden_dropout_prob = hidden_dropout_prob
125163
self.attention_probs_dropout_prob = attention_probs_dropout_prob
126164
self.initializer_range = initializer_range
165+
self.intermediate_size=intermediate_size
127166
self.layer_norm_eps = layer_norm_eps
128-
self.use_cls_token = use_cls_token
129-
self.use_mask_token = use_mask_token
130-
self.use_pos_embed = use_pos_embed # we will use "sincos" or "absolute"
131167
self.qkv_bias = qkv_bias
132-
# If norm_layer is provided, use it, otherwise, default to nn.LayerNorm with the specified eps
133-
self.norm_layer = (
134-
norm_layer
135-
if norm_layer is not None
136-
else functools.partial(nn.LayerNorm, eps=layer_norm_eps)
137-
)
168+
self.use_bias = use_bias
169+
self.use_cls_token = use_cls_token
170+
self.pos_embed_type = pos_embed_type
171+
#self.use_rms_norm = use_rms_norm
172+
self.post_trunk_norm = post_trunk_norm
173+
self.probe_layers = probe_layers
174+
self.reduce = reduce
175+
self.ffn_target_type = ffn_target_type
176+
self.is_causal = is_causal
177+
self.norm_layer = norm_layer
178+
self.num_queries = num_queries
179+
self.use_batch_norm = use_batch_norm
180+
self.proj_bias = proj_bias
181+
self.average_pool = average_pool
182+
self.num_labels = num_labels
138183

139184

140185
class AIMv2OnnxConfig(OnnxConfig):

0 commit comments

Comments
 (0)