File tree Expand file tree Collapse file tree 5 files changed +38
-14
lines changed 
examples/models/llama3_2_vision Expand file tree Collapse file tree 5 files changed +38
-14
lines changed Original file line number Diff line number Diff line change 7272        conda activate "${CONDA_ENV}" 
7373
7474        MODEL_NAME=${{ matrix.model }} 
75+         # Install requirements for llama vision 
76+         if [[ "$MODEL_NAME" == "llama3_2_vision_encoder" ]]; then 
77+           bash examples/models/llama3_2_vision/install_requirements.sh 
78+         fi 
7579        BUILD_TOOL=${{ matrix.build-tool }} 
7680        BACKEND=${{ matrix.backend }} 
7781        DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }} 
Original file line number Diff line number Diff line change 5858        bash .ci/scripts/setup-conda.sh 
5959        # Setup MacOS dependencies as there is no Docker support on MacOS atm 
6060        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" 
61-         # Build and test xecutorch 
61+         # Install requirements for llama vision 
62+         if [[ "$MODEL_NAME" == "llama3_2_vision_encoder" ]]; then 
63+           ${CONDA_RUN} bash examples/models/llama3_2_vision/install_requirements.sh 
64+         fi 
65+         # Build and test executorch 
6266        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}" 
6367
6468test-custom-ops-macos :
Original file line number Diff line number Diff line change 1+ # Copyright (c) Meta Platforms, Inc. and affiliates. 
2+ # All rights reserved. 
3+ # 
4+ # This source code is licensed under the BSD-style license found in the 
5+ # LICENSE file in the root directory of this source tree. 
6+ 
7+ from  .vision_encoder  import  FlamingoVisionEncoderModel , VisionEncoderConfig 
8+ 
9+ __all__  =  [
10+     "FlamingoVisionEncoderModel" ,
11+     "VisionEncoderConfig" ,
12+ ]
Original file line number Diff line number Diff line change 1616)
1717from  torchtune .models .llama3_2_vision ._component_builders  import  llama3_2_vision_encoder 
1818
19- max_seq_len  =  8192 
20- in_channels  =  3 
21- tile_size  =  560 
22- max_num_tiles  =  4 
23- # how many tokens per image generated by the vision encoder 
24- tokens_per_image  =  6404 
25- # how many images to cache in the kv cache in cross attention 
26- kv_cache_image_num  =  1 
27- # maximum number of tokens generated by encoder and thus stored in the kv cache in cross attention 
28- encoder_max_seq_len  =  tokens_per_image  *  kv_cache_image_num 
29- 
3019
3120@dataclass  
3221class  VisionEncoderConfig :
@@ -42,11 +31,26 @@ class VisionEncoderConfig:
4231    in_channels : int  =  3 
4332
4433
34+ # 8 layers for CI testing purpose 
35+ demo_config : VisionEncoderConfig  =  VisionEncoderConfig (
36+     patch_size = 14 ,
37+     num_heads = 8 ,
38+     clip_embed_dim = 768 ,
39+     clip_num_layers = 6 ,
40+     clip_hidden_states = [1 , 3 , 5 ],
41+     decoder_embed_dim = 1024 ,
42+     num_layers_projection = 4 ,
43+     tile_size = 224 ,
44+     max_num_tiles = 4 ,
45+     in_channels = 3 ,
46+ )
47+ 
48+ 
4549class  FlamingoVisionEncoderModel (EagerModelBase ):
4650    def  __init__ (self , config : Optional [VisionEncoderConfig ] =  None ):
4751        super ().__init__ ()
4852        if  config  is  None :
49-             config  =  VisionEncoderConfig () 
53+             config  =  demo_config 
5054        self .config  =  config 
5155        self .model  =  llama3_2_vision_encoder (
5256            patch_size = config .patch_size ,
Original file line number Diff line number Diff line change @@ -17,7 +17,7 @@ addopts =
1717    #  examples
1818    examples/models/llama/tests
1919    examples/models/llama3_2_vision/preprocess
20-     #   examples/models/llama3_2_vision/vision_encoder/test TODO: enable this 
20+     examples/models/llama3_2_vision/vision_encoder/test
2121    #  examples/models/llava/test TODO: enable this
2222    #  exir
2323    exir/_serialize/test
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments