diff --git a/clipeval/eval_all.py b/clipeval/eval_all.py
index ef8de5c..0407b28 100644
--- a/clipeval/eval_all.py
+++ b/clipeval/eval_all.py
@@ -16,6 +16,12 @@
     ("slip", "clipeval.slip.eval_slip"),
     ("xm3600", "clipeval.xm3600.eval_xm3600"),
     ("cvqa", "clipeval.cvqa.eval_cvqa"),
+    ("zero_shot_classification_dollar_street", "clipeval.zero_shot_classification.eval_dollar_street"),
+    ("zero_shot_classification_GeoDE", "clipeval.zero_shot_classification.eval_GeoDE"),
+    ("zero_shot_classification_GLDv2", "clipeval.zero_shot_classification.eval_GLDv2"),
+    ("few_shot_geo_localization_dollar_street", "clipeval.few_shot_geo_localization.eval_dollar_street"),
+    ("few_shot_geo_localization_GeoDE", "clipeval.few_shot_geo_localization.eval_GeoDE"),
+    ("few_shot_geo_localization_xm3600", "clipeval.few_shot_geo_localization.eval_xm3600"),
 ]
 
 
diff --git a/clipeval/few_shot_geo_localization/eval_GeoDE.py b/clipeval/few_shot_geo_localization/eval_GeoDE.py
new file mode 100644
index 0000000..462721c
--- /dev/null
+++ b/clipeval/few_shot_geo_localization/eval_GeoDE.py
@@ -0,0 +1,105 @@
+import torch
+import json
+from PIL import Image
+from tqdm import tqdm
+import pandas as pd
+import numpy as np
+
+import sys
+if "external/big_vision" not in sys.path:
+    sys.path.append("external/big_vision") 
+# or directly copy the functions from https://github.com/google-research/big_vision/blob/main/big_vision/evaluators/fewshot_lsr.py
+
+from big_vision.evaluators.fewshot_lsr import _precompute_cache, _eig_fewshot_acc_fn
+
+data_dir = 'data/geode/'
+GROUP_KEY = 'ip_country' # 'ip_country'
+
+# Evaluation Function
+def evaluate(model, preprocess_val):
+    geo_df = pd.read_csv(data_dir + 'index.csv')
+    geo_df = geo_df.sample(frac=1).reset_index(drop=True) #shuffle
+    train_df = geo_df.iloc[:20000]
+    test_df = geo_df.iloc[20000:]
+    print("done load data", len(geo_df), len(train_df), len(test_df))
+
+    batch_size = 16
+    device = torch.cuda.current_device()
+
+    ## train classification probe
+    classification_probes = []
+    country_ids_list = [] # each n_shot has a list, theoretically should be the same, but GeoDE is special, some countries are very rare
+    for n_shot in [5, 10, 25]:
+        train_sampled = train_df.groupby(GROUP_KEY, group_keys=False).apply(lambda x: x.sample(n=min(len(x), n_shot), random_state=42))
+        country_ids = sorted(list(set(train_sampled[GROUP_KEY])))
+
+        df = train_sampled
+        with torch.no_grad():
+            all_features = []
+            all_labels = []
+            for start in tqdm(range(0, len(df), batch_size)):
+                end = min(start + batch_size, len(df))
+                batch_imgs = []
+                for i in range(start, end):
+                    data = df.iloc[i]
+                    try:
+                        batch_imgs.append(Image.open(data_dir + 'images/' + data['file_path']).convert("RGB"))
+                        all_labels.append(country_ids.index(data[GROUP_KEY]))
+                    except:
+                        print(f"missing image {data['file_path']}")
+
+                images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs]) 
+                image_embs = model.encode_image(images)
+                image_embs /= image_embs.norm(dim=-1, keepdim=True)
+
+                all_features.append(image_embs)
+
+            all_features = torch.cat(all_features, dim=0)
+            print(all_features.shape)
+
+        classification_probes.append(_precompute_cache(all_features.cpu().numpy(), all_labels, len(set(all_labels))))
+        country_ids_list.append(country_ids)
+    
+    ## start eval
+    n = 0
+    correct = [0] * len(classification_probes)
+
+    with torch.no_grad():
+        for local_start in tqdm(range(0, len(test_df), batch_size)):
+            local_end = min(local_start + batch_size, len(test_df))
+            batch_imgs = []
+            country_labels = []
+     
+            for i in range(local_start, local_end):
+                data = test_df.iloc[i]
+                try:
+                    batch_imgs.append(Image.open(data_dir + 'images/' + data['file_path']).convert("RGB"))
+                    country_labels.append(data[GROUP_KEY])
+                except:
+                    print(f"missing image {data['file_path']}")
+
+            images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs]) 
+            image_features = model.encode_image(images)
+            image_features /= image_features.norm(dim=-1, keepdim=True)
+
+            for ind, cache in enumerate(classification_probes):
+                labels = [country_ids_list[ind].index(c) if c in country_ids_list[ind] else -1 for c in country_labels]
+                if labels.count(-1) > 0:
+                    print(f"WARNING: there are {labels.count(-1)} out of {len(labels)} samples country are not in the training set.")
+                correct[ind] += _eig_fewshot_acc_fn(cache, image_features.cpu().numpy(), labels, 2.0 ** 10).item()
+            
+            n += len(labels)
+    
+    print(f"few_shot [5, 10, 25] geo-localization on GeoDE, {correct}, {n}, {np.array(correct)/n}")        
+    return correct, n
+
+def parse_results(results, result_json):
+    with open(result_json) as f:
+        result = json.load(f)
+        print("few-shot geo-localization GeoDE:", result['acc'])
+        results['few_shot_geo_loc_GeoDE'] = result['acc']
+
+def main(model, preprocess_val, tokenizer, result_json):
+    correct, n = evaluate(model, preprocess_val)
+    with open(result_json, "w") as f:
+        json.dump({"correct": correct, "total": n, "acc": (np.array(correct)/n).tolist()}, f)
diff --git a/clipeval/few_shot_geo_localization/eval_dollar_street.py b/clipeval/few_shot_geo_localization/eval_dollar_street.py
new file mode 100644
index 0000000..e5f39c7
--- /dev/null
+++ b/clipeval/few_shot_geo_localization/eval_dollar_street.py
@@ -0,0 +1,96 @@
+import torch
+import json
+from PIL import Image
+from tqdm import tqdm
+import pandas as pd
+import numpy as np
+
+import sys
+if "external/big_vision" not in sys.path:
+    sys.path.append("external/big_vision") 
+# or directly copy the functions from https://github.com/google-research/big_vision/blob/main/big_vision/evaluators/fewshot_lsr.py
+
+from big_vision.evaluators.fewshot_lsr import _precompute_cache, _eig_fewshot_acc_fn
+
+
+data_dir = 'data/DollarStreet/dataset_dollarstreet/'
+
+# Evaluation Function
+def evaluate(model, preprocess_val):
+    train_df = pd.read_csv(data_dir + 'images_v2_imagenet_train.csv')
+    test_df = pd.read_csv(data_dir + 'images_v2_imagenet_test.csv')
+    print("done load data", len(train_df), len(test_df))
+
+    batch_size = 16
+    device = torch.cuda.current_device()
+
+    ## train classification probe
+    classification_probes = []
+    country_ids_list = [] # each n_shot has a list, theoretically should be the same, but just in case
+    for n_shot in [5, 10, 25]:
+        train_sampled = train_df.groupby('country.id', group_keys=False).apply(lambda x: x.sample(n=min(len(x), n_shot), random_state=42))
+        country_ids = sorted(list(set(train_sampled['country.id'])))
+
+        df = train_sampled
+        with torch.no_grad():
+            all_features = []
+            all_labels = []
+            for start in tqdm(range(0, len(df), batch_size)):
+                end = min(start + batch_size, len(df))
+                batch_imgs = []
+                for i in range(start, end):
+                    data = df.iloc[i]
+                    batch_imgs.append(Image.open(data_dir + data['imageRelPath']).convert("RGB"))
+                    all_labels.append(country_ids.index(data['country.id']))
+
+
+                images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs]) 
+                image_embs = model.encode_image(images)
+                image_embs /= image_embs.norm(dim=-1, keepdim=True)
+
+                all_features.append(image_embs)
+
+            all_features = torch.cat(all_features, dim=0)
+            print(all_features.shape)
+
+        classification_probes.append(_precompute_cache(all_features.cpu().numpy(), all_labels, len(set(all_labels))))
+        country_ids_list.append(country_ids)
+
+    ## start eval
+    n = 0
+    correct = [0] * len(classification_probes)
+
+    with torch.no_grad():
+        for local_start in tqdm(range(0, len(test_df), batch_size)):
+            local_end = min(local_start + batch_size, len(test_df))
+            batch_imgs = []
+            country_labels = []
+     
+            for i in range(local_start, local_end):
+                data = test_df.iloc[i]
+                batch_imgs.append(Image.open(data_dir + data['imageRelPath']).convert("RGB"))
+                country_labels.append(data['country.id'])
+
+            images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs]) 
+            image_features = model.encode_image(images)
+            image_features /= image_features.norm(dim=-1, keepdim=True)
+
+            for ind, cache in enumerate(classification_probes):
+                labels = [country_ids_list[ind].index(c) for c in country_labels]
+                correct[ind] += _eig_fewshot_acc_fn(cache, image_features.cpu().numpy(), labels, 2.0 ** 10).item()
+            
+            n += len(labels)
+    
+    print(f"few_shot [5, 10, 25] geo-localization on DollarStreet, {correct}, {n}, {np.array(correct)/n}")        
+    return correct, n
+
+def parse_results(results, result_json):
+    with open(result_json) as f:
+        result = json.load(f)
+        print("few-shot geo-localization dollar street:", result['acc'])
+        results['few_shot_geo_loc_dollar_street'] = result['acc']
+
+def main(model, preprocess_val, tokenizer, result_json):
+    correct, n = evaluate(model, preprocess_val)
+    with open(result_json, "w") as f:
+        json.dump({"correct": correct, "total": n, "acc": (np.array(correct)/n).tolist()}, f)
diff --git a/clipeval/few_shot_geo_localization/eval_xm3600.py b/clipeval/few_shot_geo_localization/eval_xm3600.py
new file mode 100644
index 0000000..5ea0dfa
--- /dev/null
+++ b/clipeval/few_shot_geo_localization/eval_xm3600.py
@@ -0,0 +1,107 @@
+import torch
+import json
+from PIL import Image
+from tqdm import tqdm
+import pandas as pd
+import numpy as np
+
+import sys
+if "external/big_vision" not in sys.path:
+    sys.path.append("external/big_vision") 
+# or directly copy the functions from https://github.com/google-research/big_vision/blob/main/big_vision/evaluators/fewshot_lsr.py
+
+from big_vision.evaluators.fewshot_lsr import _precompute_cache, _eig_fewshot_acc_fn
+
+data_dir = 'data/XM3600/'
+GROUP_KEY = 'image/locale'
+
+# Evaluation Function
+def evaluate(model, preprocess_val):
+    with open(data_dir + 'captions.jsonl', 'r') as f:
+        data = [{k: v for k, v in json.loads(line).items() if k in ['image/key', 'image/locale']} for line in f]
+    df = pd.DataFrame(data)
+    df = df.sample(frac=1).reset_index(drop=True) #shuffle
+    train_df = df.iloc[:1800]
+    test_df = df.iloc[1800:]
+    print("done load data", len(df), len(train_df), len(test_df))
+
+    batch_size = 16
+    device = torch.cuda.current_device()
+
+    ## train classification probe
+    classification_probes = []
+    country_ids_list = [] # each n_shot has a list, theoretically should be the same, but GeoDE is special, some countries are very rare
+    for n_shot in [5, 10, 25]:
+        train_sampled = train_df.groupby(GROUP_KEY, group_keys=False).apply(lambda x: x.sample(n=min(len(x), n_shot), random_state=42))
+        country_ids = sorted(list(set(train_sampled[GROUP_KEY])))
+
+        df = train_sampled
+        with torch.no_grad():
+            all_features = []
+            all_labels = []
+            for start in tqdm(range(0, len(df), batch_size)):
+                end = min(start + batch_size, len(df))
+                batch_imgs = []
+                for i in range(start, end):
+                    data = df.iloc[i]
+                    try:
+                        batch_imgs.append(Image.open(data_dir + f"images/{data['image/key']}.jpg").convert("RGB"))
+                        all_labels.append(country_ids.index(data[GROUP_KEY]))
+                    except:
+                        print(f"missing image {data['image/key']}")
+
+                images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs]) 
+                image_embs = model.encode_image(images)
+                image_embs /= image_embs.norm(dim=-1, keepdim=True)
+
+                all_features.append(image_embs)
+
+            all_features = torch.cat(all_features, dim=0)
+            print(all_features.shape)
+
+        classification_probes.append(_precompute_cache(all_features.cpu().numpy(), all_labels, len(set(all_labels))))
+        country_ids_list.append(country_ids)
+
+    ## start eval
+    n = 0
+    correct = [0] * len(classification_probes)
+
+    with torch.no_grad():
+        for local_start in tqdm(range(0, len(test_df), batch_size)):
+            local_end = min(local_start + batch_size, len(test_df))
+            batch_imgs = []
+            country_labels = []
+     
+            for i in range(local_start, local_end):
+                data = test_df.iloc[i]
+                try:
+                    batch_imgs.append(Image.open(data_dir + f"images/{data['image/key']}.jpg").convert("RGB"))
+                    country_labels.append(data[GROUP_KEY])
+                except:
+                    print(f"missing image {data['image/key']}")
+
+            images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs]) 
+            image_features = model.encode_image(images)
+            image_features /= image_features.norm(dim=-1, keepdim=True)
+
+            for ind, cache in enumerate(classification_probes):
+                labels = [country_ids_list[ind].index(c) if c in country_ids_list[ind] else -1 for c in country_labels]
+                if labels.count(-1) > 0:
+                    print(f"WARNING: there are {labels.count(-1)} out of {len(labels)} samples country are not in the training set.")
+                correct[ind] += _eig_fewshot_acc_fn(cache, image_features.cpu().numpy(), labels, 2.0 ** 10).item()
+            
+            n += len(labels)
+        
+    print(f"few_shot [5, 10, 25] geo-localization on XM3600, {correct}, {n}, {np.array(correct)/n}")        
+    return correct, n
+
+def parse_results(results, result_json):
+    with open(result_json) as f:
+        result = json.load(f)
+        print("few-shot geo-localization XM3600:", result['acc'])
+        results['few_shot_geo_loc_xm3600'] = result['acc']
+
+def main(model, preprocess_val, tokenizer, result_json):
+    correct, n = evaluate(model, preprocess_val)
+    with open(result_json, "w") as f:
+        json.dump({"correct": correct, "total": n, "acc": (np.array(correct)/n).tolist()}, f)
diff --git a/clipeval/zero_shot_classification/eval_GLDv2.py b/clipeval/zero_shot_classification/eval_GLDv2.py
new file mode 100644
index 0000000..a0de9ce
--- /dev/null
+++ b/clipeval/zero_shot_classification/eval_GLDv2.py
@@ -0,0 +1,96 @@
+import torch
+import json
+from PIL import Image
+from tqdm import tqdm
+from collections import Counter
+import pandas as pd
+
+import sys
+if "external/open_clip" not in sys.path:
+    sys.path.append("external/open_clip") 
+from src.open_clip.open_clip_train.zero_shot import accuracy # https://github.com/mlfoundations/open_clip/blob/main/src/open_clip_train/zero_shot.py
+
+data_dir = 'data/GLDv2/'
+
+# return top landmark or all retrieved landmarks
+def get_landmark(row, image_to_land_id_map, land_id_to_name):
+    tmp = [image_to_land_id_map[id] for id in row['images'].split()]
+    counter = Counter(tmp)
+    landmark_id, count = counter.most_common(1)[0]
+    return land_id_to_name[landmark_id], [land_id_to_name[x] for x in set(tmp)] # most_voting landmark, all landmarks
+
+def encode_texts(model, tokenizer, texts, device):
+    texts = tokenizer(texts).to(device)        
+    text_embs = model.encode_text(texts)
+    text_embs /= text_embs.norm(dim=-1, keepdim=True)
+    return text_embs
+
+# Evaluation Function
+def evaluate(model, preprocess_val, tokenizer):
+    # prepare for GLDv2 data
+    df = pd.read_csv(data_dir + 'retrieval_solution_v2.1.csv')
+    df = df[df['Usage'].isin(['Private', 'Public'])]
+    ids = set(df['id'])
+    print("test images count: ", len(ids))
+    image_ids = set([id for x in df['images'] for id in x.split()])
+    print("retrieved images count: ", len(image_ids))
+
+    image_to_landmark_df = pd.read_csv(data_dir + 'index_image_to_landmark.csv')
+    image_to_landmark_df = image_to_landmark_df[image_to_landmark_df['id'].isin(image_ids)]
+    image_to_land_id_map = {}
+    for _, row in image_to_landmark_df.iterrows():
+        image_to_land_id_map[row['id']] = row['landmark_id']
+
+    landmark_to_category_df = pd.read_csv(data_dir + 'index_label_to_category.csv')
+    landmark_to_category_df = landmark_to_category_df[landmark_to_category_df['landmark_id'].isin(set(image_to_landmark_df['landmark_id']))]
+    land_id_to_name = {}
+    for _, row in landmark_to_category_df.iterrows():
+        category = row['category']
+        name = category[category.rfind(':') + 1:].replace("_", " ").rstrip('"')
+        land_id_to_name[row['landmark_id']] = name
+
+    landmarks = list(land_id_to_name.values())
+    print("number of landmarks: ", len(landmarks))
+
+    batch_size = 16
+    device = torch.cuda.current_device()
+
+    top1 = 0
+    n = 0
+
+    text_features = encode_texts(model, tokenizer, landmarks, device)
+
+    with torch.no_grad():
+        for local_start in tqdm(range(0, len(df), batch_size)):
+            local_end = min(local_start + batch_size, len(df))
+            batch_imgs = []
+            labels = []
+
+            for i in range(local_start, local_end):
+                data = df.iloc[i]
+                batch_imgs.append(Image.open(data_dir + 'test/' + data['id'] + '.jpg').convert("RGB"))
+                landmark_name, retrieved_landmarks = get_landmark(data, image_to_land_id_map, land_id_to_name) 
+                labels.append(landmarks.index(landmark_name))
+
+            labels = torch.tensor(labels).to(device)
+            images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs]) 
+            image_features = model.encode_image(images)
+            image_features /= image_features.norm(dim=-1, keepdim=True)
+
+            probs = image_features @ text_features.T
+            top1 += accuracy(probs, labels)[0]
+            n += images.size(0)
+
+    print(f"results {top1}, {n}, {top1/n}")
+    return top1, n
+
+def parse_results(results, result_json):
+    with open(result_json) as f:
+        result = json.load(f)
+        print("zero-shot classification GLDv2:", result['acc'])
+        results['zero_shot_classification_GLDv2'] = result['acc']
+
+def main(model, preprocess_val, tokenizer, result_json):
+    top1, n = evaluate(model, preprocess_val, tokenizer)
+    with open(result_json, "w") as f:
+        json.dump({"top1": top1, "total": n, "acc": top1/n}, f)
diff --git a/clipeval/zero_shot_classification/eval_GeoDE.py b/clipeval/zero_shot_classification/eval_GeoDE.py
new file mode 100644
index 0000000..be244d7
--- /dev/null
+++ b/clipeval/zero_shot_classification/eval_GeoDE.py
@@ -0,0 +1,72 @@
+import torch
+import json
+from PIL import Image
+from tqdm import tqdm
+import pandas as pd
+
+import sys
+if "external/open_clip" not in sys.path:
+    sys.path.append("external/open_clip") 
+from src.open_clip.zero_shot_classifier import build_zero_shot_classifier # https://github.com/mlfoundations/open_clip/blob/main/src/open_clip/zero_shot_classifier.py
+from src.open_clip.open_clip_train.zero_shot import accuracy # https://github.com/mlfoundations/open_clip/blob/main/src/open_clip_train/zero_shot.py
+from src.open_clip.zero_shot_metadata import OPENAI_IMAGENET_TEMPLATES # https://github.com/mlfoundations/open_clip/blob/main/src/open_clip/zero_shot_metadata.py
+
+
+data_dir = 'data/GeoDE/geode/'
+# Evaluation Function
+def evaluate(model, preprocess_val, tokenizer):
+    df = pd.read_csv(data_dir + 'index.csv')
+    classnames = df['object'].unique().tolist()
+    print("done load data", len(df))
+
+    batch_size = 16
+    device = torch.cuda.current_device()
+
+    top1 = 0
+    n = 0
+
+    classifier = build_zero_shot_classifier(
+        model,
+        tokenizer=tokenizer,
+        classnames=classnames,
+        templates=OPENAI_IMAGENET_TEMPLATES,
+        num_classes_per_batch=10,
+        device=device,
+        use_tqdm=True,
+    )
+
+    with torch.no_grad():
+        for local_start in tqdm(range(0, len(df), batch_size)):
+            local_end = min(local_start + batch_size, len(df))
+            batch_imgs = []
+            labels = []
+
+            for i in range(local_start, local_end):
+                data = df.iloc[i]
+                try:
+                    batch_imgs.append(Image.open(data_dir + 'images/' + data['file_path']).convert("RGB"))
+                    labels.append(classnames.index(data['object']))
+                except:
+                    print(f"missing image {data['file_path']}")
+
+            labels = torch.tensor(labels).to(device)
+            images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs]) 
+            image_features = model.encode_image(images)
+            image_features /= image_features.norm(dim=-1, keepdim=True)
+            logits = 100. * image_features @ classifier
+            top1 += accuracy(logits, labels)[0]
+            n += images.size(0)
+
+    print(f"results {top1}, {n}, {top1/n}")
+    return top1, n
+
+def parse_results(results, result_json):
+    with open(result_json) as f:
+        result = json.load(f)
+        print("zero-shot classification GeoDE:", result['acc'])
+        results['zero_shot_classification_GeoDE'] = result['acc']
+
+def main(model, preprocess_val, tokenizer, result_json):
+    top1, n = evaluate(model, preprocess_val, tokenizer)
+    with open(result_json, "w") as f:
+        json.dump({"top1": top1, "total": n, "acc": top1/n}, f)
diff --git a/clipeval/zero_shot_classification/eval_dollar_street.py b/clipeval/zero_shot_classification/eval_dollar_street.py
new file mode 100644
index 0000000..97a48bc
--- /dev/null
+++ b/clipeval/zero_shot_classification/eval_dollar_street.py
@@ -0,0 +1,85 @@
+import ast
+import torch
+import json
+from PIL import Image
+from tqdm import tqdm
+import pandas as pd
+
+import sys
+if "external/open_clip" not in sys.path:
+    sys.path.append("external/open_clip") 
+from src.open_clip.zero_shot_classifier import build_zero_shot_classifier # https://github.com/mlfoundations/open_clip/blob/main/src/open_clip/zero_shot_classifier.py
+from src.open_clip.zero_shot_metadata import OPENAI_IMAGENET_TEMPLATES, IMAGENET_CLASSNAMES # https://github.com/mlfoundations/open_clip/blob/main/src/open_clip/zero_shot_metadata.py
+
+
+data_dir = 'data/DollarStreet/dataset_dollarstreet/'
+
+def match_any_accuracy(output, target, topk=(1,)):
+    pred = output.topk(max(topk), 1, True, True)[1] # [B, k]
+    pred_exp = pred.unsqueeze(2)       # [B, k, 1]
+    target_exp = target.unsqueeze(1)  # [B, 1, N]
+    # Compare: broadcasted over [B, k, N]
+    correct = pred_exp.eq(target_exp).any(dim=2).t() # [k, B] — True if any label matches
+    return [float(correct[:k].reshape(-1).float().sum().item()) for k in topk]
+
+# Evaluation Function
+def evaluate(model, preprocess_val, tokenizer):
+    ds_train_df = pd.read_csv(data_dir + 'images_v2_imagenet_train.csv')
+    ds_test_df = pd.read_csv(data_dir + 'images_v2_imagenet_test.csv')
+    df = pd.concat([ds_train_df, ds_test_df])
+    print("done load data", len(df))
+
+    batch_size = 16
+    device = torch.cuda.current_device()
+
+    top1 = 0
+    top5 = 0
+    n = 0
+
+    classifier = build_zero_shot_classifier(
+        model,
+        tokenizer=tokenizer,
+        classnames=IMAGENET_CLASSNAMES,
+        templates=OPENAI_IMAGENET_TEMPLATES,
+        num_classes_per_batch=10,
+        device=device,
+        use_tqdm=True,
+    )
+
+    with torch.no_grad():
+        for local_start in tqdm(range(0, len(df), batch_size)):
+            local_end = min(local_start + batch_size, len(df))
+            batch_imgs = []
+            labels = []
+     
+            for i in range(local_start, local_end):
+                data = df.iloc[i]
+                batch_imgs.append(Image.open(data_dir + data['imageRelPath']).convert("RGB"))
+                labels.append(ast.literal_eval(data['imagenet_sysnet_id']))
+
+            max_len = max(len(x) for x in labels)
+            padded_labels = [x + [-1] * (max_len - len(x)) for x in labels]
+
+            labels = torch.tensor(padded_labels).to(device)
+            images = torch.stack([preprocess_val(img).to(device) for img in batch_imgs]) 
+            image_features = model.encode_image(images)
+            image_features /= image_features.norm(dim=-1, keepdim=True)
+            logits = 100. * image_features @ classifier
+            tmp1, tmp5 = match_any_accuracy(logits, labels, (1, 5))
+            top1 += tmp1
+            top5 += tmp5
+            n += images.size(0)
+
+    print(f"results {top1}, {top5}, {n}, {top1/n}, {top5/n}")
+    return top1, top5, n
+
+def parse_results(results, result_json):
+    with open(result_json) as f:
+        result = json.load(f)
+        print("zero-shot classification dollar street:", result['acc'])
+        results['zero_shot_classification_dollar_street'] = result['acc']
+
+def main(model, preprocess_val, tokenizer, result_json):
+    top1, top5, n = evaluate(model, preprocess_val, tokenizer)
+    with open(result_json, "w") as f:
+        json.dump({"top1": top1, "top5": top5, "total": n, "acc": top1/n}, f)