Skip to content

Commit 14b6d8a

Browse files
committed
Updated model name.
1 parent c5173be commit 14b6d8a

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

embedding_sandbox.ipynb

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
"metadata": {},
2525
"outputs": [],
2626
"source": [
27-
"OUTPUT_FOLDER = \"/mnt/colab_public/datasets/joao/mteb_results/\"\n",
27+
"OUTPUT_FOLDER = \"./mteb_results/\"\n",
2828
"DEVICE = \"cuda:0\"\n",
2929
"BATCH_SIZE=32\n",
3030
"MAX_INPUT_LEN = 10000\n",
@@ -57,7 +57,7 @@
5757
" super().__init__()\n",
5858
"\n",
5959
" self.model_name = model_name\n",
60-
" self.tokenizer = prepare_tokenizer(model_name)\n",
60+
" self.tokenizer = prepare_tokenizer(model_name, use_auth_token=True)\n",
6161
" self.encoder = AutoModel.from_pretrained(model_name, use_auth_token=True).to(DEVICE).eval()\n",
6262
" self.device = device\n",
6363
" self.max_input_len = max_input_len\n",
@@ -88,10 +88,10 @@
8888
"\n",
8989
" return [emb.squeeze().numpy() for emb in input_sentences_embedding]\n",
9090
"\n",
91-
"class BigCodeEncoder(BaseEncoder):\n",
91+
"class StarEncoder(BaseEncoder):\n",
9292
"\n",
9393
" def __init__(self, device, max_input_len, maximum_token_len):\n",
94-
" super().__init__(device, max_input_len, maximum_token_len, model_name = \"bigcode/bigcode-encoder\")\n",
94+
" super().__init__(device, max_input_len, maximum_token_len, model_name = \"bigcode/starencoder\")\n",
9595
" \n",
9696
" def forward(self, input_sentences):\n",
9797
"\n",
@@ -152,7 +152,7 @@
152152
],
153153
"source": [
154154
"codebert = CodeBERT(DEVICE, MAX_INPUT_LEN, MAX_TOKEN_LEN)\n",
155-
"bigcode_model = BigCodeEncoder(DEVICE, MAX_INPUT_LEN, MAX_TOKEN_LEN)"
155+
"starencoder = StarEncoder(DEVICE, MAX_INPUT_LEN, MAX_TOKEN_LEN)"
156156
]
157157
},
158158
{
@@ -167,7 +167,7 @@
167167
"]\n",
168168
"\n",
169169
"codebert_embeddings = codebert.encode(input_sentences)\n",
170-
"bigcode_model_embeddings = bigcode_model.encode(input_sentences)\n"
170+
"starencoder_embeddings = starencoder.encode(input_sentences)\n"
171171
]
172172
},
173173
{
@@ -202,13 +202,13 @@
202202
}
203203
],
204204
"source": [
205-
"results_bigcode_encoder = evaluation.run(\n",
206-
" bigcode_model, \n",
207-
" output_folder=os.path.join(OUTPUT_FOLDER, \"bigcode_encoder\"), \n",
205+
"results_starencoder = evaluation.run(\n",
206+
" starencoder, \n",
207+
" output_folder=os.path.join(OUTPUT_FOLDER, \"starencoder\"), \n",
208208
" batch_size=BATCH_SIZE, \n",
209209
" overwrite_results=True,)\n",
210210
"\n",
211-
"results_bigcode_encoder"
211+
"results_starencoder"
212212
]
213213
},
214214
{

0 commit comments

Comments
 (0)