Skip to content

Commit a1e9221

Browse files
authored
Adding option to write vocab_size/vector_size as first line
For use with some libraries, such as gensim.
1 parent c0d838f commit a1e9221

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

src/glove.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ typedef struct cooccur_rec {
4141
real val;
4242
} CREC;
4343

44+
int write_header=0; //0=no, 1=yes; writes vocab_size/vector_size as first line for use with some libraries, such as gensim.
4445
int verbose = 2; // 0, 1, or 2
4546
int use_unk_vec = 1; // 0 or 1
4647
int num_threads = 8; // pthreads
@@ -220,6 +221,7 @@ int save_params(int nb_iter) {
220221
fid = fopen(vocab_file, "r");
221222
sprintf(format,"%%%ds",MAX_STRING_LENGTH);
222223
if (fid == NULL) {fprintf(stderr, "Unable to open file %s.\n",vocab_file); return 1;}
224+
if (write_header) fprintf(fout, "%ld %d\n", vocab_size, vector_size);
223225
for (a = 0; a < vocab_size; a++) {
224226
if (fscanf(fid,format,word) == 0) return 1;
225227
// input vocab cannot contain special <unk> keyword
@@ -369,6 +371,8 @@ int main(int argc, char **argv) {
369371
printf("Usage options:\n");
370372
printf("\t-verbose <int>\n");
371373
printf("\t\tSet verbosity: 0, 1, or 2 (default)\n");
374+
printf("\t-write-header <int>\n");
375+
printf("\t\tIf 1, write vocab_size/vector_size as first line. Do nothing if 0 (default).\n");
372376
printf("\t-vector-size <int>\n");
373377
printf("\t\tDimension of word vector representations (excluding bias term); default 50\n");
374378
printf("\t-threads <int>\n");
@@ -404,6 +408,7 @@ int main(int argc, char **argv) {
404408
printf("./glove -input-file cooccurrence.shuf.bin -vocab-file vocab.txt -save-file vectors -gradsq-file gradsq -verbose 2 -vector-size 100 -threads 16 -alpha 0.75 -x-max 100.0 -eta 0.05 -binary 2 -model 2\n\n");
405409
result = 0;
406410
} else {
411+
if ((i = find_arg((char *)"-write-header", argc, argv)) > 0) write_header = atoi(argv[i + 1]);
407412
if ((i = find_arg((char *)"-verbose", argc, argv)) > 0) verbose = atoi(argv[i + 1]);
408413
if ((i = find_arg((char *)"-vector-size", argc, argv)) > 0) vector_size = atoi(argv[i + 1]);
409414
if ((i = find_arg((char *)"-iter", argc, argv)) > 0) num_iter = atoi(argv[i + 1]);

0 commit comments

Comments
 (0)