1
- """Usage: bioclip predict [options] [IMAGE_FILE...]
2
-
3
- Use BioCLIP to generate predictions for an IMAGE_FILE.
4
-
5
- Arguments:
6
- IMAGE_FILE input image file
7
-
8
- Options:
9
- -h --help
10
- --format=FORMAT format of the output (table or csv) [default: csv]
11
- --rank=RANK rank of the classification (kingdom, phylum, class, order, family, genus, species) [default: species]
12
- --k=K number of top predictions to show [default: 5]
13
- --cls=CLS comma separated list of classes to predict, when specified the --rank and --k arguments are ignored [default: all]
14
- --device=DEVICE device to use for prediction (cpu or cuda or mps) [default: cpu]
15
- --output=OUTFILE print output to file OUTFILE [default: stdout]
16
-
17
- """
18
- from docopt import docopt
19
1
from bioclip import TreeOfLifeClassifier , Rank , CustomLabelsClassifier
20
2
import json
21
3
import sys
22
4
import prettytable as pt
23
- import csv
24
5
import pandas as pd
6
+ import argparse
25
7
26
8
27
9
def write_results (data , format , output ):
@@ -46,33 +28,89 @@ def write_results_to_file(df, format, outfile):
46
28
else :
47
29
raise ValueError (f"Invalid format: { format } " )
48
30
49
-
50
- def main ():
51
- # execute only if run as the entry point into the program
52
- x = docopt (__doc__ ) # parse arguments based on docstring above
53
- format = x ['--format' ]
54
- output = x ['--output' ]
55
- image_file = x ['IMAGE_FILE' ]
56
- device = 'cpu'
57
- if x ['--device' ]:
58
- device = x ['--device' ]
59
- cls = x ['--cls' ]
60
- if not format in ['table' , 'csv' ]:
61
- raise ValueError (f"Invalid format: { format } " )
62
- rank = Rank [x ['--rank' ].upper ()]
63
- if cls == 'all' :
64
- classifier = TreeOfLifeClassifier (device = device )
31
+ def predict (image_file : list [str ], format : str , output : str ,
32
+ cls_str : str , device : str , rank : Rank , k : int ):
33
+ if cls_str :
34
+ classifier = CustomLabelsClassifier (device = device )
65
35
data = []
66
36
for image_path in image_file :
67
- data .extend (classifier .predict (image_path = image_path , rank = rank , k = int ( x [ '--k' ] )))
37
+ data .extend (classifier .predict (image_path = image_path , cls_ary = cls_str . split ( ',' )))
68
38
write_results (data , format , output )
69
39
else :
70
- classifier = CustomLabelsClassifier (device = device )
40
+ classifier = TreeOfLifeClassifier (device = device )
71
41
data = []
72
42
for image_path in image_file :
73
- data .extend (classifier .predict (image_path = image_path , cls_ary = cls . split ( ',' ) ))
43
+ data .extend (classifier .predict (image_path = image_path , rank = rank , k = k ))
74
44
write_results (data , format , output )
75
45
76
46
47
+ def embed (image_file : list [str ], output : str , device : str ):
48
+ classifier = TreeOfLifeClassifier (device = device )
49
+ images_dict = {}
50
+ data = {
51
+ "model" : classifier .model_str ,
52
+ "embeddings" : images_dict
53
+ }
54
+ for image_path in image_file :
55
+ features = classifier .get_image_features (image_path )[0 ]
56
+ images_dict [image_path ] = features .tolist ()
57
+ if output == 'stdout' :
58
+ print (json .dumps (data , indent = 4 ))
59
+ else :
60
+ with open (output , 'w' ) as outfile :
61
+ json .dump (data , outfile , indent = 4 )
62
+
63
+
64
+ def create_parser ():
65
+ parser = argparse .ArgumentParser (prog = 'bioclip' , description = 'BioCLIP command line interface' )
66
+ subparsers = parser .add_subparsers (title = 'commands' , dest = 'command' )
67
+
68
+ # Predict command
69
+ predict_parser = subparsers .add_parser ('predict' , help = 'Use BioCLIP to generate predictions for image files.' )
70
+ predict_parser .add_argument ('image_file' , nargs = '+' , help = 'input image file(s)' )
71
+ predict_parser .add_argument ('--format' , choices = ['table' , 'csv' ], default = 'csv' , help = 'format of the output, default: csv' )
72
+ predict_parser .add_argument ('--output' , default = 'stdout' , help = 'print output to file, default: stdout' )
73
+ predict_parser .add_argument ('--rank' , choices = ['kingdom' , 'phylum' , 'class' , 'order' , 'family' , 'genus' , 'species' ],
74
+ help = 'rank of the classification, default: species (when)' )
75
+ predict_parser .add_argument ('--k' , type = int , help = 'number of top predictions to show, default: 5' )
76
+ predict_parser .add_argument ('--cls' , help = 'comma separated list of classes to predict, when specified the --rank and --k arguments are not allowed' )
77
+ predict_parser .add_argument ('--device' , help = 'device to use (cpu or cuda or mps), default: cpu' , default = 'cpu' )
78
+
79
+ # Embed command
80
+ embed_parser = subparsers .add_parser ('embed' , help = 'Use BioCLIP to generate embeddings for image files.' )
81
+ embed_parser .add_argument ('image_file' , nargs = '+' , help = 'input image file(s)' )
82
+ embed_parser .add_argument ('--output' , default = 'stdout' , help = 'print output to file, default: stdout' )
83
+ embed_parser .add_argument ('--device' , help = 'device to use (cpu or cuda or mps), default: cpu' , default = 'cpu' )
84
+
85
+ return parser
86
+
87
+
88
+ def parse_args (input_args = None ):
89
+ args = create_parser ().parse_args (input_args )
90
+ if args .command == 'predict' :
91
+ if args .cls :
92
+ # custom class list mode
93
+ if args .rank or args .k :
94
+ raise ValueError ("Cannot use --cls with --rank or --k" )
95
+ else :
96
+ # tree of life class list mode
97
+ if not args .rank :
98
+ args .rank = 'species'
99
+ args .rank = Rank [args .rank .upper ()]
100
+ if not args .k :
101
+ args .k = 5
102
+ return args
103
+
104
+
105
+ def main ():
106
+ args = parse_args ()
107
+ if args .command == 'embed' :
108
+ embed (args .image_file , args .output , args .device )
109
+ elif args .command == 'predict' :
110
+ predict (args .image_file , args .format , args .output , args .cls , args .device , args .rank , args .k )
111
+ else :
112
+ raise ValueError ("Invalid command" )
113
+
114
+
77
115
if __name__ == '__main__' :
78
116
main ()
0 commit comments