99import org .monarchinitiative .phenopacket2prompt .model .PhenopacketDisease ;
1010import org .monarchinitiative .phenopacket2prompt .model .PpktIndividual ;
1111import org .monarchinitiative .phenopacket2prompt .output .CorrectResult ;
12+ import org .monarchinitiative .phenopacket2prompt .output .PpktCopy ;
1213import org .monarchinitiative .phenopacket2prompt .output .PromptGenerator ;
1314import org .slf4j .Logger ;
1415import org .slf4j .LoggerFactory ;
2324import java .util .ArrayList ;
2425import java .util .List ;
2526import java .util .Map ;
27+ import java .util .Set ;
2628import java .util .concurrent .Callable ;
2729
2830@ CommandLine .Command (name = "batch" , aliases = {"B" },
@@ -40,9 +42,16 @@ public class GbtTranslateBatchCommand implements Callable<Integer> {
4042 description = "path to translations file" )
4143 private String translationsPath = "data/hp-international.obo" ;
4244
45+ @ CommandLine .Option (names = {"-o" , "--outdir" },
46+ description = "path to outdir" )
47+ private String outdirname = "prompts" ;
48+
4349 @ CommandLine .Option (names = {"-d" , "--dir" }, description = "Path to directory with JSON phenopacket files" , required = true )
4450 private String ppktDir ;
4551
52+ private String currentLanguageCode = null ;
53+ private int currentCount ;
54+
4655 @ Override
4756 public Integer call () throws Exception {
4857 File hpJsonFile = new File (hpoJsonPath );
@@ -57,19 +66,51 @@ public Integer call() throws Exception {
5766 return 1 ;
5867 }
5968 HpInternationalOboParser oboParser = new HpInternationalOboParser (translationsFile );
69+
6070 Map <String , HpInternational > internationalMap = oboParser .getLanguageToInternationalMap ();
6171 LOGGER .info ("Got {} translations" , internationalMap .size ());
6272 List <File > ppktFiles = getAllPhenopacketJsonFiles ();
63- createDir ("prompts" );
73+ createDir (outdirname );
6474 List <CorrectResult > correctResultList = outputPromptsEnglish (ppktFiles , hpo );
6575 // output all non-English languages here
66- PromptGenerator spanish = PromptGenerator .spanish (hpo , internationalMap .get ("es" ));
76+
77+ // SPANISH
78+ PromptGenerator spanish = PromptGenerator .spanish (internationalMap .get ("es" ));
79+ resetOutput ("es" );
6780 outputPromptsInternational (ppktFiles , hpo , "es" , spanish );
81+
82+ resetOutput ("nl" );
83+ PromptGenerator dutch = PromptGenerator .dutch (internationalMap .get ("nl" ));
84+ outputPromptsInternational (ppktFiles , hpo , "nl" , dutch );
85+ // GERMAN
86+ resetOutput ("de" );
87+ PromptGenerator german = PromptGenerator .german (internationalMap .get ("de" ));
88+ outputPromptsInternational (ppktFiles , hpo , "de" , german );
89+
90+ // ITALIAN
91+ resetOutput ("it" );
92+ PromptGenerator italian = PromptGenerator .italian (internationalMap .get ("it" ));
93+ outputPromptsInternational (ppktFiles , hpo , "it" , italian );
94+ resetOutput ("finished" );
95+ // output original phenopackets
96+ PpktCopy pcopy = new PpktCopy (new File (outdirname ));
97+ for (var file : ppktFiles ) {
98+ pcopy .copyFile (file );
99+ }
100+
68101 // output file with correct diagnosis list
69102 outputCorrectResults (correctResultList );
70103 return 0 ;
71104 }
72105
106+ private void resetOutput (String es ) {
107+ if (currentLanguageCode != null ) {
108+ System .out .printf ("Finished writing %d phenopackets in %s\n " , currentCount , currentLanguageCode );
109+ }
110+ currentLanguageCode = es ;
111+ currentCount = 0 ;
112+ }
113+
73114 private void outputCorrectResults (List <CorrectResult > correctResultList ) {
74115 File outfile = new File ("prompts" + File .separator + "correct_results.tsv" );
75116 try (BufferedWriter bw = new BufferedWriter (new FileWriter (outfile ))) {
@@ -79,12 +120,12 @@ private void outputCorrectResults(List<CorrectResult> correctResultList) {
79120 } catch (IOException e ) {
80121 e .printStackTrace ();
81122 }
82- System .out .printf ("[INFO] Output a total of %d prompts in en and es .\n " , correctResultList .size ());
123+ System .out .printf ("[INFO] Output a total of %d prompts in en, es, nl, de, and it .\n " , correctResultList .size ());
83124 }
84125
85126
86- private String getFileName (String phenopacketID ) {
87- return phenopacketID .replaceAll ("[^\\ w]" , phenopacketID ). replaceAll ( "/" , "_" ) + "-prompt.txt" ;
127+ private String getFileName (String phenopacketID , String languageCode ) {
128+ return phenopacketID .replaceAll ("[^\\ w]" ,"_" ) + "_" + languageCode + "-prompt.txt" ;
88129 }
89130
90131
@@ -94,21 +135,28 @@ private void outputPromptsInternational(List<File> ppktFiles, Ontology hpo, Stri
94135 createDir (dirpath );
95136 List <String > diagnosisList = new ArrayList <>();
96137 for (var f : ppktFiles ) {
97- PpktIndividual individual = new PpktIndividual (f );
138+ PpktIndividual individual = PpktIndividual . fromFile (f );
98139 List <PhenopacketDisease > diseaseList = individual .getDiseases ();
99140 if (diseaseList .size () != 1 ) {
100- System . err . println ( String .format ("[ERROR] Got %d diseases for %s.\n " , diseaseList .size (), individual .getPhenopacketId () ));
101- continue ;
141+ String errmsg = String .format ("[ERROR] Got %d diseases for %s.\n " , diseaseList .size (), individual .getPhenopacketId ());
142+ throw new PhenolRuntimeException ( errmsg ) ;
102143 }
103144 PhenopacketDisease pdisease = diseaseList .get (0 );
104- String promptFileName = getFileName ( individual .getPhenopacketId ());
145+ String promptFileName = getFileName ( individual .getPhenopacketId (), languageCode );
105146 String diagnosisLine = String .format ("%s\t %s\t %s\t %s" , pdisease .getDiseaseId (), pdisease .getLabel (), promptFileName , f .getAbsolutePath ());
106147 try {
107148 diagnosisList .add (diagnosisLine );
108149 String prompt = generator .createPrompt (individual );
109150 outputPrompt (prompt , promptFileName , dirpath );
110151 } catch (Exception e ) {
111- e .printStackTrace ();
152+ System .err .printf ("[ERROR] Could not process %s: %s\n " , promptFileName , e .getMessage ());
153+ //e.printStackTrace();
154+ }
155+ }
156+ Set <String > missing = generator .getMissingTranslations ();
157+ if (! missing .isEmpty ()) {
158+ for (var m : missing ) {
159+ System .out .printf ("[%s] Missing: %s\n " , languageCode , m );
112160 }
113161 }
114162 }
@@ -117,17 +165,17 @@ private void outputPromptsInternational(List<File> ppktFiles, Ontology hpo, Stri
117165 private List <CorrectResult > outputPromptsEnglish (List <File > ppktFiles , Ontology hpo ) {
118166 createDir ("prompts/en" );
119167 List <CorrectResult > correctResultList = new ArrayList <>();
120- PromptGenerator generator = PromptGenerator .english (hpo );
168+ PromptGenerator generator = PromptGenerator .english ();
121169
122170 for (var f : ppktFiles ) {
123- PpktIndividual individual = new PpktIndividual (f );
171+ PpktIndividual individual = PpktIndividual . fromFile (f );
124172 List <PhenopacketDisease > diseaseList = individual .getDiseases ();
125173 if (diseaseList .size () != 1 ) {
126- System .err .println ( String . format ( "[ERROR] Got %d diseases for %s.\n " , diseaseList .size (), individual .getPhenopacketId () ));
174+ System .err .printf ( "[ERROR] Got %d diseases for %s.\n " , diseaseList .size (), individual .getPhenopacketId ());
127175 continue ;
128176 }
129177 PhenopacketDisease pdisease = diseaseList .get (0 );
130- String promptFileName = getFileName ( individual .getPhenopacketId ());
178+ String promptFileName = getFileName ( individual .getPhenopacketId (), "en" );
131179 String diagnosisLine = String .format ("%s\t %s\t %s\t %s" , pdisease .getDiseaseId (), pdisease .getLabel (), promptFileName , f .getAbsolutePath ());
132180 try {
133181 String prompt = generator .createPrompt (individual );
@@ -150,7 +198,8 @@ private void outputPrompt(String prompt, String promptFileName, String dir) {
150198 } catch (IOException e ) {
151199 e .printStackTrace ();
152200 }
153- System .out .print ("." );
201+ System .out .printf ("%s %d.\r " , currentLanguageCode , currentCount );
202+ currentCount ++;
154203 }
155204
156205
@@ -177,6 +226,9 @@ private List<File> getAllPhenopacketJsonFiles() {
177226 for (File item : items ) {
178227 if (item .isDirectory ())
179228 ppktDirectories .add (ppktDir +item .getName ());
229+ else if (item .isFile () && item .getName ().endsWith (".json" )) {
230+ ppktFiles .add (item );
231+ }
180232 }
181233 for (var f : ppktDirectories ) {
182234 File subdir = new File (f );
0 commit comments