@@ -166,6 +166,8 @@ extern "C" int load_parser(void* vzps, const char *sFeaturePath) {
166166 return 0 ;
167167}
168168
169+
170+
169171// The function to load the dependency parser model
170172extern " C" int load_depparser (void * vzps, const char *sFeaturePath ) {
171173
@@ -282,19 +284,61 @@ extern "C" char* parse_sentence(void* vzps, const char *input_sentence, bool tok
282284 zps->output_buffer = new char [1 ];
283285 strcpy (zps->output_buffer , " " );
284286 } else {
285- // initialize the variable that will hold the tagged sentence
287+ // initialize the variables that will hold the tagged and parsed sentences
286288 CTwoStringVector tagged_sent[1 ];
287289 english::CCFGTree parsed_sent[1 ];
288290
289- // get the tagger that was stored earlier
291+ // get the tagger and parser that were stored earlier
290292 CTagger *tagger = zps->tagger ;
291293 CConParser *conparser = zps->conparser ;
292294
293- // tag the sentence
295+ // tag and parse the sentence
294296 tagger->tag (tokenized_sent, tagged_sent);
295297 conparser->parse (*tagged_sent, parsed_sent);
296298
297- // now put the tagged_sent into a string stream
299+ // now put the parsed sentence into a string stream
300+ std::string parse = parsed_sent->str_unbinarized ();
301+ int parselen = parse.length ();
302+ zps->output_buffer = new char [parselen + 1 ];
303+ strcpy (zps->output_buffer , parse.c_str ());
304+ }
305+
306+ return zps->output_buffer ;
307+ }
308+
309+ extern " C" char * parse_tagged_sentence (void * vzps, const char *input_tagged_sentence, const char seperator=' /' )
310+ {
311+
312+ zparSession_t* zps = static_cast <zparSession_t *>(vzps);
313+
314+ // create a temporary string stream from the input char *
315+ CSentenceReader input_reader (std::string (input_tagged_sentence), false );
316+
317+ // read the tagged sentence into a CTwoStringVector
318+ CTwoStringVector tagged_sent[1 ];
319+ input_reader.readTaggedSentence (tagged_sent, false , seperator);
320+
321+ if (zps->output_buffer != NULL ) {
322+ delete zps->output_buffer ;
323+ zps->output_buffer = NULL ;
324+ }
325+
326+ if (tagged_sent->size () >= MAX_SENTENCE_SIZE){
327+ // The ZPar code asserts that length < MAX_SENTENCE_SIZE...
328+ std::cerr << " Sentence too long. Returning empty string. Sentence: " << input_tagged_sentence << std::endl;
329+ zps->output_buffer = new char [1 ];
330+ strcpy (zps->output_buffer , " " );
331+ } else {
332+ // initialize the variable that will hold the parsed sentence
333+ english::CCFGTree parsed_sent[1 ];
334+
335+ // get the parser that was stored earlier
336+ CConParser *conparser = zps->conparser ;
337+
338+ // parse the tagged sentence
339+ conparser->parse (*tagged_sent, parsed_sent);
340+
341+ // now put the parsed sentence into a string stream
298342 std::string parse = parsed_sent->str_unbinarized ();
299343 int parselen = parse.length ();
300344 zps->output_buffer = new char [parselen + 1 ];
@@ -321,10 +365,6 @@ extern "C" char* dep_parse_sentence(void* vzps, const char *input_sentence, bool
321365 input_reader.readSegmentedSentence (tokenized_sent);
322366 }
323367
324- // initialize the variable that will hold the tagged sentence
325- CTwoStringVector tagged_sent[1 ];
326- CDependencyParse parsed_sent[1 ];
327-
328368 if (zps->output_buffer != NULL ) {
329369 delete zps->output_buffer ;
330370 zps->output_buffer = NULL ;
@@ -336,11 +376,16 @@ extern "C" char* dep_parse_sentence(void* vzps, const char *input_sentence, bool
336376 zps->output_buffer = new char [1 ];
337377 strcpy (zps->output_buffer , " " );
338378 } else {
339- // get the tagger that was stored earlier
379+
380+ // initialize the variable that will hold the tagged and parsed sentences
381+ CTwoStringVector tagged_sent[1 ];
382+ CDependencyParse parsed_sent[1 ];
383+
384+ // get the tagger and parser that were stored earlier
340385 CTagger *tagger = zps->tagger ;
341386 CDepParser *depparser = zps->depparser ;
342387
343- // tag the sentence
388+ // tag and parse the sentence
344389 tagger->tag (tokenized_sent, tagged_sent);
345390 depparser->parse (*tagged_sent, parsed_sent);
346391
@@ -354,6 +399,49 @@ extern "C" char* dep_parse_sentence(void* vzps, const char *input_sentence, bool
354399 return zps->output_buffer ;
355400}
356401
402+ // Function to dependency parse a sentence
403+ extern " C" char * dep_parse_tagged_sentence (void * vzps, const char *input_tagged_sentence, const char seperator=' /' )
404+ {
405+ zparSession_t* zps = static_cast <zparSession_t *>(vzps);
406+
407+ // create a temporary string stream from the input char *
408+ CSentenceReader input_reader (std::string (input_tagged_sentence), false );
409+
410+ // read the tagged sentence into a CTwoStringVector
411+ CTwoStringVector tagged_sent[1 ];
412+ input_reader.readTaggedSentence (tagged_sent, false , seperator);
413+
414+ if (zps->output_buffer != NULL ) {
415+ delete zps->output_buffer ;
416+ zps->output_buffer = NULL ;
417+ }
418+
419+ if (tagged_sent->size () >= MAX_SENTENCE_SIZE){
420+ // The ZPar code asserts that length < MAX_SENTENCE_SIZE...
421+ std::cerr << " Sentence too long. Returning empty string. Sentence: " << input_tagged_sentence << std::endl;
422+ zps->output_buffer = new char [1 ];
423+ strcpy (zps->output_buffer , " " );
424+ } else {
425+
426+ // initialize the variable that will hold the parsed sentence
427+ CDependencyParse parsed_sent[1 ];
428+
429+ // get the parser that was stored earlier
430+ CDepParser *depparser = zps->depparser ;
431+
432+ // parse the sentence
433+ depparser->parse (*tagged_sent, parsed_sent);
434+
435+ // now output the formatted dependency tree
436+ std::string deptree = format_dependency_tree (parsed_sent);
437+ int deptreelen = deptree.length ();
438+ zps->output_buffer = new char [deptreelen + 1 ];
439+ strcpy (zps->output_buffer , deptree.c_str ());
440+ }
441+
442+ return zps->output_buffer ;
443+ }
444+
357445// Function to tag all sentence in the given input file
358446// and write tagged sentences to the given output file
359447extern " C" void tag_file (void * vzps, const char *sInputFile , const char *sOutputFile , bool tokenize)
@@ -366,17 +454,17 @@ extern "C" void tag_file(void* vzps, const char *sInputFile, const char *sOutput
366454 // initialize the input reader
367455 CSentenceReader input_reader (sInputFile );
368456
369- // open the output file
370- FILE *outfp = NULL ;
371- outfp = fopen (sOutputFile , " w" );
372-
373457 // initialize the temporary sentence variables
374458 CStringVector tokenized_sent[1 ];
375459 CTwoStringVector tagged_sent[1 ];
376460
377461 // get the tagger and the parser that were stored earlier
378462 CTagger *tagger = zps->tagger ;
379463
464+ // initialize the output file writer
465+ std::string outputFileName = std::string (sOutputFile );
466+ CSentenceWriter output_writer (outputFileName);
467+
380468 // read in and tokenize the given input file if asked
381469 bool readSomething;
382470 if (tokenize) {
@@ -396,8 +484,7 @@ extern "C" void tag_file(void* vzps, const char *sInputFile, const char *sOutput
396484 tagger->tag (tokenized_sent, tagged_sent);
397485
398486 // write the formatted sentence to the output file
399- std::string tagvec = format_tagged_vector (tagged_sent);
400- fprintf (outfp, " %s\n " , tagvec.c_str ());
487+ output_writer.writeSentence (tagged_sent, ' /' , true );
401488
402489 if (tokenize) {
403490 readSomething = input_reader.readSegmentedSentenceAndTokenize (tokenized_sent);
@@ -409,7 +496,6 @@ extern "C" void tag_file(void* vzps, const char *sInputFile, const char *sOutput
409496
410497 // close the output file
411498 std::cerr << " Wrote output to " << sOutputFile << std::endl;
412- fclose (outfp);
413499}
414500
415501// Function to constituency parse all sentence in the given input file
@@ -477,6 +563,51 @@ extern "C" void parse_file(void* vzps, const char *sInputFile, const char *sOutp
477563 fclose (outfp);
478564}
479565
566+ extern " C" void parse_tagged_file (void * vzps, const char *sInputFile , const char *sOutputFile , const char seperator=' /' )
567+ {
568+
569+ zparSession_t* zps = static_cast <zparSession_t *>(vzps);
570+
571+ std::cerr << " Processing file " << sInputFile << std::endl;
572+
573+ // initialize the input reader
574+ CSentenceReader input_reader (sInputFile );
575+
576+ // open the output file
577+ FILE *outfp = NULL ;
578+ outfp = fopen (sOutputFile , " w" );
579+
580+ // initialize the temporary sentence variables
581+ CTwoStringVector tagged_sent[1 ];
582+ english::CCFGTree parsed_sent[1 ];
583+
584+ // get the parser that was stored earlier
585+ CConParser *conparser = zps->conparser ;
586+
587+ // read in and tokenize the given input file if asked
588+ bool readSomething;
589+ readSomething = input_reader.readTaggedSentence (tagged_sent, false , seperator);
590+
591+ while ( readSomething )
592+ {
593+ std::string parse = " " ;
594+ if (tagged_sent->size () < MAX_SENTENCE_SIZE){
595+ conparser->parse (*tagged_sent, parsed_sent);
596+ parse = parsed_sent->str_unbinarized ();
597+ } else {
598+ std::cerr << " Sentence too long. Writing empty string. Sentence: " << tagged_sent << std::endl;
599+ }
600+
601+ fprintf (outfp, " %s\n " , parse.c_str ());
602+
603+ readSomething = input_reader.readTaggedSentence (tagged_sent, false , seperator);
604+ }
605+
606+ // close the output file
607+ std::cerr << " Wrote output to " << sOutputFile << std::endl;
608+ fclose (outfp);
609+ }
610+
480611// Function to dependency parse all sentence in the given input file
481612// and write parsed sentences to the given output file
482613extern " C" void dep_parse_file (void * vzps, const char *sInputFile , const char *sOutputFile , bool tokenize)
@@ -542,6 +673,51 @@ extern "C" void dep_parse_file(void* vzps, const char *sInputFile, const char *s
542673 fclose (outfp);
543674}
544675
676+ extern " C" void dep_parse_tagged_file (void * vzps, const char *sInputFile , const char *sOutputFile , const char seperator=' /' )
677+ {
678+
679+ zparSession_t* zps = static_cast <zparSession_t *>(vzps);
680+
681+ std::cerr << " Processing file " << sInputFile << std::endl;
682+
683+ // initialize the input reader
684+ CSentenceReader input_reader (sInputFile );
685+
686+ // open the output file
687+ FILE *outfp = NULL ;
688+ outfp = fopen (sOutputFile , " w" );
689+
690+ // initialize the temporary sentence variables
691+ CTwoStringVector tagged_sent[1 ];
692+ CDependencyParse parsed_sent[1 ];
693+
694+ // get the parser that was stored earlier
695+ CDepParser *depparser = zps->depparser ;
696+
697+ // read in and tokenize the given input file if asked
698+ bool readSomething;
699+ readSomething = input_reader.readTaggedSentence (tagged_sent, false , seperator);
700+
701+ while ( readSomething )
702+ {
703+ std::string deptree = " " ;
704+ if (tagged_sent->size () < MAX_SENTENCE_SIZE){
705+ depparser->parse (*tagged_sent, parsed_sent);
706+ deptree = format_dependency_tree (parsed_sent);
707+ } else {
708+ std::cerr << " Sentence too long. Writing empty string. Sentence: " << tagged_sent << std::endl;
709+ }
710+
711+ fprintf (outfp, " %s\n " , deptree.c_str ());
712+
713+ readSomething = input_reader.readTaggedSentence (tagged_sent, false , seperator);
714+ }
715+
716+ // close the output file
717+ std::cerr << " Wrote output to " << sOutputFile << std::endl;
718+ fclose (outfp);
719+ }
720+
545721// Function to unload all the models
546722extern " C" void unload_models (void * vzps)
547723{
@@ -554,12 +730,17 @@ extern "C" void unload_models(void* vzps)
554730 zps = NULL ;
555731}
556732
557- // // A main function for testing
733+ // A main function for testing
558734// extern "C" int main(int argc, char *argv[])
559735// {
560736// void* vzps = initialize();
561737// load_tagger(vzps, "/Users/nmadnani/work/NLPTools/zpar/english-models");
562- // std::cout << std::string(tag_sentence(vzps, "I said I am going to the market.", false));
738+ // load_parser(vzps, "/Users/nmadnani/work/NLPTools/zpar/english-models");
739+ // load_depparser(vzps, "/Users/nmadnani/work/NLPTools/zpar/english-models");
740+ // parse_tagged_file(vzps, "/Users/nmadnani/work/python-zpar/examples/test_tagged.txt", "/Users/nmadnani/work/python-zpar/examples/test_tagged.parse");
741+ // dep_parse_tagged_file(vzps, "/Users/nmadnani/work/python-zpar/examples/test_tagged.txt", "/Users/nmadnani/work/python-zpar/examples/test_tagged.dep");
742+ // std::cout << std::string(parse_tagged_sentence(vzps, "I/PRP am/VBP going/VBG to/TO the/DT market/NN ./.")) << std::endl;
743+ // std::cout << std::string(dep_parse_tagged_sentence(vzps, "I/PRP am/VBP going/VBG to/TO the/DT market/NN ./.")) << std::endl;
563744// unload_models(vzps);
564745// return 0;
565746// }
0 commit comments