@@ -611,6 +611,13 @@ namespace dd
611611 write_images_to_hdf5 (img_lists.at (1 ), test_dbfullname, test_list,
612612 alphabet, max_ocr_length, false );
613613 }
614+ if (img_lists.size () > 2 )
615+ {
616+ _logger->error (
617+ " multiple test sets not supported by caffe backend yet" );
618+ throw InputConnectorBadParamException (
619+ " multiple test sets not supported by caffe backend yet" );
620+ }
614621
615622 // save the alphabet as corresp file
616623 std::ofstream correspf (_model_repo + " /" + _correspname, std::ios::binary);
@@ -1341,8 +1348,9 @@ namespace dd
13411348 }
13421349
13431350 /* - DDCCsv -*/
1344- int DDCCsv::read_file (const std::string &fname)
1351+ int DDCCsv::read_file (const std::string &fname, int test_id )
13451352 {
1353+ (void )test_id;
13461354 if (_cifc)
13471355 {
13481356 _cifc->read_csv (fname);
@@ -1483,7 +1491,8 @@ namespace dd
14831491 {
14841492 if (!_db)
14851493 {
1486- CSVInputFileConn::add_test_csvline (id, vals);
1494+ // MULTIPLE TEST SETS : we consider here only 1 test set
1495+ CSVInputFileConn::add_test_csvline (0 , id, vals);
14871496 return ;
14881497 }
14891498
@@ -1535,8 +1544,9 @@ namespace dd
15351544 if (!fileops::file_exists (_csv_fname))
15361545 throw InputConnectorBadParamException (" training CSV file " + _csv_fname
15371546 + " does not exist" );
1547+ // MULTIPLE TEST SETS : we consider here only 1 test set
15381548 if (_uris.size () > 1 )
1539- _csv_test_fname = _uris.at (1 );
1549+ _csv_test_fnames. push_back ( _uris.at (1 ) );
15401550 /* if (ad_input.has("label"))
15411551 _label = ad_input.get("label").get<std::string>();
15421552 else if (_train && _label.empty()) throw
@@ -1644,13 +1654,27 @@ namespace dd
16441654 // XXX: remove in-memory data, which pre-processing is useless and
16451655 // should be avoided
16461656 destroy_txt_entries (_txt);
1647- destroy_txt_entries (_test_txt);
1657+ // MULTIPLE TEST SETS : we consider here only 1 test set
1658+ // destroy_txt_entries(_test_txt);
1659+ for (auto tt : _tests_txt)
1660+ destroy_txt_entries (tt);
1661+ _tests_txt.clear ();
16481662
16491663 return 0 ;
16501664 }
16511665
16521666 _db_batchsize = _txt.size ();
1653- _db_testbatchsize = _test_txt.size ();
1667+ // MULTIPLE TEST SETS : we consider here only 1 test set
1668+
1669+ if (_tests_txt.size () > 1 )
1670+ {
1671+ _logger->error (
1672+ " multiple test sets not supported by caffe backend yet" );
1673+ throw InputConnectorBadParamException (
1674+ " multiple test sets not supported by caffe backend yet" );
1675+ }
1676+
1677+ _db_testbatchsize = _tests_txt[0 ].size ();
16541678
16551679 _logger->info (" db_batchsize={} / db_testbatchsize={}" , _db_batchsize,
16561680 _db_testbatchsize);
@@ -1661,13 +1685,19 @@ namespace dd
16611685 else
16621686 write_sparse_txt_to_db (dbfullname, _txt);
16631687 destroy_txt_entries (_txt);
1664- if (!_test_txt.empty ())
1688+ // MULTIPLE TEST SETS : we consider here only 1 test set
1689+ if (!_tests_txt.empty () && !_tests_txt[0 ].empty ())
16651690 {
16661691 if (!_sparse)
1667- write_txt_to_db (testdbfullname, _test_txt);
1692+ // MULTIPLE TEST SETS : we consider here only 1 test set
1693+ write_txt_to_db (testdbfullname, _tests_txt[0 ]);
16681694 else
1669- write_sparse_txt_to_db (testdbfullname, _test_txt);
1670- destroy_txt_entries (_test_txt);
1695+ // MULTIPLE TEST SETS : we consider here only 1 test set
1696+ write_sparse_txt_to_db (testdbfullname, _tests_txt[0 ]);
1697+ // MULTIPLE TEST SETS : we consider here only 1 test set
1698+ for (auto tt : _tests_txt)
1699+ destroy_txt_entries (tt);
1700+ _tests_txt.clear ();
16711701 }
16721702
16731703 return 0 ;
@@ -1678,11 +1708,17 @@ namespace dd
16781708 if (_cifc)
16791709 {
16801710 _cifc->_columns .clear ();
1681- std::string test_file = _cifc->_csv_test_fname ;
1682- _cifc->_csv_test_fname = " " ;
1711+ // MULTIPLE TEST SETS : we consider here only 1 test set
1712+ // std::string test_file = _cifc->_csv_test_fname;
1713+ std::vector<std::string> test_files = _cifc->_csv_test_fnames ;
1714+ // MULTIPLE TEST SETS : we consider here only 1 test set
1715+ // _cifc->_csv_test_fname = "";
1716+ _cifc->_csv_test_fnames .clear ();
16831717 _cifc->read_csv (fname);
16841718 _cifc->push_csv_to_csvts (is_test_data);
1685- _cifc->_csv_test_fname = test_file;
1719+ // MULTIPLE TEST SETS : we consider here only 1 test set
1720+ // _cifc->_csv_test_fname = test_file;
1721+ _cifc->_csv_test_fnames = test_files;
16861722 _cifc->_ids .push_back (fname);
16871723 return 0 ;
16881724 }
@@ -1728,8 +1764,10 @@ namespace dd
17281764
17291765 // - read all test files
17301766 std::unordered_set<std::string> testfiles;
1731- if (!_cifc->_csv_test_fname .empty ())
1732- fileops::list_directory (_cifc->_csv_test_fname , true , false , true ,
1767+ // MULTIPLE TEST SETS : we consider here only 1 test set
1768+ if (!_cifc->_csv_test_fnames .empty ())
1769+ // MULTIPLE TEST SETS : we consider here only 1 test set
1770+ fileops::list_directory (_cifc->_csv_test_fnames [0 ], true , false , true ,
17331771 testfiles);
17341772
17351773 std::unordered_set<std::string> allfiles = trainfiles;
@@ -1809,15 +1847,19 @@ namespace dd
18091847 if (_datadim != -1 )
18101848 return ;
18111849 if (is_test_data)
1812- _datadim = _csvtsdata_test[0 ][0 ]._v .size () + 1 ;
1850+ // MULTIPLE TEST SETS : we consider here only 1 test set
1851+ _datadim = _csvtsdata_tests[0 ][0 ][0 ]._v .size () + 1 ;
18131852 else
18141853 _datadim = _csvtsdata[0 ][0 ]._v .size () + 1 ;
18151854 }
18161855
18171856 void CSVTSCaffeInputFileConn::push_csv_to_csvts (bool is_test_data)
18181857 {
1819-
1820- CSVTSInputFileConn::push_csv_to_csvts (is_test_data);
1858+ // MULTIPLE TEST SETS : we consider here only one test set
1859+ if (is_test_data)
1860+ CSVTSInputFileConn::push_csv_to_csvts (0 );
1861+ else
1862+ CSVTSInputFileConn::push_csv_to_csvts (-1 );
18211863 set_datadim (is_test_data);
18221864 dv_to_db (is_test_data);
18231865 }
@@ -1875,14 +1917,26 @@ namespace dd
18751917 _db = true ;
18761918 return ; // done
18771919 }
1878- _csvtsdata_test = std::move (_csvtsdata);
1920+ // MULTIPLE TEST SETS : we consider here only 1 test set
1921+ // _csvtsdata_test = std::move(_csvtsdata);
1922+ _csvtsdata_tests.push_back (std::move (_csvtsdata));
18791923 }
18801924 else
18811925 _csvtsdata.clear ();
1926+
1927+ if (_csvtsdata_tests.size () > 1 )
1928+ {
1929+ _logger->error (
1930+ " multiple test sets not supported by caffe backend yet" );
1931+ throw InputConnectorBadParamException (
1932+ " multiple test sets not supported by caffe backend yet" );
1933+ }
1934+
18821935 csvts_to_dv (true , true , true , false , _continuation);
1883- _csvtsdata_test.clear ();
1936+ // MULTIPLE TEST SETS : we consider here only 1 test set
1937+ _csvtsdata_tests[0 ].clear ();
18841938 }
1885- _csvtsdata_test .clear ();
1939+ _csvtsdata_tests .clear ();
18861940 }
18871941
18881942 void CSVTSCaffeInputFileConn::reset_dv_test ()
@@ -2075,8 +2129,10 @@ namespace dd
20752129 if (!fileops::dir_exists (_csv_fname))
20762130 throw InputConnectorBadParamException (" training CSV_TS dir " + _csv_fname
20772131 + " does not exist" );
2132+ // MULTIPLE TEST SETS : we consider here only 1 test set
20782133 if (_uris.size () > 1 )
2079- _csv_test_fname = _uris.at (1 );
2134+ // _csv_test_fname = _uris.at(1);
2135+ _csv_test_fnames.push_back (_uris.at (1 ));
20802136 DDCCsvTS ddccsvts;
20812137 ddccsvts._cifc = this ;
20822138 ddccsvts._adconf = ad_input;
@@ -2102,7 +2158,8 @@ namespace dd
21022158 {
21032159 dv = &_dv_test;
21042160 index = &_dv_test_index;
2105- data = &this ->_csvtsdata_test ;
2161+ // MULTIPLE TEST SETS : we consider here only 1 test set
2162+ data = &this ->_csvtsdata_tests [0 ];
21062163 }
21072164 else
21082165 {
0 commit comments