@@ -382,30 +382,40 @@ void TRestDataSet::GenerateDataSet() {
382382 ROOT::DisableImplicitMT ();
383383
384384 RESTInfo << " Initializing dataset" << RESTendl;
385- fDataSet = ROOT::RDataFrame (" AnalysisTree" , fFileSelection );
385+ fDataFrame = ROOT::RDataFrame (" AnalysisTree" , fFileSelection );
386386
387387 RESTInfo << " Making cuts" << RESTendl;
388- fDataSet = MakeCut (fCut );
388+ fDataFrame = MakeCut (fCut );
389389
390390 // Adding new user columns added to the dataset
391391 for (const auto & [cName, cExpression] : fColumnNameExpressions ) {
392392 RESTInfo << " Adding column to dataset: " << cName << RESTendl;
393393 finalList.emplace_back (cName);
394- fDataSet = DefineColumn (cName, cExpression);
394+ fDataFrame = DefineColumn (cName, cExpression);
395395 }
396396
397+ RegenerateTree (finalList);
398+
399+ RESTInfo << " - Dataset generated!" << RESTendl;
400+ }
401+
402+ // /////////////////////////////////////////////
403+ // / \brief It regenerates the tree so that it is an exact copy of the present DataFrame
404+ // /
405+ void TRestDataSet::RegenerateTree (std::vector<std::string> finalList) {
397406 RESTInfo << " Generating snapshot." << RESTendl;
398407 std::string user = getenv (" USER" );
399408 std::string fOutName = " /tmp/rest_output_" + user + " .root" ;
400- fDataSet .Snapshot (" AnalysisTree" , fOutName , finalList);
409+ if (!finalList.empty ())
410+ fDataFrame .Snapshot (" AnalysisTree" , fOutName , finalList);
411+ else
412+ fDataFrame .Snapshot (" AnalysisTree" , fOutName );
401413
402414 RESTInfo << " Re-importing analysis tree." << RESTendl;
403- fDataSet = ROOT::RDataFrame (" AnalysisTree" , fOutName );
415+ fDataFrame = ROOT::RDataFrame (" AnalysisTree" , fOutName );
404416
405417 TFile* f = TFile::Open (fOutName .c_str ());
406418 fTree = (TChain*)f->Get (" AnalysisTree" );
407-
408- RESTInfo << " - Dataset generated!" << RESTendl;
409419}
410420
411421// /////////////////////////////////////////////
@@ -517,14 +527,32 @@ std::vector<std::string> TRestDataSet::FileSelection() {
517527 return fFileSelection ;
518528}
519529
530+ // /////////////////////////////////////////////
531+ // / \brief This method returns a RDataFrame node with the number of
532+ // / samples inside the dataset by selecting a range. It will not
533+ // / modify internally the dataset. See ApplyRange to modify internally
534+ // / the dataset.
535+ // /
536+ ROOT::RDF::RNode TRestDataSet::Range (size_t from, size_t to) { return fDataFrame .Range (from, to); }
537+
538+ // /////////////////////////////////////////////
539+ // / \brief This method reduces the number of samples inside the
540+ // / dataset by selecting a range.
541+ // /
542+ ROOT::RDF::RNode TRestDataSet::ApplyRange (size_t from, size_t to) {
543+ fDataFrame = fDataFrame .Range (from, to);
544+ RegenerateTree ();
545+ return fDataFrame ;
546+ }
547+
520548// /////////////////////////////////////////////
521549// / \brief This function applies a TRestCut to the dataframe
522550// / and returns a dataframe with the applied cuts. Note that
523551// / the cuts are not applied directly to the dataframe on
524- // / TRestDataSet, to do so you should do fDataSet = MakeCut(fCut);
552+ // / TRestDataSet, to do so you should do fDataFrame = MakeCut(fCut);
525553// /
526554ROOT::RDF::RNode TRestDataSet::MakeCut (const TRestCut* cut) {
527- auto df = fDataSet ;
555+ auto df = fDataFrame ;
528556
529557 if (cut == nullptr ) return df;
530558
@@ -561,6 +589,20 @@ ROOT::RDF::RNode TRestDataSet::MakeCut(const TRestCut* cut) {
561589 return df;
562590}
563591
592+ // /////////////////////////////////////////////
593+ // / \brief It returns the number of entries found inside fDataFrame
594+ // / and prints out a warning if the number of entries inside the
595+ // / tree is not the same.
596+ // /
597+ size_t TRestDataSet::GetEntries () {
598+ auto nEntries = fDataFrame .Count ();
599+ if (*nEntries == (long long unsigned int )GetTree ()->GetEntries ()) return *nEntries;
600+ RESTWarning << " TRestDataSet::GetEntries. Number of tree entries is not the same as RDataFrame entries."
601+ << RESTendl;
602+ RESTWarning << " Returning RDataFrame entries" << RESTendl;
603+ return *nEntries;
604+ }
605+
564606// /////////////////////////////////////////////
565607// / \brief This function will add a new column to the RDataFrame using
566608// / the same scheme as the usual RDF::Define method, but it will on top of
@@ -574,7 +616,7 @@ ROOT::RDF::RNode TRestDataSet::MakeCut(const TRestCut* cut) {
574616// / \endcode
575617// /
576618ROOT::RDF::RNode TRestDataSet::DefineColumn (const std::string& columnName, const std::string& formula) {
577- auto df = fDataSet ;
619+ auto df = fDataFrame ;
578620
579621 std::string evalFormula = formula;
580622 for (auto const & [name, properties] : fQuantity )
@@ -819,7 +861,7 @@ void TRestDataSet::InitFromConfigFile() {
819861void TRestDataSet::Export (const std::string& filename, std::vector<std::string> excludeColumns) {
820862 RESTInfo << " Exporting dataset" << RESTendl;
821863
822- std::vector<std::string> columns = fDataSet .GetColumnNames ();
864+ std::vector<std::string> columns = fDataFrame .GetColumnNames ();
823865 if (!excludeColumns.empty ()) {
824866 columns.erase (std::remove_if (columns.begin (), columns.end (),
825867 [&excludeColumns](std::string elem) {
@@ -831,10 +873,10 @@ void TRestDataSet::Export(const std::string& filename, std::vector<std::string>
831873 RESTInfo << " Re-Generating snapshot." << RESTendl;
832874 std::string user = getenv (" USER" );
833875 std::string fOutName = " /tmp/rest_output_" + user + " .root" ;
834- fDataSet .Snapshot (" AnalysisTree" , fOutName , columns);
876+ fDataFrame .Snapshot (" AnalysisTree" , fOutName , columns);
835877
836878 RESTInfo << " Re-importing analysis tree." << RESTendl;
837- fDataSet = ROOT::RDataFrame (" AnalysisTree" , fOutName );
879+ fDataFrame = ROOT::RDataFrame (" AnalysisTree" , fOutName );
838880
839881 TFile* f = TFile::Open (fOutName .c_str ());
840882 fTree = (TChain*)f->Get (" AnalysisTree" );
@@ -846,7 +888,7 @@ void TRestDataSet::Export(const std::string& filename, std::vector<std::string>
846888 RESTInfo << " Re-Generating snapshot." << RESTendl;
847889 std::string user = getenv (" USER" );
848890 std::string fOutName = " /tmp/rest_output_" + user + " .root" ;
849- fDataSet .Snapshot (" AnalysisTree" , fOutName );
891+ fDataFrame .Snapshot (" AnalysisTree" , fOutName );
850892
851893 TFile* f = TFile::Open (fOutName .c_str ());
852894 fTree = (TChain*)f->Get (" AnalysisTree" );
@@ -910,7 +952,7 @@ void TRestDataSet::Export(const std::string& filename, std::vector<std::string>
910952 fprintf (f, " ###\n " );
911953 fprintf (f, " ### Data starts here\n " );
912954
913- auto obsNames = fDataSet .GetColumnNames ();
955+ auto obsNames = fDataFrame .GetColumnNames ();
914956 std::string obsListStr = " " ;
915957 for (const auto & l : obsNames) {
916958 if (!obsListStr.empty ()) obsListStr += " :" ;
@@ -938,7 +980,7 @@ void TRestDataSet::Export(const std::string& filename, std::vector<std::string>
938980
939981 return ;
940982 } else if (TRestTools::GetFileNameExtension (filename) == " root" ) {
941- fDataSet .Snapshot (" AnalysisTree" , filename);
983+ fDataFrame .Snapshot (" AnalysisTree" , filename);
942984
943985 TFile* f = TFile::Open (filename.c_str (), " UPDATE" );
944986 std::string name = this ->GetName ();
@@ -1038,7 +1080,7 @@ void TRestDataSet::Import(const std::string& fileName) {
10381080 else
10391081 ROOT::DisableImplicitMT ();
10401082
1041- fDataSet = ROOT::RDataFrame (" AnalysisTree" , fileName);
1083+ fDataFrame = ROOT::RDataFrame (" AnalysisTree" , fileName);
10421084
10431085 fTree = (TChain*)file->Get (" AnalysisTree" );
10441086}
@@ -1104,7 +1146,7 @@ void TRestDataSet::Import(std::vector<std::string> fileNames) {
11041146 }
11051147
11061148 RESTInfo << " Opening list of files. First file: " << fileNames[0 ] << RESTendl;
1107- fDataSet = ROOT::RDataFrame (" AnalysisTree" , fileNames);
1149+ fDataFrame = ROOT::RDataFrame (" AnalysisTree" , fileNames);
11081150
11091151 if (fTree != nullptr ) {
11101152 delete fTree ;
0 commit comments