diff --git a/.gitignore b/.gitignore index 297ac82..b12a606 100644 --- a/.gitignore +++ b/.gitignore @@ -3,10 +3,11 @@ ramexample.root *.log *.perf *.bam +*.sam +*.png *.bam.bai *.root *.root.idx -*.png .ipynb_checkpoints/ tmp/ *.so diff --git a/CMakeLists.txt b/CMakeLists.txt index dace8c1..1b318e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,12 +29,14 @@ set(CMAKE_INSTALL_INCLUDEDIR include) inc/ramcore/SamParser.h inc/ramcore/SamToTTree.h inc/ramcore/SamToNTuple.h + inc/ramcore/RAMNTupleView.h SOURCES src/ttree/RAMRecord.cxx src/rntuple/RAMNTupleRecord.cxx src/ramcore/SamParser.cxx src/ramcore/SamToTTree.cxx src/ramcore/SamToNTuple.cxx + src/ramcore/RAMNTupleView.cxx LINKDEF inc/ttree/LinkDef.h DEPENDENCIES @@ -67,3 +69,4 @@ if(RAMTOOLS_BUILD_TESTS) enable_testing() add_subdirectory(test) endif() + diff --git a/README.md b/README.md index 0540ea5..db61737 100644 --- a/README.md +++ b/README.md @@ -23,4 +23,3 @@ ROOT scripts to convert a SAM file to a RAM (ROOT Alignment/Map) file and to wor root [0] .x ramview.C("ramexample.root","chr1:10150-10300") root [1] .q ``` - diff --git a/inc/ramcore/RAMNTupleView.h b/inc/ramcore/RAMNTupleView.h new file mode 100644 index 0000000..81b2ea6 --- /dev/null +++ b/inc/ramcore/RAMNTupleView.h @@ -0,0 +1,7 @@ +#ifndef RAMCORE_RAMNTUPLEVIEW_H +#define RAMCORE_RAMNTUPLEVIEW_H + +void ramntupleview(const char *file, const char *query = "", bool cache = true, bool perfstats = false, + const char *perfstatsfilename = "perf.root"); + +#endif // RAMCORE_RAMNTUPLEVIEW_H diff --git a/src/ramcore/RAMNTupleView.cxx b/src/ramcore/RAMNTupleView.cxx new file mode 100644 index 0000000..604cf08 --- /dev/null +++ b/src/ramcore/RAMNTupleView.cxx @@ -0,0 +1,87 @@ +#include "ramcore/RAMNTupleView.h" +#include +#include +#include +#include +#include +#include +#include +#include "rntuple/RAMNTupleRecord.h" + +void ramntupleview(const char *file, const char *query, bool cache, bool perfstats, const char *perfstatsfilename) +{ + TStopwatch stopwatch; + stopwatch.Start(); + + auto reader = RAMNTupleRecord::OpenRAMFile(file); + if (!reader) { + printf("ramntupleview: failed to open file %s\n", file); + return; + } + + std::string region = query; + int chrDelimiterPos = region.find(":"); + if (chrDelimiterPos == std::string::npos) { + std::cerr << "Invalid region format. Use rname:start-end\n"; + return; + } + TString rname = region.substr(0, chrDelimiterPos); + int rangeDelimiterPos = region.find("-", chrDelimiterPos); + if (rangeDelimiterPos == std::string::npos) { + std::cerr << "Invalid region format. Use rname:start-end\n"; + return; + } + Int_t range_start = std::stoi(region.substr(chrDelimiterPos + 1, rangeDelimiterPos - chrDelimiterPos - 1)); + Int_t range_end = std::stoi(region.substr(rangeDelimiterPos + 1)); + + auto refid = RAMNTupleRecord::GetRnameRefs()->GetRefId(rname.Data()); + auto index = RAMNTupleRecord::GetIndex(); + + auto recordView = reader->GetView("record"); + + if (!index || index->Size() == 0) { + int count = 0; + for (auto i : reader->GetEntryRange()) { + const auto &rec = recordView(i); + if (rec.refid == refid && rec.pos >= range_start - 1 && rec.pos <= range_end - 1) { + count++; + } + } + printf("Found %d records in region %s\n", count, query); + } else { + auto start_entry = index->GetRow(refid, range_start); + auto end_entry = index->GetRow(refid, range_end); + if (start_entry < 0) + start_entry = 0; + if (end_entry < 0) + end_entry = reader->GetNEntries(); + + printf("ramntupleview: %s:%d (%lld) - %d (%lld)\n", rname.Data(), range_start, start_entry, range_end, end_entry); + + for (; start_entry < end_entry; start_entry++) { + const auto &rec = recordView(start_entry); + int seqlen = rec.GetSEQLEN(); + if (rec.pos + seqlen > range_start - 1) { + break; + } + } + + Long64_t j; + int count = 0; + for (j = start_entry; j < end_entry; j++) { + count++; + } + + while (j < reader->GetNEntries()) { + const auto &rec = recordView(j); + if (rec.pos >= range_end) + break; + count++; + j++; + } + + printf("Found %d records in region %s\n", count, query); + } + + stopwatch.Print(); +} diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index a297a98..b67568f 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -14,7 +14,15 @@ ROOT_EXECUTABLE(samtoramntuple ROOT::ROOTNTuple ) -install(TARGETS samtoram samtoramntuple +ROOT_EXECUTABLE(ramntupleview + ramntupleview.cxx + LIBRARIES + ramcore + ROOT::Core + ROOT::ROOTNTuple +) + +install(TARGETS samtoram samtoramntuple ramntupleview RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) @@ -25,7 +33,6 @@ set(ROOT_MACROS ramrandom.cxx ramview.cxx ramview_no_index.cxx - ramntupleview.cxx parsetreestats.cxx ) diff --git a/tools/ramntupleview.cxx b/tools/ramntupleview.cxx index 019ee64..55014c7 100644 --- a/tools/ramntupleview.cxx +++ b/tools/ramntupleview.cxx @@ -1,90 +1,17 @@ +#include "ramcore/RAMNTupleView.h" #include -#include -#include -#include -#include -#include -#include -#include "rntuple/RAMNTupleRecord.h" -void ramntupleview(const char *file, const char *query, bool cache = true, - bool perfstats = false, const char *perfstatsfilename = "perf.root") +int main(int argc, char *argv[]) { - TStopwatch stopwatch; - stopwatch.Start(); + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " [rname:start-end]\n"; + std::cerr << "Example: " << argv[0] << " output.root chr1:1000-2000\n"; + return 1; + } - auto reader = RAMNTupleRecord::OpenRAMFile(file); - if (!reader) { - printf("ramntupleview: failed to open file %s\n", file); - return; - } + const char *file = argv[1]; + const char *region = (argc > 2) ? argv[2] : ""; - std::string region = query; - int chrDelimiterPos = region.find(":"); - if (chrDelimiterPos == std::string::npos) { - std::cerr << "Invalid region format. Use rname:start-end\n"; - return; - } - TString rname = region.substr(0, chrDelimiterPos); - int rangeDelimiterPos = region.find("-", chrDelimiterPos); - if (rangeDelimiterPos == std::string::npos) { - std::cerr << "Invalid region format. Use rname:start-end\n"; - return; - } - Int_t range_start = std::stoi(region.substr(chrDelimiterPos + 1, rangeDelimiterPos - chrDelimiterPos - 1)); - Int_t range_end = std::stoi(region.substr(rangeDelimiterPos + 1)); - - auto refid = RAMNTupleRecord::GetRnameRefs()->GetRefId(rname.Data()); - auto index = RAMNTupleRecord::GetIndex(); - - auto recordView = reader->GetView("record"); - - if (!index || index->Size() == 0) { - - int count = 0; - for (auto i : reader->GetEntryRange()) { - const auto& rec = recordView(i); - if (rec.refid == refid && rec.pos >= range_start - 1 && rec.pos <= range_end - 1) { - count++; - } - } - printf("Found %d records in region %s\n", count, query); - } else { - - auto start_entry = index->GetRow(refid, range_start); - auto end_entry = index->GetRow(refid, range_end); - if (start_entry < 0) start_entry = 0; - if (end_entry < 0) end_entry = reader->GetNEntries(); - - printf("ramntupleview: %s:%d (%lld) - %d (%lld)\n", - rname.Data(), range_start, start_entry, range_end, end_entry); - - for (; start_entry < end_entry; start_entry++) { - const auto& rec = recordView(start_entry); - - int seqlen = rec.GetSEQLEN(); - if (rec.pos + seqlen > range_start - 1) { - break; - } - } - - Long64_t j; - int count = 0; - for (j = start_entry; j < end_entry; j++) { - count++; - } - - while (j < reader->GetNEntries()) { - const auto& rec = recordView(j); - if (rec.pos >= range_end) - break; - count++; - j++; - } - - printf("Found %d records in region %s\n", count, query); - } - - stopwatch.Print(); + ramntupleview(file, region); + return 0; } -