Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ ramexample.root
*.log
*.perf
*.bam
*.sam
*.png
*.bam.bai
*.root
*.root.idx
*.png
.ipynb_checkpoints/
tmp/
*.so
Expand Down
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@ set(CMAKE_INSTALL_INCLUDEDIR include)
inc/ramcore/SamParser.h
inc/ramcore/SamToTTree.h
inc/ramcore/SamToNTuple.h
inc/ramcore/RAMNTupleView.h
SOURCES
src/ttree/RAMRecord.cxx
src/rntuple/RAMNTupleRecord.cxx
src/ramcore/SamParser.cxx
src/ramcore/SamToTTree.cxx
src/ramcore/SamToNTuple.cxx
src/ramcore/RAMNTupleView.cxx
LINKDEF
inc/ttree/LinkDef.h
DEPENDENCIES
Expand Down Expand Up @@ -67,3 +69,4 @@ if(RAMTOOLS_BUILD_TESTS)
enable_testing()
add_subdirectory(test)
endif()

1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,3 @@ ROOT scripts to convert a SAM file to a RAM (ROOT Alignment/Map) file and to wor
root [0] .x ramview.C("ramexample.root","chr1:10150-10300")
root [1] .q
```

7 changes: 7 additions & 0 deletions inc/ramcore/RAMNTupleView.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#ifndef RAMCORE_RAMNTUPLEVIEW_H
#define RAMCORE_RAMNTUPLEVIEW_H

void ramntupleview(const char *file, const char *query = "", bool cache = true, bool perfstats = false,
const char *perfstatsfilename = "perf.root");

#endif // RAMCORE_RAMNTUPLEVIEW_H
87 changes: 87 additions & 0 deletions src/ramcore/RAMNTupleView.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#include "ramcore/RAMNTupleView.h"
#include <iostream>
#include <cstring>
#include <ROOT/RNTuple.hxx>
#include <ROOT/RNTupleReader.hxx>
#include <ROOT/RNTupleView.hxx>
#include <TStopwatch.h>
#include <TString.h>
#include "rntuple/RAMNTupleRecord.h"

void ramntupleview(const char *file, const char *query, bool cache, bool perfstats, const char *perfstatsfilename)
{
TStopwatch stopwatch;
stopwatch.Start();

auto reader = RAMNTupleRecord::OpenRAMFile(file);
if (!reader) {
printf("ramntupleview: failed to open file %s\n", file);
return;
}

std::string region = query;
int chrDelimiterPos = region.find(":");
if (chrDelimiterPos == std::string::npos) {
std::cerr << "Invalid region format. Use rname:start-end\n";
return;
}
TString rname = region.substr(0, chrDelimiterPos);
int rangeDelimiterPos = region.find("-", chrDelimiterPos);
if (rangeDelimiterPos == std::string::npos) {
std::cerr << "Invalid region format. Use rname:start-end\n";
return;
}
Int_t range_start = std::stoi(region.substr(chrDelimiterPos + 1, rangeDelimiterPos - chrDelimiterPos - 1));
Int_t range_end = std::stoi(region.substr(rangeDelimiterPos + 1));

auto refid = RAMNTupleRecord::GetRnameRefs()->GetRefId(rname.Data());
auto index = RAMNTupleRecord::GetIndex();

auto recordView = reader->GetView<RAMNTupleRecord>("record");

if (!index || index->Size() == 0) {
int count = 0;
for (auto i : reader->GetEntryRange()) {
const auto &rec = recordView(i);
if (rec.refid == refid && rec.pos >= range_start - 1 && rec.pos <= range_end - 1) {
count++;
}
}
printf("Found %d records in region %s\n", count, query);
} else {
auto start_entry = index->GetRow(refid, range_start);
auto end_entry = index->GetRow(refid, range_end);
if (start_entry < 0)
start_entry = 0;
if (end_entry < 0)
end_entry = reader->GetNEntries();

printf("ramntupleview: %s:%d (%lld) - %d (%lld)\n", rname.Data(), range_start, start_entry, range_end, end_entry);

for (; start_entry < end_entry; start_entry++) {
const auto &rec = recordView(start_entry);
int seqlen = rec.GetSEQLEN();
if (rec.pos + seqlen > range_start - 1) {
break;
}
}

Long64_t j;
int count = 0;
for (j = start_entry; j < end_entry; j++) {
count++;
}

while (j < reader->GetNEntries()) {
const auto &rec = recordView(j);
if (rec.pos >= range_end)
break;
count++;
j++;
}

printf("Found %d records in region %s\n", count, query);
}

stopwatch.Print();
}
11 changes: 9 additions & 2 deletions tools/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,15 @@ ROOT_EXECUTABLE(samtoramntuple
ROOT::ROOTNTuple
)

install(TARGETS samtoram samtoramntuple
ROOT_EXECUTABLE(ramntupleview
ramntupleview.cxx
LIBRARIES
ramcore
ROOT::Core
ROOT::ROOTNTuple
)

install(TARGETS samtoram samtoramntuple ramntupleview
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
)

Expand All @@ -25,7 +33,6 @@ set(ROOT_MACROS
ramrandom.cxx
ramview.cxx
ramview_no_index.cxx
ramntupleview.cxx
parsetreestats.cxx
)

Expand Down
95 changes: 11 additions & 84 deletions tools/ramntupleview.cxx
Original file line number Diff line number Diff line change
@@ -1,90 +1,17 @@
#include "ramcore/RAMNTupleView.h"
#include <iostream>
#include <cstring>
#include <ROOT/RNTuple.hxx>
#include <ROOT/RNTupleReader.hxx>
#include <ROOT/RNTupleView.hxx>
#include <TStopwatch.h>
#include <TString.h>
#include "rntuple/RAMNTupleRecord.h"

void ramntupleview(const char *file, const char *query, bool cache = true,
bool perfstats = false, const char *perfstatsfilename = "perf.root")
int main(int argc, char *argv[])
{
TStopwatch stopwatch;
stopwatch.Start();
if (argc < 2) {
std::cerr << "Usage: " << argv[0] << " <file.root> [rname:start-end]\n";
std::cerr << "Example: " << argv[0] << " output.root chr1:1000-2000\n";
return 1;
}

auto reader = RAMNTupleRecord::OpenRAMFile(file);
if (!reader) {
printf("ramntupleview: failed to open file %s\n", file);
return;
}
const char *file = argv[1];
const char *region = (argc > 2) ? argv[2] : "";

std::string region = query;
int chrDelimiterPos = region.find(":");
if (chrDelimiterPos == std::string::npos) {
std::cerr << "Invalid region format. Use rname:start-end\n";
return;
}
TString rname = region.substr(0, chrDelimiterPos);
int rangeDelimiterPos = region.find("-", chrDelimiterPos);
if (rangeDelimiterPos == std::string::npos) {
std::cerr << "Invalid region format. Use rname:start-end\n";
return;
}
Int_t range_start = std::stoi(region.substr(chrDelimiterPos + 1, rangeDelimiterPos - chrDelimiterPos - 1));
Int_t range_end = std::stoi(region.substr(rangeDelimiterPos + 1));

auto refid = RAMNTupleRecord::GetRnameRefs()->GetRefId(rname.Data());
auto index = RAMNTupleRecord::GetIndex();

auto recordView = reader->GetView<RAMNTupleRecord>("record");

if (!index || index->Size() == 0) {

int count = 0;
for (auto i : reader->GetEntryRange()) {
const auto& rec = recordView(i);
if (rec.refid == refid && rec.pos >= range_start - 1 && rec.pos <= range_end - 1) {
count++;
}
}
printf("Found %d records in region %s\n", count, query);
} else {

auto start_entry = index->GetRow(refid, range_start);
auto end_entry = index->GetRow(refid, range_end);
if (start_entry < 0) start_entry = 0;
if (end_entry < 0) end_entry = reader->GetNEntries();

printf("ramntupleview: %s:%d (%lld) - %d (%lld)\n",
rname.Data(), range_start, start_entry, range_end, end_entry);

for (; start_entry < end_entry; start_entry++) {
const auto& rec = recordView(start_entry);

int seqlen = rec.GetSEQLEN();
if (rec.pos + seqlen > range_start - 1) {
break;
}
}

Long64_t j;
int count = 0;
for (j = start_entry; j < end_entry; j++) {
count++;
}

while (j < reader->GetNEntries()) {
const auto& rec = recordView(j);
if (rec.pos >= range_end)
break;
count++;
j++;
}

printf("Found %d records in region %s\n", count, query);
}

stopwatch.Print();
ramntupleview(file, region);
return 0;
}