Merge pull request #275 from sy-c/master

sy-c · web-flow · commit 51dd32127ee9 · 2024-03-18T17:08:17.000+01:00
v2.23.0
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -343,7 +343,7 @@ endif()
 ###################################################
 
 # list of executables build (to be completed depending on dependencies found)
-set(executables o2-readout-exe o2-readout-receiver o2-readout-test-fmq-tx o2-readout-test-fmq-rx o2-readout-test-fmq-perf-tx o2-readout-test-fmq-perf-rx o2-readout-test-memorybanks o2-readout-rawreader o2-readout-test-lib-monitoring)
+set(executables o2-readout-exe o2-readout-receiver o2-readout-test-fmq-tx o2-readout-test-fmq-rx o2-readout-test-fmq-perf-tx o2-readout-test-fmq-perf-rx o2-readout-test-memorybanks o2-readout-rawreader o2-readout-rawmerger o2-readout-test-lib-monitoring)
 
 # o2-readout-exe : main executable
 add_executable(
@@ -411,6 +411,13 @@ add_executable(
 	$<TARGET_OBJECTS:objReadoutUtils>
 )
 
+# a RAW data file merger
+add_executable(
+        o2-readout-rawmerger
+        ${SOURCE_DIR}/exeRawMerger.cxx
+	$<TARGET_OBJECTS:objReadoutUtils>
+)
+
 # a minimal test program to readout a ROC device
 if (ReadoutCard_FOUND)
 add_executable(
diff --git a/doc/README.md b/doc/README.md
@@ -15,6 +15,7 @@ The following executables, presented with the _nicknames_ used below, are part o
   - [_o2-readout-rawreader_](#rawreader) or _RawReader_: a tool to check validity and inspect content of raw data files recorded by _Readout_.
   - [_o2-readout-receiver_](#receiver) or _Receiver_ : a process to receive data from _Readout_ by FMQ, e.g. for local communication tests when STFB is not available.
   - [_o2-readout-status_] or _ReadoutStatus_: an interactive interface to displaying runtime statistics connected by _Monitor_.
+  - [_o2-readout-rawmmerger_](#rawmerger) or _RawMerger_: a tool to concatenate multiple raw data files in a single file, e.g. for replay by _Readout_.
 
 There are also some readout internal test components, not used in normal runtime conditions, for development and debugging purpose (_o2-readout-test-*_)
 The source code repository is [https://github.com/AliceO2Group/Readout].
@@ -243,7 +244,19 @@ Example launch command:
 o2-readout-rawreader /tmp/data.raw dumpRDH=1 dumpData=-1 | less
 ```
    
+## RawMerger
+
+This is a console utility to merge multiple data files in a single one.
    
+Example launch command:
+
+```
+o2-readout-rawmerger outputFile=/local/replay/2024-02-07-LHC23zzk_544490_50khz_TPC.raw /local/replay/2024-02-07-LHC23zzk_544490_50khz/TPC*.raw
+```
+
+It assumes that all input files have the same trigger orbit sequence in order to create an output file with data grouped by timeframes.
+
+
 ## EventDump
 
 This is an interactive program to check/display content of online data taken with Readout. It needs a special consumer defined in Readout configuration, to publish data pages over ZeroMQ:
diff --git a/doc/releaseNotes.md b/doc/releaseNotes.md
@@ -612,3 +612,6 @@ This file describes the main feature changes for each readout.exe released versi
 
 ## v2.22.0 - 30/01/2024
 - consumer-FairMQChannel: fix for DD TF ordering. In case of start/stop/start with parameter threads>1, there were some "non-continuous ordering" warnings due to a missing counter reset.
+
+## v2.23.0 - 18/03/2024
+- Added o2-readout-rawmerger: a utility to concatenate multiple raw files in a single one, e.g. to replay full detector data from a single FLP.
diff --git a/src/ReadoutEquipmentPlayer.cxx b/src/ReadoutEquipmentPlayer.cxx
@@ -282,7 +282,7 @@ DataBlockContainerReference ReadoutEquipmentPlayer::getNextBlock()
 
             // fill page metadata
             if (pageOffset == 0) {
-              // printf("link %d TF %d\n", (int)currentPacketHeader.linkId,(int)currentPacketHeader.timeframeId);
+              // printf("eq %d link %d TF %d\n", (int)currentPacketHeader.equipmentId, (int)currentPacketHeader.linkId,(int)currentPacketHeader.timeframeId);
               b->header.linkId = currentPacketHeader.linkId;
               b->header.equipmentId = currentPacketHeader.equipmentId;
               b->header.timeframeId = currentPacketHeader.timeframeId;
@@ -292,7 +292,7 @@ DataBlockContainerReference ReadoutEquipmentPlayer::getNextBlock()
             bool changePage = 0;
             if (pageOffset != 0) {
               if ((currentPacketHeader.linkId != lastPacketHeader.linkId) || (currentPacketHeader.equipmentId != lastPacketHeader.equipmentId) || (currentPacketHeader.timeframeId != lastPacketHeader.timeframeId)) {
-                // printf("%d : %d -> %d : %d\n",currentPacketHeader.linkId,currentPacketHeader.timeframeId,lastPacketHeader.linkId,lastPacketHeader.timeframeId);
+                // printf("new page : %d.%d : %d -> %d.%d : %d\n",currentPacketHeader.equipmentId, currentPacketHeader.linkId,(int)currentPacketHeader.timeframeId,lastPacketHeader.equipmentId,lastPacketHeader.linkId,(int)lastPacketHeader.timeframeId);
                 changePage = 1;
               }
             }
diff --git a/src/ReadoutVersion.h b/src/ReadoutVersion.h
@@ -9,5 +9,5 @@
 // granted to it by virtue of its status as an Intergovernmental Organization
 // or submit itself to any jurisdiction.
 
-#define READOUT_VERSION "2.22.0"
+#define READOUT_VERSION "2.23.0"
 
diff --git a/src/exeRawMerger.cxx b/src/exeRawMerger.cxx
@@ -0,0 +1,292 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+
+
+// This utility does timeframe building on a set of RAW data files and output a single merged RAW data file
+// Input files must have a certain level of synchronization: same timeframes in all files, in same order.
+
+
+#include <lz4.h>
+#include <stdio.h>
+#include <string>
+#include <inttypes.h>
+
+#include "DataBlock.h"
+#include "DataBlockContainer.h"
+#include "DataSet.h"
+#include "RdhUtils.h"
+#include "CounterStats.h"
+
+#include <filesystem>
+#include <string.h>
+
+
+struct RawFileDescriptor {
+  std::string path; // path to file
+  FILE *fp = NULL; // file descriptor
+  void *buffer = NULL; // memory buffer to read file chunk by chunk
+  uint64_t bufferUsed; // amount of buffer in use (filled with data from file)
+  uint64_t bufferProcessed; // number of bytes in buffer already processed
+  long fileSize; // size of file on disk
+  long fileOffset; // current location in file
+  uint64_t currentTimeframe; // current timeframe
+  uint64_t nextTimeframe; // next timeframe detected
+  long bytesOut; // number of bytes written
+  bool done; // flag set when file read completed
+};
+
+int main(int argc, const char* argv[])
+{
+  std::vector<std::string> filenames;
+  std::vector<RawFileDescriptor> fds;
+  bool isError = 0;
+  
+  std::string outputFile = "/tmp/out.raw"; // path to output merged file
+  long bufferSize = 1000000; // chunck size for reading in memory
+  bool fileReadVerbose = true; // flag to print more info (chunk size, etc) when reading file
+  uint64_t TFperiod = 32; // period of a timeframe
+  long totalSize = 0; // input files total size (bytes)
+  
+  // parse arguments (key=value pairs) and build list of input files
+  for (int i = 1; i < argc; i++) {
+    
+    // check if argument is an option of the form key=value
+    const char* option = argv[i];   
+    std::string key(option);
+    size_t separatorPosition = key.find('=');
+    if (separatorPosition != std::string::npos) {
+      key.resize(separatorPosition);
+      std::string value = &(option[separatorPosition + 1]);
+      
+      if (key == "outputFile") {
+        outputFile = value;
+      } else if (key == "bufferSize") {
+        bufferSize = std::atoi(value.c_str());
+      } else {
+        printf("unknown option %s\n", key.c_str());
+	isError = 1;
+      }
+      
+      continue;      
+    }
+    
+    filenames.push_back(option);
+  }
+
+  // check success
+  if (isError) {
+    printf("Aborting\n");
+    return -1;
+  }
+  
+  // summary of options
+  printf("Using options:\n\t outputFile = %s\n\t bufferSize = %lu\n", outputFile.c_str(), (long unsigned)bufferSize);
+  
+  // open files and init
+  // not done in arg paring loop, so that we now have all options set
+  for (const auto &fn : filenames) {
+    FILE *fp = fopen(fn.c_str(), "rb");
+    if (fp == NULL) {
+      printf("Can't open %s\n", fn.c_str());
+      isError = 1 ;
+      continue;
+    }
+    printf("%s\n", fn.c_str());
+    
+     // get file size
+    long fileSize = std::filesystem::file_size(fn);
+    if (fileReadVerbose) {
+      printf("File size: %ld bytes\n", fileSize);
+    }
+    totalSize += fileSize;
+
+    void *buffer = malloc(bufferSize);
+    if (buffer == NULL) {
+      printf("Failed to allocate buffer\n");
+      isError = 1;
+      break;
+    }
+
+    fds.push_back({.path = fn, .fp = fp, .buffer = buffer, .bufferUsed = 0, .bufferProcessed = 0, .fileSize = fileSize, .fileOffset = 0, .currentTimeframe = 0, .nextTimeframe = 0, .bytesOut = 0, .done = 0});
+  }
+  
+  // check success
+  if (isError) {
+    printf("Aborting\n");
+    return -1;
+  }
+  
+  // open output file
+  printf("Opening %s for output\n", outputFile.c_str());
+  FILE *fdout = fopen(outputFile.c_str(), "w");
+  if (fdout == NULL) {
+    printf("Can't open %s for writing\n", outputFile.c_str());
+    return -1;
+  }
+  printf("Expected output size: %ld\n", totalSize);
+  
+  for(;;) {
+   
+    unsigned int nCompleted = 0;
+    printf("\n\n\n*** LOOP\n");
+
+    // are all files at the same TF now ?
+    bool sameTimeframeId = true;
+    uint64_t nextTF=0;
+    for(auto &fd : fds) {
+      if (fd.fileOffset >= fd.fileSize) continue;
+      
+      if (nextTF == 0) {
+        nextTF=fd.nextTimeframe;
+      }
+
+      if (fd.nextTimeframe != nextTF) {
+      
+        sameTimeframeId = 0;
+	printf("TF %d != %d @ file %s\n", (int) fd.nextTimeframe, (int) nextTF, fd.path.c_str());
+        break;	
+      }
+    }
+    
+
+    for(auto &fd : fds) {
+      printf("\nFile %s\n",fd.path.c_str());
+      
+      bool skip=0;
+      for (; !fd.done; ) {
+
+        if ((fd.bufferUsed == 0)||(fd.bufferUsed == fd.bufferProcessed)) {
+	  // read new chunk
+
+	  long dataSize = fd.fileSize - fd.fileOffset;
+	  if (dataSize > bufferSize) {
+            dataSize = bufferSize;
+	  }
+
+	  if (fread(fd.buffer, dataSize, 1, fd.fp) != 1) {
+	    break;      
+	  }
+	  printf("Got block %ld bytes @ %ld (total: %ld /%ld)\n", dataSize, fd.fileOffset, fd.fileOffset + dataSize, fd.fileSize);
+
+	  fd.bufferUsed = dataSize;
+	  fd.bufferProcessed = 0;
+	  fd.fileOffset += dataSize;
+	} else {
+	    printf("Continuing with buffer @ %ld (%ld /%ld) \n",fd.fileOffset, fd.bufferProcessed, fd.bufferUsed);
+	}
+
+        uint64_t bufferProcessedInIteration = 0;
+	
+	// process current chunk until next timeframe
+	while (fd.bufferProcessed < fd.bufferUsed) {
+
+          // check we are not at page boundary
+          if (fd.bufferProcessed + sizeof(o2::Header::RAWDataHeader) <= fd.bufferUsed) {
+	    
+             RdhHandle h(((uint8_t*)fd.buffer) + fd.bufferProcessed);
+	     
+	     std::string err;
+             if (h.validateRdh(err)) {
+               printf("RDH error @ %ld: %s", (long)fd.bufferProcessed, err.c_str());
+               return -1;
+	     }
+	     
+             long nBytes = h.getOffsetNextPacket();
+	     
+	     if (fd.bufferProcessed + nBytes <= fd.bufferUsed) {
+	       uint64_t TFid = 1 + h.getHbOrbit() / TFperiod;
+	       
+	       if (TFid != fd.currentTimeframe) {
+	         if (TFid != fd.nextTimeframe) {
+		   printf("Next TF detected %ld @ %ld\n", TFid, fd.bufferProcessed);
+		   if (sameTimeframeId) {
+		     // wait that all files are at the same TF before checking next
+		     fd.nextTimeframe = TFid;
+		   }
+		   skip = 1;
+		   break;
+		 } else {		   
+		   if (!sameTimeframeId) {
+		     skip = 1;
+		     break;
+		   }
+		   fd.currentTimeframe = TFid; // we can start with this one
+		   printf("Starting new TF %ld @ %ld\n", fd.currentTimeframe, fd.bufferProcessed);
+		   skip=0;
+		 }		 
+	       }
+               //    h.dumpRdh(fd.fileOffset + fd.bufferProcessed - fd.bufferUsed, 1);
+               bufferProcessedInIteration += nBytes;
+	       fd.bufferProcessed += nBytes;
+	       continue;
+	     }
+	  }
+	  
+	  if (!skip) {
+            // rewind a bit
+            int delta = fd.bufferUsed - fd.bufferProcessed;
+	    if (delta) {
+	      fd.fileOffset -= delta;
+	      printf("%ld / %ld : %d -> new position %ld\n", fd.bufferProcessed, fd.bufferUsed, delta, fd.fileOffset);
+	      if (fseek(fd.fp, fd.fileOffset, SEEK_SET)) {
+        	printf("Failed to seek in file");
+        	return -1;
+              }
+	    }
+	    fd.bufferUsed = 0; // re-read from file from beginning of chunk
+	  }
+        }
+	
+	// write validated data
+	if (bufferProcessedInIteration) { 
+          if (fwrite(&((char *)fd.buffer)[fd.bufferProcessed - bufferProcessedInIteration], bufferProcessedInIteration, 1, fdout)!=1) {
+	    printf("Failed to write %d bytes\n",(int) fd.bufferProcessed);
+	    printf("%s\n",strerror(errno));
+	    return -1;
+	  }
+          printf("Wrote %d bytes\n", (int)bufferProcessedInIteration);
+	  fd.bytesOut += bufferProcessedInIteration;
+	}
+	if (skip) {
+	  printf("skipping until next loop \n");
+	  break;
+	}
+      }
+      if ((fd.fileOffset >= fd.fileSize)&&(fd.bufferUsed == fd.bufferProcessed)) {
+          fd.done = 1;
+          printf("File read completed %ld %ld\n",fd.bufferUsed,fd.bufferProcessed);
+          nCompleted++;
+      }
+    }
+    
+
+   printf("*** %d / %ld completed\n", nCompleted, fds.size());
+   if (nCompleted == fds.size()) {
+     // all files read
+     break;
+   }
+
+  }
+  
+  fclose(fdout);
+  
+  long totalBytesOut = 0;
+  for(auto &fd : fds) {
+      printf("\nFile %s: %ld / %ld\n",fd.path.c_str(),fd.bytesOut, fd.fileSize);
+      totalBytesOut += fd.bytesOut;
+  }
+  if (totalBytesOut!=totalSize) {
+    printf("Warning: output size mismatch input %ld != %ld\n",totalBytesOut, totalSize);
+  }
+  return 0;
+
+}