Skip to content

Commit 5ea7b6b

Browse files
authored
Merge pull request hpcc-systems#19607 from ghalliday/eventlogging
HPCC-33609 Create a POC for an Event Recording framework Reviewed-By: Tim Klemm <tim.klemm@lexisnexisrisk.com> Reviewed-by: Jake Smith <jake.smith@lexisnexisrisk.com> Merged-by: Gavin Halliday <ghalliday@hpccsystems.com>
2 parents 6110993 + 2932e26 commit 5ea7b6b

File tree

10 files changed

+1071
-8
lines changed

10 files changed

+1071
-8
lines changed

roxie/ccd/ccdmain.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1162,6 +1162,13 @@ int CCD_API roxie_main(int argc, const char *argv[], const char * defaultYaml)
11621162
else
11631163
multicastTTL = ttlTmp;
11641164

1165+
if (topology->getPropBool("@recordAllEvents", false))
1166+
{
1167+
const char * recordEventOptions = topology->queryProp("@recordEventOptions");
1168+
const char * optRecordEventFilename = topology->queryProp("@recordEventFilename");
1169+
startRoxieEventRecording(recordEventOptions, optRecordEventFilename);
1170+
}
1171+
11651172
workunitGraphCacheEnabled = topology->getPropBool("expert/@workunitGraphCacheEnabled", workunitGraphCacheEnabled);
11661173

11671174
indexReadChunkSize = topology->getPropInt("@indexReadChunkSize", 60000);
@@ -1766,6 +1773,7 @@ int CCD_API roxie_main(int argc, const char *argv[], const char * defaultYaml)
17661773
#ifndef _CONTAINERIZED
17671774
stopPerformanceMonitor();
17681775
#endif
1776+
stopRoxieEventRecording();
17691777
cleanupPlugins();
17701778
unloadHpccProtocolPlugin();
17711779
closeMulticastSockets();

roxie/ccd/ccdstate.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "jhash.hpp"
2323
#include "jsort.hpp"
2424
#include "jregexp.hpp"
25+
#include "jevent.hpp"
2526

2627
#include "udptopo.hpp"
2728
#include "ccd.hpp"
@@ -241,6 +242,45 @@ void stopDelayedReleaser()
241242
}
242243

243244

245+
//-------------------------------------------------------------------------
246+
247+
void startRoxieEventRecording(const char * options, const char * filename)
248+
{
249+
if (isEmptyString(options))
250+
options = "threadid";
251+
252+
StringBuffer outputFilename;
253+
const char * path = filename;
254+
if (!isAbsolutePath(filename))
255+
{
256+
getTempFilePath(outputFilename, "eventrecorder", nullptr);
257+
outputFilename.append(PATHSEPCHAR);
258+
if (!isEmptyString(filename))
259+
{
260+
outputFilename.append(filename);
261+
}
262+
else
263+
{
264+
//MORE: Revisit this at a later date
265+
unsigned seq = (unsigned)(get_cycles_now() % 100000);
266+
outputFilename.append("roxieevents.").append((unsigned)GetCurrentProcessId()).append(".").append(seq).append(".evt");
267+
}
268+
269+
path = outputFilename.str();
270+
//MORE: The caller will need to know the full pathname
271+
}
272+
273+
recursiveCreateDirectoryForFile(path);
274+
queryRecorder().startRecording(options, path);
275+
276+
//MORE: Add all the meta information for the files - either now or in stopRoxieEventRecording()
277+
}
278+
279+
void stopRoxieEventRecording()
280+
{
281+
queryRecorder().stopRecording();
282+
}
283+
244284
//-------------------------------------------------------------------------
245285

246286
class CSimpleSuperFileArray : implements ISimpleSuperFileEnquiry, public CInterface

roxie/ccd/ccdstate.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ extern const char *queryNodeIndexName(const IPropertyTree &graphNode, ThorActivi
171171
extern void createDelayedReleaser();
172172
extern void stopDelayedReleaser();
173173

174-
extern void createDelayedReleaser();
175-
extern void stopDelayedReleaser();
174+
extern void startRoxieEventRecording(const char * optionsText, const char * filename);
175+
extern void stopRoxieEventRecording();
176176

177177
#endif

roxie/roxiemem/roxiemem.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7054,7 +7054,7 @@ class SimpleRowBuffer : implements IBufferedRowCallback
70547054
};
70557055

70567056
// A row buffer which does not allocate memory for the row array from roxiemem
7057-
class TestingRowBuffer : implements IBufferedRowCallback
7057+
class TestingRowBuffer final : implements IBufferedRowCallback
70587058
{
70597059
public:
70607060
TestingRowBuffer(unsigned _cost, unsigned _id) : cost(_cost), id(_id)
@@ -7136,7 +7136,7 @@ class CallbackBlockAllocator : implements IBufferedRowCallback
71367136

71377137

71387138
//Free the block as soon as requested
7139-
class SimpleCallbackBlockAllocator : public CallbackBlockAllocator
7139+
class SimpleCallbackBlockAllocator final : public CallbackBlockAllocator
71407140
{
71417141
public:
71427142
SimpleCallbackBlockAllocator(IRowManager * _rowManager, memsize_t _size, unsigned _cost, unsigned _id)
@@ -7154,7 +7154,7 @@ class SimpleCallbackBlockAllocator : public CallbackBlockAllocator
71547154
};
71557155

71567156
//Allocate another row before disposing of the first
7157-
class NastyCallbackBlockAllocator : public CallbackBlockAllocator
7157+
class NastyCallbackBlockAllocator final : public CallbackBlockAllocator
71587158
{
71597159
public:
71607160
NastyCallbackBlockAllocator(IRowManager * _rowManager, unsigned _size, unsigned _cost, unsigned _id)

system/jhtree/jhtree.cpp

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
#include "rtldynfield.hpp"
6565
#include "eclhelper_base.hpp"
6666
#include "jmetrics.hpp"
67+
#include "jevent.hpp"
6768

6869
constexpr __uint64 defaultFetchThresholdNs = 20000; // Assume anything < 20us comes from the page cache, everything above probably went to disk
6970

@@ -671,23 +672,51 @@ class DelayedCacheEntryReleaser : public IRemovedMappingCallback
671672
public:
672673
~DelayedCacheEntryReleaser()
673674
{
675+
if (unlikely(recordingEvents()))
676+
{
677+
for (unsigned i1 = 0; i1 < numFixed; i1++)
678+
noteEviction(fixedFileId[i1], fixedPending[i1]);
679+
680+
ForEachItemIn(i2, pending)
681+
noteEviction(pendingFileIds.item(i2), &pending.item(i2));
682+
}
683+
674684
for (unsigned i=0; i < numFixed; i++)
675685
fixedPending[i]->Release();
676686
}
677687
virtual void noteRemoval(void * _mapping) override
678688
{
679689
CNodeMapping *mapping = reinterpret_cast<CNodeMapping *>(_mapping);
680-
//Save the node onto a list, so it will be released when this object is released.
690+
unsigned keyId = mapping->queryFindValue().keyId;
691+
// Save the node onto a list, so it will be released when this object is released.
681692
CJHTreeNode * node = const_cast<CJHTreeNode *>(mapping->query().getNode());
693+
// The key id needs to be stored separately because the node does not currently contain it.
682694
if (numFixed < maxFixed)
683-
fixedPending[numFixed++] = node;
695+
{
696+
fixedPending[numFixed] = node;
697+
fixedFileId[numFixed] = keyId;
698+
numFixed++;
699+
}
684700
else
701+
{
685702
pending.append(*node);
703+
pendingFileIds.append(keyId);
704+
}
686705
}
706+
707+
protected:
708+
void noteEviction(unsigned keyId, CJHTreeNode * node)
709+
{
710+
offset_t pos = node->getFpos();
711+
queryRecorder().recordIndexEviction(keyId, pos, node->getNodeType(), node->getMemSize());
712+
}
713+
687714
protected:
688-
CIArray pending;
715+
CIArrayOf<CJHTreeNode> pending;
716+
UnsignedArray pendingFileIds;
689717
//Use a fixed array for a small number of allocations to avoid a heap allocation inside the critsec
690718
CJHTreeNode * fixedPending[maxFixed]; // deliberately uninitialized
719+
unsigned fixedFileId[maxFixed]; // deliberately uninitialized
691720
unsigned numFixed = 0;
692721

693722
};
@@ -2969,6 +2998,8 @@ const CJHTreeNode *CNodeCache::getCachedNode(const INodeLoader *keyIndex, unsign
29692998
block.leave();
29702999

29713000
//Update ctx stats outside of the critical section.
3001+
if (unlikely(recordingEvents()))
3002+
queryRecorder().recordIndexLookup(iD, pos, type, true);
29723003
if (ctx) ctx->noteStatistic(hitStatId[cacheType], 1);
29733004
return fastPathMatch;
29743005
}
@@ -2986,6 +3017,9 @@ const CJHTreeNode *CNodeCache::getCachedNode(const INodeLoader *keyIndex, unsign
29863017
ownedCacheEntry.setown(cacheEntry);
29873018
}
29883019

3020+
if (unlikely(recordingEvents()))
3021+
queryRecorder().recordIndexLookup(iD, pos, type, false);
3022+
29893023
//If an exception is thrown before the node is cleanly loaded we need to remove the partially constructed
29903024
//node from the cache otherwise it may never get loaded, and can prevent items being removed from the cache
29913025
//note: noteStatistic() can throw an exception if a worker has been aborted...
@@ -3041,6 +3075,10 @@ const CJHTreeNode *CNodeCache::getCachedNode(const INodeLoader *keyIndex, unsign
30413075
}
30423076
cycle_t endLoadCycles = get_cycles_now();
30433077
cycle_t actualLoadCycles = endLoadCycles - startLoadCycles;
3078+
3079+
if (unlikely(recordingEvents()))
3080+
queryRecorder().recordIndexLoad(iD, pos, type, ownedCacheEntry->queryNode()->getMemSize(), cycle_to_nanosec(actualLoadCycles), cycle_to_nanosec(fetchCycles));
3081+
30443082
if (actualLoadCycles > traceNodeLoadThreshold)
30453083
{
30463084
if ((endLoadCycles - lastLoadReportCycles) >= traceNodeLoadFrequency)

system/jlib/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ set ( SRCS
6363
jcrc.cpp
6464
jdebug.cpp
6565
jencrypt.cpp
66+
jevent.cpp
6667
jexcept.cpp
6768
jfile.cpp
6869
jflz.cpp
@@ -127,6 +128,7 @@ set ( INCLUDES
127128
jelogtype.hpp
128129
jencrypt.hpp
129130
jerror.hpp
131+
jevent.hpp
130132
jexcept.hpp
131133
jfcmp.hpp
132134
jfile.hpp

0 commit comments

Comments
 (0)