Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
280 changes: 280 additions & 0 deletions packages/daemon/__tests__/actors/MonitoringActor.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
/**
* Copyright (c) Hathor Labs and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

import MonitoringActor from '../../src/actors/MonitoringActor';
import logger from '../../src/logger';
import { EventTypes } from '../../src/types/event';
import getConfig from '../../src/config';
import { addAlert } from '@wallet-service/common';

jest.useFakeTimers();
jest.spyOn(global, 'setInterval');
jest.spyOn(global, 'clearInterval');
jest.spyOn(global, 'setTimeout');
jest.spyOn(global, 'clearTimeout');

jest.mock('@wallet-service/common', () => ({
...jest.requireActual('@wallet-service/common'),
addAlert: jest.fn().mockResolvedValue(undefined),
}));

const mockAddAlert = addAlert as jest.Mock;

describe('MonitoringActor', () => {
let mockCallback: jest.Mock;
let mockReceive: jest.Mock;
let receiveCallback: (event: any) => void;
let config: ReturnType<typeof getConfig>;
let processExitSpy: jest.SpyInstance;

const sendEvent = (monitoringEventType: string) => {
receiveCallback({
type: EventTypes.MONITORING_EVENT,
event: { type: monitoringEventType },
});
};

beforeEach(() => {
jest.clearAllMocks();
jest.clearAllTimers();
processExitSpy = jest.spyOn(process, 'exit').mockImplementation(() => undefined as never);
config = getConfig();
config['IDLE_EVENT_TIMEOUT_MS'] = 5 * 60 * 1000; // 5 min
config['STUCK_PROCESSING_TIMEOUT_MS'] = 5 * 60 * 1000; // 5 min
config['RECONNECTION_STORM_THRESHOLD'] = 3; // low threshold for tests
config['RECONNECTION_STORM_WINDOW_MS'] = 5 * 60 * 1000; // 5 min

mockCallback = jest.fn();
mockReceive = jest.fn().mockImplementation((cb: any) => {
receiveCallback = cb;
});
});

afterAll(() => {
jest.clearAllMocks();
jest.useRealTimers();
});

// ── Idle detection ───────────────────────────────────────────────────────────

it('should not start the idle timer on initialization', () => {
MonitoringActor(mockCallback, mockReceive, config);
expect(setInterval).not.toHaveBeenCalled();
});

it('should start the idle timer when receiving a CONNECTED event', () => {
MonitoringActor(mockCallback, mockReceive, config);
sendEvent('CONNECTED');
expect(setInterval).toHaveBeenCalledTimes(1);
});

it('should stop the idle timer when receiving a DISCONNECTED event', () => {
MonitoringActor(mockCallback, mockReceive, config);
sendEvent('CONNECTED');
sendEvent('DISCONNECTED');
expect(clearInterval).toHaveBeenCalledTimes(1);
});

it('should stop the idle timer when the actor is stopped', () => {
const stopActor = MonitoringActor(mockCallback, mockReceive, config);
sendEvent('CONNECTED');
stopActor();
expect(clearInterval).toHaveBeenCalledTimes(1);
});

it('should fire an idle alert and exit after IDLE_EVENT_TIMEOUT_MS with no events', async () => {
MonitoringActor(mockCallback, mockReceive, config);
sendEvent('CONNECTED');

jest.advanceTimersByTime(config['IDLE_EVENT_TIMEOUT_MS'] + 1);
await Promise.resolve();
await Promise.resolve(); // flush the .finally() microtask

expect(mockAddAlert).toHaveBeenCalledTimes(1);
expect(mockAddAlert.mock.calls[0][0]).toBe('Daemon Idle — No Events Received');
expect(processExitSpy).toHaveBeenCalledWith(1);
});

it('should NOT fire an idle alert when events keep arriving', async () => {
MonitoringActor(mockCallback, mockReceive, config);
sendEvent('CONNECTED');

// Stay below the threshold each time
jest.advanceTimersByTime(config['IDLE_EVENT_TIMEOUT_MS'] - 1000);
sendEvent('EVENT_RECEIVED');
jest.advanceTimersByTime(config['IDLE_EVENT_TIMEOUT_MS'] - 1000);

await Promise.resolve();
expect(mockAddAlert).not.toHaveBeenCalled();
});

it('should fire only one idle alert and exit once per idle period', async () => {
MonitoringActor(mockCallback, mockReceive, config);
sendEvent('CONNECTED');

jest.advanceTimersByTime(config['IDLE_EVENT_TIMEOUT_MS'] * 3);
await Promise.resolve();
await Promise.resolve();

expect(mockAddAlert).toHaveBeenCalledTimes(1);
expect(processExitSpy).toHaveBeenCalledTimes(1);
expect(processExitSpy).toHaveBeenCalledWith(1);
});

it('should reset the idle alert flag when an event is received, allowing a second exit', async () => {
MonitoringActor(mockCallback, mockReceive, config);
sendEvent('CONNECTED');

// Trigger first alert + exit
jest.advanceTimersByTime(config['IDLE_EVENT_TIMEOUT_MS'] + 1);
await Promise.resolve();
await Promise.resolve();
expect(mockAddAlert).toHaveBeenCalledTimes(1);
expect(processExitSpy).toHaveBeenCalledTimes(1);

// Receive an event — resets idleAlertFired and lastEventReceivedAt
sendEvent('EVENT_RECEIVED');

// Advance far enough for the interval to fire when idleMs >= threshold again.
// The interval fires at 2T, 3T, … from start. After EVENT_RECEIVED at ~T,
// the next fire where idleMs >= T is at 3T (fire at 2T gives idleMs = T-1).
jest.advanceTimersByTime(2 * config['IDLE_EVENT_TIMEOUT_MS']);
await Promise.resolve();
await Promise.resolve();

expect(mockAddAlert).toHaveBeenCalledTimes(2);
expect(processExitSpy).toHaveBeenCalledTimes(2);
});

it('should restart the idle timer when CONNECTED is sent while already running', () => {
MonitoringActor(mockCallback, mockReceive, config);
sendEvent('CONNECTED');
sendEvent('CONNECTED'); // second connect clears old and starts new
expect(clearInterval).toHaveBeenCalledTimes(1);
expect(setInterval).toHaveBeenCalledTimes(2);
});

// ── Stuck-processing detection ───────────────────────────────────────────────

it('should start a stuck timer on PROCESSING_STARTED', () => {
MonitoringActor(mockCallback, mockReceive, config);
sendEvent('PROCESSING_STARTED');
expect(setTimeout).toHaveBeenCalledTimes(1);
});

it('should cancel the stuck timer on PROCESSING_COMPLETED', () => {
MonitoringActor(mockCallback, mockReceive, config);
sendEvent('PROCESSING_STARTED');
sendEvent('PROCESSING_COMPLETED');
expect(clearTimeout).toHaveBeenCalledTimes(1);
});

it('should fire a MAJOR alert when stuck', async () => {
MonitoringActor(mockCallback, mockReceive, config);
sendEvent('PROCESSING_STARTED');

jest.advanceTimersByTime(config['STUCK_PROCESSING_TIMEOUT_MS'] + 1);
await Promise.resolve();
await Promise.resolve();

expect(mockAddAlert).toHaveBeenCalledTimes(1);
expect(mockAddAlert.mock.calls[0][0]).toBe('Daemon Stuck In Processing State');
expect(mockCallback).not.toHaveBeenCalled();
});

it('should NOT fire the stuck alert when PROCESSING_COMPLETED arrives in time', async () => {
MonitoringActor(mockCallback, mockReceive, config);
sendEvent('PROCESSING_STARTED');

jest.advanceTimersByTime(config['STUCK_PROCESSING_TIMEOUT_MS'] - 1000);
sendEvent('PROCESSING_COMPLETED');

jest.advanceTimersByTime(2000); // advance past original timeout
await Promise.resolve();

expect(mockAddAlert).not.toHaveBeenCalled();
expect(mockCallback).not.toHaveBeenCalled();
});

it('should reset the stuck timer on consecutive PROCESSING_STARTED events', () => {
MonitoringActor(mockCallback, mockReceive, config);
sendEvent('PROCESSING_STARTED');
sendEvent('PROCESSING_STARTED'); // second one clears the first
expect(clearTimeout).toHaveBeenCalledTimes(1);
expect(setTimeout).toHaveBeenCalledTimes(2);
});

it('should stop the stuck timer when the actor is stopped', () => {
const stopActor = MonitoringActor(mockCallback, mockReceive, config);
sendEvent('PROCESSING_STARTED');
stopActor();
expect(clearTimeout).toHaveBeenCalledTimes(1);
});

it('should also clear the stuck timer on DISCONNECTED', () => {
MonitoringActor(mockCallback, mockReceive, config);
sendEvent('CONNECTED');
sendEvent('PROCESSING_STARTED');
sendEvent('DISCONNECTED');
// clearTimeout for stuck timer + clearInterval for idle timer
expect(clearTimeout).toHaveBeenCalledTimes(1);
expect(clearInterval).toHaveBeenCalledTimes(1);
});

// ── Reconnection storm detection ─────────────────────────────────────────────

it('should fire a reconnection storm alert when the threshold is reached', async () => {
MonitoringActor(mockCallback, mockReceive, config);

sendEvent('RECONNECTING');
sendEvent('RECONNECTING');
sendEvent('RECONNECTING'); // threshold is 3 in test config

await Promise.resolve();
expect(mockAddAlert).toHaveBeenCalledTimes(1);
expect(mockAddAlert.mock.calls[0][0]).toBe('Daemon Reconnection Storm');
});

it('should NOT fire a reconnection storm alert below the threshold', async () => {
MonitoringActor(mockCallback, mockReceive, config);

sendEvent('RECONNECTING');
sendEvent('RECONNECTING');

await Promise.resolve();
expect(mockAddAlert).not.toHaveBeenCalled();
});

it('should evict old reconnections outside the storm window', async () => {
MonitoringActor(mockCallback, mockReceive, config);

sendEvent('RECONNECTING');
sendEvent('RECONNECTING');

jest.advanceTimersByTime(config['RECONNECTION_STORM_WINDOW_MS'] + 1000);

// Only 1 new reconnection — below threshold after eviction
sendEvent('RECONNECTING');

await Promise.resolve();
expect(mockAddAlert).not.toHaveBeenCalled();
});

// ── Misc ─────────────────────────────────────────────────────────────────────

it('should ignore events of other types and log a warning', () => {
const warnSpy = jest.spyOn(logger, 'warn');
MonitoringActor(mockCallback, mockReceive, config);

receiveCallback({ type: 'SOME_OTHER_EVENT', event: { type: 'WHATEVER' } });

expect(warnSpy).toHaveBeenCalledWith(
'[monitoring] Unexpected event type received by MonitoringActor',
);
expect(setInterval).not.toHaveBeenCalled();
});
});
4 changes: 4 additions & 0 deletions packages/daemon/__tests__/integration/balances.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ getConfig.mockReturnValue({
DB_PASS,
DB_PORT,
ACK_TIMEOUT_MS: 20000,
IDLE_EVENT_TIMEOUT_MS: 5 * 60 * 1000,
STUCK_PROCESSING_TIMEOUT_MS: 5 * 60 * 1000,
RECONNECTION_STORM_THRESHOLD: 10,
RECONNECTION_STORM_WINDOW_MS: 5 * 60 * 1000,
});

let mysql: Connection;
Expand Down
4 changes: 4 additions & 0 deletions packages/daemon/__tests__/integration/token_creation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ getConfig.mockReturnValue({
DB_PASS,
DB_PORT,
ACK_TIMEOUT_MS: 20000,
IDLE_EVENT_TIMEOUT_MS: 5 * 60 * 1000,
STUCK_PROCESSING_TIMEOUT_MS: 5 * 60 * 1000,
RECONNECTION_STORM_THRESHOLD: 10,
RECONNECTION_STORM_WINDOW_MS: 5 * 60 * 1000,
});

let mysql: Connection;
Expand Down
16 changes: 16 additions & 0 deletions packages/daemon/jestIntegrationSetup.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/**
* Copyright (c) Hathor Labs and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

/**
* Integration test environment setup.
* Mocks addAlert so MonitoringActor does not attempt real SQS/SNS connections
* in environments where AWS credentials / region are not configured.
*/
jest.mock('@wallet-service/common', () => ({
...jest.requireActual('@wallet-service/common'),
addAlert: jest.fn().mockResolvedValue(undefined),
}));
1 change: 1 addition & 0 deletions packages/daemon/jest_integration.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const mainTestMatch = process.env.SPECIFIC_INTEGRATION_TEST_FILE
module.exports = {
roots: ["<rootDir>/__tests__"],
setupFiles: ['./jestSetup.ts'],
setupFilesAfterEnv: ['./jestIntegrationSetup.ts'],
transform: {
"^.+\\.ts$": ["ts-jest", {
tsconfig: "./tsconfig.json",
Expand Down
63 changes: 63 additions & 0 deletions packages/daemon/src/actions/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,66 @@ export const stopHealthcheckPing = sendTo(
* Logs the event as an error log
*/
export const logEventError = (_context: Context, event: Event) => logger.error(bigIntUtils.JSONBigInt.stringify(event));

/*
* This is a helper to get the monitoring ref from the context and throw if it's not found.
*/
export const getMonitoringRefFromContext = (context: Context) => {
if (!context.monitoring) {
throw new Error('No monitoring actor in context');
}
Comment on lines +205 to +207
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (blocking): Throwing in getMonitoringRefFromContext can crash the machine before monitoring is spawned

Do we know the effects of the raised exception on the machine?

Also, is it possible to do something like "no monitoring -> start monitor -> resume machine"?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I refactored it to use choose and fail silently instead of throwing

This is more defensive than anything as there is no case where monitoring will be null here

I don't think we should do (at least in this PR) a more elaborate mechanism of pausing the machine and resuming the monitor if it's null


return context.monitoring;
};

/*
* Notifies the monitoring actor that the WebSocket became connected.
*/
export const sendMonitoringConnected = sendTo(
getMonitoringRefFromContext,
{ type: EventTypes.MONITORING_EVENT, event: { type: 'CONNECTED' } },
);

/*
* Notifies the monitoring actor that the WebSocket disconnected.
*/
export const sendMonitoringDisconnected = sendTo(
getMonitoringRefFromContext,
{ type: EventTypes.MONITORING_EVENT, event: { type: 'DISCONNECTED' } },
);

/*
* Notifies the monitoring actor that a fullnode event was received (resets the idle timer).
*/
export const sendMonitoringEventReceived = sendTo(
getMonitoringRefFromContext,
{ type: EventTypes.MONITORING_EVENT, event: { type: 'EVENT_RECEIVED' } },
);

/*
* Notifies the monitoring actor that the machine is entering the RECONNECTING state.
*/
export const sendMonitoringReconnecting = sendTo(
getMonitoringRefFromContext,
{ type: EventTypes.MONITORING_EVENT, event: { type: 'RECONNECTING' } },
);

/*
* Notifies the monitoring actor that a processing state was entered.
* The actor starts a stuck-detection timer; if PROCESSING_COMPLETED doesn't
* arrive within STUCK_PROCESSING_TIMEOUT_MS it fires a CRITICAL alert and sends
* MONITORING_STUCK_PROCESSING back to the machine.
*/
export const sendMonitoringProcessingStarted = sendTo(
getMonitoringRefFromContext,
{ type: EventTypes.MONITORING_EVENT, event: { type: 'PROCESSING_STARTED' } },
);

/*
* Notifies the monitoring actor that a processing state was exited normally,
* cancelling the stuck-detection timer.
*/
export const sendMonitoringProcessingCompleted = sendTo(
getMonitoringRefFromContext,
{ type: EventTypes.MONITORING_EVENT, event: { type: 'PROCESSING_COMPLETED' } },
);
Loading