Skip to content

Commit 2ca1978

Browse files
authored
Merge pull request #481 from OpenBeta/error-handling
Increased memory and error logging for prod crashes
2 parents c112f1e + 06ef883 commit 2ca1978

File tree

7 files changed

+425
-28
lines changed

7 files changed

+425
-28
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ COPY . *.env ./
1313
RUN yarn install --no-progress && \
1414
yarn build-release
1515

16-
CMD node --experimental-json-modules build/main.js
16+
CMD node --max-old-space-size=1024 --max-semi-space-size=128 --optimize-for-size --gc-interval=100 --expose-gc --experimental-json-modules build/main.js

src/db/index.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,19 @@ export const connectDB = async (onConnected: () => any = defaultFn): Promise<voi
4949
mongoose.connection.on(
5050
'error', (e) => {
5151
logger.error('MongoDB connection error', e)
52-
process.exit(1)
52+
// Don't exit immediately, let the app try to reconnect
53+
// process.exit(1)
5354
}
5455
)
5556

57+
mongoose.connection.on('disconnected', () => {
58+
logger.warn('MongoDB disconnected. Attempting to reconnect...')
59+
})
60+
61+
mongoose.connection.on('reconnected', () => {
62+
logger.info('MongoDB reconnected successfully')
63+
})
64+
5665
await mongoose.connect(
5766
`${scheme}://${user}:${pass}@${server}/${dbName}?authSource=${authDb}&tls=${tlsFlag}&replicaSet=${rsName}`,
5867
{ autoIndex: true }
Lines changed: 60 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import axios from 'axios'
2+
import { CircuitBreaker, retryWithBackoff } from '../../../../utils/CircuitBreaker'
23

34
const SIRV_CONFIG = {
45
clientId: process.env.SIRV_CLIENT_ID_RO ?? null,
@@ -9,9 +10,27 @@ const client = axios.create({
910
baseURL: 'https://api.sirv.com/v2',
1011
headers: {
1112
'content-type': 'application/json'
12-
}
13+
},
14+
timeout: 30000 // 30 second timeout
1315
})
1416

17+
// Add axios interceptors for better error handling
18+
client.interceptors.response.use(
19+
response => response,
20+
async error => {
21+
console.error('Sirv API error:', {
22+
status: error.response?.status,
23+
statusText: error.response?.statusText,
24+
data: error.response?.data,
25+
config: {
26+
method: error.config?.method,
27+
url: error.config?.url
28+
}
29+
})
30+
return await Promise.reject(error)
31+
}
32+
)
33+
1534
const headers = {
1635
'content-type': 'application/json'
1736
}
@@ -21,23 +40,33 @@ interface TokenParamsType {
2140
clientSecret: string | null
2241
}
2342

43+
// Circuit breaker for Sirv API calls
44+
const sirvCircuitBreaker = new CircuitBreaker({
45+
failureThreshold: 3,
46+
resetTimeout: 60000, // 1 minute
47+
monitoringPeriod: 10000 // 10 seconds
48+
})
49+
2450
const getToken = async (): Promise<string | null> => {
2551
const params: TokenParamsType = {
2652
clientId: SIRV_CONFIG.clientId,
2753
clientSecret: SIRV_CONFIG.clientSecret
2854
}
2955

3056
try {
31-
const res = await client.post(
32-
'/token',
33-
params)
57+
const res = await sirvCircuitBreaker.execute(async () => {
58+
return await retryWithBackoff(async () => {
59+
return await client.post('/token', params)
60+
}, 3, 1000, 5000)
61+
})
3462

3563
if (res.status === 200) {
3664
return res.data.token
3765
}
3866
} catch (e) {
39-
console.error(e)
40-
process.exit(1)
67+
console.error('Failed to get Sirv token after retries:', e)
68+
// Don't exit process - let the app continue without Sirv functionality
69+
return null
4170
}
4271
return null
4372
}
@@ -57,22 +86,31 @@ interface FileMetadaata {
5786
* @returns
5887
*/
5988
export const getFileInfo = async (filename: string): Promise<FileMetadaata> => {
60-
const res = await client.get(
61-
'/files/stat?filename=' + encodeURIComponent(filename),
62-
{
63-
headers: {
64-
...headers,
65-
Authorization: `bearer ${token}`
66-
}
67-
}
68-
)
69-
70-
if (res.status === 200) {
71-
const { ctime, mtime } = res.data
72-
return ({
73-
btime: new Date(ctime),
74-
mtime: new Date(mtime)
89+
try {
90+
const res = await sirvCircuitBreaker.execute(async () => {
91+
return await retryWithBackoff(async () => {
92+
return await client.get(
93+
'/files/stat?filename=' + encodeURIComponent(filename),
94+
{
95+
headers: {
96+
...headers,
97+
Authorization: `bearer ${token}`
98+
}
99+
}
100+
)
101+
}, 3, 1000, 5000)
75102
})
103+
104+
if (res.status === 200) {
105+
const { ctime, mtime } = res.data
106+
return ({
107+
btime: new Date(ctime),
108+
mtime: new Date(mtime)
109+
})
110+
}
111+
throw new Error('Sirv API.getFileInfo() error: ' + String(res.statusText))
112+
} catch (e) {
113+
console.error('Failed to get file info after retries:', e)
114+
throw e
76115
}
77-
throw new Error('Sirv API.getFileInfo() error' + res.statusText)
78116
}

src/main.ts

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,57 @@
11
import { connectDB, defaultPostConnect } from './db/index.js'
22
import { createServer } from './server.js'
3+
import { errorMonitor, setupGlobalErrorHandlers } from './utils/ErrorMonitor.js'
34

4-
await connectDB(defaultPostConnect)
5-
await createServer()
5+
// Setup enhanced error monitoring
6+
setupGlobalErrorHandlers()
7+
8+
// Enhanced error handling with graceful shutdown
9+
let isShuttingDown = false
10+
11+
process.on('uncaughtException', (error) => {
12+
console.error('Uncaught Exception:', error)
13+
errorMonitor.logError(error, 'UNCAUGHT_EXCEPTION')
14+
15+
if (!isShuttingDown) {
16+
isShuttingDown = true
17+
// Give some time for cleanup before exiting
18+
setTimeout(() => {
19+
console.log('Final error stats:', errorMonitor.getStats())
20+
process.exit(1)
21+
}, 5000)
22+
}
23+
})
24+
25+
process.on('unhandledRejection', (reason, promise) => {
26+
console.error('Unhandled Rejection at:', promise, 'reason:', reason)
27+
const error = reason instanceof Error ? reason : new Error(String(reason))
28+
errorMonitor.logError(error, 'UNHANDLED_REJECTION', { promise })
29+
30+
// Don't exit immediately on unhandled rejections in production
31+
// Log the error and continue running
32+
if (process.env.NODE_ENV !== 'production') {
33+
if (!isShuttingDown) {
34+
isShuttingDown = true
35+
setTimeout(() => process.exit(1), 5000)
36+
}
37+
}
38+
})
39+
40+
process.on('SIGTERM', () => {
41+
console.log('SIGTERM received, shutting down gracefully')
42+
process.exit(0)
43+
})
44+
45+
process.on('SIGINT', () => {
46+
console.log('SIGINT received, shutting down gracefully')
47+
process.exit(0)
48+
})
49+
50+
try {
51+
await connectDB(defaultPostConnect)
52+
await createServer()
53+
console.log('🚀 Server started successfully')
54+
} catch (error) {
55+
console.error('Failed to start server:', error)
56+
process.exit(1)
57+
}

src/server.ts

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,16 +49,31 @@ export async function createServer (): Promise<{ app: express.Application, serve
4949
schema,
5050
plugins: [ApolloServerPluginDrainHttpServer({ httpServer })],
5151
cache: new InMemoryLRUCache({
52-
max: 100
52+
max: 50,
53+
maxSize: 1024 * 1024 * 10
5354
})
5455
})
5556
// server must be started before applying middleware
5657
await server.start()
5758

5859
const context = process.env.LOCAL_DEV_BYPASS_AUTH === 'true' ? localDevBypassAuthContext : createContext
5960

61+
app.get('/health', (req, res) => {
62+
const memUsage = process.memoryUsage()
63+
res.json({
64+
status: 'ok',
65+
timestamp: new Date().toISOString(),
66+
memory: {
67+
rss: `${Math.round(memUsage.rss / 1024 / 1024)}MB`,
68+
heapTotal: `${Math.round(memUsage.heapTotal / 1024 / 1024)}MB`,
69+
heapUsed: `${Math.round(memUsage.heapUsed / 1024 / 1024)}MB`,
70+
external: `${Math.round(memUsage.external / 1024 / 1024)}MB`
71+
}
72+
})
73+
})
74+
6075
app.use('/',
61-
bodyParser.json({ limit: '10mb' }),
76+
bodyParser.json({ limit: '5mb' }),
6277
cors<cors.CorsRequest>(),
6378
express.json(),
6479
expressMiddleware(server, {

src/utils/CircuitBreaker.ts

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/**
2+
* Circuit breaker pattern implementation for handling network failures
3+
*/
4+
5+
export enum CircuitState {
6+
CLOSED = 'CLOSED',
7+
OPEN = 'OPEN',
8+
HALF_OPEN = 'HALF_OPEN'
9+
}
10+
11+
export interface CircuitBreakerOptions {
12+
failureThreshold: number
13+
resetTimeout: number
14+
monitoringPeriod: number
15+
}
16+
17+
export class CircuitBreaker {
18+
private state: CircuitState = CircuitState.CLOSED
19+
private failureCount: number = 0
20+
private lastFailureTime?: number
21+
private successCount: number = 0
22+
23+
constructor (
24+
private readonly options: CircuitBreakerOptions = {
25+
failureThreshold: 5,
26+
resetTimeout: 60000, // 1 minute
27+
monitoringPeriod: 10000 // 10 seconds
28+
}
29+
) {}
30+
31+
async execute<T>(operation: () => Promise<T>): Promise<T> {
32+
if (this.state === CircuitState.OPEN) {
33+
if (this.shouldAttemptReset()) {
34+
this.state = CircuitState.HALF_OPEN
35+
} else {
36+
throw new Error('Circuit breaker is OPEN - operation not allowed')
37+
}
38+
}
39+
40+
try {
41+
const result = await operation()
42+
this.onSuccess()
43+
return result
44+
} catch (error) {
45+
this.onFailure()
46+
throw error
47+
}
48+
}
49+
50+
private onSuccess (): void {
51+
this.failureCount = 0
52+
if (this.state === CircuitState.HALF_OPEN) {
53+
this.state = CircuitState.CLOSED
54+
}
55+
this.successCount++
56+
}
57+
58+
private onFailure (): void {
59+
this.failureCount++
60+
this.lastFailureTime = Date.now()
61+
62+
if (this.failureCount >= this.options.failureThreshold) {
63+
this.state = CircuitState.OPEN
64+
}
65+
}
66+
67+
private shouldAttemptReset (): boolean {
68+
return (
69+
this.lastFailureTime != null &&
70+
Date.now() - this.lastFailureTime >= this.options.resetTimeout
71+
)
72+
}
73+
74+
getState (): CircuitState {
75+
return this.state
76+
}
77+
78+
getStats (): { state: CircuitState, failureCount: number, successCount: number, lastFailureTime?: number } {
79+
return {
80+
state: this.state,
81+
failureCount: this.failureCount,
82+
successCount: this.successCount,
83+
lastFailureTime: this.lastFailureTime
84+
}
85+
}
86+
}
87+
88+
/**
89+
* Retry with exponential backoff
90+
*/
91+
export async function retryWithBackoff<T> (
92+
operation: () => Promise<T>,
93+
maxRetries: number = 3,
94+
initialDelay: number = 1000,
95+
maxDelay: number = 10000
96+
): Promise<T> {
97+
let lastError: Error | undefined
98+
99+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
100+
try {
101+
return await operation()
102+
} catch (error) {
103+
lastError = error as Error
104+
105+
if (attempt === maxRetries) {
106+
break
107+
}
108+
109+
const delay = Math.min(
110+
initialDelay * Math.pow(2, attempt - 1),
111+
maxDelay
112+
)
113+
114+
console.warn(`Operation failed (attempt ${attempt}/${maxRetries}), retrying in ${delay}ms:`, error)
115+
await new Promise(resolve => setTimeout(resolve, delay))
116+
}
117+
}
118+
119+
throw lastError ?? new Error('Operation failed after all retry attempts')
120+
}

0 commit comments

Comments
 (0)