Skip to content

Commit 7c3ba07

Browse files
l2yshoclaude
andauthored
refactor: resolve last direct @apify/log calls
- Remove direct @apify/log imports from utils, memory-storage, http-client, and stagehand-crawler - Use callback options for system-info logging in cpu-info and memory-info - Use serviceLocator.getLogger() in context_pipeline and stagehand-crawler - Wire crawler logger through BasicCrawler into HttpClient and RobotsTxtFile - Drop @apify/log dependency from all four package.json files --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 0b6a159 commit 7c3ba07

File tree

44 files changed

+396
-284
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+396
-284
lines changed

packages/basic-crawler/package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
"access": "public"
4040
},
4141
"dependencies": {
42-
"@apify/log": "^2.5.18",
4342
"@apify/timeout": "^0.3.2",
4443
"@apify/utilities": "^2.15.5",
4544
"@crawlee/core": "4.0.0",

packages/basic-crawler/src/internals/basic-crawler.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,7 @@ export class BasicCrawler<
815815
this.requestQueue = requestQueue;
816816
}
817817

818-
this.httpClient = httpClient ?? new GotScrapingHttpClient();
818+
this.httpClient = httpClient ?? new GotScrapingHttpClient({ logger: this.log });
819819
this.proxyConfiguration = proxyConfiguration;
820820
this.statusMessageLoggingInterval = statusMessageLoggingInterval;
821821
this.statusMessageCallback = statusMessageCallback as StatusMessageCallback;
@@ -1711,7 +1711,7 @@ export class BasicCrawler<
17111711
return cachedRobotsTxtFile;
17121712
}
17131713

1714-
const robotsTxtFile = await RobotsTxtFile.find(url);
1714+
const robotsTxtFile = await RobotsTxtFile.find(url, { logger: this.log });
17151715
this.robotsTxtFileCache.add(origin, robotsTxtFile);
17161716

17171717
return robotsTxtFile;

packages/browser-pool/package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
"copy": "tsx ../../scripts/copy.ts"
3131
},
3232
"dependencies": {
33-
"@apify/log": "^2.5.18",
3433
"@apify/timeout": "^0.3.2",
3534
"@crawlee/core": "4.0.0",
3635
"@crawlee/types": "4.0.0",

packages/core/src/autoscaling/snapshotter.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,10 @@ export class Snapshotter {
177177
this.maxMemoryBytes = memoryMbytes * 1024 * 1024;
178178
} else {
179179
const containerized = serviceLocator.getConfiguration().get('containerized', await isContainerized());
180-
const memInfo = await getMemoryInfo(containerized);
180+
const memInfo = await getMemoryInfo({
181+
containerized,
182+
logger: serviceLocator.getLogger(),
183+
});
181184
const totalBytes = memInfo.totalBytes;
182185

183186
this.maxMemoryBytes = Math.ceil(

packages/core/src/crawlers/context_pipeline.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
import type { Awaitable } from '@crawlee/types';
22

3-
import log from '@apify/log';
4-
53
import {
64
ContextPipelineCleanupError,
75
ContextPipelineInitializationError,
86
ContextPipelineInterruptedError,
97
RequestHandlerError,
108
SessionError,
119
} from '../errors.js';
10+
import { serviceLocator } from '../service_locator.js';
1211

1312
/**
1413
* Represents a middleware step in the context pipeline.
@@ -175,7 +174,9 @@ class ContextPipelineImpl<TContextBase, TCrawlingContext extends TContextBase> e
175174
);
176175
}
177176
} catch (error: any) {
178-
log.debug(`Context pipeline failed to define property ${key.toString()}:`, error);
177+
serviceLocator
178+
.getLogger()
179+
.debug(`Context pipeline failed to define property ${key.toString()}:`, error);
179180
}
180181
}
181182

packages/core/src/events/local_event_manager.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,10 @@ export class LocalEventManager extends EventManager {
9393

9494
private async createCpuInfo(options: { maxUsedCpuRatio: number }) {
9595
const { getCurrentCpuTicksV2 } = await import('@crawlee/utils');
96-
const usedCpuRatio = await getCurrentCpuTicksV2(await this.isContainerizedWrapper());
96+
const usedCpuRatio = await getCurrentCpuTicksV2({
97+
containerized: await this.isContainerizedWrapper(),
98+
logger: serviceLocator.getLogger(),
99+
});
97100
return {
98101
cpuCurrentUsage: usedCpuRatio * 100,
99102
isCpuOverloaded: usedCpuRatio > options.maxUsedCpuRatio,
@@ -103,7 +106,10 @@ export class LocalEventManager extends EventManager {
103106
private async createMemoryInfo() {
104107
try {
105108
const { getMemoryInfo } = await import('@crawlee/utils');
106-
const memInfo = await getMemoryInfo(await this.isContainerizedWrapper());
109+
const memInfo = await getMemoryInfo({
110+
containerized: await this.isContainerizedWrapper(),
111+
logger: serviceLocator.getLogger(),
112+
});
107113
return {
108114
memCurrentBytes: memInfo.mainProcessBytes + memInfo.childProcessesBytes,
109115
};

packages/core/src/log.ts

Lines changed: 3 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,85 +1,9 @@
1+
import type { CrawleeLogger, CrawleeLoggerOptions } from '@crawlee/types';
2+
13
import type { LoggerOptions } from '@apify/log';
24
import log, { Log, Logger, LoggerJson, LoggerText, LogLevel } from '@apify/log';
35

4-
/**
5-
* Configuration options for Crawlee logger implementations.
6-
*/
7-
export interface CrawleeLoggerOptions {
8-
/** Prefix to be prepended to each logged line. */
9-
prefix?: string | null;
10-
}
11-
12-
/**
13-
* Interface for Crawlee logger implementations.
14-
* This allows users to inject custom loggers (e.g., Winston, Pino) while maintaining
15-
* compatibility with the default `@apify/log` implementation.
16-
*/
17-
export interface CrawleeLogger {
18-
/**
19-
* Returns the logger configuration.
20-
*/
21-
getOptions(): CrawleeLoggerOptions;
22-
23-
/**
24-
* Configures logger options.
25-
*/
26-
setOptions(options: Partial<CrawleeLoggerOptions>): void;
27-
28-
/**
29-
* Creates a new instance of logger that inherits settings from a parent logger.
30-
*/
31-
child(options: Partial<CrawleeLoggerOptions>): CrawleeLogger;
32-
33-
/**
34-
* Logs an `ERROR` message.
35-
*/
36-
error(message: string, data?: Record<string, unknown>): void;
37-
38-
/**
39-
* Logs an `ERROR` level message with a nicely formatted exception.
40-
*/
41-
exception(exception: Error, message: string, data?: Record<string, unknown>): void;
42-
43-
/**
44-
* Logs a `SOFT_FAIL` level message.
45-
*/
46-
softFail(message: string, data?: Record<string, unknown>): void;
47-
48-
/**
49-
* Logs a `WARNING` level message.
50-
*/
51-
warning(message: string, data?: Record<string, unknown>): void;
52-
53-
/**
54-
* Logs a `WARNING` level message only once.
55-
*/
56-
warningOnce(message: string): void;
57-
58-
/**
59-
* Logs an `INFO` message.
60-
*/
61-
info(message: string, data?: Record<string, unknown>): void;
62-
63-
/**
64-
* Logs a `DEBUG` message.
65-
*/
66-
debug(message: string, data?: Record<string, unknown>): void;
67-
68-
/**
69-
* Logs a `PERF` level message for performance tracking.
70-
*/
71-
perf(message: string, data?: Record<string, unknown>): void;
72-
73-
/**
74-
* Logs given message only once as WARNING for deprecated features.
75-
*/
76-
deprecated(message: string): void;
77-
78-
/**
79-
* Logs a message at the given level. Useful when the log level is determined dynamically.
80-
*/
81-
logWithLevel(level: number, message: string, data?: Record<string, unknown>): void;
82-
}
6+
export type { CrawleeLogger, CrawleeLoggerOptions };
837

848
/**
859
* Abstract base class for custom Crawlee logger implementations.

packages/core/src/service_locator.ts

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,6 @@ interface ServiceLocatorInterface {
127127
* // Crawler has its own isolated ServiceLocator instance
128128
* ```
129129
*/
130-
// Used as fallback in ServiceLocator methods that need to log before a logger is explicitly set,
131-
// without implicitly locking the logger slot (which getLogger() would do).
132-
const fallbackLog = new ApifyLogAdapter(log);
133-
134130
export class ServiceLocator implements ServiceLocatorInterface {
135131
private configuration?: Configuration;
136132
private eventManager?: EventManager;
@@ -165,9 +161,7 @@ export class ServiceLocator implements ServiceLocatorInterface {
165161

166162
getConfiguration(): Configuration {
167163
if (!this.configuration) {
168-
(this.logger ?? fallbackLog).debug(
169-
'No configuration set, implicitly creating and using default Configuration.',
170-
);
164+
this.getLogger().debug('No configuration set, implicitly creating and using default Configuration.');
171165
this.configuration = new Configuration();
172166
}
173167
return this.configuration;
@@ -189,11 +183,9 @@ export class ServiceLocator implements ServiceLocatorInterface {
189183

190184
getEventManager(): EventManager {
191185
if (!this.eventManager) {
192-
(this.logger ?? fallbackLog).debug(
193-
'No event manager set, implicitly creating and using default LocalEventManager.',
194-
);
186+
this.getLogger().debug('No event manager set, implicitly creating and using default LocalEventManager.');
195187
if (!this.configuration) {
196-
(this.logger ?? fallbackLog).warning(
188+
this.getLogger().warning(
197189
'Implicit creation of event manager will implicitly set configuration as side effect. ' +
198190
'It is advised to explicitly first set the configuration instead.',
199191
);
@@ -219,18 +211,17 @@ export class ServiceLocator implements ServiceLocatorInterface {
219211

220212
getStorageClient(): StorageClient {
221213
if (!this.storageClient) {
222-
(this.logger ?? fallbackLog).debug(
223-
'No storage client set, implicitly creating and using default MemoryStorage.',
224-
);
214+
this.getLogger().debug('No storage client set, implicitly creating and using default MemoryStorage.');
225215
if (!this.configuration) {
226-
(this.logger ?? fallbackLog).warning(
216+
this.getLogger().warning(
227217
'Implicit creation of storage client will implicitly set configuration as side effect. ' +
228218
'It is advised to explicitly first set the configuration instead.',
229219
);
230220
}
231221
const config = this.getConfiguration();
232222
this.storageClient = new MemoryStorage({
233223
persistStorage: config.get('persistStorage'),
224+
logger: this.getLogger().child({ prefix: 'MemoryStorage' }),
234225
});
235226
}
236227
return this.storageClient;

packages/core/src/storages/sitemap_request_list.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,9 @@ export class SitemapRequestList implements IRequestList {
423423
persistStateKey: options.persistStateKey ?? STATE_PERSISTENCE_KEY,
424424
});
425425
await requestList.restoreState();
426-
void requestList.load({ parseSitemapOptions: { ...options.parseSitemapOptions, httpClient } });
426+
void requestList.load({
427+
parseSitemapOptions: { logger: serviceLocator.getLogger(), ...options.parseSitemapOptions, httpClient },
428+
});
427429

428430
if (requestList.persistenceOptions.enable) {
429431
requestList.events.on(EventType.PERSIST_STATE, requestList.persistState);

packages/core/test/core/service_locator.test.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ function makeMockLogger(overrides: Partial<CrawleeLogger> = {}): CrawleeLogger {
2323
debug: () => {},
2424
perf: () => {},
2525
deprecated: () => {},
26-
internal: () => {},
26+
logWithLevel: () => {},
2727
...overrides,
2828
};
2929
return logger;
@@ -205,6 +205,17 @@ describe('ServiceLocator', () => {
205205
}).toThrow(/Logger is already in use/);
206206
});
207207

208+
test('setting logger after getStorageClient throws ServiceConflictError (logger already locked)', () => {
209+
// getStorageClient() implicitly calls getLogger(), locking the logger
210+
serviceLocator.getStorageClient();
211+
212+
const customLogger = makeMockLogger();
213+
214+
expect(() => {
215+
serviceLocator.setLogger(customLogger);
216+
}).toThrow(ServiceConflictError);
217+
});
218+
208219
test('reset clears the logger', () => {
209220
const customLogger = makeMockLogger();
210221
serviceLocator.setLogger(customLogger);

0 commit comments

Comments
 (0)