diff --git a/website/versioned_docs/version-4.0/guides/custom-logger/custom-logger.mdx b/website/versioned_docs/version-4.0/guides/custom-logger/custom-logger.mdx new file mode 100644 index 000000000000..3eea37178ab0 --- /dev/null +++ b/website/versioned_docs/version-4.0/guides/custom-logger/custom-logger.mdx @@ -0,0 +1,60 @@ +--- +id: custom-logger +title: Using a custom logger +description: Replace Crawlee's default logger with Winston, Pino, or any logging library +--- + +import CodeBlock from '@theme/CodeBlock'; + +import ImplementationSource from '!!raw-loader!./implementation.ts'; +import UsageSource from '!!raw-loader!./usage.ts'; + +By default, Crawlee uses `@apify/log` for all internal logging. Starting with v4, you can +replace this with any logger that suits your infrastructure — Winston, Pino, Bunyan, a +custom JSON sink, or whatever you already use in production. + +This is useful when you want centralized structured logs, ship logs to an external service +(e.g. Datadog, ELK, CloudWatch), or enforce a consistent log format across your entire +application. + +## The `CrawleeLogger` interface + +Crawlee expects a logger that satisfies the `CrawleeLogger` interface. The easiest way to +build one is to extend the `BaseCrawleeLogger` abstract class and implement two methods: + +- **`logWithLevel(level, message, data?)`** — the core dispatch method called for every log entry. +- **`createChild(options)`** — returns a new logger instance scoped to a prefix (e.g. the crawler name). + +All other methods (`info`, `debug`, `warning`, `error`, `warningOnce`, etc.) are provided +for free by `BaseCrawleeLogger`. + +## Example: Winston adapter + +The following adapter wraps a standard Winston logger: + +{ImplementationSource} + +### Wiring the adapter into a crawler + +Register your adapter with `serviceLocator.setLogger()` **before** creating any crawlers. +This sets it as the global Crawlee logger: + +{UsageSource} + +The `log` object available inside `requestHandler` is a child logger scoped to the +crawler, so prefix-tagged entries like `[CheerioCrawler] Processing ...` appear in your +Winston output automatically. + +## Using a different logging library + +The same pattern works for any library. Create a class that extends `BaseCrawleeLogger`, +map the numeric `level` argument to your library's level constants inside `logWithLevel()`, +and delegate child-logger creation in `createChild()`. You only need those two methods — +everything else is handled for you. + +## Controlling log level + +Level filtering is handled entirely by your custom logger implementation. Configure the +desired verbosity directly on your underlying library (e.g. set `level: 'debug'` on your +Winston logger) — Crawlee passes every log call through to your `logWithLevel()` method +without any filtering of its own. diff --git a/website/versioned_docs/version-4.0/guides/custom-logger/implementation.ts b/website/versioned_docs/version-4.0/guides/custom-logger/implementation.ts new file mode 100644 index 000000000000..696e7a0f1eb6 --- /dev/null +++ b/website/versioned_docs/version-4.0/guides/custom-logger/implementation.ts @@ -0,0 +1,56 @@ +import winston from 'winston'; +import { BaseCrawleeLogger } from 'crawlee'; +import type { CrawleeLogger, CrawleeLoggerOptions } from 'crawlee'; + +// Map Crawlee numeric log levels to Winston level strings +// LogLevel values: ERROR=1, SOFT_FAIL=2, WARNING=3, INFO=4, PERF=5, DEBUG=6 +const CRAWLEE_LEVEL_TO_WINSTON: Record = { + 1: 'error', // ERROR + 2: 'warn', // SOFT_FAIL + 3: 'warn', // WARNING + 4: 'info', // INFO + 5: 'debug', // PERF + 6: 'debug', // DEBUG +}; + +/** + * Adapter that bridges Crawlee's CrawleeLogger interface to a Winston logger. + * Extend BaseCrawleeLogger and implement only `logWithLevel()` and `createChild()`. + */ +export class WinstonAdapter extends BaseCrawleeLogger { + constructor( + private readonly logger: winston.Logger, + options?: Partial, + ) { + super(options); + } + + logWithLevel(level: number, message: string, data?: Record): void { + const winstonLevel = CRAWLEE_LEVEL_TO_WINSTON[level] ?? 'info'; + const prefix = this.getOptions().prefix; + this.logger.log(winstonLevel, message, { ...data, prefix }); + } + + protected createChild(options: Partial): CrawleeLogger { + return new WinstonAdapter( + this.logger.child({ prefix: options.prefix }), + { ...this.getOptions(), ...options }, + ); + } +} + +/** + * Create a pre-configured Winston logger instance with colorized console output. + */ +export const winstonLogger = winston.createLogger({ + level: 'debug', + format: winston.format.combine( + winston.format.colorize(), + winston.format.timestamp(), + winston.format.printf(({ level, message, timestamp, prefix }) => { + const tag = prefix ? `[${prefix}] ` : ''; + return `${timestamp} ${level}: ${tag}${message}`; + }), + ), + transports: [new winston.transports.Console()], +}); diff --git a/website/versioned_docs/version-4.0/guides/custom-logger/usage.ts b/website/versioned_docs/version-4.0/guides/custom-logger/usage.ts new file mode 100644 index 000000000000..d3eaa432567c --- /dev/null +++ b/website/versioned_docs/version-4.0/guides/custom-logger/usage.ts @@ -0,0 +1,23 @@ +import { CheerioCrawler, serviceLocator } from 'crawlee'; +import { WinstonAdapter, winstonLogger } from './implementation'; + +// Register the Winston adapter as Crawlee's global logger +// This must be done before creating any crawlers +serviceLocator.setLogger(new WinstonAdapter(winstonLogger)); + +const crawler = new CheerioCrawler( + { + async requestHandler({ request, $, log }) { + // `log` here is the per-crawler scoped CrawleeLogger instance + // backed by your Winston adapter. + log.info(`Processing ${request.url}`); + + const title = $('title').text(); + log.debug('Page title extracted', { title }); + + console.log(`Title: ${title}`); + }, + }, +); + +await crawler.run(['https://crawlee.dev']); diff --git a/website/versioned_sidebars/version-4.0-sidebars.json b/website/versioned_sidebars/version-4.0-sidebars.json index f7bd1d6eda0b..c8482bc3ea04 100644 --- a/website/versioned_sidebars/version-4.0-sidebars.json +++ b/website/versioned_sidebars/version-4.0-sidebars.json @@ -35,6 +35,7 @@ "items": [ "guides/request-storage", "guides/result-storage", + "guides/http-clients", "guides/configuration", "guides/cheerio-crawler-guide", "guides/javascript-rendering", @@ -43,12 +44,15 @@ "guides/scaling-crawlers", "guides/avoid-blocking", "guides/jsdom-crawler-guide", + "guides/impit-http-client/impit-http-client", "guides/got-scraping", "guides/typescript-project", "guides/docker-images", + "guides/stagehand-crawler-guide", "guides/running-in-web-server/running-in-web-server", "guides/parallel-scraping/parallel-scraping-guide", - "guides/custom-http-client/custom-http-client" + "guides/custom-http-client/custom-http-client", + "guides/custom-logger/custom-logger" ] }, {