From c1a3055dc4426f597ffa4482bcdcdaf9a1df653d Mon Sep 17 00:00:00 2001 From: Pangerkumzuk Longkumer <73515951+pangerlkr@users.noreply.github.com> Date: Mon, 9 Mar 2026 11:12:45 +0530 Subject: [PATCH 1/8] docs: add Winston adapter implementation for custom logger guide --- .../guides/custom-logger/implementation.ts | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 website/versioned_docs/version-4.0/guides/custom-logger/implementation.ts diff --git a/website/versioned_docs/version-4.0/guides/custom-logger/implementation.ts b/website/versioned_docs/version-4.0/guides/custom-logger/implementation.ts new file mode 100644 index 000000000000..2ab04a735122 --- /dev/null +++ b/website/versioned_docs/version-4.0/guides/custom-logger/implementation.ts @@ -0,0 +1,55 @@ +import winston from 'winston'; +import { BaseCrawleeLogger } from 'crawlee'; +import type { CrawleeLogger, CrawleeLoggerOptions } from 'crawlee'; + +// Map Crawlee numeric log levels to Winston level strings +const CRAWLEE_LEVEL_TO_WINSTON: Record = { + 0: 'error', // ERROR + 1: 'warn', // WARNING + 2: 'info', // INFO (SOFT_FAIL) + 3: 'info', // INFO + 4: 'debug', // PERF + 5: 'debug', // DEBUG +}; + +/** + * Adapter that bridges Crawlee's CrawleeLogger interface to a Winston logger. + * Extend BaseCrawleeLogger and implement only `log()` and `_createChild()`. + */ +export class WinstonAdapter extends BaseCrawleeLogger { + constructor( + private readonly logger: winston.Logger, + options?: Partial, + ) { + super(options); + } + + protected log(level: number, message: string, data?: Record): void { + const winstonLevel = CRAWLEE_LEVEL_TO_WINSTON[level] ?? 'info'; + const prefix = this.getOptions().prefix; + this.logger.log(winstonLevel, message, { ...data, prefix }); + } + + protected _createChild(options: Partial): CrawleeLogger { + return new WinstonAdapter( + this.logger.child({ prefix: options.prefix }), + { ...this.getOptions(), ...options }, + ); + } +} + +/** + * Create a pre-configured Winston logger instance with colorized console output. + */ +export const winstonLogger = winston.createLogger({ + level: 'debug', + format: winston.format.combine( + winston.format.colorize(), + winston.format.timestamp(), + winston.format.printf(({ level, message, timestamp, prefix }) => { + const tag = prefix ? `[${prefix}] ` : ''; + return `${timestamp} ${level}: ${tag}${message}`; + }), + ), + transports: [new winston.transports.Console()], +}); From 5ebf19c54ad5f83d15f3e034ffd53fc17a5f2513 Mon Sep 17 00:00:00 2001 From: Pangerkumzuk Longkumer <73515951+pangerlkr@users.noreply.github.com> Date: Mon, 9 Mar 2026 11:13:50 +0530 Subject: [PATCH 2/8] docs: add usage example for custom logger guide --- .../version-4.0/guides/custom-logger/usage.ts | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 website/versioned_docs/version-4.0/guides/custom-logger/usage.ts diff --git a/website/versioned_docs/version-4.0/guides/custom-logger/usage.ts b/website/versioned_docs/version-4.0/guides/custom-logger/usage.ts new file mode 100644 index 000000000000..57196af7d9f6 --- /dev/null +++ b/website/versioned_docs/version-4.0/guides/custom-logger/usage.ts @@ -0,0 +1,25 @@ +import { CheerioCrawler, Configuration } from 'crawlee'; +import { WinstonAdapter, winstonLogger } from './implementation'; + +// Wrap your Winston logger in the adapter and pass it to Configuration +const config = new Configuration({ + loggerProvider: new WinstonAdapter(winstonLogger), +}); + +const crawler = new CheerioCrawler( + { + async requestHandler({ request, $, log }) { + // `log` here is the per-crawler scoped CrawleeLogger instance + // backed by your Winston adapter. + log.info(`Processing ${request.url}`); + + const title = $('title').text(); + log.debug('Page title extracted', { title }); + + console.log(`Title: ${title}`); + }, + }, + config, +); + +await crawler.run(['https://crawlee.dev']); From 69e3096a239e5f7cfead4046a2654ae1d3460b38 Mon Sep 17 00:00:00 2001 From: Pangerkumzuk Longkumer <73515951+pangerlkr@users.noreply.github.com> Date: Mon, 9 Mar 2026 11:14:55 +0530 Subject: [PATCH 3/8] docs: add custom logger guide (closes #3460) --- .../guides/custom-logger/custom-logger.mdx | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 website/versioned_docs/version-4.0/guides/custom-logger/custom-logger.mdx diff --git a/website/versioned_docs/version-4.0/guides/custom-logger/custom-logger.mdx b/website/versioned_docs/version-4.0/guides/custom-logger/custom-logger.mdx new file mode 100644 index 000000000000..272b12b789f7 --- /dev/null +++ b/website/versioned_docs/version-4.0/guides/custom-logger/custom-logger.mdx @@ -0,0 +1,60 @@ +--- +id: custom-logger +title: Using a custom logger +description: Replace Crawlee's default logger with Winston, Pino, or any logging library +--- + +import CodeBlock from '@theme/CodeBlock'; + +import ImplementationSource from '!!raw-loader!./implementation.ts'; +import UsageSource from '!!raw-loader!./usage.ts'; + +By default, Crawlee uses `@apify/log` for all internal logging. Starting with v4, you can +replace this with any logger that suits your infrastructure — Winston, Pino, Bunyan, a +custom JSON sink, or whatever you already use in production. + +This is useful when you want centralized structured logs, ship logs to an external service +(e.g. Datadog, ELK, CloudWatch), or enforce a consistent log format across your entire +application. + +## The `CrawleeLogger` interface + +Crawlee expects a logger that satisfies the `CrawleeLogger` interface. The easiest way to +build one is to extend the `BaseCrawleeLogger` abstract class and implement two methods: + +- **`log(level, message, data?)`** — the core dispatch method called for every log entry. +- **`_createChild(options)`** — returns a new logger instance scoped to a prefix (e.g. the crawler name). + +All other methods (`info`, `debug`, `warning`, `error`, `warningOnce`, etc.) are provided +for free by `BaseCrawleeLogger`. + +## Example: Winston adapter + +The following adapter wraps a standard Winston logger: + +{ImplementationSource} + +### Wiring the adapter into a crawler + +Pass a new instance of your adapter to `Configuration` via the `loggerProvider` option, +then hand that `Configuration` object to your crawler: + +{UsageSource} + +The `log` object available inside `requestHandler` is a child logger scoped to the +crawler, so prefix-tagged entries like `[CheerioCrawler] Processing ...` appear in your +Winston output automatically. + +## Using a different logging library + +The same pattern works for any library. Create a class that extends `BaseCrawleeLogger`, +map the numeric `level` argument (0 = error, 1 = warning, 3 = info, 5 = debug) to your +library's level constants inside `log()`, and delegate child-logger creation in +`_createChild()`. You only need those two methods — everything else is handled for you. + +## Controlling log level + +When using the default `@apify/log`, the `logLevel` option in `Configuration` (or the +`CRAWLEE_LOG_LEVEL` environment variable) controls verbosity. When you supply your own +logger via `loggerProvider`, Crawlee delegates level filtering entirely to your logger, +so configure it there. From 51172dd34c9f737fb2b96feecbcfb46511af5b63 Mon Sep 17 00:00:00 2001 From: Pangerkumzuk Longkumer <73515951+pangerlkr@users.noreply.github.com> Date: Mon, 9 Mar 2026 11:16:52 +0530 Subject: [PATCH 4/8] docs: add custom-logger to version-4.0 sidebar --- .../version-4.0-sidebars.json | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 website/versioned_sidebars/version-4.0-sidebars.json diff --git a/website/versioned_sidebars/version-4.0-sidebars.json b/website/versioned_sidebars/version-4.0-sidebars.json new file mode 100644 index 000000000000..0be3a42a7fc7 --- /dev/null +++ b/website/versioned_sidebars/version-4.0-sidebars.json @@ -0,0 +1,56 @@ +{ + "docs": [ + "quick-start/quick-start", + { + "type": "category", + "label": "Introduction", + "collapsed": false, + "link": { + "type": "doc", + "id": "introduction/introduction" + }, + "items": [ + "introduction/setting-up", + "introduction/first-crawler", + "introduction/adding-urls", + "introduction/real-world-project", + "introduction/crawling", + "introduction/scraping", + "introduction/saving-data", + "introduction/refactoring", + "introduction/deployment" + ] + }, + { + "type": "category", + "label": "Guides", + "link": { + "type": "generated-index", + "title": "Guides", + "slug": "/guides", + "keywords": [ + "guides" + ] + }, + "items": [ + "guides/request-storage", + "guides/result-storage", + "guides/configuration", + "guides/cheerio-crawler-guide", + "guides/javascript-rendering", + "guides/proxy-management", + "guides/session-management", + "guides/scaling-crawlers", + "guides/avoid-blocking", + "guides/jsdom-crawler-guide", + "guides/got-scraping", + "guides/typescript-project", + "guides/docker-images", + "guides/running-in-web-server/running-in-web-server", + "guides/parallel-scraping/parallel-scraping-guide", + "guides/custom-http-client/custom-http-client", + "guides/custom-logger/custom-logger" + ] + } + ] +} From f4d91646255b0b4f5e5438307d4603fd3eff77a7 Mon Sep 17 00:00:00 2001 From: Pangerkumzuk Longkumer <73515951+pangerlkr@users.noreply.github.com> Date: Tue, 10 Mar 2026 21:02:51 +0530 Subject: [PATCH 5/8] fix: restore accidentally deleted sidebar sections, keep custom-logger entry --- .../version-4.0-sidebars.json | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/website/versioned_sidebars/version-4.0-sidebars.json b/website/versioned_sidebars/version-4.0-sidebars.json index 0be3a42a7fc7..c8482bc3ea04 100644 --- a/website/versioned_sidebars/version-4.0-sidebars.json +++ b/website/versioned_sidebars/version-4.0-sidebars.json @@ -35,6 +35,7 @@ "items": [ "guides/request-storage", "guides/result-storage", + "guides/http-clients", "guides/configuration", "guides/cheerio-crawler-guide", "guides/javascript-rendering", @@ -43,14 +44,104 @@ "guides/scaling-crawlers", "guides/avoid-blocking", "guides/jsdom-crawler-guide", + "guides/impit-http-client/impit-http-client", "guides/got-scraping", "guides/typescript-project", "guides/docker-images", + "guides/stagehand-crawler-guide", "guides/running-in-web-server/running-in-web-server", "guides/parallel-scraping/parallel-scraping-guide", "guides/custom-http-client/custom-http-client", "guides/custom-logger/custom-logger" ] + }, + { + "type": "category", + "label": "Deployment", + "link": { + "type": "generated-index", + "title": "Deployment guides", + "description": "Here you can find guides on how to deploy your crawlers to various cloud providers.", + "slug": "/deployment" + }, + "items": [ + { + "type": "doc", + "id": "deployment/apify-platform", + "label": "Deploy on Apify" + }, + { + "type": "category", + "label": "Deploy on AWS", + "items": [ + "deployment/aws-cheerio", + "deployment/aws-browsers" + ] + }, + { + "type": "category", + "label": "Deploy to Google Cloud", + "items": [ + "deployment/gcp-cheerio", + "deployment/gcp-browsers" + ] + } + ] + }, + { + "type": "category", + "label": "Examples", + "link": { + "type": "generated-index", + "title": "Examples", + "slug": "/examples", + "keywords": [ + "examples" + ] + }, + "items": [ + { + "type": "autogenerated", + "dirName": "examples" + } + ] + }, + { + "type": "category", + "label": "Experiments", + "link": { + "type": "generated-index", + "title": "Experiments", + "slug": "/experiments", + "keywords": [ + "experiments", + "experimental-features" + ] + }, + "items": [ + { + "type": "autogenerated", + "dirName": "experiments" + } + ] + }, + { + "type": "category", + "label": "Upgrading", + "link": { + "type": "generated-index", + "title": "Upgrading", + "slug": "/upgrading", + "keywords": [ + "upgrading" + ] + }, + "items": [ + { + "type": "autogenerated", + "dirName": "upgrading" + } + ] } ] } From 7b6239a36de708255222ca5c20f9aafdbb59243e Mon Sep 17 00:00:00 2001 From: Pangerkumzuk Longkumer <73515951+pangerlkr@users.noreply.github.com> Date: Fri, 13 Mar 2026 19:31:09 +0530 Subject: [PATCH 6/8] fix: use correct BaseCrawleeLogger method names and LogLevel values --- .../guides/custom-logger/implementation.ts | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/website/versioned_docs/version-4.0/guides/custom-logger/implementation.ts b/website/versioned_docs/version-4.0/guides/custom-logger/implementation.ts index 2ab04a735122..696e7a0f1eb6 100644 --- a/website/versioned_docs/version-4.0/guides/custom-logger/implementation.ts +++ b/website/versioned_docs/version-4.0/guides/custom-logger/implementation.ts @@ -3,18 +3,19 @@ import { BaseCrawleeLogger } from 'crawlee'; import type { CrawleeLogger, CrawleeLoggerOptions } from 'crawlee'; // Map Crawlee numeric log levels to Winston level strings +// LogLevel values: ERROR=1, SOFT_FAIL=2, WARNING=3, INFO=4, PERF=5, DEBUG=6 const CRAWLEE_LEVEL_TO_WINSTON: Record = { - 0: 'error', // ERROR - 1: 'warn', // WARNING - 2: 'info', // INFO (SOFT_FAIL) - 3: 'info', // INFO - 4: 'debug', // PERF - 5: 'debug', // DEBUG + 1: 'error', // ERROR + 2: 'warn', // SOFT_FAIL + 3: 'warn', // WARNING + 4: 'info', // INFO + 5: 'debug', // PERF + 6: 'debug', // DEBUG }; /** * Adapter that bridges Crawlee's CrawleeLogger interface to a Winston logger. - * Extend BaseCrawleeLogger and implement only `log()` and `_createChild()`. + * Extend BaseCrawleeLogger and implement only `logWithLevel()` and `createChild()`. */ export class WinstonAdapter extends BaseCrawleeLogger { constructor( @@ -24,13 +25,13 @@ export class WinstonAdapter extends BaseCrawleeLogger { super(options); } - protected log(level: number, message: string, data?: Record): void { + logWithLevel(level: number, message: string, data?: Record): void { const winstonLevel = CRAWLEE_LEVEL_TO_WINSTON[level] ?? 'info'; const prefix = this.getOptions().prefix; this.logger.log(winstonLevel, message, { ...data, prefix }); } - protected _createChild(options: Partial): CrawleeLogger { + protected createChild(options: Partial): CrawleeLogger { return new WinstonAdapter( this.logger.child({ prefix: options.prefix }), { ...this.getOptions(), ...options }, From 3d163e99b9c5673ebf438b202e0e090d95cedc6d Mon Sep 17 00:00:00 2001 From: Pangerkumzuk Longkumer <73515951+pangerlkr@users.noreply.github.com> Date: Fri, 13 Mar 2026 19:31:57 +0530 Subject: [PATCH 7/8] fix: replace Configuration loggerProvider with serviceLocator.setLogger() Updated logger configuration to use serviceLocator for global logger registration. --- .../version-4.0/guides/custom-logger/usage.ts | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/website/versioned_docs/version-4.0/guides/custom-logger/usage.ts b/website/versioned_docs/version-4.0/guides/custom-logger/usage.ts index 57196af7d9f6..d3eaa432567c 100644 --- a/website/versioned_docs/version-4.0/guides/custom-logger/usage.ts +++ b/website/versioned_docs/version-4.0/guides/custom-logger/usage.ts @@ -1,10 +1,9 @@ -import { CheerioCrawler, Configuration } from 'crawlee'; +import { CheerioCrawler, serviceLocator } from 'crawlee'; import { WinstonAdapter, winstonLogger } from './implementation'; -// Wrap your Winston logger in the adapter and pass it to Configuration -const config = new Configuration({ - loggerProvider: new WinstonAdapter(winstonLogger), -}); +// Register the Winston adapter as Crawlee's global logger +// This must be done before creating any crawlers +serviceLocator.setLogger(new WinstonAdapter(winstonLogger)); const crawler = new CheerioCrawler( { @@ -19,7 +18,6 @@ const crawler = new CheerioCrawler( console.log(`Title: ${title}`); }, }, - config, ); await crawler.run(['https://crawlee.dev']); From 885ab37577b6e2eab3ef2b73be4c14b74ab949bf Mon Sep 17 00:00:00 2001 From: Pangerkumzuk Longkumer <73515951+pangerlkr@users.noreply.github.com> Date: Fri, 13 Mar 2026 19:33:00 +0530 Subject: [PATCH 8/8] fix: update docs to use correct method names and serviceLocator.setLogger() API --- .../guides/custom-logger/custom-logger.mdx | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/website/versioned_docs/version-4.0/guides/custom-logger/custom-logger.mdx b/website/versioned_docs/version-4.0/guides/custom-logger/custom-logger.mdx index 272b12b789f7..3eea37178ab0 100644 --- a/website/versioned_docs/version-4.0/guides/custom-logger/custom-logger.mdx +++ b/website/versioned_docs/version-4.0/guides/custom-logger/custom-logger.mdx @@ -22,8 +22,8 @@ application. Crawlee expects a logger that satisfies the `CrawleeLogger` interface. The easiest way to build one is to extend the `BaseCrawleeLogger` abstract class and implement two methods: -- **`log(level, message, data?)`** — the core dispatch method called for every log entry. -- **`_createChild(options)`** — returns a new logger instance scoped to a prefix (e.g. the crawler name). +- **`logWithLevel(level, message, data?)`** — the core dispatch method called for every log entry. +- **`createChild(options)`** — returns a new logger instance scoped to a prefix (e.g. the crawler name). All other methods (`info`, `debug`, `warning`, `error`, `warningOnce`, etc.) are provided for free by `BaseCrawleeLogger`. @@ -36,8 +36,8 @@ The following adapter wraps a standard Winston logger: ### Wiring the adapter into a crawler -Pass a new instance of your adapter to `Configuration` via the `loggerProvider` option, -then hand that `Configuration` object to your crawler: +Register your adapter with `serviceLocator.setLogger()` **before** creating any crawlers. +This sets it as the global Crawlee logger: {UsageSource} @@ -48,13 +48,13 @@ Winston output automatically. ## Using a different logging library The same pattern works for any library. Create a class that extends `BaseCrawleeLogger`, -map the numeric `level` argument (0 = error, 1 = warning, 3 = info, 5 = debug) to your -library's level constants inside `log()`, and delegate child-logger creation in -`_createChild()`. You only need those two methods — everything else is handled for you. +map the numeric `level` argument to your library's level constants inside `logWithLevel()`, +and delegate child-logger creation in `createChild()`. You only need those two methods — +everything else is handled for you. ## Controlling log level -When using the default `@apify/log`, the `logLevel` option in `Configuration` (or the -`CRAWLEE_LOG_LEVEL` environment variable) controls verbosity. When you supply your own -logger via `loggerProvider`, Crawlee delegates level filtering entirely to your logger, -so configure it there. +Level filtering is handled entirely by your custom logger implementation. Configure the +desired verbosity directly on your underlying library (e.g. set `level: 'debug'` on your +Winston logger) — Crawlee passes every log call through to your `logWithLevel()` method +without any filtering of its own.