Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 1 addition & 123 deletions packages/core/src/configuration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,15 @@ import { EventEmitter } from 'node:events';
import { readFileSync } from 'node:fs';
import { join } from 'node:path';

import type { MemoryStorageOptions } from '@crawlee/memory-storage';
import { MemoryStorage } from '@crawlee/memory-storage';
import type { Dictionary, StorageClient } from '@crawlee/types';
import type { Dictionary } from '@crawlee/types';
import { pathExistsSync } from 'fs-extra/esm';

import log, { LogLevel } from '@apify/log';

import { type EventManager } from './events/event_manager.js';
import { LocalEventManager } from './events/local_event_manager.js';
import type { StorageManager } from './storages/storage_manager.js';
import { type Constructor, entries } from './typedefs.js';

export interface ConfigurationOptions {
/**
* Defines storage client to be used.
* @default {@apilink MemoryStorage}
*/
storageClient?: StorageClient;

/**
* Defines the Event Manager to be used.
* @default {@apilink EventManager}
*/
eventManager?: EventManager;

/**
* Could be used to adjust the storage client behavior
* e.g. {@apilink MemoryStorageOptions} could be used to adjust the {@apilink MemoryStorage} behavior.
*/
storageClientOptions?: Dictionary;

/**
* Default dataset id.
*
Expand Down Expand Up @@ -289,19 +267,7 @@ export class Configuration {
persistStorage: true,
};

/**
* Provides access to the current-instance-scoped Configuration without passing it around in parameters.
* @internal
*/
static storage = new AsyncLocalStorage<Configuration>();

protected options!: Map<keyof ConfigurationOptions, ConfigurationOptions[keyof ConfigurationOptions]>;
protected services = new Map<string, unknown>();

/** @internal */
static globalConfig?: Configuration;

public readonly storageManagers = new Map<Constructor, StorageManager>();

/**
* Creates new `Configuration` instance with provided options. Env vars will have precedence over those.
Expand Down Expand Up @@ -391,72 +357,6 @@ export class Configuration {
this.getGlobalConfig().set(key, value);
}

/**
* Returns cached instance of {@apilink StorageClient} using options as defined in the environment variables or in
* this {@apilink Configuration} instance. Only first call of this method will create the client, following calls will
* return the same client instance.
*
* Caching works based on the `storageClientOptions`, so calling this method with different options will return
* multiple instances, one for each variant of the options.
* @internal
*/
getStorageClient(): StorageClient {
if (this.options.has('storageClient')) {
return this.options.get('storageClient') as StorageClient;
}

const options = this.options.get('storageClientOptions') as Dictionary;
return this.createMemoryStorage(options);
}

getEventManager(): EventManager {
if (this.options.has('eventManager')) {
return this.options.get('eventManager') as EventManager;
}

if (this.services.has('eventManager')) {
return this.services.get('eventManager') as EventManager;
}

const eventManager = new LocalEventManager(this);
this.services.set('eventManager', eventManager);

return eventManager;
}

/**
* Creates an instance of MemoryStorage using options as defined in the environment variables or in this `Configuration` instance.
* @internal
*/
createMemoryStorage(options: MemoryStorageOptions = {}): MemoryStorage {
const cacheKey = `MemoryStorage-${JSON.stringify(options)}`;

if (this.services.has(cacheKey)) {
return this.services.get(cacheKey) as MemoryStorage;
}

const storage = new MemoryStorage({
persistStorage: this.get('persistStorage'),
// Override persistStorage if user provides it via storageClientOptions
...options,
});
this.services.set(cacheKey, storage);

return storage;
}

useStorageClient(client: StorageClient): void {
this.options.set('storageClient', client);
}

static useStorageClient(client: StorageClient): void {
this.getGlobalConfig().useStorageClient(client);
}

useEventManager(events: EventManager): void {
this.options.set('eventManager', events);
}

/**
* Returns the global configuration instance. It will respect the environment variables.
*/
Expand All @@ -469,28 +369,6 @@ export class Configuration {
return Configuration.globalConfig;
}

/**
* Gets default {@apilink StorageClient} instance.
*/
static getStorageClient(): StorageClient {
return this.getGlobalConfig().getStorageClient();
}

/**
* Gets default {@apilink EventManager} instance.
*/
static getEventManager(): EventManager {
return this.getGlobalConfig().getEventManager();
}

/**
* Resets global configuration instance. The default instance holds configuration based on env vars,
* if we want to change them, we need to first reset the global state. Used mainly for testing purposes.
*/
static resetGlobalState(): void {
delete this.globalConfig;
}

protected buildOptions(options: ConfigurationOptions) {
// try to load configuration from crawlee.json as the baseline
const path = join(process.cwd(), 'crawlee.json');
Expand Down
Loading