Skip to content

Commit e2c6784

Browse files
authored
fix: KVS.getPublicUrl() reads the public URL directly from storage client (#3286)
Removes incorrect implementation `KVS.getPublicUrl()` implementation from `@crawlee/core` and proxies the call to the storage client. Closes #3272 Closes #3076
1 parent 0b0a23e commit e2c6784

File tree

6 files changed

+38
-6
lines changed

6 files changed

+38
-6
lines changed

docs/upgrading/upgrading_v4.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,6 @@ The `CrawlingContext.response` property is now of type [`Response`](https://deve
107107

108108
The crawling context in the `FileDownload` crawler no longer includes the `body` and `stream` properties. These can be accessed directly via the `response` property instead, e.g. `context.response.bytes()` or `context.response.body`.
109109

110+
## `KeyValueStore.getPublicUrl` is now async
111+
112+
The `KeyValueStore.getPublicUrl` method is now asynchronous and reads the public URL directly from the storage client.

packages/core/src/crawlers/error_snapshotter.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,9 @@ export class ErrorSnapshotter {
8888

8989
return {
9090
screenshotFileName,
91-
screenshotFileUrl: screenshotFileName && keyValueStore.getPublicUrl(screenshotFileName),
91+
screenshotFileUrl: screenshotFileName && (await keyValueStore.getPublicUrl(screenshotFileName)),
9292
htmlFileName,
93-
htmlFileUrl: htmlFileName && keyValueStore.getPublicUrl(htmlFileName),
93+
htmlFileUrl: htmlFileName && (await keyValueStore.getPublicUrl(htmlFileName)),
9494
};
9595
} catch {
9696
return {};

packages/core/src/storages/key_value_store.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -483,10 +483,13 @@ export class KeyValueStore {
483483

484484
/**
485485
* Returns a file URL for the given key.
486+
*
487+
* If the record does not exist or has no associated file path (i.e., it is not stored as a file), returns `undefined`.
488+
*
489+
* @param key The key of the record to generate the public URL for.
486490
*/
487-
getPublicUrl(key: string): string {
488-
const name = this.name ?? this.config.get('defaultKeyValueStoreId');
489-
return `file://${process.cwd()}/storage/key_value_stores/${name}/${key}`;
491+
async getPublicUrl(key: string): Promise<string | undefined> {
492+
return this.client.getRecordPublicUrl(key);
490493
}
491494

492495
/**

packages/memory-storage/src/fs/key-value-store/fs.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ export class KeyValueFileSystemEntry implements StorageImplementation<InternalKe
3434
file = await readFile(this.filePath);
3535
} catch {
3636
try {
37+
const noExtFilePath = resolve(this.storeDirectory, this.rawRecord.key);
3738
// Try without extension
38-
file = await readFile(resolve(this.storeDirectory, this.rawRecord.key));
39+
file = await readFile(noExtFilePath);
3940
memoryStorageLog.warning(
4041
[
4142
`Key-value entry "${this.rawRecord.key}" for store ${basename(
@@ -45,6 +46,7 @@ export class KeyValueFileSystemEntry implements StorageImplementation<InternalKe
4546
].join('\n'),
4647
);
4748
file = file.toString('utf-8');
49+
this.filePath = noExtFilePath;
4850
} catch {
4951
// This is impossible to happen, but just in case
5052
throw new Error(`Could not find file at ${this.filePath}`);
@@ -56,6 +58,7 @@ export class KeyValueFileSystemEntry implements StorageImplementation<InternalKe
5658
return {
5759
...this.rawRecord,
5860
value: file,
61+
filePath: this.filePath,
5962
};
6063
}
6164

packages/memory-storage/src/resource-clients/key-value-store.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ export interface InternalKeyRecord {
3232
value: Buffer | string;
3333
contentType?: string;
3434
extension: string;
35+
filePath?: string;
3536
}
3637

3738
export class KeyValueStoreClient extends BaseClient {
@@ -184,6 +185,27 @@ export class KeyValueStoreClient extends BaseClient {
184185
};
185186
}
186187

188+
/**
189+
* Generates a public file:// URL for accessing a specific record in the key-value store.
190+
*
191+
* Returns `undefined` if the record does not exist or has no associated file path (i.e., it is not stored as a file).
192+
* @param key The key of the record to generate the public URL for.
193+
*/
194+
async getRecordPublicUrl(key: string): Promise<string | undefined> {
195+
s.string().parse(key);
196+
197+
// Check by id
198+
const existingStoreById = await findOrCacheKeyValueStoreByPossibleId(this.client, this.name ?? this.id);
199+
200+
if (!existingStoreById) {
201+
this.throwOnNonExisting(StorageTypes.KeyValueStore);
202+
}
203+
204+
const storageEntry = await existingStoreById.keyValueEntries.get(key)?.get();
205+
206+
return storageEntry?.filePath;
207+
}
208+
187209
/**
188210
* Tests whether a record with the given key exists in the key-value store without retrieving its value.
189211
*

packages/types/src/storages.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ export interface KeyValueStoreClient {
168168
delete(): Promise<void>;
169169
listKeys(options?: KeyValueStoreClientListOptions): Promise<KeyValueStoreClientListData>;
170170
recordExists(key: string): Promise<boolean>;
171+
getRecordPublicUrl(key: string): Promise<string | undefined>;
171172
getRecord(key: string, options?: KeyValueStoreClientGetRecordOptions): Promise<KeyValueStoreRecord | undefined>;
172173
setRecord(record: KeyValueStoreRecord, options?: KeyValueStoreRecordOptions): Promise<void>;
173174
deleteRecord(key: string): Promise<void>;

0 commit comments

Comments
 (0)