Skip to content

Commit ba403a8

Browse files
authored
DOG-6369 : Add Dataset support and upload queue size (#487)
* Add Dataset support and upload queue size
1 parent 981425d commit ba403a8

File tree

3 files changed

+143
-22
lines changed

3 files changed

+143
-22
lines changed

cognite/extractorutils/unstable/configuration/models.py

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
OAuthClientCertificate,
2525
OAuthClientCredentials,
2626
)
27-
from cognite.client.data_classes import Asset
27+
from cognite.client.data_classes import Asset, DataSet
2828
from cognite.extractorutils.configtools._util import _load_certificate_data
2929
from cognite.extractorutils.exceptions import InvalidConfigError
3030
from cognite.extractorutils.metrics import AbstractMetricsPusher, CognitePusher, PrometheusPusher
@@ -246,6 +246,27 @@ class IntegrationConfig(ConfigModel):
246246
external_id: str
247247

248248

249+
class EitherIdConfig(ConfigModel):
250+
"""
251+
Configuration parameter representing an ID in CDF, which can either be an external or internal ID.
252+
253+
An EitherId can only hold one ID type, not both.
254+
"""
255+
256+
id: int | None = None
257+
external_id: str | None = None
258+
259+
@property
260+
def either_id(self) -> EitherId:
261+
"""
262+
Returns an EitherId object based on the current configuration.
263+
264+
Raises:
265+
TypeError: If both id and external_id are None, or if both are set.
266+
"""
267+
return EitherId(id=self.id, external_id=self.external_id)
268+
269+
249270
class ConnectionConfig(ConfigModel):
250271
"""
251272
Configuration for connecting to a Cognite Data Fusion project.
@@ -444,27 +465,6 @@ class LogConsoleHandlerConfig(ConfigModel):
444465
LogHandlerConfig = Annotated[LogFileHandlerConfig | LogConsoleHandlerConfig, Field(discriminator="type")]
445466

446467

447-
class EitherIdConfig(ConfigModel):
448-
"""
449-
Configuration parameter representing an ID in CDF, which can either be an external or internal ID.
450-
451-
An EitherId can only hold one ID type, not both.
452-
"""
453-
454-
id: int | None = None
455-
external_id: str | None = None
456-
457-
@property
458-
def either_id(self) -> EitherId:
459-
"""
460-
Returns an EitherId object based on the current configuration.
461-
462-
Raises:
463-
TypeError: If both id and external_id are None, or if both are set.
464-
"""
465-
return EitherId(id=self.id, external_id=self.external_id)
466-
467-
468468
class _PushGatewayConfig(ConfigModel):
469469
"""
470470
Configuration for pushing metrics to a Prometheus Push Gateway.
@@ -869,6 +869,30 @@ class ExtractorConfig(ConfigModel):
869869
metrics: MetricsConfig | None = None
870870
log_handlers: list[LogHandlerConfig] = Field(default_factory=_log_handler_default)
871871
retry_startup: bool = True
872+
upload_queue_size: int = 50_000
873+
data_set: EitherIdConfig | None = None
874+
data_set_external_id: str | None = None
875+
876+
def get_data_set(self, cdf_client: CogniteClient) -> DataSet | None:
877+
"""
878+
Retrieves the DataSet object based on the configuration.
879+
880+
Args:
881+
cdf_client: An instance of CogniteClient to use for retrieving the DataSet.
882+
883+
Returns:
884+
DataSet object if data_set, data_set_id, or data_set_external_id is provided; otherwise None.
885+
"""
886+
if self.data_set_external_id:
887+
return cdf_client.data_sets.retrieve(external_id=self.data_set_external_id)
888+
889+
if not self.data_set:
890+
return None
891+
892+
return cdf_client.data_sets.retrieve(
893+
id=self.data_set.either_id.internal_id,
894+
external_id=self.data_set.either_id.external_id,
895+
)
872896

873897

874898
ConfigType = TypeVar("ConfigType", bound=ExtractorConfig)

tests/test_unstable/test_base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from cognite.extractorutils.unstable.configuration.loaders import ConfigFormat, load_io
1616
from cognite.extractorutils.unstable.configuration.models import (
1717
ConnectionConfig,
18+
ExtractorConfig,
1819
LocalStateStoreConfig,
1920
LogConsoleHandlerConfig,
2021
LogFileHandlerConfig,
@@ -344,3 +345,16 @@ def test_pushgatewayconfig_none_credentials_from_yaml() -> None:
344345
assert pusher.password is None
345346
assert pusher.url == "http://localhost:9091"
346347
assert pusher.job_name == "test-job"
348+
349+
350+
def test_extractor_config_upload_queue_size_with_yaml() -> None:
351+
"""Test upload_queue_size parsing from YAML configuration."""
352+
config_yaml = """
353+
upload-queue-size: 200000
354+
retry-startup: false
355+
"""
356+
stream = StringIO(config_yaml)
357+
config = load_io(stream, ConfigFormat.YAML, ExtractorConfig)
358+
359+
assert config.upload_queue_size == 200_000
360+
assert config.retry_startup is False

tests/test_unstable/test_configuration.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
import os
22
from io import StringIO
3+
from unittest.mock import Mock
34

45
import pytest
56
from pydantic import Field
67

78
from cognite.client.credentials import OAuthClientCredentials
9+
from cognite.client.data_classes import DataSet
810
from cognite.extractorutils.exceptions import InvalidConfigError
911
from cognite.extractorutils.unstable.configuration.loaders import ConfigFormat, load_io
1012
from cognite.extractorutils.unstable.configuration.models import (
1113
ConfigModel,
1214
ConnectionConfig,
15+
EitherIdConfig,
16+
ExtractorConfig,
1317
FileSizeConfig,
1418
LogLevel,
1519
TimeIntervalConfig,
@@ -303,3 +307,82 @@ def test_setting_log_level_from_any_case() -> None:
303307

304308
with pytest.raises(ValueError):
305309
LogLevel("not-a-log-level")
310+
311+
312+
@pytest.mark.parametrize(
313+
"data_set_external_id,data_set_config,expected_call,expected_result_attrs,should_return_none",
314+
[
315+
# Test with data_set_external_id provided
316+
(
317+
"test-dataset",
318+
None,
319+
{"external_id": "test-dataset"},
320+
{"external_id": "test-dataset", "name": "Test Dataset"},
321+
False,
322+
),
323+
# Test with data_set config using internal ID
324+
(
325+
None,
326+
EitherIdConfig(id=12345),
327+
{"id": 12345, "external_id": None},
328+
{"id": 12345, "name": "Test Dataset"},
329+
False,
330+
),
331+
# Test with data_set config using external ID
332+
(
333+
None,
334+
EitherIdConfig(external_id="config-dataset"),
335+
{"id": None, "external_id": "config-dataset"},
336+
{"external_id": "config-dataset", "name": "Config Dataset"},
337+
False,
338+
),
339+
# Test that data_set_external_id takes priority over data_set
340+
(
341+
"priority-dataset",
342+
EitherIdConfig(external_id="should-be-ignored"),
343+
{"external_id": "priority-dataset"},
344+
{"external_id": "priority-dataset", "name": "Priority Dataset"},
345+
False,
346+
),
347+
# Test with neither data_set_external_id nor data_set provided
348+
(
349+
None,
350+
None,
351+
{},
352+
{},
353+
True,
354+
),
355+
],
356+
)
357+
def test_get_data_set_various_configurations(
358+
data_set_external_id: str | None,
359+
data_set_config: EitherIdConfig | None,
360+
expected_call: dict | None,
361+
expected_result_attrs: dict | None,
362+
should_return_none: bool,
363+
) -> None:
364+
"""Test get_data_set method with various configuration scenarios."""
365+
extractor_config = ExtractorConfig(
366+
retry_startup=False,
367+
data_set_external_id=data_set_external_id,
368+
data_set=data_set_config,
369+
)
370+
371+
# Create a mock client instead of using a real one
372+
mock_client = Mock()
373+
374+
if not should_return_none:
375+
mock_dataset = DataSet(**expected_result_attrs)
376+
mock_client.data_sets.retrieve.return_value = mock_dataset
377+
378+
result = extractor_config.get_data_set(mock_client)
379+
380+
if should_return_none:
381+
assert result is None
382+
mock_client.data_sets.retrieve.assert_not_called()
383+
else:
384+
assert result is not None
385+
for attr, value in expected_result_attrs.items():
386+
if attr != "name":
387+
assert getattr(result, attr) == value
388+
mock_client.data_sets.retrieve.assert_called_once_with(**expected_call)

0 commit comments

Comments
 (0)