Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 158 additions & 0 deletions cognite/extractorutils/unstable/configuration/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,164 @@ def _log_handler_default() -> list[LogHandlerConfig]:
return [LogConsoleHandlerConfig(type="console", level=LogLevel.INFO)]


class FileSizeConfig:
"""
Configuration parameter for setting a file size.
"""

def __init__(self, expression: str) -> None:
self._bytes, self._expression = FileSizeConfig._parse_expression(expression)

@classmethod
def __get_pydantic_core_schema__(cls, source_type: Any, handler: GetCoreSchemaHandler) -> CoreSchema: # noqa: ANN401
"""
Pydantic hook to define how this class should be serialized/deserialized.

This allows the class to be used as a field in Pydantic models.
"""
return core_schema.no_info_after_validator_function(cls, handler(str | int))

def __eq__(self, other: object) -> bool:
"""
Two FileSizeConfig objects are equal if they have the same number of bytes.
"""
if not isinstance(other, FileSizeConfig):
return NotImplemented
return self._bytes == other._bytes

def __hash__(self) -> int:
"""
Hash function for FileSizeConfig based on the number of bytes.
"""
return hash(self._bytes)

@classmethod
def _parse_expression(cls, expression: str) -> tuple[int, str]:
sizes = {
"kb": 1000,
"mb": 1_000_000,
"gb": 1_000_000_000,
"tb": 1_000_000_000_000,
"kib": 1024,
"mib": 1_048_576,
"gib": 1_073_741_824,
"tib": 1_099_511_627_776,
}

expression_normalized = expression.strip().lower()
try:
num_value = float(expression_normalized)
return int(num_value), expression
except ValueError:
pass

match = re.match(r"^([0-9]*\.?[0-9]+)\s*([a-zA-Z]*)$", expression_normalized)
if not match:
raise InvalidConfigError(f"Invalid file size format: '{expression}'. Must start with a number.")

num_str, unit_str = match.groups()
try:
num_value = float(num_str)
except ValueError as e:
raise InvalidConfigError(f"Invalid numeric value in file size: '{num_str}'") from e

if not unit_str:
return int(num_value), expression

unit_lower = unit_str.lower()
if unit_lower in sizes:
return int(num_value * sizes[unit_lower]), expression

raise InvalidConfigError(f"Invalid unit for file size: '{unit_str}'. Valid units: {list(sizes.keys())}")

@property
def bytes(self) -> int:
"""
File size in bytes.
"""
return self._bytes

@property
def kilobytes(self) -> float:
"""
File size in kilobytes.
"""
return self._bytes / 1000

@property
def megabytes(self) -> float:
"""
File size in megabytes.
"""
return self._bytes / 1_000_000

@property
def gigabytes(self) -> float:
"""
File size in gigabytes.
"""
return self._bytes / 1_000_000_000

@property
def terabytes(self) -> float:
"""
File size in terabytes.
"""
return self._bytes / 1_000_000_000_000

@property
def kibibytes(self) -> float:
"""
File size in kibibytes (1024 bytes).
"""
return self._bytes / 1024

@property
def mebibytes(self) -> float:
"""
File size in mebibytes (1024 kibibytes).
"""
return self._bytes / 1_048_576

@property
def gibibytes(self) -> float:
"""
File size in gibibytes (1024 mebibytes).
"""
return self._bytes / 1_073_741_824

@property
def tebibytes(self) -> float:
"""
File size in tebibytes (1024 gibibytes).
"""
return self._bytes / 1_099_511_627_776

def __int__(self) -> int:
"""
Returns the file size as bytes.
"""
return int(self._bytes)

def __float__(self) -> float:
"""
Returns the file size as bytes.
"""
return float(self._bytes)

def __str__(self) -> str:
"""
Returns the file size as a human readable string.
"""
return self._expression

def __repr__(self) -> str:
"""
Returns the file size as a human readable string.
"""
return self._expression


class RawDestinationConfig(ConfigModel):
"""
Configuration parameters for using Raw.
Expand Down
79 changes: 79 additions & 0 deletions tests/test_unstable/test_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@
from io import StringIO

import pytest
from pydantic import Field

from cognite.client.credentials import OAuthClientCredentials
from cognite.extractorutils.exceptions import InvalidConfigError
from cognite.extractorutils.unstable.configuration.loaders import ConfigFormat, load_io
from cognite.extractorutils.unstable.configuration.models import (
ConfigModel,
ConnectionConfig,
FileSizeConfig,
LogLevel,
TimeIntervalConfig,
_ClientCredentialsConfig,
Expand Down Expand Up @@ -215,6 +219,81 @@ def test_from_env() -> None:
assert len(client.assets.list(limit=1)) == 1


class CustomFileConfig(ConfigModel):
file_size: FileSizeConfig = Field(default_factory=lambda: FileSizeConfig("1MB"))
file_max_size: FileSizeConfig = Field(default_factory=lambda: FileSizeConfig("10MiB"))


def test_parse_file_size() -> None:
config_str = """
file_size: 25MB
file_max_size: 10MiB
"""
stream = StringIO(config_str)
config = load_io(stream, ConfigFormat.YAML, CustomFileConfig)
assert config.file_size == FileSizeConfig("25MB")
assert config.file_size.bytes == 25_000_000
assert config.file_size._expression == "25MB"
assert config.file_max_size == FileSizeConfig("10MiB")
assert config.file_max_size.bytes == 10_485_760


def test_file_size_config_default_values() -> None:
config = CustomFileConfig()
assert config.file_size == FileSizeConfig("1MB")
assert config.file_max_size == FileSizeConfig("10MiB")
assert config.file_size.bytes == 1_000_000
assert config.file_max_size.bytes == 10_485_760


def test_file_size_config_partial_fields() -> None:
config_str = """
file_size: 5MB
"""
stream = StringIO(config_str)
config = load_io(stream, ConfigFormat.YAML, CustomFileConfig)
assert config.file_size == FileSizeConfig("5MB")
assert config.file_max_size == FileSizeConfig("10MiB")


def test_file_size_config_equality() -> None:
file_size_1 = FileSizeConfig("2000MB")
file_size_2 = FileSizeConfig("2GB")
file_size_3 = FileSizeConfig("1GB")

assert file_size_1.bytes == 2_000_000_000
assert file_size_2.bytes == 2_000_000_000
assert file_size_3.bytes == 1_000_000_000
assert file_size_1 == file_size_2
assert file_size_3 != file_size_1


@pytest.mark.parametrize(
"expression", ["12.3kbkb", "10XY", "abcMB", "5.5.5GB", "MB", "", " ", "10 M B", "10MB extra", "tenMB"]
)
def test_file_size_config_invalid(expression: str) -> None:
with pytest.raises(InvalidConfigError):
FileSizeConfig(expression)


@pytest.mark.parametrize(
"expression, value",
[
("10MB", 10_000_000),
("1GB", 1_000_000_000),
("512KiB", 524_288),
("2.5TB", 2_500_000_000_000),
("100", 100),
("0.5MiB", 524_288),
("1.2GB", 1_200_000_000),
],
)
def test_file_size_config_valid(expression: str, value: int) -> None:
config = FileSizeConfig(expression)
assert config._expression == expression
assert config.bytes == value


def test_setting_log_level_from_any_case() -> None:
log_level = LogLevel("DEBUG")
assert log_level == LogLevel.DEBUG
Expand Down
Loading