Skip to content

Commit 065a74f

Browse files
authored
Name validation only two periods (#393)
* add validation for more than two periods in file name * make ._period_strings public * pre-commit ran? * check for three periods in tests
1 parent d968aeb commit 065a74f

File tree

4 files changed

+12
-9
lines changed

4 files changed

+12
-9
lines changed

src/dapla_metadata/datasets/dapla_dataset_path_info.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ def __init__(self, dataset_path: ReadablePathLike) -> None:
340340
self.dataset_string = str(dataset_path)
341341
self.dataset_path = UPath(self.dataset_string)
342342
self.dataset_name_sections = self.dataset_path.stem.split("_")
343-
self._period_strings = self._extract_period_strings(self.dataset_name_sections)
343+
self.period_strings = self._extract_period_strings(self.dataset_name_sections)
344344

345345
# Since UPath as a trailing slash after the bucket name we remove that so that we are able to find the bucket name in the path parts later
346346
self.dataset_path_parts = [p.strip("/") for p in self.dataset_path.parent.parts]
@@ -446,7 +446,7 @@ def _extract_period_string_from_index(self, index: int) -> str | None:
446446
The extracted period string if it exists, otherwise None.
447447
"""
448448
try:
449-
return self._period_strings[index]
449+
return self.period_strings[index]
450450
except IndexError:
451451
return None
452452

@@ -574,7 +574,7 @@ def contains_data_from(self) -> datetime.date | None:
574574
"""
575575
period_string = self._extract_period_string_from_index(0)
576576
if not period_string or (
577-
len(self._period_strings) > 1 and period_string > self._period_strings[1]
577+
len(self.period_strings) > 1 and period_string > self.period_strings[1]
578578
):
579579
return None
580580
date_format = categorize_period_string(period_string)

src/dapla_metadata/standards/name_validator.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,19 @@
88

99
from dapla_metadata.datasets.dapla_dataset_path_info import DaplaDatasetPathInfo
1010
from dapla_metadata.datasets.dataset_parser import SUPPORTED_DATASET_FILE_SUFFIXES
11-
from dapla_metadata.standards.utils.constants import DESCRIPTION_OTHER_THAN_DASHES
1211
from dapla_metadata.standards.utils.constants import FILE_DOES_NOT_EXIST
1312
from dapla_metadata.standards.utils.constants import FILE_IGNORED
1413
from dapla_metadata.standards.utils.constants import IGNORED_FOLDERS
1514
from dapla_metadata.standards.utils.constants import INVALID_SYMBOLS
15+
from dapla_metadata.standards.utils.constants import MAX_TWO_PERIODS
1616
from dapla_metadata.standards.utils.constants import MISSING_DATA_STATE
1717
from dapla_metadata.standards.utils.constants import MISSING_DATASET_SHORT_NAME
1818
from dapla_metadata.standards.utils.constants import MISSING_PERIOD
1919
from dapla_metadata.standards.utils.constants import MISSING_SHORT_NAME
2020
from dapla_metadata.standards.utils.constants import NAME_STANDARD_SUCCESS
2121
from dapla_metadata.standards.utils.constants import NAME_STANDARD_VIOLATION
2222
from dapla_metadata.standards.utils.constants import PATH_IGNORED
23+
from dapla_metadata.standards.utils.constants import SHORT_NAME_OTHER_THAN_DASHES
2324
from dapla_metadata.standards.utils.constants import SSB_NAMING_STANDARD_REPORT
2425
from dapla_metadata.standards.utils.constants import SSB_NAMING_STANDARD_REPORT_FILES
2526
from dapla_metadata.standards.utils.constants import (
@@ -194,9 +195,10 @@ def _check_violations(
194195
MISSING_SHORT_NAME: path_info.statistic_short_name,
195196
MISSING_DATA_STATE: path_info.dataset_state,
196197
MISSING_PERIOD: path_info.contains_data_from,
198+
MAX_TWO_PERIODS: len(path_info.period_strings) <= 2,
197199
MISSING_DATASET_SHORT_NAME: path_info.dataset_short_name,
198200
INVALID_SYMBOLS: not _has_invalid_symbols(file),
199-
DESCRIPTION_OTHER_THAN_DASHES: not _short_name_has_illegal_chars(
201+
SHORT_NAME_OTHER_THAN_DASHES: not _short_name_has_illegal_chars(
200202
path_info.dataset_short_name
201203
),
202204
}

src/dapla_metadata/standards/utils/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
NAME_STANDARD_SUCCESS = "Filene dine er i samsvar med SSB-navnestandarden"
88

99
NAME_STANDARD_VIOLATION = "Det er oppdaget brudd på SSB-navnestandard:"
10-
10+
MAX_TWO_PERIODS = "Det er mer enn to perioder i filnavnet, navnestandarden åpner kun for to: https://manual.dapla.ssb.no/statistikkere/navnestandard.html#tbl-filename"
1111
MISSING_BUCKET_NAME = "Filnavn mangler bøttenavn ref: https://manual.dapla.ssb.no/statistikkere/navnestandard.html#obligatoriske-mapper"
1212
MISSING_VERSION = "Filnavn mangler versjon, hvis ikke filen er nyeste versjon kan dette være brudd på navnestandarden ref: https://manual.dapla.ssb.no/statistikkere/navnestandard.html#versjonering-av-datasett"
1313
MISSING_PERIOD = "Filnavn mangler gyldighetsperiode ref: https://manual.dapla.ssb.no/statistikkere/navnestandard.html#filnavn"
@@ -16,7 +16,7 @@
1616
MISSING_DATASET_SHORT_NAME = "Filnavn mangler datasett kortnavn ref: https://manual.dapla.ssb.no/statistikkere/navnestandard.html#filnavn"
1717

1818
INVALID_SYMBOLS = "Filnavn inneholder ulovlige tegn ref: https://manual.dapla.ssb.no/statistikkere/navnestandard.html#filnavn"
19-
DESCRIPTION_OTHER_THAN_DASHES = "Datasett kortnavn inneholder ulovlige tegn ref: https://manual.dapla.ssb.no/statistikkere/navnestandard.html#filnavn"
19+
SHORT_NAME_OTHER_THAN_DASHES = "Datasett kortnavn inneholder ulovlige tegn ref: https://manual.dapla.ssb.no/statistikkere/navnestandard.html#filnavn"
2020

2121
PATH_IGNORED = "Ignorert, mappen er ikke underlagt krav til navnestandard."
2222
FILE_IGNORED = f"Ignorert, kun datasett med {', '.join(SUPPORTED_DATASET_FILE_SUFFIXES.keys())} filendelser valideres foreløpig."

tests/standards/test_check_naming_standard.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ async def test_generate_naming_standard_report(tmp_path):
351351
"buckets/ssb-dapla-example-data-produkt-prod/klargjorte_data/_p2021-12-31_p2021-12-31_v1.parquet",
352352
"buckets/ssb-dapla-example-data-produkt-prod/ledstill/klargjorte_data/park_p2021-12-31_p2021-12-31_v1.parquet",
353353
"buckets/ssb-dapla-example-data-produkt-prod/ledstill/park_wrongunderscore_v1.parquet",
354+
"buckets/ssb-dapla-example-data-produkt-prod/ledstill/klargjorte_data/three-periods_p2020-12-31_p2021-12-31_p2022-12-31_v1.parquet",
354355
]
355356
for file_path in file_paths:
356357
full_path = tmp_path / file_path
@@ -363,8 +364,8 @@ async def test_generate_naming_standard_report(tmp_path):
363364

364365
if isinstance(results, list):
365366
report = generate_validation_report(validation_results=results)
366-
assert report.num_failures == 4
367-
assert report.num_files_validated == 6
367+
assert report.num_failures == 5
368+
assert report.num_files_validated == 7
368369
assert report.num_success == 2
369370

370371

0 commit comments

Comments
 (0)