diff --git a/dicogis/cli/__init__.py b/dicogis/cli/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dicogis/cli/cmd_list.py b/dicogis/cli/cmd_list.py new file mode 100644 index 00000000..8fb58364 --- /dev/null +++ b/dicogis/cli/cmd_list.py @@ -0,0 +1,73 @@ +#! python3 # noqa: E265 + +# ############################################################################ +# ########## Libraries ############# +# ################################## + +# standard lib +import logging +from pathlib import Path +from typing import Annotated, Optional + +# 3rd party +import typer + +# project +from dicogis.__about__ import __title__ + +# ############################################################################ +# ########## Globals ############### +# ################################## + +cli_list = typer.Typer(help="List (inventory) operations.") +state = {"verbose": False} +APP_NAME = f"{__title__}_list" +logger = logging.getLogger(__name__) + + +# ############################################################################ +# ########## Functions ############# +# ################################## + + +@cli_list.command( + help="List geodata and extract metadata into an Excel (.xlsx) spreadsheet file." +) +def inventory( + input_folder: Annotated[ + Optional[Path], + typer.Option( + dir_okay=True, + file_okay=False, + readable=True, + resolve_path=True, + envvar="DICOGIS_START_FOLDER", + ), + ], + formats: Annotated[ + str, + typer.Option( + envvar="DICOGIS_FORMATS_LIST", + ), + ] = "shp,geotiff,geojson,kml", +): + """Command to list geodata starting from a + + Args: + input_folder (Annotated[Optional[Path], typer.Option): _description_ + """ + typer.echo( + f"Analysing geodata stored in {input_folder}. Targetted formats: {formats}" + ) + app_dir = typer.get_app_dir(APP_NAME) + logger.warning(f"DicoGIS folder: {app_dir}") + + # TODO: check if specified formats are supported + + +# ############################################################################ +# #### Stand alone program ######## +# ################################# +if __name__ == "__main__": + """standalone execution""" + cli_list() diff --git a/dicogis/cli/main.py b/dicogis/cli/main.py new file mode 100644 index 00000000..20f2ab53 --- /dev/null +++ b/dicogis/cli/main.py @@ -0,0 +1,82 @@ +#! python3 # noqa: E265 + +# ############################################################################ +# ########## Libraries ############# +# ################################## + +# standard lib +import logging +from typing import Annotated, Optional + +# 3rd party +import typer + +# project +from dicogis.__about__ import __title__, __version__ +from dicogis.cli.cmd_list import cli_list + +# ############################################################################ +# ########## Globals ############### +# ################################## + +cli_dicogis = typer.Typer() +state = {"verbose": False} +APP_NAME = __title__ +logger = logging.getLogger(__name__) + + +# ############################################################################ +# ########## Functions ############# +# ################################## + + +def version_callback(value: bool): + """Special callback to show verison and exit. + + See: https://typer.tiangolo.com/tutorial/options/version/ + + Raises: + typer.Exit: CLI exit + """ + if value: + typer.echo(f"{__title__} {__version__}") + raise typer.Exit() + + +@cli_dicogis.callback() +def main( + verbose: bool = False, + version: Annotated[ + Optional[bool], + typer.Option( + "--version", + callback=version_callback, + is_eager=True, + help="Show version and exit.", + ), + ] = None, +): + """Common options to commands or option only applicable to the main command. + + Args: + verbose (bool, optional): enable verbose mode. Defaults to False. + version (Annotated[ Optional[bool], typer.Option, optional): show version and + exit. Defaults to version_callback. + """ + if verbose: + state["verbose"] = True + if version: + typer.echo(f"{__title__} {__version__}") + raise typer.Exit() + + +# integrate subcommands +cli_dicogis.add_typer(cli_list, name="list") + + +# ############################################################################ +# #### Stand alone program ######## +# ################################# +if __name__ == "__main__": + """standalone execution""" + cli_dicogis() diff --git a/dicogis/constants.py b/dicogis/constants.py new file mode 100644 index 00000000..949ee4b6 --- /dev/null +++ b/dicogis/constants.py @@ -0,0 +1,31 @@ +from enum import Enum + + +class FormatsVector(str, Enum): + """Supported vectors formats. + + Args: + str (_type_): _description_ + Enum (_type_): _description_ + """ + + esri_shapefile = "shp" + geojson = "GeoJSON" + gml = "GML" + kml = "KML" + mapinfo_tab = "tab" + + +class FormatsRaster(str, Enum): + """Suported raster formats. + + Args: + str (_type_): _description_ + Enum (_type_): _description_ + """ + + geotiff = "geotiff" + jpeg = "jpeg" + + +SUPPORTED_FORMATS = [*FormatsVector, *FormatsRaster] diff --git a/dicogis/listing/__init__.py b/dicogis/listing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dicogis/listing/formats_matrix.py b/dicogis/listing/formats_matrix.py new file mode 100644 index 00000000..c74922c8 --- /dev/null +++ b/dicogis/listing/formats_matrix.py @@ -0,0 +1,94 @@ +#! python3 # noqa: E265 + +""" + Matching table of formats and their extensions, types, etc. + + Resources: + + - https://app.isogeo.com/api/v1/formats + - https://github.com/isogeo/isogeo-worker-client-fme/blob/master/lib/formats/file.js + +""" + +# ############################################################################# +# ########## Libraries ############# +# ################################## + +# package +from dicogis.listing.models import FormatMatcher + +# ############################################################################## +# ############ Globals ############ +# ################################# + +FORMATS_MATRIX = { + # Esri SDE + "arcsde": FormatMatcher( + data_structure="both", + name="ESRI SDE Geodatabase", + alternative_names=["arcsde", "geodatabase_sde", "sde", "sde30"], + storage_kind="sgbd", + extension=".sde", + dependencies_required=[], + dependencies_optional=[], + ), + # Esri FileGeoDatabase + "filegdb": FormatMatcher( + data_structure="both", + name="ESRI File Geodatabase", + alternative_names=["esri_filegdb"], + storage_kind="directory", + extension=".gdb", + dependencies_required=[], + dependencies_optional=[], + ), + # GeoTIFF + "geotiff": FormatMatcher( + data_structure="raster", + name="geotiff", + alternative_names=["geotiff", "tiff"], + storage_kind="files", + extension=".tif", + dependencies_required=["tab", "tfw"], + dependencies_optional=[".aux", ".aux.xml", ".lgo", ".txt", ".wld"], + ), + # Esri Shapefiles + "shp": FormatMatcher( + data_structure="vector", + name="ESRI Shapefile", + alternative_names=["esri_shp", "esri_shape", "shapefile", "shapefiles"], + storage_kind="files", + extension=".shp", + dependencies_required=[".dbf", ".shx"], + dependencies_optional=[ + ".atx", + ".cpg", + ".fbn", + ".fbx", + ".ixs", + ".mxs", + ".prj", + ".sbn", + ".sbx", + ".shp.xml", + ], + ), + # PostGIS + "postgis": FormatMatcher( + data_structure="both", + name="PostGIS", + alternative_names=["postgis", "pgis", "postgresql_postgis"], + storage_kind="sgbd", + extension=".pgconf", + dependencies_required=[], + dependencies_optional=[], + ), +} + +# ############################################################################# +# ##### Stand alone program ######## +# ################################## +if __name__ == "__main__": + """Standalone execution and development tests""" + for i in FORMATS_MATRIX: + assert isinstance(FORMATS_MATRIX.get(i), FormatMatcher) diff --git a/dicogis/listing/formats_yaml_parser.py b/dicogis/listing/formats_yaml_parser.py new file mode 100644 index 00000000..53fa7524 --- /dev/null +++ b/dicogis/listing/formats_yaml_parser.py @@ -0,0 +1,199 @@ +#! python3 # noqa: E265 + +""" + Read YAML files produced by Isogeo Scan. + + See: + + - https://docs.python.org/fr/3/library/YAML file.html + +""" + + +# ############################################################################# +# ########## Libraries ############# +# ################################## +# standard library +import logging +from collections.abc import Generator +from io import BufferedIOBase +from os import R_OK, access +from pathlib import Path +from typing import Union + +# 3rd party +import yaml + +# submodule +from dicogis.listing.models import FormatMatcher + +# ############################################################################# +# ########## Globals ############### +# ################################## + +logger = logging.getLogger(__name__) + + +# ############################################################################## +# ########## Classes ############### +# ################################## +class FormatYamlReader: + """Read a YAML file specifying a format configuration. + + :param in_yaml: path to the yaml file to read. + """ + + model = dict + format_definition = dict + + def __init__(self, in_yaml: Union[str, Path, BufferedIOBase]): + """Instanciating Isogeo Metadata YAML Reader.""" + # check and get YAML path + if isinstance(in_yaml, (str, Path)): + self.input_yaml = self.check_yaml_file(in_yaml) + # extract data from input file + with self.input_yaml.open(mode="r") as bytes_data: + self.model, self.format_definition = self.check_yaml_structure( + yaml.full_load_all(bytes_data) + ) + elif isinstance(in_yaml, BufferedIOBase): + self.input_yaml = self.check_yaml_buffer(in_yaml) + # extract data from input file + self.model, self.format_definition = self.check_yaml_structure( + yaml.full_load_all(self.input_yaml) + ) + else: + raise TypeError + + # CHECKS + def check_yaml_file(self, yaml_path: Union[str, Path]) -> Path: + """Perform some checks on passed yaml file and load it as Path object. + + :param yaml_path: path to the yaml file to check + + :returns: sanitized yaml path + :rtype: Path + """ + # if path as string load it in Path object + if isinstance(yaml_path, str): + try: + yaml_path = Path(yaml_path) + except Exception as exc: + raise TypeError(f"Converting yaml path failed: {exc}") + + # check if file exists + if not yaml_path.exists(): + raise FileExistsError( + f"YAML file to check doesn't exist: {yaml_path.resolve()}" + ) + + # check if it's a file + if not yaml_path.is_file(): + raise OSError(f"YAML file is not a file: {yaml_path.resolve()}") + + # check if file is readable + if not access(yaml_path, R_OK): + raise OSError(f"yaml file isn't readable: {yaml_path}") + + # check integrity and structure + with yaml_path.open(mode="r") as in_yaml_file: + try: + yaml.safe_load_all(in_yaml_file) + except yaml.YAMLError as exc: + logger.error(msg=f"YAML file is invalid: {yaml_path.resolve()}") + raise exc + except Exception as exc: + logger.error(msg=f"Structure of YAML file is incorrect: {exc}") + raise exc + + # return sanitized path + return yaml_path + + def check_yaml_buffer(self, yaml_buffer: BufferedIOBase) -> BufferedIOBase: + """Perform some checks on passed yaml file. + + :param yaml_buffer: bytes reader of the yaml file to check + + :returns: checked bytes object + :rtype: BufferedIOBase + """ + # check integrity + try: + yaml.safe_load_all(yaml_buffer) + except yaml.YAMLError as exc: + logger.error(f"Invalid YAML {yaml_buffer}. Trace: {exc}") + raise exc + + # return sanitized path + return yaml_buffer + + def check_yaml_structure(self, in_yaml_data: Generator) -> tuple: + """Look into the YAML structure and check everything it's OK. \ + Two documents are expected to be found into the YAML file. + + :param Generator in_yaml_data: [description] + + :return: tuple of 2 dicts + :rtype: tuple + + :example: + + .. code-block:: python + + # here comes an example in Python + my_yaml = Path("format_sample.yml") + with my_yaml.open(mode="r") as op: + yaml_data = yaml.full_load_all(bytes_data) + model, format_definition = check_yaml_structure(yaml_data) + """ + # convert generator into list to parse content + li_docs = list(in_yaml_data) + + # check root structure + assert isinstance(li_docs, list) + assert len(li_docs) == 1 + assert isinstance(li_docs[0], dict) + + # check dict of dicts + docs = li_docs[0] + assert "model" in docs + assert docs.get("model").get("type") == "format" + assert "format" in docs + + # return list of docs as dictionaries + return docs.get("model"), docs.get("format") + + # PROPERTIES + @property + def as_format_matcher(self) -> FormatMatcher: + """Load YAML into a FormatMatcher (named tuple). + + :return: format named tuple + :rtype: FormatMatcher + """ + return FormatMatcher(**self.format_definition) + + +# ############################################################################# +# ##### Main ####################### +# ################################## +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) + # logger.setLevel(logging.INFO) + fixtures_dir = Path("./tests/fixtures/") + + for i in fixtures_dir.glob("**/*.y*ml"): + # testing with a Path + print(type(i), isinstance(i, Path)) + t = FormatYamlReader(i) + print(isinstance(t.model, dict)) + print(isinstance(t.format_definition, dict)) + print(isinstance(t.as_format_matcher, FormatMatcher)) + + # testing with a bytes object + with i.open("rb") as in_yaml: + print(type(in_yaml), isinstance(in_yaml, BufferedIOBase)) + t = FormatYamlReader(in_yaml) + print(isinstance(t.model, dict)) + print(isinstance(t.format_definition, dict)) + print(isinstance(t.as_format_matcher, FormatMatcher)) diff --git a/dicogis/listing/geofiles_seeker.py b/dicogis/listing/geofiles_seeker.py new file mode 100644 index 00000000..ad299500 --- /dev/null +++ b/dicogis/listing/geofiles_seeker.py @@ -0,0 +1,247 @@ +#! python3 # noqa: E265 + +""" + Search for geographic files according to the required formats. +""" + +# ############################################################################# +# ########## Libraries ############# +# ################################## + +# Standard library +import logging +from os import getlogin +from pathlib import Path + +# package +from dicogis.listing.formats_yaml_parser import FormatYamlReader +from dicogis.listing.models import FORMATS_MATRIX + +# ############################################################################# +# ########## Globals ############### +# ################################## + +logger = logging.getLogger(__name__) + +# ############################################################################## +# ########## Classes ############### +# ################################## + + +class GeofilesExplorer: + """Used to store Isogeo API results into an Excel worksheet (.xlsx) + + :param Path start_folder: parent folder where to start looking + :param tuple formats: selected formats to look for. Others formats will be ignored. + :param Path definitions_folder: folder containing formats definitons overriding embedded matrix + """ + + # attributes + files_by_formats = {} + formats_definitions = FORMATS_MATRIX + + def __init__( + self, + start_folder: Path, + formats: tuple, + definitions_folder: Path = None, + ): + """Instanciating the geofiles seeker.""" + # check parent folder + start_folder = Path(start_folder) + if not start_folder.is_dir(): + raise OSError( + f"Directory {start_folder} doesn't exist or it's not reachable " + f"for the user: {getlogin()}" + ) + self.start_folder = Path(start_folder) + + # check definitions folder + self.definitions_folder = definitions_folder + if definitions_folder is not None: + definitions_folder = Path(definitions_folder) + if definitions_folder.is_dir(): + self.definitions_folder = Path(definitions_folder) + self.formats_definitions = self.load_overriding_definitions( + FORMATS_MATRIX, self.definitions_folder + ) + else: + logger.error( + IOError( + f"Folder of formats definitions {definitions_folder} " + f"doesn't exist or it's not reachable for the user: {getlogin()}" + ) + ) + else: + logger.debug( + "No folder with custom formats definitions, so use only embedded formats." + ) + + # check formats + self.formats = [] # list of formats named tuples + formats_reversed_matrix = self.reverse_matrix() + for frmt in formats: + if frmt in self.formats_definitions: + # match based on format code + self.formats.append(self.formats_definitions.get(frmt)) + logger.info(f"Format '{frmt}' found in formats codes matrix.") + elif frmt in formats_reversed_matrix: + self.formats.append( + self.formats_definitions.get(formats_reversed_matrix.get(frmt)) + ) + logger.info( + "Format '{}' found in formats alternative names matrix (reversed).".format( + frmt + ) + ) + else: + logging.warning( + "Format '{}' is not an accepted value. It'll be ignored. " + "Must be one of: {}".format( + frmt, " | ".join(self.formats_definitions) + ) + ) + + # list of formats codes (names). Just a shortcut for information to display (log, console...) + self.formats_codes = sorted([i.name for i in self.formats]) + self.formats_fme_short_names = sorted([i.fme_short_name for i in self.formats]) + + @classmethod + def load_overriding_definitions( + self, in_formats_matrix: dict, definitions_folder: Path + ) -> dict: + """Browse the specified folder and its children to find formats definitions and override \ + embedded matrix (class attribute). + + :param dict in_formats_matrix: formats matrix to override + :param Path definitions_folder: Path to the folder containgin the format definitions (YAML files). \ + Defaults to: None - optional + """ + # check folder path + if isinstance(definitions_folder, str): + definitions_folder = Path(definitions_folder) + + if not definitions_folder.is_dir(): + logger.error(IOError("Path is not a folder.")) + return in_formats_matrix + + # check if definitions file exist + li_yaml_files = sorted(list(definitions_folder.glob("*.y*ml"))) + if not len(li_yaml_files): + logger.debug(f"No YAML file found into folder: {definitions_folder}") + return in_formats_matrix + + # parse them + for yaml_file in li_yaml_files: + # load format definition + try: + format_definition = FormatYamlReader(yaml_file).as_format_matcher + logger.info( + "Overriding format definition found: {}".format( + format_definition.name + ) + ) + except Exception: + logger.warning( + "Format definition is incorrect and will be ignored: {}".format( + yaml_file + ) + ) + continue + + # replace or add it + in_formats_matrix[format_definition.isogeo_code] = format_definition + + return in_formats_matrix + + @classmethod + def reverse_matrix(self) -> dict: + """Parse formats matrix and return a new dictionary with alternative names as keys. + + :returns: dictionary of format alternative names + :rtype: dict + """ + # out dictionary + dict_reversed_matrix = {} + # parse matrix + for frmt_code in self.formats_definitions: + frmt = self.formats_definitions.get(frmt_code) + if frmt.alternative_names: + for alt_name in frmt.alternative_names: + dict_reversed_matrix[alt_name] = frmt_code + + return dict_reversed_matrix + + def seek(self): + """Parse file system from the start_folder and applying patterns matching each formats. + + :returns: dictionary of paths to datasets + :rtype: dict + """ + for frmt in self.formats: + if frmt.storage_kind == "files": + logger.info( + "Looking for {} files in {}...".format( + frmt.extension, self.start_folder + ) + ) + # listing files paths + li_frmt_paths = sorted(self.start_folder.glob(f"**/*{frmt.extension}")) + # storing paths + if li_frmt_paths: + self.files_by_formats[frmt.isogeo_code] = li_frmt_paths + logger.info( + "Found {} elements matching format {}".format( + len(li_frmt_paths), frmt.name + ) + ) + elif frmt.storage_kind == "directory": + logger.info( + "Looking for {} directories in {}...".format( + frmt.extension, self.start_folder + ) + ) + # listing files paths + li_frmt_paths = sorted(self.start_folder.glob(f"**/*{frmt.extension}/")) + # storing paths + if li_frmt_paths: + self.files_by_formats[frmt.isogeo_code] = li_frmt_paths + logger.info( + "Found {} elements matching format {}".format( + len(li_frmt_paths), frmt.name + ) + ) + elif frmt.storage_kind == "sgbd": + logger.info( + "Looking for {} files (SGBD configuration) in {}...".format( + frmt.extension, self.start_folder + ) + ) + # listing files paths + li_frmt_paths = sorted(self.start_folder.glob(f"**/*{frmt.extension}/")) + # storing paths + if li_frmt_paths: + self.files_by_formats[frmt.isogeo_code] = li_frmt_paths + logger.info( + "Found {} elements matching format {}".format( + len(li_frmt_paths), frmt.name + ) + ) + else: + pass + + +# ############################################################################# +# ##### Main ####################### +# ################################## +if __name__ == "__main__": + geoexplorer = GeofilesExplorer( + start_folder=r"C:\Users\JulienMOURA\ISOGEO\SIG - Documents\TESTS", + formats=("esri_shp", "filegdb"), + ) + # print(geoexplorer.reverse_matrix()) + # print(geoexplorer.formats[0].isogeo_code) + # print(geoexplorer.seek()) + # print(list(geoexplorer.start_folder.glob("**/*.gdb/"))) + # for p in geoexplorer.files_by_formats.get("filegdb"): + # print(p.is_dir()) diff --git a/dicogis/listing/models.py b/dicogis/listing/models.py new file mode 100644 index 00000000..899cdbc3 --- /dev/null +++ b/dicogis/listing/models.py @@ -0,0 +1,50 @@ +#! python3 # noqa: E265 + +"""Models defining a dataset or database formats.""" + +# ############################################################################# +# ########## Libraries ############# +# ################################## + +from dataclasses import dataclass + +# ############################################################################## +# ########## Classes ############### +# ################################## + + +@dataclass +class DatabaseConfig: + """Model for configuration settings of a database stored into a SGBD.""" + + name: str + host: str + port: int + username: str + password: str + schemas: list[str] + esri_sde: bool = False + + +@dataclass +class FormatMatcher: + """Model for a dataset format. + + :param str name: name of the format. Example: 'ESRI File Geodatabase' + :param list alternative_names: potential alternative names. Example: \ + ['esri_filegdb', 'filegdb'] + :param str storage_kind: type of storage: directory, database, files + :param str extension: extension for files and directorie. Example : '.gdb' + :param str dependencies_required: list of extensions of potential required \ + dependencies. Example: ['.dbf', '.shx']. + :param str dependencies_optional: list of extensions of potential optional \ + dependencies. Example: ['.prj','.sbn', '.sbx']. + """ + + alternative_names: list + data_structure: str + dependencies_optional: list + dependencies_required: list + extension: str + name: str + storage_kind: str diff --git a/requirements/base.txt b/requirements/base.txt index 64b70d81..8a92cffa 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -6,4 +6,5 @@ geoserver-restconfig>=2.0.4,<2.0.10 lxml>=4.9,<4.10 numpy>=1.22,<1.27 openpyxl>=3.0,<3.2 +typer[all]>=0.9,<1 xmltodict>=0.12,<1 diff --git a/setup.py b/setup.py index 805d8907..2aa6e465 100644 --- a/setup.py +++ b/setup.py @@ -90,9 +90,12 @@ def load_requirements(requirements_files: Path | list[Path]) -> list: install_requires=load_requirements(HERE / "requirements/base.txt"), # run entry_points={ + "console_scripts": [ + f"{__about__.__package_name__}-cli = dicogis.cli.main:cli_dicogis" + ], "gui_scripts": [ "dicogis = dicogis.DicoGIS:__main__", - ] + ], }, # metadata keywords=__about__.__keywords__,