Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ dependencies = [
"openpyxl>=3.1.4",
"scikit-learn>=1.5.2",
"rapidfuzz>=3.10.1",
"Levenshtein>=0.26.1",
]

[project.urls]
Expand Down
7 changes: 7 additions & 0 deletions src/osc_transformer_presteps/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from osc_transformer_presteps.run_local_extraction import app as extraction
from osc_transformer_presteps.run_local_relevance_curation import app as curation
from osc_transformer_presteps.run_local_kpi_curation import kpi_curator_app
from osc_transformer_presteps.run_local_folder_structure_maker import folderizer_app

# Define command structure with typer module
app = typer.Typer(no_args_is_help=True)
Expand All @@ -33,6 +34,12 @@
help="If you want to run local creation of dataset for kpi-detection task, then this is the subcommand to use.",
)

app.add_typer(
folderizer_app,
name="make-folder-structure",
help="If you want to run local creation of dataset for kpi-detection task, then this is the subcommand to use.",
)


def run():
"""Provide main entry point for the OSC Transformer CLI application.
Expand Down
1 change: 1 addition & 0 deletions src/osc_transformer_presteps/folderizer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Module responsible for creating folder structure for the project."""
49 changes: 49 additions & 0 deletions src/osc_transformer_presteps/folderizer/folderizer_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Folderizer-MAIN file."""

import os


def create_osc_folder_structure(base_path: str):
"""Create a predefined folder structure inside an 'OSC' directory at the given base path.

Args:
base_path (str): The base directory where the 'OSC' folder will be created.

The function creates the following subdirectories inside 'OSC':
- inputs/pdfs_training
- inputs/pdfs_inference
- logs
- outputs/jsons_training
- outputs/jsons_inference
- outputs/curated_data_rel
- outputs/curated_data_kpi
- model
- outputs/inference_rel

"""
osc_path = os.path.join(base_path, "OSC")
folders = [
"inputs/pdfs_training",
"inputs/pdfs_inference",
"inputs/kpi_mapping",
"inputs/annotation_files",
"logs",
"outputs/jsons_training",
"outputs/jsons_inference",
"outputs/curated_data_rel",
"outputs/curated_data_kpi",
"outputs/inference_rel",
"outputs/inference_kpi",
"models/relevance",
"models/kpi_detection",
]

for folder in folders:
os.makedirs(os.path.join(osc_path, folder), exist_ok=True)

print(f"Folder structure created successfully under: {osc_path}")


create_osc_folder_structure(
r"C:\Users\Tanishq\Desktop\IDS_WORK\presteps\osc-transformer-presteps"
)
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ def run_kpi_curator(

# Format current date for file naming
da = date.today().strftime("%d-%m-%Y")
train_df.rename(
columns={"paragraph": "context", "answer": "annotation_answer"},
inplace=True,
)
val_df.rename(
columns={"paragraph": "context", "answer": "annotation_answer"},
inplace=True,
)

# Save DataFrames to Excel files
train_output_path = Path(output_folder) / f"train_kpi_data_{da}.xlsx"
Expand Down
34 changes: 34 additions & 0 deletions src/osc_transformer_presteps/run_local_folder_structure_maker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""File to locally run make-folder-structure module."""

import typer
from osc_transformer_presteps.folderizer.folderizer_main import (
create_osc_folder_structure,
)

folderizer_app = typer.Typer(no_args_is_help=True)


@folderizer_app.callback(invoke_without_command=True)
def folderizer(ctx: typer.Context):
"""Commands for Folderizer .

Available commands:
- make-folder-structure
"""
if ctx.invoked_subcommand is None:
typer.echo(folderizer.__doc__)
raise typer.Exit()


@folderizer_app.command("run-folder-structure-maker")
def run_folderizer(
base_path: str = typer.Argument(
..., help="Base Path where the folder structure will be made"
),
):
"""Create a folder structure needed for running OSC pipelines."""
try:
# Call the create folder maker function
create_osc_folder_structure(base_path=base_path)
except Exception as e:
typer.echo(f"Error: {e}", err=True)
Loading