Skip to content

Commit 1a15c92

Browse files
authored
Merge pull request #662 from HaoZeke/python-script-structure
ENH: Add in new snippet for script structure
2 parents ff6de63 + 597a3ee commit 1a15c92

File tree

2 files changed

+144
-0
lines changed

2 files changed

+144
-0
lines changed

python-script-structure/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# How Can You Structure Your Python Script?
2+
3+
This folder provides the code examples for the Real Python tutorial [How Can You Structure Your Python Script?](https://realpython.com/python-script-structure/).
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
#!/usr/bin/env python3
2+
3+
# /// script
4+
# requires-python = ">=3.11"
5+
# dependencies = [
6+
# "click==8.1.8",
7+
# "pandas==2.2.3",
8+
# "rich==14.0.0",
9+
# "ucimlrepo==0.0.7",
10+
# ]
11+
# ///
12+
13+
import logging
14+
import sys
15+
from dataclasses import dataclass, field
16+
from enum import IntEnum, StrEnum, auto
17+
from pprint import pformat
18+
19+
import click
20+
import pandas as pd
21+
from rich.console import Console, Text
22+
from rich.logging import RichHandler
23+
from rich.table import Table
24+
from ucimlrepo import fetch_ucirepo
25+
26+
logging.basicConfig(
27+
level=logging.INFO,
28+
format="%(levelname)s - %(message)s",
29+
handlers=[RichHandler(rich_tracebacks=True)],
30+
)
31+
32+
33+
class UCIDataset(IntEnum):
34+
IRIS = 53
35+
36+
37+
class IrisVariable(StrEnum):
38+
PETAL_LENGTH = "petal length"
39+
PETAL_WIDTH = "petal width"
40+
SEPAL_WIDTH = "sepal width"
41+
SEPAL_LENGTH = "sepal length"
42+
43+
44+
class Operation(StrEnum):
45+
SUMMARY = auto()
46+
METADATA = auto()
47+
48+
49+
@dataclass
50+
class DescriptiveStatistics:
51+
data: pd.Series
52+
mean: float = field(init=False)
53+
median: float = field(init=False)
54+
mm_diff: float = field(init=False)
55+
56+
def __post_init__(self):
57+
if not isinstance(self.data, pd.Series):
58+
raise TypeError(
59+
f"data must be a pandas Series, not {type(self.data)}"
60+
)
61+
self.mean = self.data.mean()
62+
self.median = self.data.median()
63+
self.mm_diff = self.mean - self.median
64+
65+
def __str__(self):
66+
return pformat(self)
67+
68+
69+
@click.command()
70+
@click.option(
71+
"--operation",
72+
default=Operation.SUMMARY,
73+
type=click.Choice(Operation),
74+
help="Operation to perform: variable summary or dataset metadata",
75+
)
76+
@click.option(
77+
"--variable",
78+
type=click.Choice(IrisVariable),
79+
help="Variable to summarize.",
80+
required=False,
81+
)
82+
def main(operation, variable):
83+
"""Fetch the Iris dataset from UCI."""
84+
iris = fetch_iris()
85+
if operation is Operation.SUMMARY:
86+
if variable:
87+
table = generate_table(iris, variable)
88+
logging.info(format_rich_for_log(table))
89+
logging.info(f"{IrisVariable(variable)} summary:")
90+
logging.info(
91+
DescriptiveStatistics(
92+
iris.data.features[IrisVariable(variable).value]
93+
)
94+
)
95+
else:
96+
logging.info("All variables:")
97+
logging.info(pformat(iris.variables))
98+
elif operation is Operation.METADATA:
99+
logging.info("Metadata summary:")
100+
logging.info(pformat(iris.metadata))
101+
102+
103+
def fetch_iris():
104+
"""Return the Iris dataset from the UCI ML Repository."""
105+
logging.info("Fetching Iris dataset...")
106+
try:
107+
iris_data = fetch_ucirepo(id=UCIDataset.IRIS.value)
108+
assert (
109+
"data" in iris_data.keys()
110+
), "Object does not have expected structure"
111+
except Exception as e:
112+
logging.critical(f"Failed to correctly fetch Iris dataset: {e}")
113+
sys.exit(1)
114+
else:
115+
logging.info("Iris dataset fetched successfully")
116+
return iris_data
117+
118+
119+
def generate_table(dataset, variable):
120+
"""Generate a formatted table of descriptive statistics for a variable."""
121+
column = IrisVariable(variable).value
122+
stats = DescriptiveStatistics(dataset.data.features[column])
123+
table = Table(title=f"{column} summary")
124+
table.add_column("Metric", style="cyan", justify="right")
125+
table.add_column("Value", style="magenta")
126+
table.add_row("Mean", f"{stats.mean:.2f}")
127+
table.add_row("Median", f"{stats.median:.2f}")
128+
table.add_row("Mean-Median Diff", f"{stats.mm_diff:.2f}")
129+
return table
130+
131+
132+
def format_rich_for_log(renderable, width=100):
133+
"""Render a rich object to a plain text string suitable for logging."""
134+
console = Console(width=width)
135+
with console.capture() as capture:
136+
console.print(renderable)
137+
return Text.from_ansi(capture.get())
138+
139+
140+
if __name__ == "__main__":
141+
main()

0 commit comments

Comments
 (0)