Skip to content

Commit 0b39a95

Browse files
authored
Merge pull request #186 from realpython/jima/csv-interview
Skeleton files for csv-interview article
2 parents 4251c6a + 4cbf1b6 commit 0b39a95

20 files changed

+645
-0
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Python Interview Problems – Parsing CSV Files
2+
3+
Corresponding code for ["Python Interview Problems – Parsing CSV Files."](https://realpython.com/python-interview-problems-parsing-csv-python-files/)
4+
5+
The `skeleton_code` directory contains pytest fixtures and module files to get you set up to run pytest. There are no tests in the files, which pytest tells you when you run it:
6+
7+
```console
8+
$ pytest test_weather_v1.py
9+
======================================= test session starts ========================================
10+
platform linux -- Python 3.7.1, pytest-6.2.1, py-1.10.0, pluggy-0.13.1
11+
rootdir: /home/jima/coding/materials_realpy/python-interview-problems-parsing-csv/skeleton_code
12+
collected 0 items
13+
14+
====================================== no tests ran in 0.00s =======================================
15+
```
16+
17+
The `full_code` directory contains the source files we used to generate the examples in the article.
18+
19+
Good luck!
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#!/usr/bin/env python3
2+
""" Reusable CSV parser for both football and weather data. """
3+
import csv
4+
5+
6+
def get_next_result(csv_file, func):
7+
for stats in csv.DictReader(csv_file):
8+
yield func(stats)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/usr/bin/env python3
2+
""" Find Minimum Goal Differential
3+
Write a program that takes a filename on the command line and processes the
4+
CSV contents. The contents will be a CSV file with end-of-season football
5+
standings for the English Premier League.
6+
Determine which team had the smallest goal differential that season.
7+
The first line of the CSV file will be column headers:
8+
9+
Team,Games,Wins,Losses,Draws,Goals For,Goals Against
10+
11+
Write unit tests with Pytest to test your program.
12+
"""
13+
import csv_reader
14+
15+
16+
def get_name_and_diff(team_stats):
17+
diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
18+
return team_stats["Team"], abs(diff)
19+
20+
21+
def get_min_score_difference(filename):
22+
with open(filename, "r", newline="") as csv_data:
23+
return min(
24+
csv_reader.get_next_result(csv_data, get_name_and_diff),
25+
key=lambda item: item[1],
26+
)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import pandas as pd
2+
3+
4+
def read_data(csv_file):
5+
return (
6+
pd.read_csv(csv_file)
7+
.rename(
8+
columns={
9+
"Team": "team_name",
10+
"Goals For": "goals",
11+
"Goals Against": "goals_allowed",
12+
}
13+
)
14+
.assign(goal_difference=lambda df: abs(df.goals - df.goals_allowed))
15+
)
16+
17+
18+
def get_min_difference(parsed_data):
19+
return parsed_data.goal_difference.min()
20+
21+
22+
def get_team(parsed_data, min_score_difference):
23+
return (
24+
parsed_data.query(f"goal_difference == {min_score_difference}")
25+
.reset_index()
26+
.loc[0, "team_name"]
27+
)
28+
29+
30+
def get_min_score_difference(csv_file):
31+
df = read_data(csv_file)
32+
min_diff = get_min_difference(df)
33+
team = get_team(df, min_diff)
34+
return team, min_diff
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!/usr/bin/env python3
2+
""" Find Minimum Goal Differential
3+
Write a program that takes a filename on the command line and processes the
4+
CSV contents. The contents will be a CSV file with end-of-season football
5+
standings for the English Premier League.
6+
Determine which team had the smallest goal differential that season.
7+
The first line of the CSV file will be column headers, with each subsequent
8+
line showing the data for one team:
9+
10+
Team,Games,Wins,Losses,Draws,Goals For,Goals Against
11+
Arsenal,38,26,9,3,79,36
12+
13+
The columns labeled "Goals" and "Goals Allowed" contain the total number of
14+
goals scored for and against each team in that season (so Arsenal scored 79
15+
goals against opponents and had 36 goals scored against them).
16+
17+
Write a program to read the file, then print the name of the team with the
18+
smallest difference in "for" and "against" goals. Create unit tests with
19+
Pytest to test your program.
20+
"""
21+
import csv
22+
23+
24+
def parse_next_line(csv_file):
25+
for line in csv.DictReader(csv_file):
26+
yield line
27+
28+
29+
def get_name_and_diff(team_stats):
30+
diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
31+
return team_stats["Team"], abs(diff)
32+
33+
34+
def get_min_score_difference(filename):
35+
with open(filename, "r", newline="") as csv_file:
36+
min_diff = 10000
37+
min_team = None
38+
for line in parse_next_line(csv_file):
39+
team, diff = get_name_and_diff(line)
40+
if diff < min_diff:
41+
min_diff = diff
42+
min_team = team
43+
return min_team, min_diff
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/usr/bin/env python3
2+
""" Find Minimum Goal Differential
3+
Write a program that takes a filename on the command line and processes the
4+
CSV contents. The contents will be a CSV file with end-of-season football
5+
standings for the English Premier League.
6+
Determine which team had the smallest goal differential that season.
7+
The first line of the CSV file will be column headers:
8+
9+
Team,Games,Wins,Losses,Draws,Goals For,Goals Against
10+
11+
Write unit tests with Pytest to test your program.
12+
"""
13+
import csv
14+
15+
16+
def get_next_name_and_diff(csv_file):
17+
for team_stats in csv.DictReader(csv_file):
18+
diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
19+
yield team_stats["Team"], abs(diff)
20+
21+
22+
def get_min_score_difference(filename):
23+
with open(filename, "r", newline="") as csv_data:
24+
return min(get_next_name_and_diff(csv_data), key=lambda item: item[1])
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/usr/bin/env python3
2+
""" Find Minimum Goal Differential
3+
Write a program that takes a filename on the command line and processes the
4+
CSV contents. The contents will be a CSV file with end-of-season football
5+
standings for the English Premier League.
6+
Determine which team had the smallest goal differential that season.
7+
The first line of the CSV file will be column headers:
8+
9+
Team,Games,Wins,Losses,Draws,Goals For,Goals Against
10+
11+
Write unit tests with Pytest to test your program.
12+
"""
13+
import csv
14+
15+
16+
def get_name_and_diff(team_stats):
17+
diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
18+
return team_stats["Team"], abs(diff)
19+
20+
21+
def get_next_name_and_diff(csv_file):
22+
for team_stats in csv.DictReader(csv_file):
23+
yield get_name_and_diff(team_stats)
24+
25+
26+
def get_min_score_difference(filename):
27+
with open(filename, "r", newline="") as csv_data:
28+
return min(get_next_name_and_diff(csv_data), key=lambda item: item[1])
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/usr/bin/env python3
2+
""" Find Minimum Goal Differential
3+
Write a program that takes a filename on the command line and processes the
4+
CSV contents. The contents will be a CSV file with end-of-season football
5+
standings for the English Premier League.
6+
Determine which team had the smallest goal differential that season.
7+
The first line of the CSV file will be column headers:
8+
9+
Team,Games,Wins,Losses,Draws,Goals For,Goals Against
10+
11+
Write unit tests with Pytest to test your program.
12+
"""
13+
import csv
14+
15+
16+
def get_name_and_diff(team_stats):
17+
diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
18+
return team_stats["Team"], abs(diff)
19+
20+
21+
def get_next_name_and_diff(csv_file, func):
22+
for team_stats in csv.DictReader(csv_file):
23+
yield func(team_stats)
24+
25+
26+
def get_min_score_difference(filename):
27+
with open(filename, "r", newline="") as csv_data:
28+
return min(
29+
get_next_name_and_diff(csv_data, get_name_and_diff),
30+
key=lambda item: item[1],
31+
)
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import pytest
2+
import football_pandas as fb
3+
4+
5+
@pytest.fixture
6+
def mock_csv_file(tmp_path):
7+
mock_csv_data = [
8+
"Team,Games,Wins,Losses,Draws,Goals For,Goals Against",
9+
"Liverpool FC, 38, 32, 3, 3, 85, 33",
10+
"Norwich City FC, 38, 5, 27, 6, 26, 75",
11+
]
12+
datafile = tmp_path / "football.csv"
13+
datafile.write_text("\n".join(mock_csv_data))
14+
return str(datafile)
15+
16+
17+
def test_read_data(mock_csv_file):
18+
df = fb.read_data(mock_csv_file)
19+
rows, cols = df.shape
20+
assert rows == 2
21+
# The dataframe df has all seven of the cols in the original dataset plus
22+
# the goal_difference col added in read_data().
23+
assert cols == 8
24+
25+
26+
def test_score_difference(mock_csv_file):
27+
df = fb.read_data(mock_csv_file)
28+
assert df.team_name[0] == "Liverpool FC"
29+
assert df.goal_difference[0] == 52
30+
assert df.team_name[1] == "Norwich City FC"
31+
assert df.goal_difference[1] == 49
32+
33+
34+
def test_get_min_diff(mock_csv_file):
35+
df = fb.read_data(mock_csv_file)
36+
diff = fb.get_min_difference(df)
37+
assert diff == 49
38+
39+
40+
def test_get_team_name(mock_csv_file):
41+
df = fb.read_data(mock_csv_file)
42+
assert fb.get_team(df, 49) == "Norwich City FC"
43+
assert fb.get_team(df, 52) == "Liverpool FC"
44+
45+
46+
def test_get_min_score(mock_csv_file):
47+
assert fb.get_min_score_difference(mock_csv_file) == (
48+
"Norwich City FC",
49+
49,
50+
)
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/usr/bin/env python3
2+
""" Pytest functions for CSV Football problem """
3+
import pytest
4+
import football_v1 as fb
5+
6+
7+
@pytest.fixture
8+
def mock_csv_data():
9+
return [
10+
"Team,Games,Wins,Losses,Draws,Goals For,Goals Against",
11+
"Liverpool FC, 38, 32, 3, 3, 85, 33",
12+
"Norwich City FC, 38, 5, 27, 6, 26, 75",
13+
]
14+
15+
16+
@pytest.fixture
17+
def mock_csv_file(tmp_path, mock_csv_data):
18+
datafile = tmp_path / "football.csv"
19+
datafile.write_text("\n".join(mock_csv_data))
20+
return str(datafile)
21+
22+
23+
def test_get_min_score(mock_csv_file):
24+
assert fb.get_min_score_difference(mock_csv_file) == (
25+
"Norwich City FC",
26+
49,
27+
)
28+
29+
30+
def test_parse_next_line(mock_csv_data):
31+
all_lines = [line for line in fb.parse_next_line(mock_csv_data)]
32+
assert len(all_lines) == 2
33+
for line in all_lines:
34+
assert len(line) == 7
35+
36+
37+
def test_get_score_difference(mock_csv_data):
38+
reader = fb.parse_next_line(mock_csv_data)
39+
assert fb.get_name_and_diff(next(reader)) == ("Liverpool FC", 52)
40+
assert fb.get_name_and_diff(next(reader)) == ("Norwich City FC", 49)

0 commit comments

Comments
 (0)