Skip to content

Commit ca12e47

Browse files
committed
update: pre-finalize 1141 tasks
1 parent 1181e36 commit ca12e47

File tree

427 files changed

+24206
-25960
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

427 files changed

+24206
-25960
lines changed

data/clean/f_671_xiaoheng.py

Lines changed: 0 additions & 94 deletions
This file was deleted.

data/clean/f_674_xiaoheng.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22
import itertools
33

4-
def f_674(dimension):
4+
def f_674(dimension, seed=42):
55
"""
66
Create a 2D numeric array (matrix) of a given dimension with random integers between 1 and 100,
77
and a flat list of all elements in the matrix.
@@ -28,15 +28,15 @@ def f_674(dimension):
2828
>>> print(flat_list)
2929
[52, 93, 15, 72, 61, 21, 83, 87, 75]
3030
"""
31+
np.random.seed(seed) # Ensure reproducible results
32+
3133
if dimension <= 0:
3234
raise ValueError("The dimension must be a positive integer")
3335

34-
np.random.seed(42) # Ensure reproducible results
3536
matrix = np.random.randint(1, 101, size=(dimension, dimension))
3637
flat_list = matrix.flatten().tolist()
3738

3839
combinations = list(itertools.combinations(flat_list, 2))
39-
print("Combinations of pairs of elements:", combinations)
4040

4141
return matrix, flat_list
4242

@@ -54,8 +54,8 @@ def test_positive_dimension(self):
5454
matrix, flat_list = f_674(dimension)
5555
self.assertEqual(matrix.shape, (dimension, dimension))
5656
self.assertEqual(len(flat_list), dimension ** 2)
57-
self.assertTrue(all(1 <= x <= 100 for x in flat_list))
58-
57+
self.assertEqual(flat_list , [52, 93, 15, 72, 61, 21, 83, 87, 75])
58+
5959
def test_dimension_one(self):
6060
"""
6161
Test Case 2: Test with the smallest positive dimension
@@ -66,19 +66,19 @@ def test_dimension_one(self):
6666
matrix, flat_list = f_674(dimension)
6767
self.assertEqual(matrix.shape, (dimension, dimension))
6868
self.assertEqual(len(flat_list), dimension ** 2)
69-
self.assertTrue(all(1 <= x <= 100 for x in flat_list))
69+
self.assertEqual(flat_list , [52])
7070

7171
def test_large_dimension(self):
7272
"""
7373
Test Case 3: Test with a large dimension
74-
Input: 100 (a large positive integer)
75-
Expected Output: A 100x100 matrix and a flat list of 10000 elements, with all elements between 1 and 100.
74+
Input: 10 (a large positive integer)
75+
Expected Output: A 10x10 matrix and a flat list of 100 elements, with all elements between 1 and 100.
7676
"""
77-
dimension = 100
78-
matrix, flat_list = f_674(dimension)
77+
dimension = 10
78+
matrix, flat_list = f_674(dimension, 1)
7979
self.assertEqual(matrix.shape, (dimension, dimension))
8080
self.assertEqual(len(flat_list), dimension ** 2)
81-
self.assertTrue(all(1 <= x <= 100 for x in flat_list))
81+
self.assertEqual(flat_list[:10] , [38, 13, 73, 10, 76, 6, 80, 65, 17, 2])
8282

8383
def test_zero_dimension(self):
8484
"""

data/processed/1000_wo_doc.py

Lines changed: 95 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,106 +1,115 @@
11
import urllib.request
22
import os
3-
import hashlib
4-
import tarfile
3+
import csv
4+
import collections
55

6-
# Constants
7-
TARGET_TAR_FILE = "downloaded_files.tar.gz"
8-
EXPECTED_MD5_CHECKSUM = "d41d8cd98f00b204e9800998ecf8427e"
96

10-
11-
def task_func(url):
7+
def task_func(url, column_name, csv_file_path):
128
"""
13-
Downloads a tar.gz file from a specified URL, then validates its MD5 checksum against a predefined expected value.
14-
If the checksum matches, it extracts the contents of the tar.gz file. Otherwise, it deletes the downloaded file.
9+
Download a CSV file from a given URL, save it to a specified path, and count
10+
the occurrences of each value in a particular column. The function handles various
11+
scenarios including missing columns and file download errors.
1512
1613
Parameters:
17-
url (str): The URL from which to download the tar.gz file.
14+
url (str): The URL of the CSV file to be downloaded. Must be a valid and accessible URL.
15+
column_name (str): The name of the column in the CSV file whose values are to be counted.
16+
The function will raise a ValueError if this column is not found.
17+
csv_file_path (str): The file path where the downloaded CSV file will be saved.
18+
If a file already exists at this path, it will be overwritten.
1819
1920
Returns:
20-
bool: Returns True if the file is successfully downloaded, its MD5 checksum matches the expected value, and
21-
it is extracted. Returns False if the checksum does not match the expected value or if the download fails.
21+
dict: A dictionary mapping the values from the specified column to their
22+
corresponding occurrence counts.
23+
24+
Raises:
25+
ValueError: If the specified column_name does not exist in the CSV file, the function
26+
will delete the downloaded file and raise a ValueError with a message
27+
stating "The provided column_name '{column_name}' does not exist in the CSV file."
2228
2329
Requirements:
24-
- urllib.request
25-
- hashlib
26-
- tarfile
30+
- urllib
2731
- os
32+
- csv
33+
- collections
2834
2935
Example:
30-
>>> task_func('http://example.com/files.tar.gz')
31-
True
36+
>>> task_func('http://example.com/data.csv', 'category', 'downloaded_data.csv')
37+
{'cat1': 5, 'cat2': 3, 'cat3': 8}
38+
# This is a hypothetical output; the actual output will depend on the CSV data.
39+
40+
Notes:
41+
- The downloaded CSV file is deleted after its contents have been processed.
42+
- The function only counts values in the specified column and ignores other data.
3243
"""
33-
try:
34-
urllib.request.urlretrieve(url, TARGET_TAR_FILE)
35-
except Exception as e:
36-
print(e)
37-
return False
38-
md5_hash = hashlib.md5()
39-
with open(TARGET_TAR_FILE, "rb") as f:
40-
for byte_block in iter(lambda: f.read(4096), b""):
41-
md5_hash.update(byte_block)
42-
if md5_hash.hexdigest() != EXPECTED_MD5_CHECKSUM:
43-
os.remove(TARGET_TAR_FILE)
44-
return False
45-
with tarfile.open(TARGET_TAR_FILE, "r:gz") as tar_ref:
46-
tar_ref.extractall()
47-
os.remove(TARGET_TAR_FILE)
48-
return True
44+
urllib.request.urlretrieve(url, csv_file_path)
45+
with open(csv_file_path, "r", encoding="utf-8") as f:
46+
reader = csv.DictReader(f)
47+
if column_name not in reader.fieldnames:
48+
os.remove(csv_file_path)
49+
raise ValueError(
50+
f"The provided column_name '{column_name}' does not exist in the CSV file."
51+
)
52+
values = [row[column_name] for row in reader]
53+
os.remove(csv_file_path)
54+
return collections.Counter(values)
4955

5056
import unittest
51-
from unittest.mock import patch
52-
import urllib.request
53-
import hashlib
57+
from unittest.mock import patch, mock_open
5458
import os
55-
# Constants from the task_func function
56-
TARGET_TAR_FILE = "downloaded_files.tar.gz"
57-
EXPECTED_MD5_CHECKSUM = "d41d8cd98f00b204e9800998ecf8427e"
5859
class TestCases(unittest.TestCase):
5960
"""Test cases for the task_func function."""
60-
def setUp(self):
61-
self.valid_url = "http://example.com/valid.tar.gz"
62-
self.invalid_checksum_url = "http://example.com/invalid_checksum.tar.gz"
63-
# Create a minimal tar.gz file to simulate download
64-
with open("test_file.txt", "w") as f:
65-
f.write("test data")
66-
with tarfile.open(TARGET_TAR_FILE, "w:gz") as tar:
67-
tar.add("test_file.txt")
68-
def test_valid_file(self):
69-
"""Test that a valid file is downloaded, its checksum is validated, and it is extracted."""
70-
with patch("urllib.request.urlretrieve"), patch("hashlib.md5") as mock_md5:
71-
mock_md5.return_value.hexdigest.return_value = EXPECTED_MD5_CHECKSUM
72-
result = task_func(self.valid_url)
73-
self.assertTrue(result)
74-
self.assertFalse(os.path.exists(TARGET_TAR_FILE))
75-
def test_invalid_checksum_valid_format(self):
76-
"""Test that a file with an invalid checksum is not extracted."""
77-
with patch("urllib.request.urlretrieve"), patch("hashlib.md5") as mock_md5:
78-
mock_md5.return_value.hexdigest.return_value = "invalidchecksum"
79-
result = task_func(self.invalid_checksum_url)
80-
self.assertFalse(result)
81-
self.assertFalse(os.path.exists(TARGET_TAR_FILE))
82-
def test_download_failure(self):
83-
"""Test that a file that fails to download is not extracted."""
84-
with patch(
85-
"urllib.request.urlretrieve", side_effect=Exception("Download failed")
86-
):
87-
result = task_func(self.valid_url)
88-
self.assertFalse(result)
89-
def test_file_removal_after_failure(self):
90-
"""Test that a file that fails to download is removed."""
91-
with patch("urllib.request.urlretrieve"), patch("hashlib.md5") as mock_md5:
92-
mock_md5.return_value.hexdigest.return_value = "invalidchecksum"
93-
task_func(self.invalid_checksum_url)
94-
self.assertFalse(os.path.exists(TARGET_TAR_FILE))
95-
def test_extraction_success(self):
96-
"""Test that a file is extracted if its checksum is valid."""
97-
with patch("urllib.request.urlretrieve"), patch("hashlib.md5") as mock_md5:
98-
mock_md5.return_value.hexdigest.return_value = EXPECTED_MD5_CHECKSUM
99-
result = task_func(self.valid_url)
100-
self.assertTrue(result)
101-
def tearDown(self):
102-
# Clean up any created files
103-
if os.path.exists(TARGET_TAR_FILE):
104-
os.remove(TARGET_TAR_FILE)
105-
if os.path.exists("test_file.txt"):
106-
os.remove("test_file.txt")
61+
@patch("os.remove")
62+
@patch("urllib.request.urlretrieve")
63+
@patch(
64+
"builtins.open",
65+
new_callable=mock_open,
66+
read_data="category,other\n" + "cat1,x\n" * 2 + "cat2,y\n" * 2 + "cat3,z\n",
67+
)
68+
def test_count_categories_data1(self, mock_file, mock_urlretrieve, mock_remove):
69+
"""Test that the function counts the occurrences of each category in the CSV file."""
70+
result = task_func("mock_url", "category", "/mock/path/data1.csv")
71+
self.assertEqual(result, {"cat1": 2, "cat2": 2, "cat3": 1})
72+
@patch("os.remove")
73+
@patch("urllib.request.urlretrieve")
74+
@patch(
75+
"builtins.open",
76+
new_callable=mock_open,
77+
read_data="name,other\n" + "Alice,x\n" * 2 + "Bob,y\n" + "Charlie,z\n",
78+
)
79+
def test_count_names_data2(self, mock_file, mock_urlretrieve, mock_remove):
80+
"""Test that the function counts the occurrences of each name in the CSV file."""
81+
result = task_func("mock_url", "name", "/mock/path/data2.csv")
82+
self.assertEqual(result, {"Alice": 2, "Bob": 1, "Charlie": 1})
83+
@patch("os.remove")
84+
@patch("urllib.request.urlretrieve")
85+
@patch(
86+
"builtins.open",
87+
new_callable=mock_open,
88+
read_data="category,other\n" + "cat1,x\n" * 2 + "cat2,y\n" + "cat3,z\n" * 2,
89+
)
90+
def test_count_categories_data3(self, mock_file, mock_urlretrieve, mock_remove):
91+
"""Test that the function counts the occurrences of each category in the CSV file."""
92+
result = task_func("mock_url", "category", "/mock/path/data3.csv")
93+
self.assertEqual(result, {"cat1": 2, "cat2": 1, "cat3": 2})
94+
@patch("os.remove")
95+
@patch("urllib.request.urlretrieve")
96+
@patch(
97+
"builtins.open",
98+
new_callable=mock_open,
99+
read_data="name,other\n" + "Alice,x\n" * 3 + "Bob,y\n" + "Charlie,z\n",
100+
)
101+
def test_count_names_data3(self, mock_file, mock_urlretrieve, mock_remove):
102+
"""Test that the function counts the occurrences of each name in the CSV file."""
103+
result = task_func("mock_url", "name", "/mock/path/data3.csv")
104+
self.assertEqual(result, {"Alice": 3, "Bob": 1, "Charlie": 1})
105+
@patch("os.remove")
106+
@patch("urllib.request.urlretrieve")
107+
@patch(
108+
"builtins.open",
109+
new_callable=mock_open,
110+
read_data="name,other\n" + "Alice,x\n" * 3 + "Bob,y\n" + "Charlie,z\n",
111+
)
112+
def test_non_existent_column(self, mock_file, mock_urlretrieve, mock_remove):
113+
"""Test that the function raises an exception when the specified column does not exist."""
114+
with self.assertRaises(ValueError):
115+
task_func("mock_url", "non_existent_column", "/mock/path/data3.csv")

0 commit comments

Comments
 (0)