Skip to content

Commit 066b0ce

Browse files
authored
Merge branch 'main' into bigcode_james
2 parents a37ad8c + 29c877a commit 066b0ce

File tree

114 files changed

+8300
-1915
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

114 files changed

+8300
-1915
lines changed

data/clean/f_933_zhihan.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import numpy as np
2+
from collections import Counter
3+
4+
5+
def f_933(list_of_tuples):
6+
"""
7+
Computes the sum of numeric values and counts the occurrences of categories in a list of tuples.
8+
9+
Each tuple in the input list contains a numeric value and a category. This function calculates
10+
the sum of all the numeric values and also counts how many times each category appears in the list.
11+
12+
Parameters:
13+
- list_of_tuples (list of tuple): A list where each tuple contains a numeric value and a category.
14+
15+
Returns:
16+
- tuple: A 2-element tuple where the first element is the sum of the numeric values, and the
17+
second element is a dictionary with categories as keys and their counts as values.
18+
19+
Requirements:
20+
- numpy
21+
- collections.Counter
22+
23+
Example:
24+
>>> list_of_tuples = [(5, 'Fruits'), (9, 'Vegetables'), (-1, 'Dairy'), (-2, 'Bakery'), (4, 'Meat')]
25+
>>> sum_of_values, category_counts = f_933(list_of_tuples)
26+
>>> print(sum_of_values)
27+
15
28+
>>> print(category_counts)
29+
{'Fruits': 1, 'Vegetables': 1, 'Dairy': 1, 'Bakery': 1, 'Meat': 1}
30+
"""
31+
32+
numeric_values = [pair[0] for pair in list_of_tuples]
33+
categories = [pair[1] for pair in list_of_tuples]
34+
35+
total_sum = np.sum(numeric_values)
36+
category_counts = Counter(categories)
37+
38+
return total_sum, dict(category_counts)
39+
40+
41+
import unittest
42+
43+
44+
def run_tests():
45+
suite = unittest.TestSuite()
46+
suite.addTest(unittest.makeSuite(TestCases))
47+
runner = unittest.TextTestRunner()
48+
runner.run(suite)
49+
50+
51+
class TestCases(unittest.TestCase):
52+
def test_case_1(self):
53+
# Regular list of tuples with different categories
54+
input_data = [(5, 'Fruits'), (9, 'Vegetables'), (-1, 'Dairy'), (-2, 'Bakery'), (4, 'Meat')]
55+
sum_values, count_values = f_933(input_data)
56+
self.assertEqual(sum_values, 15)
57+
self.assertEqual(count_values, {'Fruits': 1, 'Vegetables': 1, 'Dairy': 1, 'Bakery': 1, 'Meat': 1})
58+
59+
def test_case_2(self):
60+
# List of tuples with all the same categories
61+
input_data = [(5, 'Fruits'), (9, 'Fruits'), (-1, 'Fruits'), (-2, 'Fruits')]
62+
sum_values, count_values = f_933(input_data)
63+
self.assertEqual(sum_values, 11)
64+
self.assertEqual(count_values, {'Fruits': 4})
65+
66+
def test_case_3(self):
67+
# List of tuples with all negative numeric values
68+
input_data = [(-5, 'Fruits'), (-9, 'Vegetables'), (-1, 'Dairy')]
69+
sum_values, count_values = f_933(input_data)
70+
self.assertEqual(sum_values, -15)
71+
self.assertEqual(count_values, {'Fruits': 1, 'Vegetables': 1, 'Dairy': 1})
72+
73+
def test_case_4(self):
74+
# Empty list
75+
input_data = []
76+
sum_values, count_values = f_933(input_data)
77+
self.assertEqual(sum_values, 0)
78+
self.assertEqual(count_values, {})
79+
80+
def test_case_5(self):
81+
# List of tuples with mixed positive and negative numeric values for the same category
82+
input_data = [(5, 'Fruits'), (-5, 'Fruits'), (3, 'Fruits')]
83+
sum_values, count_values = f_933(input_data)
84+
self.assertEqual(sum_values, 3)
85+
self.assertEqual(count_values, {'Fruits': 3})
86+
87+
def test_empty_list(self):
88+
"""Test with an empty list."""
89+
self.assertEqual(f_933([]), (0, {}))
90+
91+
def test_all_negative_values(self):
92+
"""Test with all negative numeric values."""
93+
list_of_tuples = [(-5, 'Fruits'), (-2, 'Vegetables')]
94+
self.assertEqual(f_933(list_of_tuples), (-7, {'Fruits': 1, 'Vegetables': 1}))
95+
96+
def test_duplicate_categories(self):
97+
"""Test with duplicate categories."""
98+
list_of_tuples = [(1, 'Fruits'), (2, 'Fruits'), (3, 'Vegetables')]
99+
self.assertEqual(f_933(list_of_tuples), (6, {'Fruits': 2, 'Vegetables': 1}))
100+
101+
def test_single_tuple_in_list(self):
102+
"""Test with a single tuple in the list."""
103+
list_of_tuples = [(10, 'Meat')]
104+
self.assertEqual(f_933(list_of_tuples), (10, {'Meat': 1}))
105+
106+
def test_float_numeric_values(self):
107+
"""Test with non-integer numeric values (floats)."""
108+
list_of_tuples = [(1.5, 'Fruits'), (2.5, 'Vegetables')]
109+
self.assertEqual(f_933(list_of_tuples), (4.0, {'Fruits': 1, 'Vegetables': 1}))
110+
111+
112+
if __name__ == "__main__":
113+
run_tests()

data/clean/f_934_zhihan.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
import ast
2+
import json
3+
from collections import Counter
4+
5+
6+
def f_934(file_pointer):
7+
"""
8+
Reads from a given file pointer to a JSON file, evaluates strings that represent dictionaries to actual dictionaries,
9+
and counts the frequency of each key across all dictionary entries in the JSON data.
10+
11+
12+
Parameters:
13+
file_pointer (file object): An open file object pointing to the JSON file containing the data. This file should
14+
already be opened in the correct mode (e.g., 'r' for reading).
15+
16+
Returns:
17+
collections.Counter: A Counter object representing the frequency of each key found in the dictionaries.
18+
19+
Requirements:
20+
- ast
21+
- json
22+
- collections.Counter
23+
24+
Note:
25+
This function assumes the input JSON data is a list of dictionaries or strings that can be evaluated as dictionaries.
26+
27+
Example:
28+
>>> with open("data.json", "r") as file:
29+
>>> key_frequency = f_934(file)
30+
>>> print(key_frequency)
31+
Counter({'name': 5, 'age': 5, 'city': 3})
32+
"""
33+
34+
data = json.load(file_pointer)
35+
key_frequency_counter = Counter()
36+
37+
for item in data:
38+
if isinstance(item, str):
39+
try:
40+
item = ast.literal_eval(item)
41+
except ValueError:
42+
continue
43+
44+
if isinstance(item, dict):
45+
key_frequency_counter.update(item.keys())
46+
47+
return key_frequency_counter
48+
49+
50+
import unittest
51+
from io import BytesIO
52+
from collections import Counter
53+
import json
54+
55+
56+
def run_tests():
57+
suite = unittest.TestSuite()
58+
suite.addTest(unittest.makeSuite(TestCases))
59+
runner = unittest.TextTestRunner()
60+
runner.run(suite)
61+
62+
63+
class TestCases(unittest.TestCase):
64+
def test_with_dicts(self):
65+
# Simulate a JSON file containing dictionaries
66+
data = json.dumps([{"name": "John", "age": 30}, {"name": "Jane", "age": 25}, {"name": "Jake"}]).encode('utf-8')
67+
json_file = BytesIO(data)
68+
69+
# Expected result is a Counter object with the frequency of each key
70+
expected = Counter({'name': 3, 'age': 2})
71+
result = f_934(json_file)
72+
self.assertEqual(result, expected)
73+
74+
def test_with_string_repr_dicts(self):
75+
# Simulate a JSON file containing string representations of dictionaries
76+
data = json.dumps(['{"city": "New York"}', '{"city": "Los Angeles", "temp": 75}']).encode('utf-8')
77+
json_file = BytesIO(data)
78+
79+
expected = Counter({'city': 2, 'temp': 1})
80+
result = f_934(json_file)
81+
self.assertEqual(result, expected)
82+
83+
def test_with_invalid_json(self):
84+
# Simulate an invalid JSON file
85+
data = b'invalid json'
86+
json_file = BytesIO(data)
87+
88+
# In this case, the function should either return an empty Counter or raise a specific exception
89+
# Depending on how you've implemented error handling in your function, adjust this test accordingly
90+
with self.assertRaises(json.JSONDecodeError):
91+
f_934(json_file)
92+
93+
def test_empty_json(self):
94+
# Simulate an empty JSON file
95+
data = json.dumps([]).encode('utf-8')
96+
json_file = BytesIO(data)
97+
98+
expected = Counter()
99+
result = f_934(json_file)
100+
self.assertEqual(result, expected)
101+
102+
def test_mixed_valid_invalid_dicts(self):
103+
# Simulate a JSON file with a mix of valid and invalid dictionary strings
104+
data = json.dumps(['{"name": "John"}', 'Invalid', '{"age": 30}']).encode('utf-8')
105+
json_file = BytesIO(data)
106+
107+
expected = Counter({'name': 1, 'age': 1})
108+
result = f_934(json_file)
109+
self.assertEqual(result, expected)
110+
111+
def test_nested_dicts(self):
112+
# Simulate a JSON file containing nested dictionaries (should only count top-level keys)
113+
data = json.dumps([{"person": {"name": "John", "age": 30}}, {"person": {"city": "New York"}}]).encode('utf-8')
114+
json_file = BytesIO(data)
115+
116+
expected = Counter({'person': 2})
117+
result = f_934(json_file)
118+
self.assertEqual(result, expected)
119+
120+
def test_with_actual_json_objects_instead_of_strings(self):
121+
# Simulate a JSON file with actual JSON objects (dictionaries) instead of string representations
122+
data = json.dumps([{"key1": "value1"}, {"key2": "value2", "key3": "value3"}]).encode('utf-8')
123+
json_file = BytesIO(data)
124+
125+
expected = Counter({'key1': 1, 'key2': 1, 'key3': 1})
126+
result = f_934(json_file)
127+
self.assertEqual(result, expected)
128+
129+
def test_invalid_json_structure(self):
130+
# Simulate a JSON file that is not a list
131+
data = json.dumps({"not": "a list"}).encode('utf-8')
132+
json_file = BytesIO(data)
133+
134+
# Depending on how you've implemented error handling, adjust this test accordingly
135+
# Here we expect an error or a specific handling
136+
with self.assertRaises(SyntaxError):
137+
f_934(json_file)
138+
139+
140+
if __name__ == "__main__":
141+
run_tests()

data/clean/f_935_zhihan.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
import ast
2+
import os
3+
import glob
4+
5+
# Constants
6+
DIRECTORY = 'data'
7+
8+
def f_935(directory):
9+
"""
10+
Convert all Unicode string representations of dictionaries in all text files
11+
in the specified directory to Python dictionaries.
12+
13+
Parameters:
14+
directory (str): The path to the directory containing the text files.
15+
16+
Returns:
17+
list: A list of dictionaries extracted from the text files.
18+
19+
Requirements:
20+
- ast
21+
- os
22+
- glob
23+
24+
Example:
25+
>>> f_935("sample_directory/")
26+
[{'key1': 'value1'}, {'key2': 'value2'}]
27+
28+
Note:
29+
Ensure that the text files in the directory contain valid Unicode string representations of dictionaries.
30+
31+
Raises:
32+
- The function would raise a ValueError if there are text file(s) that have invalid dictionary representation
33+
"""
34+
path = os.path.join(directory, '*.txt')
35+
files = glob.glob(path)
36+
37+
results = []
38+
for file in files:
39+
with open(file, 'r') as f:
40+
for line in f:
41+
results.append(ast.literal_eval(line.strip()))
42+
43+
return results
44+
45+
import unittest
46+
import os
47+
import ast
48+
import shutil
49+
50+
51+
def run_tests():
52+
suite = unittest.TestSuite()
53+
suite.addTest(unittest.makeSuite(TestCases))
54+
runner = unittest.TextTestRunner()
55+
runner.run(suite)
56+
57+
class TestCases(unittest.TestCase):
58+
def setUp(self):
59+
self.test_dir = 'testdir_f_935'
60+
os.makedirs(self.test_dir, exist_ok=True)
61+
self.sample_directory = 'testdir_f_935/sample_directory'
62+
os.makedirs(self.sample_directory, exist_ok=True)
63+
f = open(self.sample_directory+"/1.txt","w")
64+
f.write("{'key1': 'value1'}")
65+
f.close()
66+
f = open(self.sample_directory+"/2.txt","w")
67+
f.write("{'key2': 'value2', 'key3': 'value3'}")
68+
f.close()
69+
f = open(self.sample_directory+"/3.txt","w")
70+
f.write("{'key4': 'value4'}")
71+
f.close()
72+
f = open(self.sample_directory+"/4.txt","w")
73+
f.write("{'key5': 'value5', 'key6': 'value6', 'key7': 'value7'}")
74+
f.close()
75+
f = open(self.sample_directory+"/5.txt","w")
76+
f.write("{'key8': 'value8'}")
77+
f.close()
78+
self.empty_directory = "testdir_f_935/empty_directory"
79+
os.makedirs(self.empty_directory, exist_ok=True)
80+
self.multi_line_directory = "testdir_f_935/multi_line_directory"
81+
os.makedirs(self.multi_line_directory, exist_ok=True)
82+
f = open(self.multi_line_directory+"/1.txt","w")
83+
f.write("{'key1': 'value1'}\n{'key2': 'value2'}")
84+
f.close()
85+
self.mixed_directory = "testdir_f_935/mixed_directory"
86+
os.makedirs(self.mixed_directory, exist_ok=True)
87+
f = open(self.mixed_directory+"/1.txt","w")
88+
f.write("invalid")
89+
f.close()
90+
self.invalid_directory = "testdir_f_935/invalid_directory"
91+
os.makedirs(self.invalid_directory, exist_ok=True)
92+
f = open(self.invalid_directory+"/1.txt","w")
93+
f.write("invalid")
94+
f.close()
95+
f = open(self.invalid_directory+"/2.txt","w")
96+
f.write("{'key1': 'value1'}")
97+
f.close()
98+
99+
100+
def tearDown(self):
101+
# Clean up the test directory
102+
shutil.rmtree(self.test_dir)
103+
104+
def test_case_1(self):
105+
# Test with the sample directory
106+
result = f_935(self.sample_directory)
107+
expected_result = [
108+
{'key1': 'value1'},
109+
{'key2': 'value2', 'key3': 'value3'},
110+
{'key4': 'value4'},
111+
{'key5': 'value5', 'key6': 'value6', 'key7': 'value7'},
112+
{'key8': 'value8'}
113+
]
114+
for i in expected_result:
115+
self.assertTrue(i in result)
116+
117+
def test_case_2(self):
118+
# Test with an empty directory
119+
result = f_935(self.empty_directory)
120+
self.assertEqual(result, [])
121+
122+
def test_case_3(self):
123+
# Test with a directory containing a text file without valid dictionary representation
124+
with self.assertRaises(ValueError):
125+
f_935(self.invalid_directory)
126+
127+
def test_case_4(self):
128+
# Test with a directory containing multiple text files, some of which are invalid
129+
with self.assertRaises(ValueError):
130+
f_935(self.mixed_directory)
131+
132+
def test_case_5(self):
133+
# Test with a directory containing a text file with multiple valid dictionary representations
134+
result = f_935(self.multi_line_directory)
135+
expected_result = [
136+
{'key1': 'value1'},
137+
{'key2': 'value2'}
138+
]
139+
self.assertEqual(result, expected_result)
140+
141+
if __name__ == "__main__":
142+
run_tests()

0 commit comments

Comments
 (0)