Skip to content

Commit 656eef7

Browse files
committed
fix: improve the quality of 1140 tasks
1 parent 809f9e6 commit 656eef7

File tree

1,447 files changed

+74329
-74434
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,447 files changed

+74329
-74434
lines changed

data/clean/f_1008_zhihan_refined.py

Lines changed: 0 additions & 88 deletions
This file was deleted.

data/clean/f_1015_zhihan_refined.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def test_command_failure_with_specific_exit_code(self):
143143
self.assertEqual(len(result), 1)
144144
with open(os.path.join(self.output_dir_path, result[0]), "r") as f:
145145
content = f.read()
146-
self.assertIn("Error executing command, exited with code 1", content)
146+
self.assertIn("Error executing command", content)
147147

148148
if __name__ == "__main__":
149149
run_tests()

data/clean/f_1016_zhihan_refined.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def f_1016(directory, backup_dir='/path/to/backup'):
1313
Default is '/path/to/backup'.
1414
1515
Returns:
16-
- str: The path to the backup file if logs are found, otherwise returns a message stating no logs were found.
16+
- str: The path to the backup file if logs are found, otherwise returns a message 'No logs found to backup'.
1717
1818
Raises:
1919
- FileNotFoundError: If the specified directory does not exist.

data/clean/f_1027_zhihan_refined.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
def f_1027(data, url="http://your-api-url.com"):
66
"""
77
Convert a Python dictionary into a JSON-formatted string, encode this string in base64 format,
8-
and send it as a payload in a POST request to an API endpoint.
8+
and send it as a 'payload' in a POST request to an API endpoint.
99
1010
Parameters:
1111
data (dict): The Python dictionary to encode and send.
@@ -27,7 +27,7 @@ def f_1027(data, url="http://your-api-url.com"):
2727
"""
2828
json_data = json.dumps(data)
2929
encoded_data = base64.b64encode(json_data.encode('ascii')).decode('ascii')
30-
response = requests.post(url, data={"payload": encoded_data})
30+
response = requests.post(url, json={"payload": encoded_data})
3131

3232
return response
3333

@@ -92,7 +92,10 @@ def test_case_6(self, mock_post_method):
9292
json_data = json.dumps(data)
9393
encoded_data = base64.b64encode(json_data.encode('ascii')).decode('ascii')
9494
f_1027(data, url="http://mock-api-url.com")
95-
mock_post_method.assert_called_once_with("http://mock-api-url.com", data={"payload": encoded_data})
95+
try:
96+
mock_post_method.assert_called_once_with("http://mock-api-url.com", data={"payload": encoded_data})
97+
except:
98+
mock_post_method.assert_called_once_with("http://mock-api-url.com", json={"payload": encoded_data})
9699

97100

98101
if __name__ == "__main__":

data/clean/f_1031_zhihan_refined.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def f_1031(list_of_pairs):
1010
and the second element is the numeric value.
1111
1212
Returns:
13-
numpy.ndarray: A numpy array containing a single element that is the product of the second values in the list of tuples.
13+
numpy.ndarray: A 1D numpy array containing a single element that is the product of the second values in the list of tuples.
1414
1515
Requirements:
1616
- numpy
@@ -22,11 +22,14 @@ def f_1031(list_of_pairs):
2222
>>> print(product_array)
2323
360
2424
"""
25-
second_values = [pair[1] for pair in list_of_pairs]
26-
product = reduce(np.multiply, second_values)
27-
product_array = np.array(product)
28-
29-
return product_array
25+
# Extract the second element from each tuple using a list comprehension
26+
values = [pair[1] for pair in list_of_pairs]
27+
28+
# Use reduce to calculate the product of all elements in the values list
29+
product = reduce(lambda x, y: x * y, values)
30+
31+
# Return the result as a numpy array with a single element
32+
return np.array([product])
3033

3134
import unittest
3235
import numpy as np
@@ -45,6 +48,7 @@ def test_case_1(self):
4548
list_of_pairs = [('Fruits', 5), ('Vegetables', 9), ('Dairy', -1), ('Bakery', -2), ('Meat', 4)]
4649
expected_output = np.array(360)
4750
actual_output = f_1031(list_of_pairs)
51+
print(actual_output, expected_output)
4852
self.assertTrue(np.array_equal(actual_output, expected_output))
4953

5054
def test_case_2(self):

data/clean/f_119_armel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def f_119(timestamps):
2121
- Axes: The Axes object of the histogram plot. The histogram will have 10 bins by default, representing the distribution of the datetime objects.
2222
2323
Raises:
24-
- ValueError("Input list of timestamps is empty"): If the list of timestamps is empty.
24+
- ValueError("Input list of timestamps is empty."): If the list of timestamps is empty.
2525
2626
Requirements:
2727
- datetime

data/clean/f_124_armel.py

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,18 @@ def f_124(text):
3030
words 1
3131
dtype: int64
3232
"""
33-
words = re.findall(r"\b\w+\b", text.lower())
34-
words = [word for word in words if word not in STOPWORDS]
35-
word_counts = pd.Series(words).value_counts().rename(None)
33+
# Normalize the text to lowercase
34+
text = text.lower()
35+
36+
# Use regex to find words, considering words as sequences of alphabetic characters
37+
words = re.findall(r'\b\p{L}+\b', text)
38+
39+
# Filter out stopwords
40+
filtered_words = [word for word in words if word not in STOPWORDS]
41+
42+
# Count the frequency of each word using pandas Series
43+
word_counts = pd.Series(filtered_words).value_counts()
44+
3645
return word_counts
3746

3847

@@ -44,31 +53,26 @@ class TestCases(unittest.TestCase):
4453

4554
def test_case_1(self):
4655
text = "This is a sample text This text contains sample words"
47-
word_counts = f_124(text)
48-
expected_counts = pd.Series(
49-
{"this": 2, "sample": 2, "text": 2, "contains": 1, "words": 1}
50-
)
51-
pd.testing.assert_series_equal(word_counts, expected_counts)
56+
word_counts = f_124(text).to_dict()
57+
expected_counts = {"this": 2, "sample": 2, "text": 2, "contains": 1, "words": 1}
58+
self.assertDictEqual(word_counts, expected_counts)
5259

5360
def test_case_2(self):
5461
text = "Hello world Hello everyone"
55-
word_counts = f_124(text)
56-
expected_counts = pd.Series({"hello": 2, "world": 1, "everyone": 1})
57-
pd.testing.assert_series_equal(word_counts, expected_counts)
62+
word_counts = f_124(text).to_dict()
63+
expected_counts = {"hello": 2, "world": 1, "everyone": 1}
64+
self.assertDictEqual(word_counts, expected_counts)
5865

5966
def test_case_3(self):
6067
text = "a an the in is are"
61-
word_counts = f_124(text)
62-
expected_counts = pd.Series(dtype="int64")
63-
pd.testing.assert_series_equal(
64-
word_counts.reset_index(drop=True), expected_counts.reset_index(drop=True)
65-
)
68+
word_counts = f_124(text).to_dict()
69+
expected_counts = {}
70+
self.assertDictEqual(word_counts, expected_counts)
6671

6772
def test_case_4(self):
6873
text = "This is a test sentence which has a bunch of words and no period"
69-
word_counts = f_124(text)
70-
expected_counts = pd.Series(
71-
{
74+
word_counts = f_124(text).to_dict()
75+
expected_counts = {
7276
"this": 1,
7377
"test": 1,
7478
"sentence": 1,
@@ -81,18 +85,16 @@ def test_case_4(self):
8185
"no": 1,
8286
"period": 1,
8387
}
84-
)
85-
pd.testing.assert_series_equal(word_counts, expected_counts)
88+
89+
self.assertDictEqual(word_counts, expected_counts)
8690

8791
def test_case_5(self):
8892
text = (
8993
"I I I want want to to to to to go to to to the olympics olympics this year"
9094
)
91-
word_counts = f_124(text)
92-
expected_counts = pd.Series(
93-
{"i": 3, "want": 2, "to": 8, "go": 1, "olympics": 2, "this": 1, "year": 1}
94-
).sort_values(ascending=False)
95-
pd.testing.assert_series_equal(word_counts, expected_counts)
95+
word_counts = f_124(text).to_dict()
96+
expected_counts = {"i": 3, "want": 2, "to": 8, "go": 1, "olympics": 2, "this": 1, "year": 1}
97+
self.assertDictEqual(word_counts, expected_counts)
9698

9799

98100
def run_tests():

data/clean/f_139_armel.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def test_case_1(self):
5252
expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
5353
# Assertions
5454
self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
55-
pd.testing.assert_frame_equal(analyzed_df, expected_df)
55+
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
5656
self.assertTrue(isinstance(ax, plt.Axes))
5757

5858
def test_case_2(self):
@@ -73,7 +73,7 @@ def test_case_2(self):
7373
expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
7474
# Assertions
7575
self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
76-
pd.testing.assert_frame_equal(analyzed_df, expected_df)
76+
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
7777
self.assertTrue(isinstance(ax, plt.Axes))
7878

7979
def test_case_3(self):
@@ -94,7 +94,7 @@ def test_case_3(self):
9494
expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
9595
# Assertions
9696
self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
97-
pd.testing.assert_frame_equal(analyzed_df, expected_df)
97+
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
9898
self.assertTrue(isinstance(ax, plt.Axes))
9999

100100
def test_case_4(self):
@@ -111,7 +111,7 @@ def test_case_4(self):
111111
expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
112112
# Assertions
113113
self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
114-
pd.testing.assert_frame_equal(analyzed_df, expected_df)
114+
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
115115
self.assertTrue(isinstance(ax, plt.Axes))
116116

117117
def test_case_5(self):
@@ -136,7 +136,7 @@ def test_case_5(self):
136136
expected_df = expected_df.pivot(index=COLUMNS[0], columns=COLUMNS[1], values=COLUMNS[2])
137137
# Assertions
138138
self.assertTrue(isinstance(analyzed_df, pd.DataFrame))
139-
pd.testing.assert_frame_equal(analyzed_df, expected_df)
139+
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
140140
self.assertTrue(isinstance(ax, plt.Axes))
141141

142142
def run_tests():

data/clean/f_140_armel.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def test_case_1(self):
5454
# Assertions for the returned DataFrame
5555
expected_data = [[1, 1, 2], [1, 2, 1], [2, 1, 3], [2, 2, 1]]
5656
expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
57-
pd.testing.assert_frame_equal(analyzed_df, expected_df)
57+
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
5858

5959
# Assertions for the returned plot
6060
self.assertEqual(ax.get_xlabel(), 'col1-col2')
@@ -75,7 +75,7 @@ def test_case_2(self):
7575
[1, 3, 1]
7676
]
7777
expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
78-
pd.testing.assert_frame_equal(analyzed_df, expected_df)
78+
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
7979
self.assertEqual(ax.get_xlabel(), 'col1-col2')
8080
self.assertEqual(ax.get_ylabel(), 'col3')
8181
self.assertListEqual(list(ax.lines[0].get_ydata()), [3, 1, 1])
@@ -95,7 +95,7 @@ def test_case_3(self):
9595
[2, 2, 1]
9696
]
9797
expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
98-
pd.testing.assert_frame_equal(analyzed_df, expected_df)
98+
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
9999
self.assertEqual(ax.get_xlabel(), 'col1-col2')
100100
self.assertEqual(ax.get_ylabel(), 'col3')
101101
self.assertListEqual(list(ax.lines[0].get_ydata()), [1, 1, 1, 1])
@@ -111,7 +111,7 @@ def test_case_4(self):
111111
[1, 1, 1],
112112
]
113113
expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
114-
pd.testing.assert_frame_equal(analyzed_df, expected_df)
114+
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
115115
self.assertEqual(ax.get_xlabel(), 'col1-col2')
116116
self.assertEqual(ax.get_ylabel(), 'col3')
117117
self.assertListEqual(list(ax.lines[0].get_ydata()), [1])
@@ -135,7 +135,7 @@ def test_case_5(self):
135135
[1, 1, 2]
136136
]
137137
expected_df = pd.DataFrame(expected_data, columns=COLUMNS)
138-
pd.testing.assert_frame_equal(analyzed_df, expected_df)
138+
pd.testing.assert_frame_equal(analyzed_df, expected_df, check_dtype=False)
139139
self.assertEqual(ax.get_xlabel(), 'col1-col2')
140140
self.assertEqual(ax.get_ylabel(), 'col3')
141141
self.assertListEqual(list(ax.lines[0].get_ydata()), [2, 2, 2, 2])

0 commit comments

Comments
 (0)