@@ -34,15 +34,14 @@ def remove_unnecessary_blank_lines(source_code: str) -> str:
3434
3535
3636def get_data_from_dir (
37- path : str = './data' ,
38- max_count_lines : Optional [int ] = None
37+ path : str = "./data" , max_count_lines : Optional [int ] = None
3938) -> pd .DataFrame :
4039 df = pd .DataFrame ()
4140 for filename in os .listdir (path ):
42- if not re .search (r' .csv$' , filename ):
41+ if not re .search (r" .csv$" , filename ):
4342 continue
4443
45- tmp_df = pd .read_csv (os .path .join (path , filename ), sep = ';' , index_col = 0 )
44+ tmp_df = pd .read_csv (os .path .join (path , filename ), sep = ";" , index_col = 0 )
4645 df = df .append (tmp_df , ignore_index = True )
4746
4847 if max_count_lines :
@@ -52,43 +51,43 @@ def get_data_from_dir(
5251
5352
5453def save_works_from_repo_url (url : str , check_policy : bool = True ) -> None :
55- current_repo_name = url .split ('/' )[- 1 ]
56- env_config = Config (RepositoryEnv (' ../../.env' ))
54+ current_repo_name = url .split ("/" )[- 1 ]
55+ env_config = Config (RepositoryEnv (" ../../.env" ))
5756 gh = GitHubParser (
58- file_extensions = (re .compile (r' .py$' ),),
57+ file_extensions = (re .compile (r" .py$" ),),
5958 check_all = check_policy ,
60- access_token = env_config .get (' ACCESS_TOKEN' )
59+ access_token = env_config .get (" ACCESS_TOKEN" ),
6160 )
6261 files = list (gh .get_files_generator_from_repo_url (url ))
6362 files = [(remove_unnecessary_blank_lines (file .code ), file .link ) for file in files ]
6463
6564 df = pd .DataFrame (
6665 {
67- ' content' : [file_ [0 ] for file_ in files [:- 1 ]],
68- ' link' : [file_ [1 ] for file_ in files [:- 1 ]],
69- ' extension' : ['py' ] * (len (files ) - 1 ),
70- ' repo_name' : [current_repo_name ] * (len (files ) - 1 ),
71- ' content_len' : [len (file_ [0 ]) for file_ in files [:- 1 ]],
72- ' content_len_without_blank' : [
73- len (file_ [0 ].replace (' ' , '' ).replace (' \n ' , '' ).replace (' \t ' , '' ))
66+ " content" : [file_ [0 ] for file_ in files [:- 1 ]],
67+ " link" : [file_ [1 ] for file_ in files [:- 1 ]],
68+ " extension" : ["py" ] * (len (files ) - 1 ),
69+ " repo_name" : [current_repo_name ] * (len (files ) - 1 ),
70+ " content_len" : [len (file_ [0 ]) for file_ in files [:- 1 ]],
71+ " content_len_without_blank" : [
72+ len (file_ [0 ].replace (" " , "" ).replace (" \n " , "" ).replace (" \t " , "" ))
7473 for file_ in files [:- 1 ]
7574 ],
76- ' count_lines_without_blank_lines' : [
75+ " count_lines_without_blank_lines" : [
7776 len (file_ [0 ].splitlines ()) for file_ in files [:- 1 ]
78- ]
77+ ],
7978 }
8079 )
81- df = df [df [' count_lines_without_blank_lines' ] > 5 ]
82- df .to_csv (os .path .join (' ./data/' , current_repo_name + ' .csv' ), sep = ';' )
80+ df = df [df [" count_lines_without_blank_lines" ] > 5 ]
81+ df .to_csv (os .path .join (" ./data/" , current_repo_name + " .csv" ), sep = ";" )
8382
8483
8584def get_time_to_meta (df : pd .DataFrame , iterations : int = 10 ) -> pd .DataFrame :
8685 count_lines = []
8786 to_meta_time = []
88- for ( index , content ) in df [
89- [' content' , ' link' , ' count_lines_without_blank_lines' ]
87+ for index , content in df [
88+ [" content" , " link" , " count_lines_without_blank_lines" ]
9089 ].iterrows ():
91- print (index , " " * 20 , end = ' \r ' )
90+ print (index , " " * 20 , end = " \r " )
9291 for _ in range (iterations ):
9392 tree = get_ast_from_content (content [0 ], content [1 ])
9493 if tree is None :
@@ -102,12 +101,7 @@ def get_time_to_meta(df: pd.DataFrame, iterations: int = 10) -> pd.DataFrame:
102101 except Exception :
103102 break
104103
105- output = pd .DataFrame (
106- {
107- 'count_lines' : count_lines ,
108- 'times' : to_meta_time
109- }
110- )
104+ output = pd .DataFrame ({"count_lines" : count_lines , "times" : to_meta_time })
111105
112106 return output
113107
@@ -118,7 +112,7 @@ def plot_and_save_result(
118112 ylabel : str ,
119113 title : str ,
120114 what : str ,
121- trend : Literal [' linear' , ' n^2' , ' n^3' , ' n^4' ] = ' linear'
115+ trend : Literal [" linear" , " n^2" , " n^3" , " n^4" ] = " linear" ,
122116) -> None :
123117 # Simple Moving average
124118 unique_count_lines = np .unique (df .count_lines )
@@ -135,75 +129,72 @@ def plot_and_save_result(
135129 plt .figure (figsize = (12 , 12 ), dpi = 80 )
136130 # plt.plot(unique_count_lines, mean_times, label='Среднее')
137131
138- if trend == ' linear' :
132+ if trend == " linear" :
139133 z = np .polyfit (unique_count_lines , mean_times , 1 )
140134 p = np .poly1d (z )
141135 plt .plot (
142- unique_count_lines , p (unique_count_lines ), "r--" , label = ' Линейный тренд.'
136+ unique_count_lines , p (unique_count_lines ), "r--" , label = " Линейный тренд."
143137 )
144- elif trend == ' n^2' :
138+ elif trend == " n^2" :
145139 popt_cons , _ = curve_fit (
146140 square_func ,
147141 unique_count_lines ,
148142 mean_times ,
149- bounds = ([- np .inf , 0. , 0. ], [np .inf , 0.1 ** 100 , 0.1 ** 100 ])
143+ bounds = ([- np .inf , 0.0 , 0.0 ], [np .inf , 0.1 ** 100 , 0.1 ** 100 ]),
150144 )
151145 p = np .poly1d (popt_cons )
152146 plt .plot (
153147 unique_count_lines ,
154148 p (unique_count_lines ),
155- "r--" , label = 'Квадратичный тренд.'
149+ "r--" ,
150+ label = "Квадратичный тренд." ,
156151 )
157- elif trend == ' n^3' :
152+ elif trend == " n^3" :
158153 popt_cons , _ = curve_fit (
159154 cube_func ,
160155 unique_count_lines ,
161156 mean_times ,
162- bounds = ([- np .inf , 0. , 0. , 0. ], [np .inf , 0.1 ** 100 , 0.1 ** 100 , 0.1 ** 100 ])
157+ bounds = (
158+ [- np .inf , 0.0 , 0.0 , 0.0 ],
159+ [np .inf , 0.1 ** 100 , 0.1 ** 100 , 0.1 ** 100 ],
160+ ),
163161 )
164162 p = np .poly1d (popt_cons )
165163 plt .plot (
166- unique_count_lines ,
167- p (unique_count_lines ),
168- "r--" ,
169- label = 'Кубический тренд.'
164+ unique_count_lines , p (unique_count_lines ), "r--" , label = "Кубический тренд."
170165 )
171- elif trend == ' n^4' :
166+ elif trend == " n^4" :
172167 popt_cons , _ = curve_fit (
173168 quart_func ,
174169 unique_count_lines ,
175170 mean_times ,
176171 bounds = (
177- [- np .inf , 0. , 0. , 0. , 0. ],
178- [np .inf , 0.1 ** 100 , 0.1 ** 100 , 0.1 ** 100 , 0.1 ** 100 ]
179- )
172+ [- np .inf , 0.0 , 0.0 , 0.0 , 0.0 ],
173+ [np .inf , 0.1 ** 100 , 0.1 ** 100 , 0.1 ** 100 , 0.1 ** 100 ],
174+ ),
180175 )
181176 p = np .poly1d (popt_cons )
182- plt .plot (unique_count_lines , p (unique_count_lines ), "r--" , label = ' n^4.' )
177+ plt .plot (unique_count_lines , p (unique_count_lines ), "r--" , label = " n^4." )
183178 else :
184179 raise Exception (f"Incorrect tred '{ trend } '." )
185180
186181 rolling = pd .DataFrame (
187- {
188- 'unique_count_lines' : unique_count_lines ,
189- 'mean_times' : mean_times
190- }
182+ {"unique_count_lines" : unique_count_lines , "mean_times" : mean_times }
191183 )
192184 num_window = 20
193185 plt .plot (
194186 rolling .unique_count_lines ,
195187 rolling .mean_times .rolling (window = num_window ).mean (),
196- label = f' Скользящее среднее по { num_window } ти замерам.'
188+ label = f" Скользящее среднее по { num_window } ти замерам." ,
197189 )
198190
199191 plt .ylabel (ylabel , fontsize = 15 )
200192 plt .xlabel (xlabel , fontsize = 15 )
201193 plt .title (title , fontsize = 17 )
202- plt .legend (loc = ' upper left' )
194+ plt .legend (loc = " upper left" )
203195 plt .savefig (
204- './graphics/need_time_{}_{}.png' .format (
205- what ,
206- datetime .now ().strftime ("%d%m%Y_%H%M%S" )
196+ "./graphics/need_time_{}_{}.png" .format (
197+ what , datetime .now ().strftime ("%d%m%Y_%H%M%S" )
207198 )
208199 )
209200
@@ -212,7 +203,7 @@ def get_time_algorithms(
212203 df : pd .DataFrame ,
213204 work ,
214205 iterations : int = 5 ,
215- metric : Literal [' fast' , ' gst' , ' structure' ] = ' fast'
206+ metric : Literal [" fast" , " gst" , " structure" ] = " fast" ,
216207) -> pd .DataFrame :
217208 count_lines = []
218209 times = []
@@ -221,11 +212,11 @@ def get_time_algorithms(
221212 raise Exception ("Unexpected error when parsing first work." )
222213
223214 features1 = get_features_from_ast (tree1 , work .link )
224- for ( index , content ) in df [
225- [' content' , ' link' , ' count_lines_without_blank_lines' ]
215+ for index , content in df [
216+ [" content" , " link" , " count_lines_without_blank_lines" ]
226217 ].iterrows ():
227218 for _ in range (iterations ):
228- print (index , " " * 20 , end = ' \r ' )
219+ print (index , " " * 20 , end = " \r " )
229220 tree2 = get_ast_from_content (content [0 ], content [1 ])
230221 if tree2 is None :
231222 continue
@@ -234,34 +225,29 @@ def get_time_algorithms(
234225 except Exception :
235226 continue
236227
237- if metric == ' fast' :
228+ if metric == " fast" :
238229 start = perf_counter ()
239230 value_jakkar_coef (features1 .tokens , features2 .tokens )
240231 counter_metric (features1 .operators , features2 .operators )
241232 counter_metric (features1 .keywords , features2 .keywords )
242233 counter_metric (features1 .literals , features2 .literals )
243234 end = perf_counter () - start
244235 times .append (end )
245- elif metric == ' gst' :
236+ elif metric == " gst" :
246237 start = perf_counter ()
247238 gst (features1 .tokens , features2 .tokens , 6 )
248239 end = perf_counter () - start
249240 times .append (end )
250- elif metric == ' structure' :
241+ elif metric == " structure" :
251242 start = perf_counter ()
252243 struct_compare (features1 .structure , features2 .structure )
253244 end = perf_counter () - start
254245 times .append (end )
255246 else :
256- raise Exception (' Incorrect metric!' )
247+ raise Exception (" Incorrect metric!" )
257248
258249 count_lines .append (content [2 ])
259250
260- output = pd .DataFrame (
261- {
262- 'count_lines' : count_lines ,
263- 'times' : times
264- }
265- )
251+ output = pd .DataFrame ({"count_lines" : count_lines , "times" : times })
266252
267253 return output
0 commit comments