11import os
2- import json
32import requests
43import getpass
54import configargparse
6- from progress .bar import ChargingBar
7-
8-
9- class CustomProgress (ChargingBar ):
10- message = 'Downloading Solutions'
11- suffix = '%(percent)d%% [%(index)d/%(max)d]'
12-
13-
14- class Metadata :
15-
16- METADATA_FILE_NAME = 'metadata.json'
17-
18- def __init__ (self ):
19- self .metadata = {}
20- if (os .path .isfile (self .METADATA_FILE_NAME )):
21- self .metadata = json .load (open (self .METADATA_FILE_NAME ))
22-
23- def put (self , challenge_id , submission_id ):
24- self .metadata [str (challenge_id )] = str (submission_id )
25- json .dump (self .metadata , open (self .METADATA_FILE_NAME , 'w' ))
26-
27- def get (self , challenge_id ):
28- challenge_id_string = str (challenge_id )
29- if challenge_id_string not in self .metadata :
30- self .metadata [challenge_id_string ] = - 1
31- submission_id_string = self .metadata [challenge_id_string ]
32- return int (submission_id_string )
5+ from .progress_bar import CustomProgress
6+ from .metadata import Metadata
7+ from .constants import extensions
338
349
3510class Crawler :
3611 base_url = 'https://www.hackerrank.com/'
3712 login_url = base_url + 'auth/login'
3813 submissions_url = base_url + 'rest/contests/master/submissions/?offset={}&limit={}'
3914 challenge_url = base_url + 'rest/contests/master/challenges/{}/submissions/{}'
40- domain_url = base_url + 'domains/{}/{}'
15+ domain_url = base_url + 'domains/{}'
16+ subdomain_url = base_url + 'domains/{}/{}'
4117 problem_url = base_url + 'challenges/{}/problem'
4218
43- new_readme_text = '## [{}]({})\n \n |Problem Name|Problem Link|Language|Solution Link|\n ---|---|---|---\n '
44- readme_headers_len = len (new_readme_text .split ('\n ' )) - 1
45- problem_readme_text = '|{}|[Problem]({})|{}|[Solution](./{})|\n '
19+ subdomain_readme_text = '## [{}]({})\n \n |Problem Name|Problem Link|Language|Solution Link|\n ---|---|---|---\n '
20+ domain_readme_text = '## [{}]({})\n \n |Subdomain|Problem Name|Problem Link|Language|Solution Link|\n ---|---|---|---|---\n '
21+ root_readme_text = '## [Hackerrank]({})\n \n |Domain|Subdomain|Problem Name|Problem Link|Language|Solution Link|\n ---|---|---|---|---|---\n '
22+ readme_headers_len = len (subdomain_readme_text .split ('\n ' )) - 1
23+
24+ subdomain_readme_row = '|{}|[Problem]({})|{}|[Solution]({})|\n '
25+ domain_readme_row = '|{}|{}|[Problem]({})|{}|[Solution]({})|\n '
26+ root_readme_row = '|{}|{}|{}|[Problem]({})|{}|[Solution]({})|\n '
4627
4728 base_folder_name = 'Hackerrank'
4829
@@ -51,57 +32,7 @@ class Crawler:
5132 # prepend language in file extension e.g Hackerrank/Regex/Introduction/matching.python3.py
5233 prepend_language_in_extension = False
5334
54- # file extensions
55- file_extensions = {
56- 'ada' : 'ada' ,
57- 'bash' : 'sh' ,
58- 'c' : 'c' ,
59- 'clojure' : 'clj' ,
60- 'coffeescript' : 'coffee' ,
61- 'cpp' : 'cpp' ,
62- 'cpp14' : 'cpp' ,
63- 'csharp' : 'cs' ,
64- 'd' : 'd' ,
65- 'db2' : 'sql' ,
66- 'elixir' : 'ex' ,
67- 'erlang' : 'erl' ,
68- 'fortran' : 'for' ,
69- 'fsharp' : 'fs' ,
70- 'go' : 'go' ,
71- 'groovy' : 'groovy' ,
72- 'haskell' : 'hs' ,
73- 'java' : 'java' ,
74- 'java8' : 'java' ,
75- 'javascript' : 'js' ,
76- 'julia' : 'jl' ,
77- 'kotlin' : 'kt' ,
78- 'lolcode' : 'lol' ,
79- 'lua' : 'lua' ,
80- 'mysql' : 'sql' ,
81- 'objectivec' : 'm' ,
82- 'ocaml' : 'ml' ,
83- 'octave' : 'oct' ,
84- 'oracle' : 'sql' ,
85- 'pascal' : 'pas' ,
86- 'perl' : 'pl' ,
87- 'php' : 'php' ,
88- 'pypy' : 'py' ,
89- 'pypy3' : 'py' ,
90- 'python' : 'py' ,
91- 'python3' : 'py' ,
92- 'racket' : 'rkt' ,
93- 'r' : 'r' ,
94- 'ruby' : 'rb' ,
95- 'rust' : 'rs' ,
96- 'sbcl' : 'lisp' ,
97- 'scala' : 'scala' ,
98- 'swift' : 'swift' ,
99- 'smalltalk' : 'st' ,
100- 'tcl' : 'tcl' ,
101- 'tsql' : 'sql' ,
102- 'visualbasic' : 'vbs' ,
103- 'whitespace' : 'hs' ,
104- }
35+ file_extensions = extensions
10536
10637 def __init__ (self ):
10738 self .session = requests .Session ()
@@ -149,27 +80,74 @@ def store_submission(self, file_name, code):
14980 with open (file_name , 'w' ) as text_file :
15081 text_file .write (code )
15182
152- def update_readme (self , readme_file_path , problem_readme_text ):
83+ def update_readme (self , readme_file_path , readme_text ):
15384 header_length = self .readme_headers_len
15485 with open (readme_file_path , 'r+' ) as text_file :
15586 lines = text_file .readlines ()
156- lines .append (problem_readme_text )
87+ lines .append (readme_text )
15788 sortedlines = lines [:header_length ] + sorted (lines [header_length :])
15889 text_file .seek (0 )
15990 text_file .writelines (sortedlines )
16091
161- def create_readme (self , track_name , track_url , file_name ):
162- if track_name is not None :
163- os .makedirs (os .path .dirname (file_name ), exist_ok = True )
164- text = self .new_readme_text .format (track_name , track_url )
165- with open (file_name , 'w' ) as text_file :
166- text_file .write (text )
167-
168- def get_file_path (self , folder_name , file_name_with_extension ):
169- return os .path .join (self .base_folder_name , folder_name , file_name_with_extension )
92+ def write (self , file_name , text ):
93+ os .makedirs (os .path .dirname (file_name ), exist_ok = True )
94+ with open (file_name , 'w' ) as text_file :
95+ text_file .write (text )
96+
97+
98+ def create_readmes (self , domain_name , subdomain_name , domain_url , subdomain_url ,
99+ subdomain_readme_path , domain_readme_path , root_readme_path ):
100+ """
101+ Method to check if readme files already exist. If readme files doesn't exist, then create them and add headers.
102+ """
103+ if not os .path .exists (subdomain_readme_path ):
104+ text = self .subdomain_readme_text .format (subdomain_name , subdomain_url )
105+ self .write (subdomain_readme_path , text )
106+
107+ if not os .path .exists (domain_readme_path ):
108+ text = self .domain_readme_text .format (domain_name , domain_url )
109+ self .write (domain_readme_path , text )
110+
111+ if not os .path .exists (root_readme_path ):
112+ text = self .root_readme_text .format (self .base_url )
113+ self .write (root_readme_path , text )
114+
115+
116+ def update_readmes (self , domain_name , subdomain_name , domain_url , subdomain_url ,
117+ challenge_name , challenge_slug , language , file_name_with_extension ):
118+ """
119+ Method to add a new row corresponding to a new solution in the readme files
120+ """
121+ subdomain_readme_path = os .path .join (self .base_folder_name , domain_name , subdomain_name , 'README.md' )
122+ if self .make_language_folder :
123+ subdomain_readme_path = os .path .join (self .base_folder_name , domain_name , subdomain_name , language , 'README.md' )
124+ domain_readme_path = os .path .join (self .base_folder_name , domain_name , 'README.md' )
125+ root_readme_path = os .path .join (self .base_folder_name , 'README.md' )
126+
127+ self .create_readmes (domain_name , subdomain_name , domain_url , subdomain_url ,
128+ subdomain_readme_path , domain_readme_path , root_readme_path )
129+
130+ problem_url = self .problem_url .format (challenge_slug )
131+
132+ file_path_relative_to_subdomain = './' + file_name_with_extension
133+ file_path_relative_to_domain = '{}/{}' .format (subdomain_name , file_name_with_extension )
134+ file_path_relative_to_root = '{}/{}/{}' .format (domain_name , subdomain_name , file_name_with_extension )
135+ subdomain_readme_text = self .subdomain_readme_row .format (challenge_name , problem_url , language , file_path_relative_to_subdomain )
136+ domain_readme_text = self .domain_readme_row .format (subdomain_name , challenge_name , problem_url , language , file_path_relative_to_domain )
137+ root_readme_text = self .root_readme_row .format (domain_name , subdomain_name , challenge_name , problem_url , language , file_path_relative_to_root )
138+ self .update_readme (
139+ subdomain_readme_path ,
140+ subdomain_readme_text ,
141+ )
142+ self .update_readme (
143+ domain_readme_path ,
144+ domain_readme_text ,
145+ )
146+ self .update_readme (
147+ root_readme_path ,
148+ root_readme_text ,
149+ )
170150
171- def get_readme_path (self , folder_name ):
172- return os .path .join (self .base_folder_name , folder_name , 'README.md' )
173151
174152 def get_submissions (self , submissions ):
175153 headers = self .headers
@@ -195,47 +173,49 @@ def get_submissions(self, submissions):
195173 data = resp .json ()['model' ]
196174 code = data ['code' ]
197175 track = data ['track' ]
198-
199- folder_name = 'Others'
200- file_extension = '.' + language
176+
177+ # Default should be empty
178+ file_extension = ''
201179 file_name = challenge_slug
202- track_folder_name = 'Others'
203- track_url = ''
204180
205- if track :
206- track_folder_name = track ['name' ].strip ().replace (' ' , '' )
207- track_url = self .domain_url .format (track ['track_slug' ], track ['slug' ])
208- parent_folder_name = track ['track_name' ].strip ().replace (' ' , '' )
209- folder_name = os .path .join (parent_folder_name , track_folder_name )
181+ domain_name = 'Others'
182+ subdomain_name = 'Miscellaneous'
210183
211- if self .make_language_folder :
212- folder_name = os .path .join (folder_name , language )
184+ domain_slug = ''
185+ subdomain_slug = ''
186+
187+ if track :
188+ domain_name = track ['track_name' ].strip ().replace (' ' , '' )
189+ subdomain_name = track ['name' ].strip ().replace (' ' , '' )
190+ domain_slug = track ['track_slug' ]
191+ subdomain_slug = track ['slug' ]
192+
193+ domain_url = self .domain_url .format (domain_slug )
194+ subdomain_url = self .subdomain_url .format (domain_slug , subdomain_slug )
213195
214196 if language in self .file_extensions :
215- if not self .prepend_language_in_extension :
216- file_extension = ''
197+ if self .prepend_language_in_extension :
198+ file_extension + = '.{}' . format ( language )
217199 file_extension += '.{}' .format (self .file_extensions [language ])
218200
219201 if file_extension .endswith ('.java' ):
220202 file_name = challenge_name .replace (' ' ,'' )
221203
222- file_path = self .get_file_path (folder_name , file_name + file_extension )
204+ file_name_with_extension = file_name + file_extension
205+ file_path = os .path .join (self .base_folder_name , domain_name , subdomain_name , file_name_with_extension )
206+ if self .make_language_folder :
207+ file_path = os .path .join (self .base_folder_name , domain_name , subdomain_name , language , file_name_with_extension )
223208 self .store_submission (file_path , code )
224- readme_file_path = self .get_readme_path (folder_name )
225- if not os .path .exists (readme_file_path ):
226- self .create_readme (track_folder_name , track_url , readme_file_path )
227- problem_url = self .problem_url .format (challenge_slug )
228- readme_text = self .problem_readme_text .format (challenge_name , problem_url , language , file_name + file_extension )
229- self .update_readme (
230- readme_file_path ,
231- readme_text ,
232- )
209+
210+ self .update_readmes (domain_name , subdomain_name , domain_url , subdomain_url ,
211+ challenge_name , challenge_slug , language , file_name_with_extension )
212+
233213 progress .next ()
234214 progress .finish ()
235215 print ('All Solutions Crawled' )
236216
237- def main ():
238217
218+ def main ():
239219 crawler = Crawler ()
240220 crawler .parse_script ()
241221 if not crawler .authenticate ():
0 commit comments