Skip to content

Commit cc7ded3

Browse files
Merge pull request #41 from Nullifiers/feature/root-level-readme
Create README files at domain and root level also
2 parents 9df3a1e + 9e7e65c commit cc7ded3

File tree

9 files changed

+198
-125
lines changed

9 files changed

+198
-125
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ __pycache__
1616

1717
# Script results
1818
Hackerrank/
19-
metadata*
19+
metadata.json
2020

2121
# Config file
2222
*.yaml

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ install:
88

99
# command to run tests
1010
script:
11-
python -m unittest tests/*
11+
python -m unittest tests/*.py

hsc/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .crawler import Crawler

hsc/constants.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
extensions = {
2+
'ada': 'ada',
3+
'bash': 'sh',
4+
'c': 'c',
5+
'clojure': 'clj',
6+
'coffeescript': 'coffee',
7+
'cpp': 'cpp',
8+
'cpp14': 'cpp',
9+
'csharp': 'cs',
10+
'd': 'd',
11+
'db2': 'sql',
12+
'elixir': 'ex',
13+
'erlang': 'erl',
14+
'fortran': 'for',
15+
'fsharp': 'fs',
16+
'go': 'go',
17+
'groovy': 'groovy',
18+
'haskell': 'hs',
19+
'java': 'java',
20+
'java8': 'java',
21+
'javascript': 'js',
22+
'julia': 'jl',
23+
'kotlin': 'kt',
24+
'lolcode': 'lol',
25+
'lua': 'lua',
26+
'mysql': 'sql',
27+
'objectivec': 'm',
28+
'ocaml': 'ml',
29+
'octave': 'oct',
30+
'oracle': 'sql',
31+
'pascal': 'pas',
32+
'perl': 'pl',
33+
'php': 'php',
34+
'pypy': 'py',
35+
'pypy3': 'py',
36+
'python': 'py',
37+
'python3': 'py',
38+
'racket': 'rkt',
39+
'r': 'r',
40+
'ruby': 'rb',
41+
'rust': 'rs',
42+
'sbcl': 'lisp',
43+
'scala': 'scala',
44+
'swift': 'swift',
45+
'smalltalk': 'st',
46+
'tcl': 'tcl',
47+
'tsql': 'sql',
48+
'visualbasic': 'vbs',
49+
'whitespace': 'hs',
50+
}

hsc/crawler.py

Lines changed: 101 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,29 @@
11
import os
2-
import json
32
import requests
43
import getpass
54
import configargparse
6-
from progress.bar import ChargingBar
7-
8-
9-
class CustomProgress(ChargingBar):
10-
message = 'Downloading Solutions'
11-
suffix = '%(percent)d%% [%(index)d/%(max)d]'
12-
13-
14-
class Metadata:
15-
16-
METADATA_FILE_NAME = 'metadata.json'
17-
18-
def __init__(self):
19-
self.metadata = {}
20-
if (os.path.isfile(self.METADATA_FILE_NAME)):
21-
self.metadata = json.load(open(self.METADATA_FILE_NAME))
22-
23-
def put(self, challenge_id, submission_id):
24-
self.metadata[str(challenge_id)] = str(submission_id)
25-
json.dump(self.metadata, open(self.METADATA_FILE_NAME, 'w'))
26-
27-
def get(self, challenge_id):
28-
challenge_id_string = str(challenge_id)
29-
if challenge_id_string not in self.metadata:
30-
self.metadata[challenge_id_string] = -1
31-
submission_id_string = self.metadata[challenge_id_string]
32-
return int(submission_id_string)
5+
from .progress_bar import CustomProgress
6+
from .metadata import Metadata
7+
from .constants import extensions
338

349

3510
class Crawler:
3611
base_url = 'https://www.hackerrank.com/'
3712
login_url = base_url + 'auth/login'
3813
submissions_url = base_url + 'rest/contests/master/submissions/?offset={}&limit={}'
3914
challenge_url = base_url + 'rest/contests/master/challenges/{}/submissions/{}'
40-
domain_url = base_url + 'domains/{}/{}'
15+
domain_url = base_url + 'domains/{}'
16+
subdomain_url = base_url + 'domains/{}/{}'
4117
problem_url = base_url + 'challenges/{}/problem'
4218

43-
new_readme_text = '## [{}]({})\n\n|Problem Name|Problem Link|Language|Solution Link|\n---|---|---|---\n'
44-
readme_headers_len = len(new_readme_text.split('\n')) - 1
45-
problem_readme_text = '|{}|[Problem]({})|{}|[Solution](./{})|\n'
19+
subdomain_readme_text = '## [{}]({})\n\n|Problem Name|Problem Link|Language|Solution Link|\n---|---|---|---\n'
20+
domain_readme_text = '## [{}]({})\n\n|Subdomain|Problem Name|Problem Link|Language|Solution Link|\n---|---|---|---|---\n'
21+
root_readme_text = '## [Hackerrank]({})\n\n|Domain|Subdomain|Problem Name|Problem Link|Language|Solution Link|\n---|---|---|---|---|---\n'
22+
readme_headers_len = len(subdomain_readme_text.split('\n')) - 1
23+
24+
subdomain_readme_row = '|{}|[Problem]({})|{}|[Solution]({})|\n'
25+
domain_readme_row = '|{}|{}|[Problem]({})|{}|[Solution]({})|\n'
26+
root_readme_row = '|{}|{}|{}|[Problem]({})|{}|[Solution]({})|\n'
4627

4728
base_folder_name = 'Hackerrank'
4829

@@ -51,57 +32,7 @@ class Crawler:
5132
# prepend language in file extension e.g Hackerrank/Regex/Introduction/matching.python3.py
5233
prepend_language_in_extension = False
5334

54-
# file extensions
55-
file_extensions = {
56-
'ada': 'ada',
57-
'bash': 'sh',
58-
'c': 'c',
59-
'clojure': 'clj',
60-
'coffeescript': 'coffee',
61-
'cpp': 'cpp',
62-
'cpp14': 'cpp',
63-
'csharp': 'cs',
64-
'd': 'd',
65-
'db2': 'sql',
66-
'elixir': 'ex',
67-
'erlang': 'erl',
68-
'fortran': 'for',
69-
'fsharp': 'fs',
70-
'go': 'go',
71-
'groovy': 'groovy',
72-
'haskell': 'hs',
73-
'java': 'java',
74-
'java8': 'java',
75-
'javascript': 'js',
76-
'julia': 'jl',
77-
'kotlin': 'kt',
78-
'lolcode': 'lol',
79-
'lua': 'lua',
80-
'mysql': 'sql',
81-
'objectivec': 'm',
82-
'ocaml': 'ml',
83-
'octave': 'oct',
84-
'oracle': 'sql',
85-
'pascal': 'pas',
86-
'perl': 'pl',
87-
'php': 'php',
88-
'pypy': 'py',
89-
'pypy3': 'py',
90-
'python': 'py',
91-
'python3': 'py',
92-
'racket': 'rkt',
93-
'r': 'r',
94-
'ruby': 'rb',
95-
'rust': 'rs',
96-
'sbcl': 'lisp',
97-
'scala': 'scala',
98-
'swift': 'swift',
99-
'smalltalk': 'st',
100-
'tcl': 'tcl',
101-
'tsql': 'sql',
102-
'visualbasic': 'vbs',
103-
'whitespace': 'hs',
104-
}
35+
file_extensions = extensions
10536

10637
def __init__(self):
10738
self.session = requests.Session()
@@ -149,27 +80,74 @@ def store_submission(self, file_name, code):
14980
with open(file_name, 'w') as text_file:
15081
text_file.write(code)
15182

152-
def update_readme(self, readme_file_path, problem_readme_text):
83+
def update_readme(self, readme_file_path, readme_text):
15384
header_length = self.readme_headers_len
15485
with open(readme_file_path, 'r+') as text_file:
15586
lines = text_file.readlines()
156-
lines.append(problem_readme_text)
87+
lines.append(readme_text)
15788
sortedlines = lines[:header_length] + sorted(lines[header_length:])
15889
text_file.seek(0)
15990
text_file.writelines(sortedlines)
16091

161-
def create_readme(self, track_name, track_url, file_name):
162-
if track_name is not None:
163-
os.makedirs(os.path.dirname(file_name), exist_ok=True)
164-
text = self.new_readme_text.format(track_name, track_url)
165-
with open(file_name, 'w') as text_file:
166-
text_file.write(text)
167-
168-
def get_file_path(self, folder_name, file_name_with_extension):
169-
return os.path.join(self.base_folder_name, folder_name, file_name_with_extension)
92+
def write(self, file_name, text):
93+
os.makedirs(os.path.dirname(file_name), exist_ok=True)
94+
with open(file_name, 'w') as text_file:
95+
text_file.write(text)
96+
97+
98+
def create_readmes(self, domain_name, subdomain_name, domain_url, subdomain_url,
99+
subdomain_readme_path, domain_readme_path, root_readme_path):
100+
"""
101+
Method to check if readme files already exist. If readme files doesn't exist, then create them and add headers.
102+
"""
103+
if not os.path.exists(subdomain_readme_path):
104+
text = self.subdomain_readme_text.format(subdomain_name, subdomain_url)
105+
self.write(subdomain_readme_path, text)
106+
107+
if not os.path.exists(domain_readme_path):
108+
text = self.domain_readme_text.format(domain_name, domain_url)
109+
self.write(domain_readme_path, text)
110+
111+
if not os.path.exists(root_readme_path):
112+
text = self.root_readme_text.format(self.base_url)
113+
self.write(root_readme_path, text)
114+
115+
116+
def update_readmes(self, domain_name, subdomain_name, domain_url, subdomain_url,
117+
challenge_name, challenge_slug, language, file_name_with_extension):
118+
"""
119+
Method to add a new row corresponding to a new solution in the readme files
120+
"""
121+
subdomain_readme_path = os.path.join(self.base_folder_name, domain_name, subdomain_name, 'README.md')
122+
if self.make_language_folder:
123+
subdomain_readme_path = os.path.join(self.base_folder_name, domain_name, subdomain_name, language, 'README.md')
124+
domain_readme_path = os.path.join(self.base_folder_name, domain_name, 'README.md')
125+
root_readme_path = os.path.join(self.base_folder_name, 'README.md')
126+
127+
self.create_readmes(domain_name, subdomain_name, domain_url, subdomain_url,
128+
subdomain_readme_path, domain_readme_path, root_readme_path)
129+
130+
problem_url = self.problem_url.format(challenge_slug)
131+
132+
file_path_relative_to_subdomain = './' + file_name_with_extension
133+
file_path_relative_to_domain = '{}/{}'.format(subdomain_name, file_name_with_extension)
134+
file_path_relative_to_root = '{}/{}/{}'.format(domain_name, subdomain_name, file_name_with_extension)
135+
subdomain_readme_text = self.subdomain_readme_row.format(challenge_name, problem_url, language, file_path_relative_to_subdomain)
136+
domain_readme_text = self.domain_readme_row.format(subdomain_name, challenge_name, problem_url, language, file_path_relative_to_domain)
137+
root_readme_text = self.root_readme_row.format(domain_name, subdomain_name, challenge_name, problem_url, language, file_path_relative_to_root)
138+
self.update_readme(
139+
subdomain_readme_path,
140+
subdomain_readme_text,
141+
)
142+
self.update_readme(
143+
domain_readme_path,
144+
domain_readme_text,
145+
)
146+
self.update_readme(
147+
root_readme_path,
148+
root_readme_text,
149+
)
170150

171-
def get_readme_path(self, folder_name):
172-
return os.path.join(self.base_folder_name, folder_name, 'README.md')
173151

174152
def get_submissions(self, submissions):
175153
headers = self.headers
@@ -195,47 +173,49 @@ def get_submissions(self, submissions):
195173
data = resp.json()['model']
196174
code = data['code']
197175
track = data['track']
198-
199-
folder_name = 'Others'
200-
file_extension = '.' + language
176+
177+
# Default should be empty
178+
file_extension = ''
201179
file_name = challenge_slug
202-
track_folder_name = 'Others'
203-
track_url = ''
204180

205-
if track:
206-
track_folder_name = track['name'].strip().replace(' ', '')
207-
track_url = self.domain_url.format(track['track_slug'], track['slug'])
208-
parent_folder_name = track['track_name'].strip().replace(' ', '')
209-
folder_name = os.path.join(parent_folder_name, track_folder_name)
181+
domain_name = 'Others'
182+
subdomain_name = 'Miscellaneous'
210183

211-
if self.make_language_folder:
212-
folder_name = os.path.join(folder_name, language)
184+
domain_slug = ''
185+
subdomain_slug = ''
186+
187+
if track:
188+
domain_name = track['track_name'].strip().replace(' ', '')
189+
subdomain_name = track['name'].strip().replace(' ', '')
190+
domain_slug = track['track_slug']
191+
subdomain_slug = track['slug']
192+
193+
domain_url = self.domain_url.format(domain_slug)
194+
subdomain_url = self.subdomain_url.format(domain_slug, subdomain_slug)
213195

214196
if language in self.file_extensions:
215-
if not self.prepend_language_in_extension:
216-
file_extension = ''
197+
if self.prepend_language_in_extension:
198+
file_extension += '.{}'.format(language)
217199
file_extension += '.{}'.format(self.file_extensions[language])
218200

219201
if file_extension.endswith('.java'):
220202
file_name = challenge_name.replace(' ','')
221203

222-
file_path = self.get_file_path(folder_name, file_name + file_extension)
204+
file_name_with_extension = file_name + file_extension
205+
file_path = os.path.join(self.base_folder_name, domain_name, subdomain_name, file_name_with_extension)
206+
if self.make_language_folder:
207+
file_path = os.path.join(self.base_folder_name, domain_name, subdomain_name, language, file_name_with_extension)
223208
self.store_submission(file_path, code)
224-
readme_file_path = self.get_readme_path(folder_name)
225-
if not os.path.exists(readme_file_path):
226-
self.create_readme(track_folder_name, track_url, readme_file_path)
227-
problem_url = self.problem_url.format(challenge_slug)
228-
readme_text = self.problem_readme_text.format(challenge_name, problem_url, language, file_name + file_extension)
229-
self.update_readme(
230-
readme_file_path,
231-
readme_text,
232-
)
209+
210+
self.update_readmes(domain_name, subdomain_name, domain_url, subdomain_url,
211+
challenge_name, challenge_slug, language, file_name_with_extension)
212+
233213
progress.next()
234214
progress.finish()
235215
print('All Solutions Crawled')
236216

237-
def main():
238217

218+
def main():
239219
crawler = Crawler()
240220
crawler.parse_script()
241221
if not crawler.authenticate():

hsc/metadata.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import os
2+
import json
3+
4+
class Metadata:
5+
6+
METADATA_FILE_NAME = 'metadata.json'
7+
8+
def __init__(self):
9+
self.metadata = {}
10+
if os.path.isfile(self.METADATA_FILE_NAME):
11+
with open(self.METADATA_FILE_NAME) as fp:
12+
self.metadata = json.load(fp)
13+
14+
def put(self, challenge_id, submission_id):
15+
self.metadata[str(challenge_id)] = str(submission_id)
16+
with open(self.METADATA_FILE_NAME, 'w') as fp:
17+
json.dump(self.metadata, fp)
18+
19+
def get(self, challenge_id):
20+
challenge_id_string = str(challenge_id)
21+
if challenge_id_string not in self.metadata:
22+
self.metadata[challenge_id_string] = -1
23+
submission_id_string = self.metadata[challenge_id_string]
24+
return int(submission_id_string)

hsc/progress_bar.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from progress.bar import ChargingBar
2+
3+
class CustomProgress(ChargingBar):
4+
suffix = '%(percent)d%% [%(index)d/%(max)d]'

tests/test_crawler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import unittest
2-
from hsc import crawler
2+
from hsc import Crawler
33

44
class TestCrawler(unittest.TestCase):
55

66
def setUp(self):
7-
self.crawler_obj = crawler.Crawler()
7+
self.crawler_obj = Crawler()
88

99
def test_crawler_obj_is_not_none(self):
1010
self.assertIsNotNone(self.crawler_obj)

0 commit comments

Comments
 (0)