Skip to content

Commit 47b78b9

Browse files
authored
Merge pull request #1 from mcantu/multiple-topics
Multiple topics
2 parents 0e5e7ec + a01f331 commit 47b78b9

File tree

1 file changed

+53
-47
lines changed

1 file changed

+53
-47
lines changed

crawler.py

Lines changed: 53 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -24,56 +24,62 @@
2424

2525
# Set the topic
2626
topic = os.getenv("TOPIC")
27+
# If multiple topics, split topics by comma
28+
topics = [t.strip() for t in topic.split(',')]
2729
organization = os.getenv("ORGANIZATION")
2830

29-
# Get all repos from organization
30-
search_string = "org:{} topic:{}".format(organization, topic)
31-
all_repos = gh.search_repositories(search_string)
31+
# Create empty list for repos
3232
repo_list = []
33-
34-
for repo in all_repos:
35-
if repo is not None:
36-
print("{0}".format(repo.repository))
37-
full_repository = repo.repository.refresh()
38-
39-
innersource_repo = repo.as_dict()
40-
innersource_repo["_InnerSourceMetadata"] = {}
41-
42-
# fetch innersource.json
43-
try:
44-
content = repo.repository.file_contents("/innersource.json").content
45-
metadata = json.loads(b64decode(content))
46-
47-
innersource_repo["_InnerSourceMetadata"] = metadata
48-
except github3.exceptions.NotFoundError:
49-
# innersource.json not found in repository, but it's not required
50-
pass
51-
52-
# fetch repository participation
53-
participation = repo.repository.weekly_commit_count()
54-
innersource_repo["_InnerSourceMetadata"]["participation"] = participation[
55-
"all"
56-
]
57-
58-
# fetch contributing guidelines
59-
try:
60-
# if CONTRIBUTING.md exists in the repository, link to that instead of repo root
61-
content = repo.repository.file_contents("/CONTRIBUTING.md").content
62-
innersource_repo["_InnerSourceMetadata"][
63-
"guidelines"
64-
] = "CONTRIBUTING.md"
65-
except github3.exceptions.NotFoundError:
66-
# CONTRIBUTING.md not found in repository, but it's not required
67-
pass
68-
69-
# fetch repository topics
70-
topics = repo.repository.topics()
71-
innersource_repo["_InnerSourceMetadata"]["topics"] = topics.names
72-
73-
# calculate score
74-
innersource_repo["score"] = repo_activity.score.calculate(innersource_repo)
75-
76-
repo_list.append(innersource_repo)
33+
# Set for repos that have already been added to the list
34+
repo_set = set()
35+
36+
for topic in topics:
37+
search_string = "org:{} topic:{}".format(organization, topic)
38+
all_repos = gh.search_repositories(search_string)
39+
40+
for repo in all_repos:
41+
if repo is not None and repo.repository.full_name not in repo_set:
42+
repo_set.add(repo.repository.full_name)
43+
print("{0}".format(repo.repository))
44+
45+
innersource_repo = repo.as_dict()
46+
innersource_repo["_InnerSourceMetadata"] = {}
47+
48+
# fetch innersource.json
49+
try:
50+
content = repo.repository.file_contents("/innersource.json").content
51+
metadata = json.loads(b64decode(content))
52+
53+
innersource_repo["_InnerSourceMetadata"] = metadata
54+
except github3.exceptions.NotFoundError:
55+
# innersource.json not found in repository, but it's not required
56+
pass
57+
58+
# fetch repository participation
59+
participation = repo.repository.weekly_commit_count()
60+
innersource_repo["_InnerSourceMetadata"]["participation"] = participation[
61+
"all"
62+
]
63+
64+
# fetch contributing guidelines
65+
try:
66+
# if CONTRIBUTING.md exists in the repository, link to that instead of repo root
67+
content = repo.repository.file_contents("/CONTRIBUTING.md").content
68+
innersource_repo["_InnerSourceMetadata"][
69+
"guidelines"
70+
] = "CONTRIBUTING.md"
71+
except github3.exceptions.NotFoundError:
72+
# CONTRIBUTING.md not found in repository, but it's not required
73+
pass
74+
75+
# fetch repository topics
76+
repo_topics = repo.repository.topics()
77+
innersource_repo["_InnerSourceMetadata"]["topics"] = repo_topics.names
78+
79+
# calculate score
80+
innersource_repo["score"] = repo_activity.score.calculate(innersource_repo)
81+
82+
repo_list.append(innersource_repo)
7783

7884
# Write each repository to a repos.json file
7985
with open("repos.json", "w") as f:

0 commit comments

Comments
 (0)