Skip to content

Commit a01f331

Browse files
committed
check for duplicate topics in for loop
1 parent 8674e4d commit a01f331

File tree

1 file changed

+51
-47
lines changed

1 file changed

+51
-47
lines changed

crawler.py

Lines changed: 51 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -28,54 +28,58 @@
2828
topics = [t.strip() for t in topic.split(',')]
2929
organization = os.getenv("ORGANIZATION")
3030

31-
# Get all repos from organization
32-
search_string = "org:{} topic:{}".format(organization, topic)
33-
all_repos = gh.search_repositories(search_string)
31+
# Create empty list for repos
3432
repo_list = []
35-
36-
for repo in all_repos:
37-
if repo is not None:
38-
print("{0}".format(repo.repository))
39-
full_repository = repo.repository.refresh()
40-
41-
innersource_repo = repo.as_dict()
42-
innersource_repo["_InnerSourceMetadata"] = {}
43-
44-
# fetch innersource.json
45-
try:
46-
content = repo.repository.file_contents("/innersource.json").content
47-
metadata = json.loads(b64decode(content))
48-
49-
innersource_repo["_InnerSourceMetadata"] = metadata
50-
except github3.exceptions.NotFoundError:
51-
# innersource.json not found in repository, but it's not required
52-
pass
53-
54-
# fetch repository participation
55-
participation = repo.repository.weekly_commit_count()
56-
innersource_repo["_InnerSourceMetadata"]["participation"] = participation[
57-
"all"
58-
]
59-
60-
# fetch contributing guidelines
61-
try:
62-
# if CONTRIBUTING.md exists in the repository, link to that instead of repo root
63-
content = repo.repository.file_contents("/CONTRIBUTING.md").content
64-
innersource_repo["_InnerSourceMetadata"][
65-
"guidelines"
66-
] = "CONTRIBUTING.md"
67-
except github3.exceptions.NotFoundError:
68-
# CONTRIBUTING.md not found in repository, but it's not required
69-
pass
70-
71-
# fetch repository topics
72-
topics = repo.repository.topics()
73-
innersource_repo["_InnerSourceMetadata"]["topics"] = topics.names
74-
75-
# calculate score
76-
innersource_repo["score"] = repo_activity.score.calculate(innersource_repo)
77-
78-
repo_list.append(innersource_repo)
33+
# Set for repos that have already been added to the list
34+
repo_set = set()
35+
36+
for topic in topics:
37+
search_string = "org:{} topic:{}".format(organization, topic)
38+
all_repos = gh.search_repositories(search_string)
39+
40+
for repo in all_repos:
41+
if repo is not None and repo.repository.full_name not in repo_set:
42+
repo_set.add(repo.repository.full_name)
43+
print("{0}".format(repo.repository))
44+
45+
innersource_repo = repo.as_dict()
46+
innersource_repo["_InnerSourceMetadata"] = {}
47+
48+
# fetch innersource.json
49+
try:
50+
content = repo.repository.file_contents("/innersource.json").content
51+
metadata = json.loads(b64decode(content))
52+
53+
innersource_repo["_InnerSourceMetadata"] = metadata
54+
except github3.exceptions.NotFoundError:
55+
# innersource.json not found in repository, but it's not required
56+
pass
57+
58+
# fetch repository participation
59+
participation = repo.repository.weekly_commit_count()
60+
innersource_repo["_InnerSourceMetadata"]["participation"] = participation[
61+
"all"
62+
]
63+
64+
# fetch contributing guidelines
65+
try:
66+
# if CONTRIBUTING.md exists in the repository, link to that instead of repo root
67+
content = repo.repository.file_contents("/CONTRIBUTING.md").content
68+
innersource_repo["_InnerSourceMetadata"][
69+
"guidelines"
70+
] = "CONTRIBUTING.md"
71+
except github3.exceptions.NotFoundError:
72+
# CONTRIBUTING.md not found in repository, but it's not required
73+
pass
74+
75+
# fetch repository topics
76+
repo_topics = repo.repository.topics()
77+
innersource_repo["_InnerSourceMetadata"]["topics"] = repo_topics.names
78+
79+
# calculate score
80+
innersource_repo["score"] = repo_activity.score.calculate(innersource_repo)
81+
82+
repo_list.append(innersource_repo)
7983

8084
# Write each repository to a repos.json file
8185
with open("repos.json", "w") as f:

0 commit comments

Comments
 (0)