Skip to content

Commit 08f5094

Browse files
authored
Merge pull request #42 from mcantu/main
Support multiple topics in repository search
2 parents 0e5e7ec + a9242f6 commit 08f5094

File tree

2 files changed

+61
-47
lines changed

2 files changed

+61
-47
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ jobs:
4040
env:
4141
GH_TOKEN: ${{ secrets.GH_TOKEN }}
4242
ORGANIZATION: ${{ secrets.ORGANIZATION }}
43+
# for multiple topics, add them after a comma eg:
44+
# TOPIC: inner-source,actions,security,python
4345
TOPIC: inner-source
4446
```
4547

crawler.py

Lines changed: 59 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from dotenv import load_dotenv
1111

1212
if __name__ == "__main__":
13-
1413
# Load env variables from file
1514
dotenv_path = join(dirname(__file__), ".env")
1615
load_dotenv(dotenv_path)
@@ -24,56 +23,69 @@
2423

2524
# Set the topic
2625
topic = os.getenv("TOPIC")
26+
if not topic:
27+
raise ValueError("TOPIC environment variable not set")
28+
29+
# If multiple topics, split topics by comma
30+
topics = [t.strip() for t in topic.split(",")]
2731
organization = os.getenv("ORGANIZATION")
2832

29-
# Get all repos from organization
30-
search_string = "org:{} topic:{}".format(organization, topic)
31-
all_repos = gh.search_repositories(search_string)
33+
# Create empty list for repos
3234
repo_list = []
33-
34-
for repo in all_repos:
35-
if repo is not None:
36-
print("{0}".format(repo.repository))
37-
full_repository = repo.repository.refresh()
38-
39-
innersource_repo = repo.as_dict()
40-
innersource_repo["_InnerSourceMetadata"] = {}
41-
42-
# fetch innersource.json
43-
try:
44-
content = repo.repository.file_contents("/innersource.json").content
45-
metadata = json.loads(b64decode(content))
46-
47-
innersource_repo["_InnerSourceMetadata"] = metadata
48-
except github3.exceptions.NotFoundError:
49-
# innersource.json not found in repository, but it's not required
50-
pass
51-
52-
# fetch repository participation
53-
participation = repo.repository.weekly_commit_count()
54-
innersource_repo["_InnerSourceMetadata"]["participation"] = participation[
55-
"all"
56-
]
57-
58-
# fetch contributing guidelines
59-
try:
60-
# if CONTRIBUTING.md exists in the repository, link to that instead of repo root
61-
content = repo.repository.file_contents("/CONTRIBUTING.md").content
35+
# Set for repos that have already been added to the list
36+
repo_set = set()
37+
38+
# Iterate over topics, search for matching repositories, and process unique ones
39+
for topic in topics:
40+
search_string = "org:{} topic:{}".format(organization, topic)
41+
all_repos = gh.search_repositories(search_string)
42+
43+
# For each repo in the search results, check if it's unique and add it to repo_set
44+
for repo in all_repos:
45+
if repo is not None and repo.repository.full_name not in repo_set:
46+
repo_set.add(repo.repository.full_name)
47+
print("{0}".format(repo.repository))
48+
49+
innersource_repo = repo.as_dict()
50+
innersource_repo["_InnerSourceMetadata"] = {}
51+
52+
# fetch innersource.json
53+
try:
54+
content = repo.repository.file_contents("/innersource.json").content
55+
metadata = json.loads(b64decode(content))
56+
57+
innersource_repo["_InnerSourceMetadata"] = metadata
58+
except github3.exceptions.NotFoundError:
59+
# innersource.json not found in repository, but it's not required
60+
pass
61+
62+
# fetch repository participation
63+
participation = repo.repository.weekly_commit_count()
6264
innersource_repo["_InnerSourceMetadata"][
63-
"guidelines"
64-
] = "CONTRIBUTING.md"
65-
except github3.exceptions.NotFoundError:
66-
# CONTRIBUTING.md not found in repository, but it's not required
67-
pass
68-
69-
# fetch repository topics
70-
topics = repo.repository.topics()
71-
innersource_repo["_InnerSourceMetadata"]["topics"] = topics.names
72-
73-
# calculate score
74-
innersource_repo["score"] = repo_activity.score.calculate(innersource_repo)
75-
76-
repo_list.append(innersource_repo)
65+
"participation"
66+
] = participation["all"]
67+
68+
# fetch contributing guidelines
69+
try:
70+
# if CONTRIBUTING.md exists in the repository, link to that instead of repo root
71+
content = repo.repository.file_contents("/CONTRIBUTING.md").content
72+
innersource_repo["_InnerSourceMetadata"][
73+
"guidelines"
74+
] = "CONTRIBUTING.md"
75+
except github3.exceptions.NotFoundError:
76+
# CONTRIBUTING.md not found in repository, but it's not required
77+
pass
78+
79+
# fetch repository topics
80+
repo_topics = repo.repository.topics()
81+
innersource_repo["_InnerSourceMetadata"]["topics"] = repo_topics.names
82+
83+
# calculate score
84+
innersource_repo["score"] = repo_activity.score.calculate(
85+
innersource_repo
86+
)
87+
88+
repo_list.append(innersource_repo)
7789

7890
# Write each repository to a repos.json file
7991
with open("repos.json", "w") as f:

0 commit comments

Comments
 (0)