Skip to content

Commit e449474

Browse files
authored
feat: add a cron tasks to update gitlab repo list (#1771)
* feat: add a cron tasks to update gitlab repo list Signed-off-by: frank-zsy <syzhao1988@126.com> * fix: fix some copilot review Signed-off-by: frank-zsy <syzhao1988@126.com> --------- Signed-off-by: frank-zsy <syzhao1988@126.com>
1 parent 07a1074 commit e449474

File tree

2 files changed

+175
-0
lines changed

2 files changed

+175
-0
lines changed

src/config.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ let config = {
3939
appPrivateKeyPath: '',
4040
appId: 0,
4141
},
42+
gitlab: {
43+
token: '',
44+
apiUrl: '',
45+
},
4246
google: {
4347
map: {
4448
key: '',
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
import { Task } from '..';
2+
import getConfig from '../../config';
3+
import { insertRecords, query } from '../../db/clickhouse';
4+
import { formatDate, getLogger } from '../../utils';
5+
import { get } from 'https';
6+
7+
/**
8+
* This task is used to update GitLab repos basic info
9+
*/
10+
const task: Task = {
11+
cron: '0 10 * * *',
12+
singleInstance: true,
13+
callback: async () => {
14+
const logger = getLogger('UpdateGitlabRepoTask');
15+
const config = await getConfig();
16+
const gitlabToken = config.gitlab.token;
17+
const gitlabApiUrl = config.gitlab.apiUrl;
18+
19+
if (!gitlabToken || !gitlabApiUrl || gitlabToken === '' || gitlabApiUrl === '') {
20+
logger.error('GitLab token or API URL is not set');
21+
return;
22+
}
23+
24+
const tableName = 'gitlab_repo_list';
25+
26+
const createGitlabRepoListTable = async () => {
27+
const createTableQuery = `
28+
CREATE TABLE IF NOT EXISTS ${tableName} (
29+
id UInt64,
30+
namespace_id UInt64,
31+
namespace_name String,
32+
name String,
33+
description String,
34+
default_branch String,
35+
archived UInt8,
36+
topics Array(String),
37+
tag_list Array(String),
38+
created_at DateTime,
39+
stars_count UInt32,
40+
forks_count UInt32,
41+
last_activity_at DateTime,
42+
updated_at DateTime,
43+
inserted_at UInt64
44+
)
45+
ENGINE = ReplacingMergeTree(inserted_at)
46+
ORDER BY id
47+
SETTINGS index_granularity = 8192
48+
`;
49+
await query(createTableQuery);
50+
};
51+
await createGitlabRepoListTable();
52+
53+
const parseProject = (project: ProjectRaw): ProjectItem => {
54+
return {
55+
id: project.id,
56+
namespace_id: project.namespace.id,
57+
namespace_name: project.namespace.full_path,
58+
name: project.path_with_namespace,
59+
description: project.description ?? '',
60+
default_branch: project.default_branch ?? '',
61+
archived: project.archived ? 1 : 0,
62+
topics: project.topics,
63+
tag_list: project.tag_list,
64+
created_at: formatDate(project.created_at),
65+
stars_count: project.stars_count,
66+
forks_count: project.forks_count,
67+
last_activity_at: formatDate(project.last_activity_at),
68+
updated_at: formatDate(project.updated_at),
69+
inserted_at: 0, // will be set when saving
70+
};
71+
};
72+
73+
const getProjects = async (lastActivityAfter: string, limit: number): Promise<ProjectRaw[]> => {
74+
const projects = await new Promise<ProjectRaw[]>((resolve, reject) => {
75+
const params = new URLSearchParams({
76+
last_activity_after: lastActivityAfter,
77+
per_page: limit.toString(),
78+
sort: 'asc',
79+
order_by: 'last_activity_at',
80+
});
81+
const url = new URL(`${gitlabApiUrl}/projects?${params.toString()}`);
82+
const options = {
83+
hostname: url.hostname,
84+
path: url.pathname + url.search,
85+
headers: {
86+
'Authorization': `Bearer ${gitlabToken}`,
87+
'User-Agent': 'opendigger-bot'
88+
}
89+
};
90+
get(options, (res) => {
91+
let data = '';
92+
res.on('data', (chunk) => data += chunk);
93+
res.on('end', () => {
94+
try {
95+
resolve(JSON.parse(data));
96+
} catch (e) {
97+
logger.error(`Error parsing projects: ${data}`);
98+
reject(e);
99+
}
100+
});
101+
}).on('error', reject);
102+
});
103+
return projects;
104+
};
105+
106+
const saveProjects = async (projects: ProjectItem[]) => {
107+
await insertRecords(projects.map(project => ({
108+
...project,
109+
inserted_at: new Date().getTime(),
110+
})), tableName);
111+
};
112+
113+
const maxLastActivityAt = await query<any[]>(`SELECT MAX(last_activity_at) AS max_last_activity_at FROM ${tableName}`);
114+
let lastActivityAt = new Date(maxLastActivityAt[0][0]).toISOString();
115+
let projects: ProjectRaw[] = [];
116+
logger.info(`Max last activity at in database: ${lastActivityAt}`);
117+
let totalCount = 0;
118+
do {
119+
try {
120+
projects = await getProjects(lastActivityAt, 100);
121+
await saveProjects(projects.map(parseProject));
122+
lastActivityAt = projects[projects.length - 1].last_activity_at;
123+
totalCount += projects.length;
124+
logger.info(`Saved ${projects.length} projects, starting from ${lastActivityAt}, total count: ${totalCount}`);
125+
} catch (error: any) {
126+
logger.error(`Error getting projects starting from ${lastActivityAt}: ${error.message}\n${error.stack}`);
127+
break;
128+
}
129+
} while (projects.length > 0);
130+
logger.info(`Task done, total count: ${totalCount}`);
131+
}
132+
};
133+
134+
interface ProjectRaw {
135+
id: number;
136+
description: string;
137+
path_with_namespace: string;
138+
created_at: string;
139+
default_branch: string;
140+
tag_list: string[];
141+
topics: string[];
142+
archived: boolean;
143+
forks_count: number;
144+
stars_count: number;
145+
last_activity_at: string;
146+
updated_at: string;
147+
namespace: {
148+
id: number;
149+
full_path: string;
150+
}
151+
};
152+
153+
interface ProjectItem {
154+
id: number;
155+
namespace_id: number;
156+
namespace_name: string;
157+
name: string;
158+
description: string;
159+
default_branch: string;
160+
archived: number;
161+
topics: string[];
162+
tag_list: string[];
163+
created_at: string;
164+
stars_count: number;
165+
forks_count: number;
166+
last_activity_at: string;
167+
updated_at: string;
168+
inserted_at: number;
169+
}
170+
171+
module.exports = task;

0 commit comments

Comments
 (0)