Skip to content

Commit a6d338a

Browse files
authored
Merge pull request #454 from boostcampwm-2024/fix/crawling-time
🐛 fix: 피드 크롤러 타임 딜레이 제거 및 피드 누락 버그 수정
2 parents 3328a23 + 2783110 commit a6d338a

File tree

9 files changed

+106
-44
lines changed

9 files changed

+106
-44
lines changed

feed-crawler/src/common/mysql-access.ts

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ export class MySQLConnection implements DatabaseConnection {
2323
});
2424
}
2525

26-
async executeQuery<T>(query: string, params: any[] = []) {
26+
async executeQuery<T>(
27+
query: string,
28+
params: any[] = [],
29+
): Promise<T[] | null> {
2730
let connection: PoolConnection;
2831
try {
2932
connection = await this.pool.getConnection();
@@ -33,7 +36,7 @@ export class MySQLConnection implements DatabaseConnection {
3336
logger.error(
3437
`${this.nameTag} 쿼리 ${query} 실행 중 오류 발생
3538
오류 메시지: ${error.message}
36-
스택 트레이스: ${error.stack}`
39+
스택 트레이스: ${error.stack}`,
3740
);
3841
} finally {
3942
if (connection) {
@@ -43,13 +46,42 @@ export class MySQLConnection implements DatabaseConnection {
4346
logger.error(
4447
`${this.nameTag} connection release 중 오류 발생
4548
오류 메시지: ${error.message}
46-
스택 트레이스: ${error.stack}`
49+
스택 트레이스: ${error.stack}`,
4750
);
4851
}
4952
}
5053
}
5154
}
5255

56+
async executeQueryStrict<T>(query: string, params: any[] = []): Promise<T[]> {
57+
let connection: PoolConnection;
58+
try {
59+
connection = await this.pool.getConnection();
60+
const [rows] = await connection.query(query, params);
61+
return rows as T[];
62+
} catch (error) {
63+
logger.error(
64+
`${this.nameTag} 쿼리 ${query} 실행 중 오류 발생
65+
오류 메시지: ${error.message}
66+
스택 트레이스: ${error.stack}`,
67+
);
68+
throw error;
69+
} finally {
70+
if (connection) {
71+
try {
72+
if (connection) connection.release();
73+
} catch (error) {
74+
logger.error(
75+
`${this.nameTag} connection release 중 오류 발생
76+
오류 메시지: ${error.message}
77+
스택 트레이스: ${error.stack}`,
78+
);
79+
throw error;
80+
}
81+
}
82+
}
83+
}
84+
5385
public async end() {
5486
await this.pool.end();
5587
}

feed-crawler/src/common/parser/base-feed-parser.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,14 @@ export abstract class BaseFeedParser {
2727
this.parserUtil = parserUtil;
2828
}
2929

30-
async parseFeed(rssObj: RssObj, xmlData: string): Promise<FeedDetail[]> {
30+
async parseFeed(
31+
rssObj: RssObj,
32+
xmlData: string,
33+
startTime: Date,
34+
): Promise<FeedDetail[]> {
3135
// 각 포맷(atom1.0, rss2.0 등...)
3236
const rawFeeds = this.extractRawFeeds(xmlData);
33-
const timeMatchedFeeds = this.filterByTime(rawFeeds);
37+
const timeMatchedFeeds = this.filterByTime(rawFeeds, startTime);
3438
const detailedFeeds = await this.convertToFeedDetails(
3539
rssObj,
3640
timeMatchedFeeds,
@@ -42,12 +46,12 @@ export abstract class BaseFeedParser {
4246
abstract canParse(xmlData: string): boolean;
4347
protected abstract extractRawFeeds(xmlData: string): RawFeed[];
4448

45-
private filterByTime(rawFeeds: RawFeed[]): RawFeed[] {
46-
const now = new Date().setSeconds(0, 0);
49+
private filterByTime(rawFeeds: RawFeed[], startTime: Date): RawFeed[] {
50+
const now = new Date(startTime).setSeconds(0, 0);
4751
return rawFeeds.filter((item) => {
4852
const pubDate = new Date(item.pubDate).setSeconds(0, 0);
4953
const timeDiff = (now - pubDate) / (ONE_MINUTE * TIME_INTERVAL);
50-
return timeDiff >= 0 && timeDiff < 1;
54+
return timeDiff >= 0 && timeDiff <= 1;
5155
});
5256
}
5357

feed-crawler/src/common/parser/feed-parser-manager.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ export class FeedParserManager {
1717
this.parsers = [rss20Parser, atom10Parser];
1818
}
1919

20-
async fetchAndParse(rssObj: RssObj): Promise<FeedDetail[]> {
20+
async fetchAndParse(rssObj: RssObj, startTime: Date): Promise<FeedDetail[]> {
2121
try {
2222
const response = await fetch(rssObj.rssUrl, {
2323
headers: {
@@ -36,9 +36,8 @@ export class FeedParserManager {
3636
if (!parser) {
3737
throw new Error(`지원하지 않는 피드 형식: ${rssObj.rssUrl} / `);
3838
}
39-
logger.info(`${rssObj.blogName}: ${parser.constructor.name} 사용`);
4039

41-
return await parser.parseFeed(rssObj, xmlData);
40+
return await parser.parseFeed(rssObj, xmlData, startTime);
4241
} catch (error) {
4342
logger.warn(`[${rssObj.rssUrl}] 피드 파싱 중 오류 발생: ${error}`);
4443
return [];

feed-crawler/src/feed-crawler.ts

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@ export class FeedCrawler {
1717
private readonly feedParserManager: FeedParserManager,
1818
) {}
1919

20-
async start() {
20+
async start(startTime: Date) {
2121
logger.info('==========작업 시작==========');
22-
const startTime = Date.now();
2322

2423
await this.feedRepository.deleteRecentFeed();
2524

@@ -29,7 +28,7 @@ export class FeedCrawler {
2928
return;
3029
}
3130

32-
const newFeedsByRss = await this.feedGroupByRss(rssObjects);
31+
const newFeedsByRss = await this.feedGroupByRss(rssObjects, startTime);
3332
const newFeeds = newFeedsByRss.flat();
3433

3534
if (!newFeeds.length) {
@@ -44,19 +43,22 @@ export class FeedCrawler {
4443
await this.feedRepository.setRecentFeedList(insertedData);
4544

4645
const endTime = Date.now();
47-
const executionTime = endTime - startTime;
46+
const executionTime = endTime - startTime.getTime();
4847

4948
logger.info(`실행 시간: ${executionTime / 1000}seconds`);
5049
logger.info('==========작업 완료==========');
5150
}
5251

53-
private feedGroupByRss(rssObjects: RssObj[]): Promise<FeedDetail[][]> {
52+
private feedGroupByRss(
53+
rssObjects: RssObj[],
54+
startTime: Date,
55+
): Promise<FeedDetail[][]> {
5456
return Promise.all(
5557
rssObjects.map(async (rssObj: RssObj) => {
5658
logger.info(
5759
`${rssObj.blogName}(${rssObj.rssUrl}) 에서 데이터 조회하는 중...`,
5860
);
59-
return await this.feedParserManager.fetchAndParse(rssObj);
61+
return await this.feedParserManager.fetchAndParse(rssObj, startTime);
6062
}),
6163
);
6264
}

feed-crawler/src/main.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ function registerSchedulers(
2828
dependencies: ReturnType<typeof initializeDependencies>,
2929
) {
3030
schedule.scheduleJob('FEED CRAWLING', '0,30 * * * *', async () => {
31-
logger.info(`Feed Crawling Start: ${new Date().toISOString()}`);
32-
dependencies.feedCrawler.start();
31+
const now = new Date();
32+
logger.info(`Feed Crawling Start: ${now.toISOString()}`);
33+
dependencies.feedCrawler.start(now);
3334
});
3435

3536
schedule.scheduleJob(

feed-crawler/src/repository/feed.repository.ts

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,34 +21,47 @@ export class FeedRepository {
2121
VALUES (?, ?, ?, ?, ?, ?)
2222
`;
2323

24-
const insertPromises = resultData.map(async (feed) => {
25-
return this.dbConnection.executeQuery(query, [
26-
feed.blogId,
27-
feed.pubDate,
28-
feed.title,
29-
feed.link,
30-
feed.imageUrl,
31-
feed.summary,
32-
]);
24+
const insertPromises = resultData.map(async (feed, index) => {
25+
try {
26+
const result = await this.dbConnection.executeQueryStrict(query, [
27+
feed.blogId,
28+
feed.pubDate,
29+
feed.title,
30+
feed.link,
31+
feed.imageUrl,
32+
feed.summary,
33+
]);
34+
return { result, index, success: true };
35+
} catch (error) {
36+
if (error.code === 'ER_DUP_ENTRY') {
37+
logger.info(`중복 피드 스킵: ${feed.title} (${feed.link})`);
38+
return { result: null, index, success: false, duplicate: true };
39+
}
40+
throw error;
41+
}
3342
});
3443

3544
const promiseResults = await Promise.all(insertPromises);
3645

3746
const insertedFeeds = promiseResults
38-
.map((feed: any, index) => {
39-
if (feed) {
40-
const insertId = feed.insertId;
41-
return {
42-
...resultData[index],
43-
id: insertId,
44-
};
45-
}
46-
})
47-
.filter((feed) => feed);
47+
.filter((result) => result.success)
48+
.map((result) => ({
49+
...resultData[result.index],
50+
id: result.result.insertId,
51+
}));
52+
53+
const duplicateCount = promiseResults.filter(
54+
(result) => result.duplicate,
55+
).length;
4856

4957
logger.info(
50-
`[MySQL] ${insertedFeeds.length}개의 피드 데이터가 성공적으로 데이터베이스에 삽입되었습니다.`,
58+
`[MySQL] ${
59+
insertedFeeds.length
60+
}개의 피드 데이터가 성공적으로 데이터베이스에 삽입되었습니다.${
61+
!!duplicateCount ? ' ' + duplicateCount + '개의 중복 피드 발생' : ''
62+
}`,
5163
);
64+
5265
return insertedFeeds;
5366
}
5467

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
export interface DatabaseConnection {
2-
executeQuery<T>(query: string, params: any[]): Promise<T[]>;
2+
executeQuery<T>(query: string, params: any[]): Promise<T[] | null>;
3+
executeQueryStrict<T>(query: string, params: any[]): Promise<T[]>;
34
end(): Promise<void>;
45
}

feed-crawler/test/e2e/feed-crawling.e2e-spec.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ describe('feed crawling e2e-test', () => {
2929
);
3030

3131
// when
32-
await feedCrawler.start();
32+
const startTime = new Date();
33+
await feedCrawler.start(startTime);
3334

3435
// then
3536
const feeds = await testContext.dbConnection.executeQuery(
@@ -66,7 +67,8 @@ describe('feed crawling e2e-test', () => {
6667
);
6768

6869
// when
69-
await feedCrawler.start();
70+
const startTime = new Date();
71+
await feedCrawler.start(startTime);
7072

7173
// then
7274
const feedsFromDB = await testContext.dbConnection.executeQuery(

feed-crawler/test/unit/parser.spec.ts

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,11 @@ describe('Parser 모듈 테스트', () => {
129129
});
130130

131131
it('정상적인 feedDetail을 반환해야 한다.', async () => {
132-
const result = await feedParserManager.fetchAndParse(MOCK_RSS_OBJ);
132+
const startTime = new Date();
133+
const result = await feedParserManager.fetchAndParse(
134+
MOCK_RSS_OBJ,
135+
startTime,
136+
);
133137

134138
expect(result[0]).toMatchObject({
135139
blogId: MOCK_RSS_OBJ.id,
@@ -162,7 +166,11 @@ describe('Parser 모듈 테스트', () => {
162166
});
163167

164168
it('정상적인 feedDetail을 반환해야 한다.', async () => {
165-
const result = await feedParserManager.fetchAndParse(MOCK_RSS_OBJ);
169+
const startTime = new Date();
170+
const result = await feedParserManager.fetchAndParse(
171+
MOCK_RSS_OBJ,
172+
startTime,
173+
);
166174

167175
expect(result[0]).toMatchObject({
168176
blogId: MOCK_RSS_OBJ.id,

0 commit comments

Comments
 (0)