Skip to content

Commit 487bcb2

Browse files
authored
Merge pull request boostcampwm-2024#489 from boostcampwm-2024/test/feed-crawler-unit
✅ test: feed-crawler 유닛 테스트 작성
2 parents 32fe3fe + 2ec1813 commit 487bcb2

15 files changed

+1566
-118
lines changed

feed-crawler/package.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@
4040
"build": "tsc --project tsconfig.build.json",
4141
"start": "node dist/src/main.js",
4242
"start:dev": "ts-node src/main.ts",
43-
"test:unit": "cross-env NODE_ENV=test jest --config test/jest-unit.json",
44-
"test:unit:cov": "cross-env NODE_ENV=test jest --coverage --config test/jest-unit.json",
45-
"test:e2e": "cross-env NODE_ENV=test jest --config test/jest-e2e.json",
46-
"test:e2e:cov": "cross-env NODE_ENV=test jest --coverage --config test/jest-e2e.json"
43+
"test:unit": "cross-env NODE_ENV=test jest --config test/config/unit/jest/jest-unit.json",
44+
"test:unit:cov": "cross-env NODE_ENV=test jest --coverage --config test/config/unit/jest/jest-unit.json",
45+
"test:e2e": "cross-env NODE_ENV=test jest --config test/config/e2e/jest/jest-e2e.json",
46+
"test:e2e:cov": "cross-env NODE_ENV=test jest --coverage --config test/config/e2e/jest/jest-e2e.json"
4747
}
4848
}

feed-crawler/src/event_worker/workers/claude-event-worker.ts

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,7 @@ export class ClaudeEventWorker extends AbstractQueueWorker<FeedAIQueueItem> {
4646
const aiData = await this.requestAI(feed);
4747
await this.saveAIResult(aiData);
4848
} catch (error) {
49-
logger.error(
50-
`${this.nameTag} ${feed.id} 처리 중 에러 발생: ${error.message}`,
51-
error.stack,
52-
);
53-
await this.handleFailure(feed);
49+
await this.handleFailure(feed, error);
5450
}
5551
}
5652

@@ -105,18 +101,48 @@ export class ClaudeEventWorker extends AbstractQueueWorker<FeedAIQueueItem> {
105101
await this.feedRepository.updateSummary(feed.id, feed.summary);
106102
}
107103

108-
protected async handleFailure(feed: FeedAIQueueItem): Promise<void> {
109-
if (feed.deathCount < 3) {
104+
protected async handleFailure(
105+
feed: FeedAIQueueItem,
106+
error: Error,
107+
): Promise<void> {
108+
const shouldRetry = this.isRetryableError(error);
109+
110+
logger.error(
111+
`${this.nameTag} ${feed.id} 처리 실패:
112+
- 에러: ${error.name} - ${error.message}
113+
- 재시도 가능: ${shouldRetry}
114+
- 현재 deathCount: ${feed.deathCount}`,
115+
);
116+
117+
if (shouldRetry && feed.deathCount < 3) {
110118
feed.deathCount++;
111119
await this.redisConnection.rpush(redisConstant.FEED_AI_QUEUE, [
112120
JSON.stringify(feed),
113121
]);
114-
logger.warn(`${this.nameTag} ${feed.id} 재시도 (${feed.deathCount})`);
115-
} else {
116-
logger.error(
117-
`${this.nameTag} ${feed.id} 의 Death Count 3회 이상 발생. AI 요청 스킵`,
122+
logger.warn(
123+
`${this.nameTag} ${feed.id} 재시도 예약 (${feed.deathCount}/3)`,
118124
);
125+
} else {
126+
const reason = shouldRetry
127+
? `Death Count 3회 초과`
128+
: `재시도 불가능한 에러 (${error.name})`;
129+
logger.error(`${this.nameTag} ${feed.id} 영구 실패 - ${reason}`);
119130
await this.feedRepository.updateNullSummary(feed.id);
120131
}
121132
}
133+
134+
private isRetryableError(error: Error): boolean {
135+
const message = error.message.toLowerCase();
136+
137+
// 재시도하면 안 되는 케이스 (영구적 에러)
138+
if (message.includes('invalid') || message.includes('401')) return false;
139+
if (message.includes('json') || message.includes('parse')) return false;
140+
141+
// 재시도해야 하는 케이스 (일시적 에러)
142+
if (message.includes('rate limit') || message.includes('429')) return true;
143+
if (message.includes('timeout') || message.includes('503')) return true;
144+
145+
// 기본값: 재시도
146+
return true;
147+
}
122148
}

feed-crawler/src/event_worker/workers/full-feed-crawl-event-worker.ts

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,6 @@ export class FullFeedCrawlEventWorker extends AbstractQueueWorker<FullFeedCrawlM
6262
`${this.nameTag} RSS ID ${rssId}에서 ${insertedFeeds.length}개의 피드를 처리했습니다.`,
6363
);
6464
} catch (error) {
65-
logger.error(
66-
`${this.nameTag} RSS ID ${rssId} 처리 중 오류 발생: ${error.message}`,
67-
);
6865
await this.handleFailure(crawlMessage, error);
6966
}
7067
}
@@ -73,18 +70,47 @@ export class FullFeedCrawlEventWorker extends AbstractQueueWorker<FullFeedCrawlM
7370
crawlMessage: FullFeedCrawlMessage,
7471
error: Error,
7572
): Promise<void> {
76-
if (crawlMessage.deathCount < 3) {
73+
const shouldRetry = this.isRetryableError(error);
74+
75+
logger.error(
76+
`${this.nameTag} RSS ID ${crawlMessage.rssId} 처리 실패:
77+
- 에러: ${error.name} - ${error.message}
78+
- 재시도 가능: ${shouldRetry}
79+
- 현재 deathCount: ${crawlMessage.deathCount}`,
80+
);
81+
82+
if (shouldRetry && crawlMessage.deathCount < 3) {
7783
crawlMessage.deathCount++;
7884
await this.redisConnection.rpush(redisConstant.FULL_FEED_CRAWL_QUEUE, [
7985
JSON.stringify(crawlMessage),
8086
]);
81-
logger.error(
82-
`${this.nameTag} ${crawlMessage.rssId} 의 Death Count 3회 이상 발생. 크롤링 스킵 처리`,
87+
logger.warn(
88+
`${this.nameTag} RSS ID ${crawlMessage.rssId} 재시도 예약 (${crawlMessage.deathCount}/3)`,
8389
);
8490
} else {
91+
const reason = shouldRetry
92+
? `Death Count 3회 초과`
93+
: `재시도 불가능한 에러 (${error.name})`;
8594
logger.error(
86-
`${this.nameTag} RSS ID ${crawlMessage.rssId} 전체 피드 크롤링 실패: ${error.message}`,
95+
`${this.nameTag} RSS ID ${crawlMessage.rssId} 영구 실패 - ${reason}`,
8796
);
8897
}
8998
}
99+
100+
private isRetryableError(error: Error): boolean {
101+
const message = error.message.toLowerCase();
102+
103+
// 재시도하면 안 되는 케이스 (영구적 에러)
104+
if (message.includes('invalid') || message.includes('401')) return false;
105+
if (message.includes('json') || message.includes('parse')) return false;
106+
if (message.includes('찾을 수 없습니다')) return false; // RSS 없음
107+
108+
// 재시도해야 하는 케이스 (일시적 에러)
109+
if (message.includes('rate limit') || message.includes('429')) return true;
110+
if (message.includes('timeout') || message.includes('503')) return true;
111+
if (message.includes('network') || message.includes('fetch')) return true;
112+
113+
// 기본값: 재시도
114+
return true;
115+
}
90116
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import { RssObj } from '../../../src/common/types';
2+
3+
/**
4+
* Parser 테스트용 픽스처 데이터
5+
*/
6+
7+
// test시에는 필터링 하는 시간대가 매우 광범위하기에, 시간대를 신경 쓸 필요없음.
8+
export const FIXED_DATE = new Date();
9+
const FIXED_DATE_UTC = FIXED_DATE.toUTCString();
10+
const FIXED_DATE_ISO = FIXED_DATE.toISOString();
11+
12+
// RSS 2.0 예제 데이터
13+
export const RSS_20_SAMPLE = `<?xml version="1.0" encoding="UTF-8"?>
14+
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
15+
<channel>
16+
<title>테스트 블로그</title>
17+
<description>테스트용 블로그입니다</description>
18+
<link>https://rssfeed.com</link>
19+
<item>
20+
<title>첫 번째 글제목</title>
21+
<description>첫 번째 글 내용입니다.</description>
22+
<content:encoded><![CDATA[<p>첫 번째 글 내용입니다.</p>]]></content:encoded>
23+
<link>https://rssfeed.com/post1</link>
24+
<pubDate>${FIXED_DATE_UTC}</pubDate>
25+
</item>
26+
<item>
27+
<title>&middot; 특수문자 제목 &nbsp;</title>
28+
<description>두 번째 글 내용입니다.</description>
29+
<link>https://rssfeed.com/post2</link>
30+
<pubDate>${FIXED_DATE_UTC}</pubDate>
31+
</item>
32+
</channel>
33+
</rss>`;
34+
35+
// Atom 1.0 예제 데이터
36+
export const ATOM_10_SAMPLE = `<?xml version="1.0" encoding="UTF-8"?>
37+
<feed xmlns="http://www.w3.org/2005/Atom">
38+
<title>테스트 Atom 피드</title>
39+
<link href="https://atomfeed.com"/>
40+
<id>https://atomfeed.com</id>
41+
<updated>${FIXED_DATE_ISO}</updated>
42+
<entry>
43+
<title>Atom 첫 번째 글</title>
44+
<link rel="alternate" href="https://atomfeed.com/entry1"/>
45+
<id>https://atomfeed.com/entry1</id>
46+
<published>${FIXED_DATE_ISO}</published>
47+
<updated>${FIXED_DATE_ISO}</updated>
48+
<summary>Atom 첫 번째 글 요약</summary>
49+
<content>Atom 첫 번째 글 내용</content>
50+
</entry>
51+
<entry>
52+
<title>&middot; Atom 특수문자 제목 &nbsp;</title>
53+
<link rel="alternate" href="https://atomfeed.com/entry2"/>
54+
<id>https://atomfeed.com/entry2</id>
55+
<published>${FIXED_DATE_ISO}</published>
56+
<updated>${FIXED_DATE_ISO}</updated>
57+
<summary>Atom 두 번째 글 요약</summary>
58+
</entry>
59+
</feed>`;
60+
61+
// 잘못된 형식의 XML 데이터
62+
export const INVALID_XML = `<?xml version="1.0"?>
63+
<invalid>
64+
<data>이것은 RSS도 Atom도 아닙니다</data>
65+
</invalid>`;
66+
67+
// 테스트용 RssObj
68+
export const MOCK_RSS_OBJ: RssObj = {
69+
id: 1,
70+
blogName: '테스트 블로그',
71+
blogPlatform: 'etc',
72+
rssUrl: 'https://denamu.dev/rss',
73+
};

feed-crawler/test/jest.setup.ts renamed to feed-crawler/test/config/e2e/env/jest.setup.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { setupTestContainer } from './setup/testContext.setup';
1+
import { setupTestContainer } from '../../../setup/testContext.setup';
22

33
beforeEach(() => {
44
jest.restoreAllMocks();

feed-crawler/test/setup/jest.global-setup.ts renamed to feed-crawler/test/config/e2e/global/jest.global-setup.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { MySqlContainer } from '@testcontainers/mysql';
2-
import { setupTestContainer } from './testContext.setup';
2+
import { setupTestContainer } from '../../../setup/testContext.setup';
33
const globalAny: any = global;
44

55
export default async function globalSetup() {
File renamed without changes.

feed-crawler/test/jest-e2e.json renamed to feed-crawler/test/config/e2e/jest/jest-e2e.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
"^.+\\.(t|j)s$": "ts-jest"
66
},
77
"testEnvironment": "node",
8-
"rootDir": "..",
8+
"rootDir": "../../../..",
99
"coverageDirectory": "test/coverage",
10-
"setupFilesAfterEnv": ["./test/jest.setup.ts"],
11-
"globalSetup": "./test/setup/jest.global-setup.ts",
12-
"globalTeardown": "./test/setup/jest.global-teardown.ts",
10+
"setupFilesAfterEnv": ["./test/config/e2e/env/jest.setup.ts"],
11+
"globalSetup": "./test/config/e2e/global/jest.global-setup.ts",
12+
"globalTeardown": "./test/config/e2e/global/jest.global-teardown.ts",
1313
"testTimeout": 20000,
1414
"maxWorkers": 1
1515
}

feed-crawler/test/jest-unit.json renamed to feed-crawler/test/config/unit/jest/jest-unit.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"^.+\\.(t|j)s$": "ts-jest"
66
},
77
"testEnvironment": "node",
8-
"rootDir": "..",
8+
"rootDir": "../../../..",
99
"coverageDirectory": "test/coverage",
1010
"testTimeout": 10000
1111
}

0 commit comments

Comments
 (0)