Skip to content

Commit f8ced99

Browse files
authored
[feat/OPS-341] 벨로그 크롤러 생성 (#90)
* feat/OPS-341 : Velog 크롤러 생성 * fix : createDate,modifyDate 자동 적용되도록 설정
1 parent 8a95b61 commit f8ced99

File tree

5 files changed

+101
-1
lines changed

5 files changed

+101
-1
lines changed

src/main/java/org/tuna/zoopzoop/backend/BackendApplication.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
import org.springframework.boot.SpringApplication;
44
import org.springframework.boot.autoconfigure.SpringBootApplication;
5+
import org.springframework.data.jpa.repository.config.EnableJpaAuditing;
56

67
@SpringBootApplication
8+
@EnableJpaAuditing
79
public class BackendApplication {
810
public static void main(String[] args) {
911
SpringApplication.run(BackendApplication.class, args);

src/main/java/org/tuna/zoopzoop/backend/domain/datasource/crawler/dto/SpecificSiteDto.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,14 @@ public record SpecificSiteDto(
99
String imageUrl, // 썸네일 이미지 url
1010
String source // 출처
1111
) {
12+
@Override
13+
public String toString() {
14+
return "SpecificSiteDto {\n" +
15+
" title='" + title + "',\n" +
16+
" dataCreatedDate=" + dataCreatedDate + ",\n" +
17+
" content='" + content + "',\n" +
18+
" imageUrl='" + imageUrl + "',\n" +
19+
" source='" + source + "'\n" +
20+
"}";
21+
}
1222
}

src/main/java/org/tuna/zoopzoop/backend/domain/datasource/crawler/service/SupportedDomain.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
public enum SupportedDomain {
44
NAVERNEWS("n.news.naver.com"),
5-
NAVERBLOG("blog.naver.com");
5+
NAVERBLOG("blog.naver.com"),
6+
VELOG("velog.io");
67

78
private final String domain;
89

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
package org.tuna.zoopzoop.backend.domain.datasource.crawler.service;
2+
3+
import org.jsoup.nodes.Document;
4+
import org.springframework.stereotype.Component;
5+
import org.tuna.zoopzoop.backend.domain.datasource.crawler.dto.CrawlerResult;
6+
import org.tuna.zoopzoop.backend.domain.datasource.crawler.dto.SpecificSiteDto;
7+
8+
import java.time.LocalDate;
9+
import java.time.format.DateTimeFormatter;
10+
11+
@Component
12+
public class VelogCrawler implements Crawler{
13+
private static final SupportedDomain DOMAIN = SupportedDomain.VELOG;
14+
private static final DateTimeFormatter VELOG_FORMATTER = DateTimeFormatter.ofPattern("yyyy년 M월 d일");
15+
16+
@Override
17+
public boolean supports(String domain) {
18+
return domain.contains(DOMAIN.getDomain());
19+
}
20+
21+
@Override
22+
public CrawlerResult<?> extract(Document doc) {
23+
// 제목
24+
String title = doc.selectFirst("meta[property=og:title]").attr("content");
25+
26+
// 작성 날짜
27+
String publishedAt = doc.selectFirst(
28+
"div.information > span:not([class])"
29+
).text();
30+
31+
LocalDate dataCreatedDate = transLocalDate(publishedAt);
32+
33+
// 내용(ai한테 줘야함)
34+
String content = doc.selectFirst("div.atom-one").text();
35+
36+
// 썸네일 이미지 url
37+
String imageUrl = doc.selectFirst("meta[property=og:image]").attr("content");
38+
39+
// 출처
40+
String source = doc.selectFirst("span.username > a").text();
41+
42+
return new CrawlerResult<>(
43+
CrawlerResult.CrawlerType.SPECIFIC,
44+
new SpecificSiteDto(title, dataCreatedDate, content, imageUrl, source)
45+
);
46+
}
47+
48+
@Override
49+
public LocalDate transLocalDate(String rawDate) {
50+
51+
if(rawDate.contains("일 전")){
52+
int daysAgo = Integer.parseInt(rawDate.split("일 전")[0].trim());
53+
return LocalDate.now().minusDays(daysAgo);
54+
}else if(rawDate.contains("방금 전")) {
55+
return LocalDate.now();
56+
}else if(rawDate.contains("시간 전")||rawDate.contains("분 전")){
57+
return LocalDate.now();
58+
}
59+
60+
return LocalDate.parse(rawDate, VELOG_FORMATTER);
61+
}
62+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package org.tuna.zoopzoop.backend.domain.datasource.crawler.service;
2+
3+
import org.jsoup.Jsoup;
4+
import org.jsoup.nodes.Document;
5+
import org.junit.jupiter.api.Test;
6+
import org.tuna.zoopzoop.backend.domain.datasource.crawler.dto.CrawlerResult;
7+
8+
import java.io.IOException;
9+
10+
import static org.assertj.core.api.Assertions.assertThat;
11+
import static org.junit.jupiter.api.Assertions.*;
12+
13+
class VelogCrawlerTest {
14+
15+
private final VelogCrawler velogCrawler = new VelogCrawler();
16+
17+
@Test
18+
void testExtract() throws IOException {
19+
Document doc = Jsoup.connect("https://velog.io/@hyeonnnnn/VampireSurvivorsClone-04.-PoolManager").get();
20+
CrawlerResult<?> result = velogCrawler.extract(doc);
21+
assertThat(result).isNotNull();
22+
23+
System.out.println(result);
24+
}
25+
}

0 commit comments

Comments
 (0)