11package org .tuna .zoopzoop .backend .domain .datasource .crawler .service ;
22
33import org .jsoup .nodes .Document ;
4+ import org .jsoup .nodes .Element ;
5+ import org .springframework .core .Ordered ;
6+ import org .springframework .core .annotation .Order ;
47import org .springframework .stereotype .Component ;
58import org .tuna .zoopzoop .backend .domain .datasource .crawler .dto .CrawlerResult ;
69import org .tuna .zoopzoop .backend .domain .datasource .crawler .dto .SpecificSiteDto ;
912import java .time .format .DateTimeFormatter ;
1013
1114@ Component
15+ @ Order (Ordered .HIGHEST_PRECEDENCE )
1216public class VelogCrawler implements Crawler {
1317 private static final SupportedDomain DOMAIN = SupportedDomain .VELOG ;
1418 private static final DateTimeFormatter VELOG_FORMATTER = DateTimeFormatter .ofPattern ("yyyy년 M월 d일" );
@@ -19,25 +23,28 @@ public boolean supports(String domain) {
1923 }
2024
2125 @ Override
22- public CrawlerResult <? > extract (Document doc ) {
26+ public CrawlerResult <SpecificSiteDto > extract (Document doc ) {
2327 // 제목
24- String title = doc .selectFirst ("meta[property=og:title]" ).attr ("content" );
28+ Element titleElement = doc .selectFirst ("meta[property=og:title]" );
29+ String title = titleElement != null ? titleElement .attr ("content" ) : "" ;
2530
2631 // 작성 날짜
27- String publishedAt = doc .selectFirst (
28- "div.information > span:not([class])"
29- ).text ();
32+ Element publishedAtElement = doc .selectFirst ("div.information > span:not([class])" );
33+ String publishedAt = publishedAtElement != null ? publishedAtElement .text () : "" ;
3034
31- LocalDate dataCreatedDate = transLocalDate (publishedAt );
35+ LocalDate dataCreatedDate = publishedAt . isBlank () ? DEFAULT_DATE : transLocalDate (publishedAt ) ;
3236
3337 // 내용(ai한테 줘야함)
34- String content = doc .selectFirst ("div.atom-one" ).text ();
38+ Element contentElement = doc .selectFirst ("div.atom-one" );
39+ String content = contentElement != null ? contentElement .text () : "" ;
3540
3641 // 썸네일 이미지 url
37- String imageUrl = doc .selectFirst ("meta[property=og:image]" ).attr ("content" );
42+ Element imageUrlElement = doc .selectFirst ("meta[property=og:image]" );
43+ String imageUrl = imageUrlElement != null ? imageUrlElement .attr ("content" ) : "" ;
3844
3945 // 출처
40- String source = doc .selectFirst ("span.username > a" ).text ();
46+ Element sourceElement = doc .selectFirst ("span.username > a" );
47+ String source = sourceElement != null ? sourceElement .text () : "" ;
4148
4249 return new CrawlerResult <>(
4350 CrawlerResult .CrawlerType .SPECIFIC ,
@@ -51,10 +58,10 @@ public LocalDate transLocalDate(String rawDate) {
5158 if (rawDate .contains ("일 전" )){
5259 int daysAgo = Integer .parseInt (rawDate .split ("일 전" )[0 ].trim ());
5360 return LocalDate .now ().minusDays (daysAgo );
54- }else if (rawDate .contains ("방금 전" )) {
55- return LocalDate .now ();
56- }else if (rawDate .contains ("시간 전" )||rawDate .contains ("분 전" )){
61+ }else if (rawDate .contains ("시간 전" )||rawDate .contains ("방금 전" )||rawDate .contains ("분 전" )){
5762 return LocalDate .now ();
63+ }else if (rawDate .contains ("어제" )){
64+ return LocalDate .now ().minusDays (1 );
5865 }
5966
6067 return LocalDate .parse (rawDate , VELOG_FORMATTER );
0 commit comments