1+ package mServer .crawler .sender .arte ;
2+
3+ import com .google .gson .JsonElement ;
4+ import de .mediathekview .mlib .Const ;
5+ import de .mediathekview .mlib .daten .DatenFilm ;
6+ import de .mediathekview .mlib .tool .Log ;
7+ import mServer .crawler .CrawlerTool ;
8+ import mServer .crawler .FilmeSuchen ;
9+ import mServer .crawler .sender .MediathekCrawler ;
10+ import mServer .crawler .sender .arte .json .ArteVideoInfoDto ;
11+ import mServer .crawler .sender .arte .tasks .ArteDtoVideo2FilmTask ;
12+ import mServer .crawler .sender .arte .tasks .ArteVideoInfoTask ;
13+ import mServer .crawler .sender .arte .tasks .ArteVideoLinkTask ;
14+ import mServer .crawler .sender .base .JsonUtils ;
15+ import mServer .crawler .sender .base .JsoupConnection ;
16+ import mServer .crawler .sender .base .TopicUrlDTO ;
17+ import org .apache .logging .log4j .LogManager ;
18+ import org .apache .logging .log4j .Logger ;
19+
20+ import java .io .IOException ;
21+ import java .util .Map ;
22+ import java .util .Optional ;
23+ import java .util .Set ;
24+ import java .util .concurrent .ConcurrentLinkedQueue ;
25+ import java .util .concurrent .RecursiveTask ;
26+
27+ public class ArteCrawler extends MediathekCrawler {
28+ private static final Logger LOG = LogManager .getLogger (ArteCrawler .class );
29+ private final JsoupConnection jsoupConnection ;
30+
31+ public ArteCrawler (FilmeSuchen ssearch , int startPrio ) {
32+ this (ssearch , startPrio , Const .ARTE_DE );
33+ }
34+
35+ protected ArteCrawler (FilmeSuchen ssearch , int startPrio , String sender ) {
36+ super (ssearch , sender ,/* threads */ 1 , /* urlWarten */ 200 , startPrio );
37+ this .jsoupConnection = new JsoupConnection (60 , 4 );
38+ }
39+
40+ protected ArteLanguage getLanguage () {
41+ return ArteLanguage .DE ;
42+ }
43+
44+ @ Override
45+ protected RecursiveTask <Set <DatenFilm >> createCrawlerTask () {
46+
47+ try {
48+ final ConcurrentLinkedQueue <TopicUrlDTO > videoUrls = new ConcurrentLinkedQueue <>();
49+ videoUrls .addAll (createVideosQueue (getLanguage ().toString ().toLowerCase ()));
50+
51+ final ArteVideoInfoTask aArteRestVideoInfoTask ;
52+ // DO NOT overload - maximumUrlsPerTask used to reduce threads to 4
53+ aArteRestVideoInfoTask = new ArteVideoInfoTask (this , videoUrls );
54+ final ConcurrentLinkedQueue <ArteVideoInfoDto > videos = new ConcurrentLinkedQueue <>();
55+ videos .addAll (aArteRestVideoInfoTask .fork ().join ());
56+ //
57+ Log .sysLog (getSendername () + " Anzahl video info: " + videos .size ());
58+ //
59+ final ConcurrentLinkedQueue <ArteVideoInfoDto > videosWithLink = new ConcurrentLinkedQueue <>();
60+ final ArteVideoLinkTask aArteRestVideosTask = new ArteVideoLinkTask (this , videos );
61+ videosWithLink .addAll (aArteRestVideosTask .fork ().join ());
62+ //
63+ Log .sysLog (getSendername () + " Anzahl video links: " + videosWithLink .size ());
64+ //
65+ return new ArteDtoVideo2FilmTask (this , new ConcurrentLinkedQueue <>(videosWithLink ), getSendername ());
66+
67+ } catch (final Exception ex ) {
68+ LOG .fatal ("Exception in {} crawler." , getSendername (), ex );
69+ }
70+ return null ;
71+ }
72+
73+ private ConcurrentLinkedQueue <TopicUrlDTO > createVideosQueue (String language ) {
74+ int maxPages = getMaxPagesForOverview (language );
75+ final ConcurrentLinkedQueue <TopicUrlDTO > root = new ConcurrentLinkedQueue <>();
76+ String rootUrl = String .format (ArteConstants .VIDEOS_URL , 1 , language );
77+ root .add (new TopicUrlDTO ("all videos1" , rootUrl ));
78+ if (maxPages >= 100 ) {
79+ String rootUrl2 = String .format (ArteConstants .VIDEOS_URL_ALT , 1 , language );
80+ root .add (new TopicUrlDTO ("all videos2" , rootUrl2 ));
81+ }
82+ return root ;
83+ }
84+
85+ private int getMaxPagesForOverview (String lang ) {
86+ final int maxAvailablePages = getNumberOfAvailablePages (lang );
87+ final int configuredMaxPages = getMaximumSubpages ();
88+ if (configuredMaxPages > maxAvailablePages ) {
89+ return Math .min (configuredMaxPages , maxAvailablePages / 2 );
90+ } else {
91+ return Math .min (configuredMaxPages , configuredMaxPages / 2 );
92+ }
93+ }
94+
95+ private int getNumberOfAvailablePages (String lang ) {
96+ final int naturalLimit = Math .min (100 , getMaximumSubpages ());
97+ try {
98+ String rootUrl = String .format (ArteConstants .VIDEOS_URL , 1 , lang );
99+ String [] path = {"meta" , "videos" , "pages" };
100+ final Map <String , String > headers = Map .of (
101+ "Accept" , "application/json" ,
102+ "Content-Type" , "application/json" ,
103+ "Authorization" , ArteConstants .API_TOKEN
104+ );
105+ JsonElement element = jsoupConnection .requestBodyAsJsonElement (rootUrl , headers );
106+ Optional <Integer > pages = JsonUtils .getElementValueAsInteger (element , path );
107+ if (pages .isPresent ()) {
108+ return pages .get ();
109+ }
110+ } catch (IOException e ) {
111+ LOG .error ("getMaxPagesForOverview" , e );
112+ }
113+ return naturalLimit ;
114+ }
115+
116+ private int getMaximumSubpages () {
117+ if (CrawlerTool .loadLongMax ()) {
118+ return 10 ;
119+ } else {
120+ return 1 ;
121+ }
122+ }
123+ }
124+
125+
0 commit comments