@@ -58,12 +58,11 @@ async function getWikipediaData(language, topic) {
5858 } ;
5959
6060 const wikipediaHTMLPromise = function ( ) {
61-
6261 const requestConfig = {
63- baseURL : "https://" + language + ".wikipedia.org/api/rest_v1 /" ,
64- url : "/page/mobile-sections/" + encodedTopic ,
62+ baseURL : "https://" + language + ".wikipedia.org/w/rest.php/v1/page /" ,
63+ url : encodedTopic + "/html" ,
6564 method : "get" ,
66- responseType : "json " ,
65+ responseType : "text " ,
6766 headers : {
6867 "Api-User-Agent" : process . env . WIKIDOCUMENTARIES_API_USER_AGENT
6968 } ,
@@ -72,65 +71,38 @@ async function getWikipediaData(language, topic) {
7271 else return axios . request ( requestConfig ) ;
7372 } ;
7473
75- const [ wikipediaSummaryResponse , wikipediaHTMLResponse ]
76- = await axios . all ( [ wikipediaSummaryPromise ( ) , wikipediaHTMLPromise ( ) ] ) ;
74+ const [ summaryRes , htmlRes ] = await Promise . allSettled ( [
75+ wikipediaSummaryPromise ( ) ,
76+ wikipediaHTMLPromise ( )
77+ ] ) ;
7778
78- if ( wikipediaHTMLResponse . data == undefined ) {
79- // No wikipedia article
80- excerptHTML = "" ;
81- remainingHTML = null ;
82- }
83- else {
84- var origHTML = wikipediaHTMLResponse . data . lead . sections [ 0 ] . text ;
85- var remainingHTML = null ;
86-
87- if ( wikipediaHTMLResponse . data . lead . disambiguation != undefined && wikipediaHTMLResponse . data . lead . disambiguation == true ) {
88- wikipediaHTMLResponse . data . remaining . sections . forEach ( section => {
89- origHTML += section . text ;
90- } ) ;
79+ const wikipediaSummaryResponse = summaryRes . status === "fulfilled" ? summaryRes . value : null ;
80+ const wikipediaHTMLResponse = htmlRes . status === "fulfilled" ? htmlRes . value : null ;
81+
82+ let excerptHTML = "" ;
83+ let remainingHTML = null ;
84+
85+ if ( wikipediaHTMLResponse && wikipediaHTMLResponse . data != null && typeof wikipediaHTMLResponse . data === 'string' ) {
86+ let rawHTML = wikipediaHTMLResponse . data ;
87+
88+ const bodyMatch = rawHTML . match ( / < b o d y [ ^ > ] * > ( [ \s \S ] * ?) < \/ b o d y > / i) ;
89+ if ( bodyMatch ) {
90+ rawHTML = bodyMatch [ 1 ] ;
9191 }
92- else {
93- var remainingOrigHTML = "" ;
94-
95- wikipediaHTMLResponse . data . remaining . sections . forEach ( section => {
96- if ( section . isReferenceSection == undefined ) {
97- var sectionHeaderStartTag = "" ;
98- var sectionHeaderEndTag = "" ;
99- switch ( section . toclevel ) {
100- case 1 :
101- sectionHeaderStartTag = "<h2 class='h2'>" ;
102- sectionHeaderEndTag = "</h2>" ;
103- break ;
104- case 2 :
105- sectionHeaderStartTag = "<h3 class='h3'>" ;
106- sectionHeaderEndTag = "</h3>" ;
107- break ;
108- case 3 :
109- sectionHeaderStartTag = "<h4 class='h4'>" ;
110- sectionHeaderEndTag = "</h4>" ;
111- break ;
112- case 4 :
113- sectionHeaderStartTag = "<h5 class='h5'>" ;
114- sectionHeaderEndTag = "</h5>" ;
115- break ;
116- }
117- remainingOrigHTML += sectionHeaderStartTag + section . line + sectionHeaderEndTag ;
118- remainingOrigHTML += section . text ;
119- }
120- } ) ;
121-
122- /* if (remainingOrigHTML.length > 3000) { */ // Small count of HTML should be with the leading section
92+
93+ const splitIndex = rawHTML . search ( / < h 2 [ \s > ] / i) ;
94+ const origHTML = splitIndex > - 1 ? rawHTML . substring ( 0 , splitIndex ) : rawHTML ;
95+
96+ if ( splitIndex > - 1 ) {
97+ const remainingOrigHTML = rawHTML . substring ( splitIndex ) ;
12398 remainingHTML = convertToWikidocumentariesHTML ( remainingOrigHTML , topic , language ) ;
124- /* }
125- else {
126- origHTML += remainingOrigHTML;
127- } */
12899 }
129- var excerptHTML = convertToWikidocumentariesHTML ( origHTML , topic , language ) ;
100+
101+ excerptHTML = convertToWikidocumentariesHTML ( origHTML , topic , language ) ;
130102 }
131103
132104 return {
133- wikipedia : wikipediaSummaryResponse . data ,
105+ wikipedia : wikipediaSummaryResponse ? wikipediaSummaryResponse . data : null ,
134106 excerptHTML,
135107 remainingHTML,
136108 } ;
@@ -172,25 +144,7 @@ const convertToWikidocumentariesHTML = function(origHTML, topic, language) {
172144 //$(this).replaceWith($(this).html());
173145 }
174146 } ) ;
175- /* $("table").each(function(index) {
176- $(this).remove();
177- });
178- $("figure").each(function(index) {
179- $(this).remove();
180- });
181- $("figure-inline").each(function(index) {
182- $(this).remove();
183- });
184- $("sup").each(function(index) {
185- $(this).remove();
186- });
187-
188- $("div").each(function(index) {
189- var div_class = $(this).attr('class');
190- if (div_class == undefined || div_class != 'noprint') {
191- $(this).remove();
192- }
193- }); */
147+
194148 $ ( "table" ) . each ( function ( index ) { //Remove English Wikipedia infobox
195149 var div_class = $ ( this ) . attr ( 'class' ) ;
196150 if ( div_class != undefined && div_class . indexOf ( 'infobox' ) != - 1 ) {
@@ -217,4 +171,4 @@ const convertToWikidocumentariesHTML = function(origHTML, topic, language) {
217171 } ) ;
218172
219173 return $ . html ( ) ;
220- }
174+ } ;
0 commit comments