@@ -11,18 +11,20 @@ export const getArticle = async (index: string, username: string) => {
1111
1212 // @ts -ignore
1313 res ?. items . forEach ( element => {
14- const thumbnail = extractFirstImageFromHTML ( element . content )
14+ const thumbnail = extractFirstImageFromHTML ( element . content || element . description )
1515 if ( thumbnail ) {
1616 element . thumbnail = thumbnail
1717 fixItem . push ( element )
1818 }
1919 } ) ;
2020
21- const { title, published : pubDate , link : url , thumbnail, content : description } = fixItem [
21+ const { title, published : pubDate , link : url , thumbnail, content : content , description : desc } = fixItem [
2222 // @ts -ignore
2323 index || 0
2424 ] ;
2525
26+ const description = content || desc ;
27+
2628
2729 const responseThumbnail = await axios ( thumbnail . src , { responseType : 'arraybuffer' } ) ;
2830 const base64Img = Buffer . from ( responseThumbnail . data , 'binary' ) . toString ( 'base64' ) ;
@@ -32,19 +34,27 @@ export const getArticle = async (index: string, username: string) => {
3234 const imgType = imgTypeArr [ imgTypeArr . length - 1 ] ;
3335
3436 const convertedThumbnail = `data:image/${ imgType } ;base64,${ base64Img } ` ;
37+
38+
39+ const cleanedDescription = stripHTML ( description ) ;
3540 return {
3641 title : title . length > 80 ? title . substring ( 0 , 80 ) + ' ...' : title ,
3742 thumbnail : convertedThumbnail ,
3843 url,
3944 date : moment ( pubDate ) . format ( 'DD MMM YYYY, HH:mm' ) ,
4045 description :
41- description
42- . replace ( / < h 3 > .* < \/ h 3 > | < f i g c a p t i o n > .* < \/ f i g c a p t i o n > | < [ ^ > ] * > / gm, '' )
46+ cleanedDescription
4347 . substring ( 0 , 60 ) + '...' ,
4448 } ;
4549} ;
4650
4751
52+ function stripHTML ( text : string ) {
53+ const dom = new JSDOM ( text ) ;
54+ const textContent = dom . window . document . body . textContent || '' ;
55+ const cleanText = textContent . trim ( ) ;
56+ return cleanText ;
57+ }
4858
4959
5060// Define a type for the image data
@@ -58,15 +68,21 @@ function extractFirstImageFromHTML(html: string): ImageData | null {
5868 const dom = new JSDOM ( html ) ;
5969 const document = dom . window . document ;
6070
61- // Select the first figure that contains an image
62- const figure = document . querySelector ( 'figure img' ) ;
63- if ( figure ) {
64- const img = figure as HTMLImageElement ; // Ensure it's treated as an image element
65- // const figcaption = figure.parentElement ? figure.parentElement.querySelector('figcaption') : null;
66- return {
67- src : img . src ,
68- alt : img . alt || '' , // Use an empty string if alt is not present
69- } ;
71+ // Try different strategies to find the first image
72+ const imageSelectors = [
73+ 'figure img' , // Case 1: Image inside figure
74+ '.medium-feed-image img' , // Case 2: Medium feed specific
75+ 'img' // Case 3: Any image as fallback
76+ ] ;
77+
78+ for ( const selector of imageSelectors ) {
79+ const img = document . querySelector ( selector ) as HTMLImageElement ;
80+ if ( img ) {
81+ return {
82+ src : img . src ,
83+ alt : img . alt || '' , // Use empty string if alt is not present
84+ } ;
85+ }
7086 }
7187
7288 return null ; // Return null if no images are found
0 commit comments