1
1
// get link metadata
2
2
// TODO: add more services and use the metascraper to fill some metadata on the article
3
3
4
- // const metascraper = require('metascraper');
5
- // const got = require('got');
4
+ const metascraper = require ( 'metascraper' ) . load ( [
5
+ require ( 'metascraper-author' ) ( ) ,
6
+ require ( 'metascraper-date' ) ( ) ,
7
+ require ( 'metascraper-description' ) ( ) ,
8
+ require ( 'metascraper-image' ) ( ) ,
9
+ require ( 'metascraper-logo' ) ( ) ,
10
+ require ( 'metascraper-clearbit-logo' ) ( ) ,
11
+ require ( 'metascraper-logo-favicon' ) ( ) ,
12
+ require ( 'metascraper-publisher' ) ( ) ,
13
+ require ( 'metascraper-title' ) ( ) ,
14
+ require ( 'metascraper-url' ) ( ) ,
15
+ require ( 'metascraper-youtube' ) ( ) ,
16
+ ] ) ;
17
+ const got = require ( 'got' ) ;
18
+ const _ = require ( 'lodash' ) ;
19
+
20
+ const getMetadata = async ( targetUrl , app ) => {
21
+ const { body : html , url } = await got ( targetUrl ) ;
22
+ app . debug ( `getMetadata - getting metadata for ${ url } ` ) ;
23
+ // app.debug(html);
24
+ // app.debug(html);
25
+ const metadata = await metascraper ( { html, url } ) ;
26
+ // app.debug(metadata);
27
+ app . debug ( `getMetadata - got metadata for ${ url } ` ) ;
28
+ return metadata ;
29
+ } ;
6
30
7
31
module . exports = function ( ) {
8
32
return function ( hook ) {
@@ -17,28 +41,55 @@ module.exports = function () {
17
41
return resolve ( hook ) ;
18
42
}
19
43
44
+ let promises = [ ] ;
45
+ let embedds = { } ;
46
+
20
47
try {
21
48
// find links
22
49
const youtubeRegex = new RegExp ( / (?: (?: h t t p s ? : ) ? \/ \/ ) ? (?: w w w \. ) ? y o u t u (?: b e \. c o m \/ (?: w a t c h \? (?: .* ?& (?: a m p ; ) ? ) * v = | v \/ | e m b e d \/ ) | \. b e \/ ) ( [ \w \- ] + ) (?: (?: & (?: a m p ; ) ? | \? ) [ \w \? = ] * ) * / , 'ig' ) ; // eslint-disable-line
23
50
const youtubeLinks = youtubeRegex . exec ( hook . data . content ) ;
24
51
25
- // html link
26
- // const htmlLink = new RegExp(/<a\s[^>]*href=\"([^\"]*)\"[^>]*>(.*)<\/a>/, 'ig'); // eslint-disable-line
52
+ // html links
53
+ const linkRegex = new RegExp ( / < a \s [ ^ > ] * h r e f = \" ( [ ^ \" ] * ) \" [ ^ > ] * > ( [ ^ < ] * ) < \/ a > / , 'ig' ) // eslint-disable-line
54
+ let match ;
55
+ while ( match = linkRegex . exec ( hook . data . content ) ) {
56
+ const url = match [ 1 ] ;
57
+ hook . app . debug ( url ) ;
27
58
28
- // here you could scrape the url for metadata
29
- // hook.app.debug('#6');
30
- // const { body: html, url } = await got(youtubeLinks[0]);
31
- // const metadata = await metascraper({html, url});
32
- // hook.app.debug('metadata');
33
- // hook.app.debug(metadata);
59
+ // skip if url already exists
60
+ if ( ! _ . isEmpty ( embedds [ url ] ) ) {
61
+ continue ;
62
+ }
63
+ // here you could scrape the url for metadata
64
+ // hook.app.debug(match);
65
+ promises . push ( new Promise ( async ( resolve ) => {
66
+ try {
67
+ const metadata = await getMetadata ( url , hook . app ) ;
68
+ embedds [ url ] = metadata ;
69
+ return resolve ( metadata ) ;
70
+ } catch ( err ) {
71
+ hook . app . error ( 'FAILED TO GRAB THE LINK' ) ;
72
+ return resolve ( ) ;
73
+ }
74
+ } ) ) ;
75
+ }
34
76
35
77
if ( youtubeLinks . length >= 2 ) {
36
78
hook . data . teaserImg = `https://img.youtube.com/vi/${ youtubeLinks [ 1 ] } /hqdefault.jpg` ;
37
79
hook . data . meta = Object . assign ( hook . data . meta || { } , { hasVideo : true } ) ;
38
80
}
39
81
} catch ( err ) { } // eslint-disable-line
40
82
41
- return resolve ( hook ) ;
83
+ return Promise . all ( promises )
84
+ . then ( ( ) => {
85
+ hook . app . debug ( 'embedds:' ) ;
86
+ hook . app . debug ( embedds ) ;
87
+
88
+ hook . data . meta . embedds = embedds ;
89
+
90
+ hook . app . debug ( 'FINISHED!' ) ;
91
+ resolve ( hook ) ;
92
+ } ) ;
42
93
} ) ;
43
94
} ;
44
95
} ;
0 commit comments