Skip to content

Commit 8a70ea2

Browse files
authored
Merge pull request microlinkhq#782 from microlinkhq/x
chore(x): get metadata from jsonld
2 parents 4920125 + 9d127e6 commit 8a70ea2

File tree

8 files changed

+14622
-63
lines changed

8 files changed

+14622
-63
lines changed

packages/metascraper-x/src/index.js

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
const {
44
getUrls,
5+
$jsonld,
56
author,
67
image,
78
memoizeOne,
@@ -15,7 +16,9 @@ const {
1516
const toAuthor = toRule(author)
1617
const toImage = toRule(image)
1718
const toTitle = toRule(title)
19+
1820
const toDescription = toRule(description)
21+
1922
const toUrl = toRule(url)
2023

2124
const test = memoizeOne(url =>
@@ -42,7 +45,10 @@ module.exports = ({ resolveUrls = false, resolveUrl = url => url } = {}) => {
4245
],
4346
description: [
4447
toDescription(async $ => {
45-
let description = $('meta[property="og:description"]').attr('content')
48+
let description =
49+
$jsonld('mainEntity.description')($) ||
50+
$('meta[property="og:description"]').attr('content')
51+
4652
if (!resolveUrls) return description
4753
const urls = getUrls(description)
4854
const resolvedUrls = await Promise.all(urls.map(resolveUrl))
@@ -57,7 +63,10 @@ module.exports = ({ resolveUrls = false, resolveUrl = url => url } = {}) => {
5763
],
5864
image: [
5965
toImage($ => {
60-
let imageUrl = $('meta[property="og:image"]').attr('content')
66+
let imageUrl =
67+
$jsonld('mainEntity.image.contentUrl')($) ||
68+
$('meta[property="og:image"]').attr('content')
69+
6170
if (imageUrl?.endsWith('_200x200.jpg')) {
6271
imageUrl = imageUrl.replace('_200x200.jpg', '_400x400.jpg')
6372
}

packages/metascraper-x/test/fixtures/post-gif.html

Lines changed: 2712 additions & 2 deletions
Large diffs are not rendered by default.

packages/metascraper-x/test/fixtures/post-image.html

Lines changed: 2712 additions & 2 deletions
Large diffs are not rendered by default.

packages/metascraper-x/test/fixtures/post.html

Lines changed: 2712 additions & 2 deletions
Large diffs are not rendered by default.

packages/metascraper-x/test/fixtures/profile-video.html

Lines changed: 2726 additions & 8 deletions
Large diffs are not rendered by default.

packages/metascraper-x/test/fixtures/profile.html

Lines changed: 3718 additions & 16 deletions
Large diffs are not rendered by default.

packages/metascraper-x/test/snapshots/index.js.md

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@ Generated by [AVA](https://avajs.dev).
99
> Snapshot 1
1010
1111
{
12-
author: '#!/kiko/beats (Kikobeats)',
13-
date: null,
14-
description: 'engineering ▲ @vercel; founder of https://t.co/4PQvCsVNsA https://t.co/fpiHwbEPBv https://t.co/IG8Qq0IDKi https://t.co/gblDRx1P9D https://t.co/SmoZi3hAhb https://t.co/Y0Uk1XU3Eu https://t.co/PAq3eTEhmI',
15-
image: 'https://pbs.twimg.com/profile_images/1846292082501054464/oKUC44PF_400x400.jpg',
12+
author: 'Kiko Beats (Kikobeats) / X',
13+
date: '2010-01-02T11:14:31.000Z',
14+
description: 'Building web infra & open source Running https://t.co/4PQvCsVNsA · https://t.co/IG8Qq0IDKi Formerly @vercel ▲',
15+
image: 'https://pbs.twimg.com/profile_images/2000931726244106244/Ty7-KT2j_400x400.jpg',
1616
lang: 'en',
1717
publisher: 'X',
1818
title: '@kikobeats on X',
19-
url: 'https://x.com/Kikobeats',
19+
url: 'https://x.com/kikobeats',
2020
video: null,
2121
}
2222

@@ -25,14 +25,14 @@ Generated by [AVA](https://avajs.dev).
2525
> Snapshot 1
2626
2727
{
28-
author: '#!/kiko/beats (Kikobeats)',
29-
date: null,
30-
description: 'engineering ▲ @vercel; founder of microlink.io teslahunt.io unavatar.io keyv.js.org osom.js.org browserless.js.org metascraper.js.org',
31-
image: 'https://pbs.twimg.com/profile_images/1846292082501054464/oKUC44PF_400x400.jpg',
28+
author: 'Kiko Beats (Kikobeats) / X',
29+
date: '2010-01-02T11:14:31.000Z',
30+
description: 'Building web infra & open source Running microlink.io · unavatar.io Formerly @vercel ▲',
31+
image: 'https://pbs.twimg.com/profile_images/2000931726244106244/Ty7-KT2j_400x400.jpg',
3232
lang: 'en',
3333
publisher: 'X',
3434
title: '@Kikobeats on X',
35-
url: 'https://x.com/Kikobeats',
35+
url: 'https://x.com/kikobeats',
3636
video: null,
3737
}
3838

@@ -41,30 +41,30 @@ Generated by [AVA](https://avajs.dev).
4141
> Snapshot 1
4242
4343
{
44-
author: 'Javi López ⛩️ (javilop)',
45-
date: null,
46-
description: 'Comparto tutoriales, herramientas y noticias de IA. Fundador @Magnific_AI 🔥 Guía IAs: https://t.co/JApwm5Tmfo 🗞️ Newsletter: https://t.co/tMELO1P8Wk',
44+
author: 'Javi López ⛩️ (javilop) / X',
45+
date: '2008-08-29T18:52:07.000Z',
46+
description: 'Comparto tutoriales, herramientas y noticias de IA. Y también chorradas. Fundador @Magnific_AI (acq. por @freepik) 🗞️ Newsletter: https://t.co/tMELO1P8Wk',
4747
image: 'https://pbs.twimg.com/profile_images/1581679886267301888/BHGZpOc6_400x400.jpg',
4848
lang: 'en',
4949
publisher: 'X',
5050
title: '@javilop on X',
51-
url: 'https://twitter.com/javilop',
52-
video: null,
51+
url: 'https://x.com/javilop',
52+
video: 'blob:https://x.com/748f188e-eea8-4c74-87c9-00235f0067a8',
5353
}
5454

5555
## from a post
5656

5757
> Snapshot 1
5858
5959
{
60-
author: 'Donald J. Trump (realDonaldTrump)',
61-
date: null,
62-
description: '“Schiff blasted for not focusing on California homeless.” @foxandfriends His District is in terrible shape. He is a corrupt pol who only dreams of the Impeachment Hoax. In my opinion he is mentally deranged!',
63-
image: 'https://pbs.twimg.com/profile_images/874276197357596672/kUuht00m_400x400.jpg',
60+
author: 'Donald J. Trump',
61+
date: '2020-01-30T15:39:43.000Z',
62+
description: null,
63+
image: 'https://abs.twimg.com/rweb/ssr/default/v2/og/image.png',
6464
lang: 'en',
6565
publisher: 'X',
6666
title: '@realDonaldTrump on X',
67-
url: 'https://twitter.com/realDonaldTrump/status/1222907250383245320',
67+
url: 'https://x.com/realDonaldTrump/status/1222907250383245320',
6868
video: null,
6969
}
7070

@@ -73,29 +73,29 @@ Generated by [AVA](https://avajs.dev).
7373
> Snapshot 1
7474
7575
{
76-
author: '#!/kiko/beats (Kikobeats)',
77-
date: null,
78-
description: 'Experimenting with Clearbit API + Apple TV 3D Parallax https://t.co/Qsm163k4mJ',
79-
image: 'https://pbs.twimg.com/tweet_video_thumb/DDbh3WCXYAAZfz9.jpg:large',
76+
author: 'Kiko Beats',
77+
date: '2017-06-28T19:01:34.000Z',
78+
description: null,
79+
image: 'https://abs.twimg.com/rweb/ssr/default/v2/og/image.png',
8080
lang: 'en',
8181
publisher: 'X',
8282
title: '@Kikobeats on X',
83-
url: 'https://twitter.com/Kikobeats/status/880139124791029763',
84-
video: null,
83+
url: 'https://x.com/Kikobeats/status/880139124791029763',
84+
video: 'https://video.twimg.com/tweet_video/DDbh3WCXYAAZfz9.mp4',
8585
}
8686

8787
## from a post with an image
8888

8989
> Snapshot 1
9090
9191
{
92-
author: 'SmartUA (UaSmart)',
93-
date: null,
94-
description: 'Y terminamos el dia con Cultura de empresa con @patoroco, @flopezluis, Katia, Angélica en @codemotion_es #codemotion2017',
95-
image: 'https://pbs.twimg.com/media/DPadOKpXcAIL-NW.jpg:large',
92+
author: 'SmartUA',
93+
date: '2017-11-24T17:10:07.000Z',
94+
description: null,
95+
image: 'https://abs.twimg.com/rweb/ssr/default/v2/og/image.png',
9696
lang: 'en',
9797
publisher: 'X',
9898
title: '@UaSmart on X',
99-
url: 'https://twitter.com/UaSmart/status/934106870834454529',
99+
url: 'https://x.com/UaSmart/status/934106870834454529',
100100
video: null,
101101
}
-251 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)