Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ It's more than likely there is nothing wrong with the library:
- If the target website does not have OpenGraph tags **the preview will most likely fail**, there are some fallbacks but in general, it will not work
- **You cannot preview (fetch) another web page from YOUR web page. This is an intentional security feature of browsers called CORS**

<h1>DO NOT FETCH CONTENT DIRECTLY FROM A USERS DEVICE. ONLY RUN THIS IN YOUR SERVER AND SANDBOX IT IF YOU CAN</h1>
Browsers block this via cors, but you might be clever like a fox and run this in React Native. This is a bad idea, you are exposing the device user to potentially malicious links

If you use this library and find it useful please consider [sponsoring me](https://github.com/sponsors/ospfranco), open source takes a lot of time and effort.

# Link Preview
Expand Down
17 changes: 11 additions & 6 deletions __tests__/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ describe(`#getLinkPreview()`, () => {
expect(linkInfo.charset?.toLowerCase()).toEqual(`utf-8`);
});

it("should extract author from news article", async () => {
xit("should extract author from news article", async () => {
const linkInfo: any = await getLinkPreview(
`https://www.usatoday.com/story/special/contributor-content/2025/10/15/why-chaos-engineering-is-more-important-than-ever-in-the-ai-era/86712877007/`
);

expect(linkInfo.author).toEqual(`Matt Emma`);
})
});

it(`should extract link info from a URL with a newline`, async () => {
const linkInfo: any = await getLinkPreview(
Expand Down Expand Up @@ -251,17 +251,22 @@ describe(`#getLinkPreview()`, () => {
expect(response.mediaType).toEqual(`website`);
});

it("should handle override response body using onResponse option", async () => {
it("should handle override response body using onResponse option", async () => {
let firstParagraphText;

const res: any = await getLinkPreview(`https://www.example.com/`, {
const res: any = await getLinkPreview(`https://www.example.com/`, {
onResponse: (result, doc) => {
firstParagraphText = doc('p').first().text().split('\n').map(x=> x.trim()).join(' ');
firstParagraphText = doc("p")
.first()
.text()
.split("\n")
.map((x) => x.trim())
.join(" ");
result.siteName = `SiteName has been overridden`;
result.description = firstParagraphText;

return result;
}
},
});

expect(res.siteName).toEqual("SiteName has been overridden");
Expand Down
77 changes: 37 additions & 40 deletions index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import cheerio from "cheerio";
import urlObj from "url";
import { CONSTANTS } from "./constants";

interface ILinkPreviewResponse {
Expand All @@ -16,12 +15,12 @@ interface ILinkPreviewResponse {
}

interface IVideoType {
url: string | undefined,
secureUrl: string | null | undefined,
type: string | null | undefined,
width: string | undefined,
height: string | undefined,
};
url: string | undefined;
secureUrl: string | null | undefined;
type: string | null | undefined;
width: string | undefined;
height: string | undefined;
}

interface ILinkPreviewOptions {
headers?: Record<string, string>;
Expand All @@ -31,7 +30,11 @@ interface ILinkPreviewOptions {
followRedirects?: `follow` | `error` | `manual`;
resolveDNSHost?: (url: string) => Promise<string>;
handleRedirects?: (baseURL: string, forwardedURL: string) => boolean;
onResponse?: (response: ILinkPreviewResponse, doc: cheerio.Root, url?: URL) => ILinkPreviewResponse;
onResponse?: (
response: ILinkPreviewResponse,
doc: cheerio.Root,
url?: URL,
) => ILinkPreviewResponse;
}

interface IPreFetchedResource {
Expand Down Expand Up @@ -79,7 +82,7 @@ function getAuthor(doc: cheerio.Root) {
const author =
metaTagContent(doc, `author`, `name`) ||
metaTagContent(doc, `article:author`, `property`);
return author;
return author;
}

function getDescription(doc: cheerio.Root) {
Expand All @@ -105,7 +108,7 @@ function getMediaType(doc: cheerio.Root) {
function getImages(
doc: cheerio.Root,
rootUrl: string,
imagesPropertyType?: string
imagesPropertyType?: string,
) {
let images: string[] = [];
let nodes: cheerio.Cheerio | null;
Expand All @@ -122,7 +125,7 @@ function getImages(
if (node.type === `tag`) {
src = node.attribs.content;
if (src) {
src = urlObj.resolve(rootUrl, src);
src = new URL(src, rootUrl).href;
images.push(src);
}
}
Expand All @@ -132,7 +135,7 @@ function getImages(
if (images.length <= 0 && !imagesPropertyType) {
src = doc(`link[rel=image_src]`).attr(`href`);
if (src) {
src = urlObj.resolve(rootUrl, src);
src = new URL(src, rootUrl).href;
images = [src];
} else {
nodes = doc(`img`);
Expand All @@ -146,7 +149,7 @@ function getImages(
dic[src] = true;
// width = node.attribs.width;
// height = node.attribs.height;
images.push(urlObj.resolve(rootUrl, src));
images.push(new URL(src, rootUrl).href);
}
});
}
Expand Down Expand Up @@ -220,12 +223,12 @@ function getVideos(doc: cheerio.Root) {
}

// returns default favicon (//hostname/favicon.ico) for a url
function getDefaultFavicon(rootUrl: string) {
return urlObj.resolve(rootUrl, `/favicon.ico`);
function getDefaultFavicon(rootUrl: string): string {
return new URL(`/favicon.ico`, rootUrl).href;
}

// returns an array of URLs to favicon images
function getFavicons(doc: cheerio.Root, rootUrl: string) {
function getFavicons(doc: cheerio.Root, rootUrl: string): string[] {
const images = [];
let nodes: cheerio.Cheerio | never[] = [];
let src: string | undefined;
Expand All @@ -245,7 +248,7 @@ function getFavicons(doc: cheerio.Root, rootUrl: string) {
nodes.each((_: number, node: cheerio.Element) => {
if (node.type === `tag`) src = node.attribs.href;
if (src) {
src = urlObj.resolve(rootUrl, src);
src = new URL(src, rootUrl).href;
images.push(src);
}
});
Expand Down Expand Up @@ -300,7 +303,7 @@ function parseTextResponse(
body: string,
url: string,
options: ILinkPreviewOptions = {},
contentType?: string
contentType?: string,
): ILinkPreviewResponse {
const doc = cheerio.load(body);

Expand All @@ -318,35 +321,31 @@ function parseTextResponse(
};

if (options?.onResponse && typeof options.onResponse !== `function`) {
throw new Error(
`link-preview-js onResponse option must be a function`
);
throw new Error(`link-preview-js onResponse option must be a function`);
}

if (options?.onResponse) {
// send in a cloned response (to avoid mutation of original response reference)
const clonedResponse = structuredClone(response);
const urlObject = new URL(url)
// send in a cloned response (to avoid mutation of original response reference)
const clonedResponse = structuredClone(response);
const urlObject = new URL(url);
response = options.onResponse(clonedResponse, doc, urlObject);
}


return response;

}

function parseUnknownResponse(
body: string,
url: string,
options: ILinkPreviewOptions = {},
contentType?: string
contentType?: string,
) {
return parseTextResponse(body, url, options, contentType);
}

function parseResponse(
response: IPreFetchedResource,
options?: ILinkPreviewOptions
options?: ILinkPreviewOptions,
) {
try {
// console.log("[link-preview-js] response", response);
Expand Down Expand Up @@ -383,9 +382,8 @@ function parseResponse(
}

if (CONSTANTS.REGEX_CONTENT_TYPE_TEXT.test(contentType)) {
const htmlString = response.data;
return {
...parseTextResponse(htmlString, response.url, options, contentType),
...parseTextResponse(response.data, response.url, options, contentType),
charset,
};
}
Expand All @@ -407,7 +405,7 @@ function parseResponse(
throw new Error(
`link-preview-js could not fetch link information ${(
e as any
).toString()}`
).toString()}`,
);
}
}
Expand All @@ -421,7 +419,7 @@ function parseResponse(
*/
export async function getLinkPreview(
text: string,
options?: ILinkPreviewOptions
options?: ILinkPreviewOptions,
) {
if (!text || typeof text !== `string`) {
throw new Error(`link-preview-js did not receive a valid url or text`);
Expand All @@ -438,7 +436,7 @@ export async function getLinkPreview(

if (options?.followRedirects === `manual` && !options?.handleRedirects) {
throw new Error(
`link-preview-js followRedirects is set to manual, but no handleRedirects function was provided`
`link-preview-js followRedirects is set to manual, but no handleRedirects function was provided`,
);
}

Expand All @@ -462,9 +460,7 @@ export async function getLinkPreview(
? options.proxyUrl.concat(detectedUrl)
: detectedUrl;

// Seems like fetchOptions type definition is out of date
// https://github.com/node-fetch/node-fetch/issues/741
let response = await fetch(fetchUrl, fetchOptions as any).catch((e) => {
let response = await fetch(fetchUrl, fetchOptions).catch((e) => {
if (e.name === `AbortError`) {
throw new Error(`Request timeout`);
}
Expand All @@ -480,12 +476,14 @@ export async function getLinkPreview(
options?.handleRedirects
) {
const locationHeader = response.headers.get(`location`) || ``;
const isAbsoluteURI = locationHeader.startsWith('http://') || locationHeader.startsWith('https://');
const isAbsoluteURI =
locationHeader.startsWith("http://") ||
locationHeader.startsWith("https://");

// Resolve the URL, handling both absolute and relative URLs
const forwardedUrl = isAbsoluteURI
? locationHeader
: urlObj.resolve(fetchUrl, locationHeader);
: new URL(locationHeader, fetchUrl).href;

if (!options.handleRedirects(fetchUrl, forwardedUrl)) {
throw new Error(`link-preview-js could not handle redirect`);
Expand Down Expand Up @@ -527,7 +525,7 @@ export async function getLinkPreview(
*/
export async function getPreviewFromContent(
response: IPreFetchedResource,
options?: ILinkPreviewOptions
options?: ILinkPreviewOptions,
) {
if (!response || typeof response !== `object`) {
throw new Error(`link-preview-js did not receive a valid response object`);
Expand All @@ -539,4 +537,3 @@ export async function getPreviewFromContent(

return parseResponse(response, options);
}

2 changes: 2 additions & 0 deletions mise.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[tools]
node = "24"
3 changes: 1 addition & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@
"license": "MIT",
"repository": "https://github.com/ospfranco/link-preview-js",
"dependencies": {
"cheerio": "1.0.0-rc.11",
"url": "0.11.0"
"cheerio": "1.0.0-rc.11"
},
"files": [
"build"
Expand Down
18 changes: 0 additions & 18 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2124,16 +2124,6 @@ prompts@^2.0.1:
kleur "^3.0.3"
sisteransi "^1.0.5"

[email protected]:
version "1.3.2"
resolved "https://registry.npmjs.org/punycode/-/punycode-1.3.2.tgz"
integrity sha1-llOgNvt8HuQjQvIyXM7v6jkmxI0=

[email protected]:
version "0.2.0"
resolved "https://registry.npmjs.org/querystring/-/querystring-0.2.0.tgz"
integrity sha1-sgmEkgO7Jd+CDadW50cAWHhSFiA=

react-is@^18.0.0:
version "18.2.0"
resolved "https://registry.yarnpkg.com/react-is/-/react-is-18.2.0.tgz#199431eeaaa2e09f86427efbb4f1473edb47609b"
Expand Down Expand Up @@ -2416,14 +2406,6 @@ update-browserslist-db@^1.0.4:
escalade "^3.1.1"
picocolors "^1.0.0"

[email protected]:
version "0.11.0"
resolved "https://registry.npmjs.org/url/-/url-0.11.0.tgz"
integrity sha1-ODjpfPxgUh63PFJajlW/3Z4uKPE=
dependencies:
punycode "1.3.2"
querystring "0.2.0"

v8-to-istanbul@^9.0.1:
version "9.0.1"
resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-9.0.1.tgz#b6f994b0b5d4ef255e17a0d17dc444a9f5132fa4"
Expand Down
Loading