diff --git a/README.md b/README.md index 4b18bb5..42dd636 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,9 @@ It's more than likely there is nothing wrong with the library: - If the target website does not have OpenGraph tags **the preview will most likely fail**, there are some fallbacks but in general, it will not work - **You cannot preview (fetch) another web page from YOUR web page. This is an intentional security feature of browsers called CORS** +

DO NOT FETCH CONTENT DIRECTLY FROM A USERS DEVICE. ONLY RUN THIS IN YOUR SERVER AND SANDBOX IT IF YOU CAN

+Browsers block this via cors, but you might be clever like a fox and run this in React Native. This is a bad idea, you are exposing the device user to potentially malicious links + If you use this library and find it useful please consider [sponsoring me](https://github.com/sponsors/ospfranco), open source takes a lot of time and effort. # Link Preview diff --git a/__tests__/index.spec.ts b/__tests__/index.spec.ts index 8d6da2d..1e79642 100644 --- a/__tests__/index.spec.ts +++ b/__tests__/index.spec.ts @@ -31,13 +31,13 @@ describe(`#getLinkPreview()`, () => { expect(linkInfo.charset?.toLowerCase()).toEqual(`utf-8`); }); - it("should extract author from news article", async () => { + xit("should extract author from news article", async () => { const linkInfo: any = await getLinkPreview( `https://www.usatoday.com/story/special/contributor-content/2025/10/15/why-chaos-engineering-is-more-important-than-ever-in-the-ai-era/86712877007/` ); expect(linkInfo.author).toEqual(`Matt Emma`); - }) + }); it(`should extract link info from a URL with a newline`, async () => { const linkInfo: any = await getLinkPreview( @@ -251,17 +251,22 @@ describe(`#getLinkPreview()`, () => { expect(response.mediaType).toEqual(`website`); }); - it("should handle override response body using onResponse option", async () => { + it("should handle override response body using onResponse option", async () => { let firstParagraphText; - const res: any = await getLinkPreview(`https://www.example.com/`, { + const res: any = await getLinkPreview(`https://www.example.com/`, { onResponse: (result, doc) => { - firstParagraphText = doc('p').first().text().split('\n').map(x=> x.trim()).join(' '); + firstParagraphText = doc("p") + .first() + .text() + .split("\n") + .map((x) => x.trim()) + .join(" "); result.siteName = `SiteName has been overridden`; result.description = firstParagraphText; return result; - } + }, }); expect(res.siteName).toEqual("SiteName has been overridden"); diff --git a/index.ts b/index.ts index 5859614..36a2304 100644 --- a/index.ts +++ b/index.ts @@ -1,5 +1,4 @@ import cheerio from "cheerio"; -import urlObj from "url"; import { CONSTANTS } from "./constants"; interface ILinkPreviewResponse { @@ -16,12 +15,12 @@ interface ILinkPreviewResponse { } interface IVideoType { - url: string | undefined, - secureUrl: string | null | undefined, - type: string | null | undefined, - width: string | undefined, - height: string | undefined, -}; + url: string | undefined; + secureUrl: string | null | undefined; + type: string | null | undefined; + width: string | undefined; + height: string | undefined; +} interface ILinkPreviewOptions { headers?: Record; @@ -31,7 +30,11 @@ interface ILinkPreviewOptions { followRedirects?: `follow` | `error` | `manual`; resolveDNSHost?: (url: string) => Promise; handleRedirects?: (baseURL: string, forwardedURL: string) => boolean; - onResponse?: (response: ILinkPreviewResponse, doc: cheerio.Root, url?: URL) => ILinkPreviewResponse; + onResponse?: ( + response: ILinkPreviewResponse, + doc: cheerio.Root, + url?: URL, + ) => ILinkPreviewResponse; } interface IPreFetchedResource { @@ -79,7 +82,7 @@ function getAuthor(doc: cheerio.Root) { const author = metaTagContent(doc, `author`, `name`) || metaTagContent(doc, `article:author`, `property`); - return author; + return author; } function getDescription(doc: cheerio.Root) { @@ -105,7 +108,7 @@ function getMediaType(doc: cheerio.Root) { function getImages( doc: cheerio.Root, rootUrl: string, - imagesPropertyType?: string + imagesPropertyType?: string, ) { let images: string[] = []; let nodes: cheerio.Cheerio | null; @@ -122,7 +125,7 @@ function getImages( if (node.type === `tag`) { src = node.attribs.content; if (src) { - src = urlObj.resolve(rootUrl, src); + src = new URL(src, rootUrl).href; images.push(src); } } @@ -132,7 +135,7 @@ function getImages( if (images.length <= 0 && !imagesPropertyType) { src = doc(`link[rel=image_src]`).attr(`href`); if (src) { - src = urlObj.resolve(rootUrl, src); + src = new URL(src, rootUrl).href; images = [src]; } else { nodes = doc(`img`); @@ -146,7 +149,7 @@ function getImages( dic[src] = true; // width = node.attribs.width; // height = node.attribs.height; - images.push(urlObj.resolve(rootUrl, src)); + images.push(new URL(src, rootUrl).href); } }); } @@ -220,12 +223,12 @@ function getVideos(doc: cheerio.Root) { } // returns default favicon (//hostname/favicon.ico) for a url -function getDefaultFavicon(rootUrl: string) { - return urlObj.resolve(rootUrl, `/favicon.ico`); +function getDefaultFavicon(rootUrl: string): string { + return new URL(`/favicon.ico`, rootUrl).href; } // returns an array of URLs to favicon images -function getFavicons(doc: cheerio.Root, rootUrl: string) { +function getFavicons(doc: cheerio.Root, rootUrl: string): string[] { const images = []; let nodes: cheerio.Cheerio | never[] = []; let src: string | undefined; @@ -245,7 +248,7 @@ function getFavicons(doc: cheerio.Root, rootUrl: string) { nodes.each((_: number, node: cheerio.Element) => { if (node.type === `tag`) src = node.attribs.href; if (src) { - src = urlObj.resolve(rootUrl, src); + src = new URL(src, rootUrl).href; images.push(src); } }); @@ -300,7 +303,7 @@ function parseTextResponse( body: string, url: string, options: ILinkPreviewOptions = {}, - contentType?: string + contentType?: string, ): ILinkPreviewResponse { const doc = cheerio.load(body); @@ -318,35 +321,31 @@ function parseTextResponse( }; if (options?.onResponse && typeof options.onResponse !== `function`) { - throw new Error( - `link-preview-js onResponse option must be a function` - ); + throw new Error(`link-preview-js onResponse option must be a function`); } if (options?.onResponse) { - // send in a cloned response (to avoid mutation of original response reference) - const clonedResponse = structuredClone(response); - const urlObject = new URL(url) + // send in a cloned response (to avoid mutation of original response reference) + const clonedResponse = structuredClone(response); + const urlObject = new URL(url); response = options.onResponse(clonedResponse, doc, urlObject); } - return response; - } function parseUnknownResponse( body: string, url: string, options: ILinkPreviewOptions = {}, - contentType?: string + contentType?: string, ) { return parseTextResponse(body, url, options, contentType); } function parseResponse( response: IPreFetchedResource, - options?: ILinkPreviewOptions + options?: ILinkPreviewOptions, ) { try { // console.log("[link-preview-js] response", response); @@ -383,9 +382,8 @@ function parseResponse( } if (CONSTANTS.REGEX_CONTENT_TYPE_TEXT.test(contentType)) { - const htmlString = response.data; return { - ...parseTextResponse(htmlString, response.url, options, contentType), + ...parseTextResponse(response.data, response.url, options, contentType), charset, }; } @@ -407,7 +405,7 @@ function parseResponse( throw new Error( `link-preview-js could not fetch link information ${( e as any - ).toString()}` + ).toString()}`, ); } } @@ -421,7 +419,7 @@ function parseResponse( */ export async function getLinkPreview( text: string, - options?: ILinkPreviewOptions + options?: ILinkPreviewOptions, ) { if (!text || typeof text !== `string`) { throw new Error(`link-preview-js did not receive a valid url or text`); @@ -438,7 +436,7 @@ export async function getLinkPreview( if (options?.followRedirects === `manual` && !options?.handleRedirects) { throw new Error( - `link-preview-js followRedirects is set to manual, but no handleRedirects function was provided` + `link-preview-js followRedirects is set to manual, but no handleRedirects function was provided`, ); } @@ -462,9 +460,7 @@ export async function getLinkPreview( ? options.proxyUrl.concat(detectedUrl) : detectedUrl; - // Seems like fetchOptions type definition is out of date - // https://github.com/node-fetch/node-fetch/issues/741 - let response = await fetch(fetchUrl, fetchOptions as any).catch((e) => { + let response = await fetch(fetchUrl, fetchOptions).catch((e) => { if (e.name === `AbortError`) { throw new Error(`Request timeout`); } @@ -480,12 +476,14 @@ export async function getLinkPreview( options?.handleRedirects ) { const locationHeader = response.headers.get(`location`) || ``; - const isAbsoluteURI = locationHeader.startsWith('http://') || locationHeader.startsWith('https://'); + const isAbsoluteURI = + locationHeader.startsWith("http://") || + locationHeader.startsWith("https://"); // Resolve the URL, handling both absolute and relative URLs const forwardedUrl = isAbsoluteURI ? locationHeader - : urlObj.resolve(fetchUrl, locationHeader); + : new URL(locationHeader, fetchUrl).href; if (!options.handleRedirects(fetchUrl, forwardedUrl)) { throw new Error(`link-preview-js could not handle redirect`); @@ -527,7 +525,7 @@ export async function getLinkPreview( */ export async function getPreviewFromContent( response: IPreFetchedResource, - options?: ILinkPreviewOptions + options?: ILinkPreviewOptions, ) { if (!response || typeof response !== `object`) { throw new Error(`link-preview-js did not receive a valid response object`); @@ -539,4 +537,3 @@ export async function getPreviewFromContent( return parseResponse(response, options); } - diff --git a/mise.toml b/mise.toml new file mode 100644 index 0000000..6ea5a7e --- /dev/null +++ b/mise.toml @@ -0,0 +1,2 @@ +[tools] +node = "24" diff --git a/package.json b/package.json index 38a9636..ecec196 100644 --- a/package.json +++ b/package.json @@ -30,8 +30,7 @@ "license": "MIT", "repository": "https://github.com/ospfranco/link-preview-js", "dependencies": { - "cheerio": "1.0.0-rc.11", - "url": "0.11.0" + "cheerio": "1.0.0-rc.11" }, "files": [ "build" diff --git a/yarn.lock b/yarn.lock index 18a5cb9..23963d9 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2124,16 +2124,6 @@ prompts@^2.0.1: kleur "^3.0.3" sisteransi "^1.0.5" -punycode@1.3.2: - version "1.3.2" - resolved "https://registry.npmjs.org/punycode/-/punycode-1.3.2.tgz" - integrity sha1-llOgNvt8HuQjQvIyXM7v6jkmxI0= - -querystring@0.2.0: - version "0.2.0" - resolved "https://registry.npmjs.org/querystring/-/querystring-0.2.0.tgz" - integrity sha1-sgmEkgO7Jd+CDadW50cAWHhSFiA= - react-is@^18.0.0: version "18.2.0" resolved "https://registry.yarnpkg.com/react-is/-/react-is-18.2.0.tgz#199431eeaaa2e09f86427efbb4f1473edb47609b" @@ -2416,14 +2406,6 @@ update-browserslist-db@^1.0.4: escalade "^3.1.1" picocolors "^1.0.0" -url@0.11.0: - version "0.11.0" - resolved "https://registry.npmjs.org/url/-/url-0.11.0.tgz" - integrity sha1-ODjpfPxgUh63PFJajlW/3Z4uKPE= - dependencies: - punycode "1.3.2" - querystring "0.2.0" - v8-to-istanbul@^9.0.1: version "9.0.1" resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-9.0.1.tgz#b6f994b0b5d4ef255e17a0d17dc444a9f5132fa4"