diff --git a/README.md b/README.md
index 4b18bb5..42dd636 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,9 @@ It's more than likely there is nothing wrong with the library:
- If the target website does not have OpenGraph tags **the preview will most likely fail**, there are some fallbacks but in general, it will not work
- **You cannot preview (fetch) another web page from YOUR web page. This is an intentional security feature of browsers called CORS**
+
DO NOT FETCH CONTENT DIRECTLY FROM A USERS DEVICE. ONLY RUN THIS IN YOUR SERVER AND SANDBOX IT IF YOU CAN
+Browsers block this via cors, but you might be clever like a fox and run this in React Native. This is a bad idea, you are exposing the device user to potentially malicious links
+
If you use this library and find it useful please consider [sponsoring me](https://github.com/sponsors/ospfranco), open source takes a lot of time and effort.
# Link Preview
diff --git a/__tests__/index.spec.ts b/__tests__/index.spec.ts
index 8d6da2d..1e79642 100644
--- a/__tests__/index.spec.ts
+++ b/__tests__/index.spec.ts
@@ -31,13 +31,13 @@ describe(`#getLinkPreview()`, () => {
expect(linkInfo.charset?.toLowerCase()).toEqual(`utf-8`);
});
- it("should extract author from news article", async () => {
+ xit("should extract author from news article", async () => {
const linkInfo: any = await getLinkPreview(
`https://www.usatoday.com/story/special/contributor-content/2025/10/15/why-chaos-engineering-is-more-important-than-ever-in-the-ai-era/86712877007/`
);
expect(linkInfo.author).toEqual(`Matt Emma`);
- })
+ });
it(`should extract link info from a URL with a newline`, async () => {
const linkInfo: any = await getLinkPreview(
@@ -251,17 +251,22 @@ describe(`#getLinkPreview()`, () => {
expect(response.mediaType).toEqual(`website`);
});
- it("should handle override response body using onResponse option", async () => {
+ it("should handle override response body using onResponse option", async () => {
let firstParagraphText;
- const res: any = await getLinkPreview(`https://www.example.com/`, {
+ const res: any = await getLinkPreview(`https://www.example.com/`, {
onResponse: (result, doc) => {
- firstParagraphText = doc('p').first().text().split('\n').map(x=> x.trim()).join(' ');
+ firstParagraphText = doc("p")
+ .first()
+ .text()
+ .split("\n")
+ .map((x) => x.trim())
+ .join(" ");
result.siteName = `SiteName has been overridden`;
result.description = firstParagraphText;
return result;
- }
+ },
});
expect(res.siteName).toEqual("SiteName has been overridden");
diff --git a/index.ts b/index.ts
index 5859614..36a2304 100644
--- a/index.ts
+++ b/index.ts
@@ -1,5 +1,4 @@
import cheerio from "cheerio";
-import urlObj from "url";
import { CONSTANTS } from "./constants";
interface ILinkPreviewResponse {
@@ -16,12 +15,12 @@ interface ILinkPreviewResponse {
}
interface IVideoType {
- url: string | undefined,
- secureUrl: string | null | undefined,
- type: string | null | undefined,
- width: string | undefined,
- height: string | undefined,
-};
+ url: string | undefined;
+ secureUrl: string | null | undefined;
+ type: string | null | undefined;
+ width: string | undefined;
+ height: string | undefined;
+}
interface ILinkPreviewOptions {
headers?: Record;
@@ -31,7 +30,11 @@ interface ILinkPreviewOptions {
followRedirects?: `follow` | `error` | `manual`;
resolveDNSHost?: (url: string) => Promise;
handleRedirects?: (baseURL: string, forwardedURL: string) => boolean;
- onResponse?: (response: ILinkPreviewResponse, doc: cheerio.Root, url?: URL) => ILinkPreviewResponse;
+ onResponse?: (
+ response: ILinkPreviewResponse,
+ doc: cheerio.Root,
+ url?: URL,
+ ) => ILinkPreviewResponse;
}
interface IPreFetchedResource {
@@ -79,7 +82,7 @@ function getAuthor(doc: cheerio.Root) {
const author =
metaTagContent(doc, `author`, `name`) ||
metaTagContent(doc, `article:author`, `property`);
- return author;
+ return author;
}
function getDescription(doc: cheerio.Root) {
@@ -105,7 +108,7 @@ function getMediaType(doc: cheerio.Root) {
function getImages(
doc: cheerio.Root,
rootUrl: string,
- imagesPropertyType?: string
+ imagesPropertyType?: string,
) {
let images: string[] = [];
let nodes: cheerio.Cheerio | null;
@@ -122,7 +125,7 @@ function getImages(
if (node.type === `tag`) {
src = node.attribs.content;
if (src) {
- src = urlObj.resolve(rootUrl, src);
+ src = new URL(src, rootUrl).href;
images.push(src);
}
}
@@ -132,7 +135,7 @@ function getImages(
if (images.length <= 0 && !imagesPropertyType) {
src = doc(`link[rel=image_src]`).attr(`href`);
if (src) {
- src = urlObj.resolve(rootUrl, src);
+ src = new URL(src, rootUrl).href;
images = [src];
} else {
nodes = doc(`img`);
@@ -146,7 +149,7 @@ function getImages(
dic[src] = true;
// width = node.attribs.width;
// height = node.attribs.height;
- images.push(urlObj.resolve(rootUrl, src));
+ images.push(new URL(src, rootUrl).href);
}
});
}
@@ -220,12 +223,12 @@ function getVideos(doc: cheerio.Root) {
}
// returns default favicon (//hostname/favicon.ico) for a url
-function getDefaultFavicon(rootUrl: string) {
- return urlObj.resolve(rootUrl, `/favicon.ico`);
+function getDefaultFavicon(rootUrl: string): string {
+ return new URL(`/favicon.ico`, rootUrl).href;
}
// returns an array of URLs to favicon images
-function getFavicons(doc: cheerio.Root, rootUrl: string) {
+function getFavicons(doc: cheerio.Root, rootUrl: string): string[] {
const images = [];
let nodes: cheerio.Cheerio | never[] = [];
let src: string | undefined;
@@ -245,7 +248,7 @@ function getFavicons(doc: cheerio.Root, rootUrl: string) {
nodes.each((_: number, node: cheerio.Element) => {
if (node.type === `tag`) src = node.attribs.href;
if (src) {
- src = urlObj.resolve(rootUrl, src);
+ src = new URL(src, rootUrl).href;
images.push(src);
}
});
@@ -300,7 +303,7 @@ function parseTextResponse(
body: string,
url: string,
options: ILinkPreviewOptions = {},
- contentType?: string
+ contentType?: string,
): ILinkPreviewResponse {
const doc = cheerio.load(body);
@@ -318,35 +321,31 @@ function parseTextResponse(
};
if (options?.onResponse && typeof options.onResponse !== `function`) {
- throw new Error(
- `link-preview-js onResponse option must be a function`
- );
+ throw new Error(`link-preview-js onResponse option must be a function`);
}
if (options?.onResponse) {
- // send in a cloned response (to avoid mutation of original response reference)
- const clonedResponse = structuredClone(response);
- const urlObject = new URL(url)
+ // send in a cloned response (to avoid mutation of original response reference)
+ const clonedResponse = structuredClone(response);
+ const urlObject = new URL(url);
response = options.onResponse(clonedResponse, doc, urlObject);
}
-
return response;
-
}
function parseUnknownResponse(
body: string,
url: string,
options: ILinkPreviewOptions = {},
- contentType?: string
+ contentType?: string,
) {
return parseTextResponse(body, url, options, contentType);
}
function parseResponse(
response: IPreFetchedResource,
- options?: ILinkPreviewOptions
+ options?: ILinkPreviewOptions,
) {
try {
// console.log("[link-preview-js] response", response);
@@ -383,9 +382,8 @@ function parseResponse(
}
if (CONSTANTS.REGEX_CONTENT_TYPE_TEXT.test(contentType)) {
- const htmlString = response.data;
return {
- ...parseTextResponse(htmlString, response.url, options, contentType),
+ ...parseTextResponse(response.data, response.url, options, contentType),
charset,
};
}
@@ -407,7 +405,7 @@ function parseResponse(
throw new Error(
`link-preview-js could not fetch link information ${(
e as any
- ).toString()}`
+ ).toString()}`,
);
}
}
@@ -421,7 +419,7 @@ function parseResponse(
*/
export async function getLinkPreview(
text: string,
- options?: ILinkPreviewOptions
+ options?: ILinkPreviewOptions,
) {
if (!text || typeof text !== `string`) {
throw new Error(`link-preview-js did not receive a valid url or text`);
@@ -438,7 +436,7 @@ export async function getLinkPreview(
if (options?.followRedirects === `manual` && !options?.handleRedirects) {
throw new Error(
- `link-preview-js followRedirects is set to manual, but no handleRedirects function was provided`
+ `link-preview-js followRedirects is set to manual, but no handleRedirects function was provided`,
);
}
@@ -462,9 +460,7 @@ export async function getLinkPreview(
? options.proxyUrl.concat(detectedUrl)
: detectedUrl;
- // Seems like fetchOptions type definition is out of date
- // https://github.com/node-fetch/node-fetch/issues/741
- let response = await fetch(fetchUrl, fetchOptions as any).catch((e) => {
+ let response = await fetch(fetchUrl, fetchOptions).catch((e) => {
if (e.name === `AbortError`) {
throw new Error(`Request timeout`);
}
@@ -480,12 +476,14 @@ export async function getLinkPreview(
options?.handleRedirects
) {
const locationHeader = response.headers.get(`location`) || ``;
- const isAbsoluteURI = locationHeader.startsWith('http://') || locationHeader.startsWith('https://');
+ const isAbsoluteURI =
+ locationHeader.startsWith("http://") ||
+ locationHeader.startsWith("https://");
// Resolve the URL, handling both absolute and relative URLs
const forwardedUrl = isAbsoluteURI
? locationHeader
- : urlObj.resolve(fetchUrl, locationHeader);
+ : new URL(locationHeader, fetchUrl).href;
if (!options.handleRedirects(fetchUrl, forwardedUrl)) {
throw new Error(`link-preview-js could not handle redirect`);
@@ -527,7 +525,7 @@ export async function getLinkPreview(
*/
export async function getPreviewFromContent(
response: IPreFetchedResource,
- options?: ILinkPreviewOptions
+ options?: ILinkPreviewOptions,
) {
if (!response || typeof response !== `object`) {
throw new Error(`link-preview-js did not receive a valid response object`);
@@ -539,4 +537,3 @@ export async function getPreviewFromContent(
return parseResponse(response, options);
}
-
diff --git a/mise.toml b/mise.toml
new file mode 100644
index 0000000..6ea5a7e
--- /dev/null
+++ b/mise.toml
@@ -0,0 +1,2 @@
+[tools]
+node = "24"
diff --git a/package.json b/package.json
index 38a9636..ecec196 100644
--- a/package.json
+++ b/package.json
@@ -30,8 +30,7 @@
"license": "MIT",
"repository": "https://github.com/ospfranco/link-preview-js",
"dependencies": {
- "cheerio": "1.0.0-rc.11",
- "url": "0.11.0"
+ "cheerio": "1.0.0-rc.11"
},
"files": [
"build"
diff --git a/yarn.lock b/yarn.lock
index 18a5cb9..23963d9 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -2124,16 +2124,6 @@ prompts@^2.0.1:
kleur "^3.0.3"
sisteransi "^1.0.5"
-punycode@1.3.2:
- version "1.3.2"
- resolved "https://registry.npmjs.org/punycode/-/punycode-1.3.2.tgz"
- integrity sha1-llOgNvt8HuQjQvIyXM7v6jkmxI0=
-
-querystring@0.2.0:
- version "0.2.0"
- resolved "https://registry.npmjs.org/querystring/-/querystring-0.2.0.tgz"
- integrity sha1-sgmEkgO7Jd+CDadW50cAWHhSFiA=
-
react-is@^18.0.0:
version "18.2.0"
resolved "https://registry.yarnpkg.com/react-is/-/react-is-18.2.0.tgz#199431eeaaa2e09f86427efbb4f1473edb47609b"
@@ -2416,14 +2406,6 @@ update-browserslist-db@^1.0.4:
escalade "^3.1.1"
picocolors "^1.0.0"
-url@0.11.0:
- version "0.11.0"
- resolved "https://registry.npmjs.org/url/-/url-0.11.0.tgz"
- integrity sha1-ODjpfPxgUh63PFJajlW/3Z4uKPE=
- dependencies:
- punycode "1.3.2"
- querystring "0.2.0"
-
v8-to-istanbul@^9.0.1:
version "9.0.1"
resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-9.0.1.tgz#b6f994b0b5d4ef255e17a0d17dc444a9f5132fa4"