diff --git a/config.local.js.SAMPLE b/config.local.js.SAMPLE index f8c8d6e77..29bbc0532 100644 --- a/config.local.js.SAMPLE +++ b/config.local.js.SAMPLE @@ -223,8 +223,8 @@ export default { // dnt: true, cache_ttl: 100 * 365 * 24 * 3600 // 100 Years. }, - readability: { - enabled: false + app: { + // allow_readability: true // allowPTagDescription: true // to enable description fallback to first paragraph }, images: { diff --git a/lib/core.js b/lib/core.js index 8c11cd9e1..79b18e817 100644 --- a/lib/core.js +++ b/lib/core.js @@ -1085,7 +1085,7 @@ return hasDomainData; } - const BIG_CONTEXT = ['htmlparser', 'readability', 'decode']; + const BIG_CONTEXT = ['htmlparser', 'readability', 'decode', 'cheerio']; function prepareResultData(uri, result, options) { @@ -1969,15 +1969,6 @@ } } - if (/* options.getProviderOptions('readability.enabled') === true */ - CONFIG.providerOptions && CONFIG.providerOptions.readability - && CONFIG.providerOptions.readability.enabled === true - || options.readability) { - context.__readabilityEnabled = true; - // Prevent force load readability plugin. - usedParams.__readabilityEnabled = true; - } - asyncMethodCb('initial'); }; diff --git a/lib/loader/utils.js b/lib/loader/utils.js index a6d82e2fd..cb78392dd 100644 --- a/lib/loader/utils.js +++ b/lib/loader/utils.js @@ -6,7 +6,6 @@ "request", "whitelistRecord", "iframelyRun", - "__readabilityEnabled", // Copy from `core.js` `utilsModules`. "utils", diff --git a/plugins/links/article/article.js b/plugins/links/article/article.js index 4d96effae..21b8770a2 100644 --- a/plugins/links/article/article.js +++ b/plugins/links/article/article.js @@ -1,9 +1,24 @@ +import * as cheerio from 'cheerio'; + export default { - getData: function(readability, meta, __is_general_article, utils) { + provides: 'articlebody', // if not yet provided from LD articlebody + + getData: function(__readabilityEnabled, readability, meta, utils) { + + const articleHtml = utils.encodeText(meta.charset, readability.getHTML()); + const $p = cheerio.load(articleHtml)('p'); + + if ($p.text()) { + return { + articlebody: articleHtml + } + } + }, + getVars: function(articlebody) { return { - safe_html: utils.encodeText(meta.charset, readability.getHTML()) + articlebody: articlebody }; } }; \ No newline at end of file diff --git a/plugins/links/article/check-article.js b/plugins/links/article/check-article.js index 11185238e..d4bfce1f4 100644 --- a/plugins/links/article/check-article.js +++ b/plugins/links/article/check-article.js @@ -1,14 +1,30 @@ export default { - provides: '__is_general_article', + provides: [ + "__readabilityEnabled", + "articlebody" + ], - getData: function(meta, __readabilityEnabled) { + getData: function(meta, options) { - if (meta.og && (meta.og.type === "article" || meta.og.type === "blog")) { + const ld = meta.ld?.newsarticle || meta.ld?.article || meta.ld?.blogposting || meta.ld?.reportagenewsarticle || meta.ld?.socialmediaposting; - return { - __is_general_article: true - }; + if ((ld + || (meta.og && (meta.og.type === "article" || meta.og.type === "blog" || meta.og.type === 'website') + || meta.twitter?.card === 'summary_large_image' + || meta.article)) + + && (options.getRequestOptions('readability.articlebody', false) || CONFIG.providerOptions?.app?.allow_readability === true)) { + + if (ld?.articlebody && /\/>/.test(ld.articlebody)) { + return { + articlebody: ld.articlebody + } + } else if (options.getProviderOptions('app.allow_readability')) { + return { + __readabilityEnabled: true + } + } } } }; \ No newline at end of file diff --git a/plugins/links/article/reader.js b/plugins/links/article/reader.js new file mode 100644 index 000000000..78c94a4f9 --- /dev/null +++ b/plugins/links/article/reader.js @@ -0,0 +1,10 @@ +export default { + + getData: function(articlebody) { + if (CONFIG.providerOptions?.app?.allow_readability === true && !CONFIG.SKIP_IFRAMELY_RENDERS) { + return { + safe_html: articlebody + } + } + } +}; \ No newline at end of file diff --git a/plugins/links/embedURL/embedURL.js b/plugins/links/embedURL/embedURL.js index 1a06b278e..f0f58e1c4 100644 --- a/plugins/links/embedURL/embedURL.js +++ b/plugins/links/embedURL/embedURL.js @@ -4,7 +4,7 @@ export default { provides: 'schemaVideoObject', - getData: function(url, cheerio, decode, __allowEmbedURL, utils) { + getData: function(url, __allowEmbedURL, cheerio, decode, utils) { /* Let's try to find ld+json in the body first. */ const ldSelector = 'script[type="application/ld+json"]:contains("VideoObject"), script[type="application/ld+json"]:contains("VideoObject")' diff --git a/plugins/meta/description-from-p-tag.js b/plugins/meta/description-from-p-tag.js index 89fbdafd8..ca3e11b2d 100644 --- a/plugins/meta/description-from-p-tag.js +++ b/plugins/meta/description-from-p-tag.js @@ -5,7 +5,7 @@ export default { lowestPriority: true, provides: '__allowPTagDescription', - getMeta: function(cheerio, decode, __allowPTagDescription) { + getMeta: function(__allowPTagDescription, cheerio, decode) { // Get the text from the first
tag that's not in a header var description; cheerio("body p").each(function() { diff --git a/static/js/debug.js b/static/js/debug.js index 81b32f701..457751dac 100644 --- a/static/js/debug.js +++ b/static/js/debug.js @@ -385,7 +385,7 @@ function processUrl() { // Render context. var contexts = data.allData && data.allData .filter(function(d) { - return d.method.name === 'getData'; + return d.method.name === 'getData' || d.method.name === 'getVars'; }) .map(function(d) { return d.data;