Skip to content

Commit 29787c9

Browse files
committed
fix(llmo): resolve overrideBaseURL via canonical www detection
1 parent a45656c commit 29787c9

File tree

5 files changed

+186
-233
lines changed

5 files changed

+186
-233
lines changed

package-lock.json

Lines changed: 0 additions & 49 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@
7474
"@adobe/helix-status": "10.1.5",
7575
"@adobe/helix-universal-logger": "3.0.28",
7676
"@adobe/spacecat-helix-content-sdk": "1.4.33",
77-
"@adobe/spacecat-shared-ahrefs-client": "1.10.5",
7877
"@adobe/spacecat-shared-athena-client": "1.9.2",
7978
"@adobe/spacecat-shared-brand-client": "1.1.35",
8079
"@adobe/spacecat-shared-data-access": "2.104.0",

src/controllers/llmo/llmo-onboarding.js

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ import { createFrom } from '@adobe/spacecat-helix-content-sdk';
1515
import { Octokit } from '@octokit/rest';
1616
import { Entitlement as EntitlementModel } from '@adobe/spacecat-shared-data-access/src/models/entitlement/index.js';
1717
import TierClient from '@adobe/spacecat-shared-tier-client';
18-
import { composeBaseURL, tracingFetch as fetch, isNonEmptyArray } from '@adobe/spacecat-shared-utils';
19-
import AhrefsAPIClient from '@adobe/spacecat-shared-ahrefs-client';
18+
import { composeBaseURL, tracingFetch as fetch, resolveCanonicalUrl } from '@adobe/spacecat-shared-utils';
2019
import { parse as parseDomain } from 'tldts';
2120
import { postSlackMessage } from '../../utils/slack/base.js';
2221

@@ -707,29 +706,23 @@ function toggleWWW(url) {
707706
}
708707
}
709708

710-
/**
711-
* Tests a URL against the Ahrefs top pages endpoint to see if it returns data.
712-
* @param {string} url - The URL to test
713-
* @param {object} ahrefsClient - The Ahrefs API client
714-
* @param {object} log - Logger instance
715-
* @returns {Promise<boolean>} - True if the URL returns top pages data, false otherwise
716-
*/
717-
async function testAhrefsTopPages(url, ahrefsClient, log) {
718-
try {
719-
const { result } = await ahrefsClient.getTopPages(url, 1);
720-
const hasData = isNonEmptyArray(result?.pages);
721-
log.debug(`Ahrefs top pages test for ${url}: ${hasData ? 'SUCCESS' : 'NO DATA'}`);
722-
return hasData;
723-
} catch (error) {
724-
log.debug(`Ahrefs top pages test for ${url}: FAILED - ${error.message}`);
725-
return false;
709+
function deriveWwwOverrideBaseURL(baseURL, alternateURL, resolvedUrl) {
710+
const baseUrlObj = new URL(baseURL);
711+
const alternateUrlObj = new URL(alternateURL);
712+
const resolvedUrlObj = new URL(resolvedUrl);
713+
714+
// Only set override when canonical hostname indicates the www-toggled variant.
715+
if (resolvedUrlObj.hostname !== alternateUrlObj.hostname) {
716+
return null;
726717
}
718+
719+
const basePathname = baseUrlObj.pathname;
720+
return basePathname !== '/' ? `${alternateUrlObj.origin}${basePathname}` : alternateUrlObj.origin;
727721
}
728722

729723
/**
730-
* Determines if overrideBaseURL should be set based on Ahrefs top pages data.
731-
* Tests both the base URL and its www-variant. If only the alternate variation succeeds,
732-
* returns that variation as the overrideBaseURL.
724+
* Determines if overrideBaseURL should be set based on canonical URL resolution.
725+
* Resolves the base URL first and falls back to the www-toggled variant when needed.
733726
*
734727
* @param {string} baseURL - The site's base URL
735728
* @param {object} context - The request context
@@ -740,7 +733,6 @@ export async function determineOverrideBaseURL(baseURL, context) {
740733

741734
try {
742735
log.info(`Determining overrideBaseURL for ${baseURL}`);
743-
const ahrefsClient = AhrefsAPIClient.createFrom(context);
744736
const alternateURL = toggleWWW(baseURL);
745737

746738
// If toggleWWW returns the same URL, it means the URL has a subdomain
@@ -750,24 +742,34 @@ export async function determineOverrideBaseURL(baseURL, context) {
750742
return null;
751743
}
752744

753-
log.debug(`Testing base URL: ${baseURL} and alternate: ${alternateURL}`);
745+
const baseResolvedUrl = await resolveCanonicalUrl(baseURL);
746+
const baseOverride = baseResolvedUrl
747+
? deriveWwwOverrideBaseURL(baseURL, alternateURL, baseResolvedUrl)
748+
: null;
754749

755-
const [baseURLSuccess, alternateURLSuccess] = await Promise.all([
756-
testAhrefsTopPages(baseURL, ahrefsClient, log),
757-
testAhrefsTopPages(alternateURL, ahrefsClient, log),
758-
]);
750+
if (baseOverride) {
751+
log.info(`Setting overrideBaseURL to ${baseOverride} (base URL canonical resolved to alternate hostname)`);
752+
return baseOverride;
753+
}
759754

760-
if (!baseURLSuccess && alternateURLSuccess) {
761-
log.info(`Setting overrideBaseURL to ${alternateURL} (base URL failed, alternate succeeded)`);
762-
return alternateURL;
755+
if (baseResolvedUrl) {
756+
log.info('Base URL resolved, no overrideBaseURL needed');
757+
return null;
758+
}
759+
const alternateResolvedUrl = await resolveCanonicalUrl(alternateURL);
760+
const alternateOverride = alternateResolvedUrl
761+
? deriveWwwOverrideBaseURL(baseURL, alternateURL, alternateResolvedUrl)
762+
: null;
763+
764+
if (alternateOverride) {
765+
log.info(`Setting overrideBaseURL to ${alternateOverride} (base URL unresolved, alternate URL resolved)`);
766+
return alternateOverride;
763767
}
764768

765-
if (baseURLSuccess && alternateURLSuccess) {
766-
log.debug('Both URLs succeeded, no overrideBaseURL needed');
767-
} else if (baseURLSuccess && !alternateURLSuccess) {
768-
log.debug('Base URL succeeded, no overrideBaseURL needed');
769+
if (alternateResolvedUrl) {
770+
log.info('Alternate URL resolved but no hostname toggle detected, no overrideBaseURL needed');
769771
} else {
770-
log.warn('Both URLs failed Ahrefs test, no overrideBaseURL set');
772+
log.warn('Both URLs could not be resolved canonically, no overrideBaseURL set');
771773
}
772774

773775
return null;

0 commit comments

Comments
 (0)