Skip to content

Commit 0cdc6e2

Browse files
committed
refactor(llmo): resolve overrideBaseURL via canonical www detection
1 parent 2f92ac1 commit 0cdc6e2

File tree

3 files changed

+195
-183
lines changed

3 files changed

+195
-183
lines changed

src/controllers/llmo/llmo-onboarding.js

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ import { createFrom } from '@adobe/spacecat-helix-content-sdk';
1515
import { Octokit } from '@octokit/rest';
1616
import { Entitlement as EntitlementModel } from '@adobe/spacecat-shared-data-access/src/models/entitlement/index.js';
1717
import TierClient from '@adobe/spacecat-shared-tier-client';
18-
import { composeBaseURL, tracingFetch as fetch, isNonEmptyArray } from '@adobe/spacecat-shared-utils';
19-
import AhrefsAPIClient from '@adobe/spacecat-shared-ahrefs-client';
18+
import { composeBaseURL, tracingFetch as fetch, resolveCanonicalUrl } from '@adobe/spacecat-shared-utils';
2019
import { parse as parseDomain } from 'tldts';
2120
import { postSlackMessage } from '../../utils/slack/base.js';
2221

@@ -707,29 +706,23 @@ function toggleWWW(url) {
707706
}
708707
}
709708

710-
/**
711-
* Tests a URL against the Ahrefs top pages endpoint to see if it returns data.
712-
* @param {string} url - The URL to test
713-
* @param {object} ahrefsClient - The Ahrefs API client
714-
* @param {object} log - Logger instance
715-
* @returns {Promise<boolean>} - True if the URL returns top pages data, false otherwise
716-
*/
717-
async function testAhrefsTopPages(url, ahrefsClient, log) {
718-
try {
719-
const { result } = await ahrefsClient.getTopPages(url, 1);
720-
const hasData = isNonEmptyArray(result?.pages);
721-
log.debug(`Ahrefs top pages test for ${url}: ${hasData ? 'SUCCESS' : 'NO DATA'}`);
722-
return hasData;
723-
} catch (error) {
724-
log.debug(`Ahrefs top pages test for ${url}: FAILED - ${error.message}`);
725-
return false;
709+
function deriveWwwOverrideBaseURL(baseURL, alternateURL, resolvedUrl) {
710+
const baseUrlObj = new URL(baseURL);
711+
const alternateUrlObj = new URL(alternateURL);
712+
const resolvedUrlObj = new URL(resolvedUrl);
713+
714+
// Only set override when canonical hostname indicates the www-toggled variant.
715+
if (resolvedUrlObj.hostname !== alternateUrlObj.hostname) {
716+
return null;
726717
}
718+
719+
const basePathname = baseUrlObj.pathname;
720+
return basePathname !== '/' ? `${alternateUrlObj.origin}${basePathname}` : alternateUrlObj.origin;
727721
}
728722

729723
/**
730-
* Determines if overrideBaseURL should be set based on Ahrefs top pages data.
731-
* Tests both the base URL and its www-variant. If only the alternate variation succeeds,
732-
* returns that variation as the overrideBaseURL.
724+
* Determines if overrideBaseURL should be set based on canonical URL resolution.
725+
* Resolves the base URL first and falls back to the www-toggled variant when needed.
733726
*
734727
* @param {string} baseURL - The site's base URL
735728
* @param {object} context - The request context
@@ -740,7 +733,6 @@ export async function determineOverrideBaseURL(baseURL, context) {
740733

741734
try {
742735
log.info(`Determining overrideBaseURL for ${baseURL}`);
743-
const ahrefsClient = AhrefsAPIClient.createFrom(context);
744736
const alternateURL = toggleWWW(baseURL);
745737

746738
// If toggleWWW returns the same URL, it means the URL has a subdomain
@@ -750,24 +742,32 @@ export async function determineOverrideBaseURL(baseURL, context) {
750742
return null;
751743
}
752744

753-
log.debug(`Testing base URL: ${baseURL} and alternate: ${alternateURL}`);
745+
const baseResolvedUrl = await resolveCanonicalUrl(baseURL);
746+
const baseOverride = baseResolvedUrl
747+
? deriveWwwOverrideBaseURL(baseURL, alternateURL, baseResolvedUrl)
748+
: null;
749+
750+
if (baseOverride) {
751+
log.info(`Setting overrideBaseURL to ${baseOverride} (base URL canonical resolved to alternate hostname)`);
752+
return baseOverride;
753+
}
754754

755-
const [baseURLSuccess, alternateURLSuccess] = await Promise.all([
756-
testAhrefsTopPages(baseURL, ahrefsClient, log),
757-
testAhrefsTopPages(alternateURL, ahrefsClient, log),
758-
]);
755+
const alternateResolvedUrl = await resolveCanonicalUrl(alternateURL);
756+
const alternateOverride = alternateResolvedUrl
757+
? deriveWwwOverrideBaseURL(baseURL, alternateURL, alternateResolvedUrl)
758+
: null;
759759

760-
if (!baseURLSuccess && alternateURLSuccess) {
761-
log.info(`Setting overrideBaseURL to ${alternateURL} (base URL failed, alternate succeeded)`);
762-
return alternateURL;
760+
if (!baseResolvedUrl && alternateOverride) {
761+
log.info(`Setting overrideBaseURL to ${alternateOverride} (base URL unresolved, alternate URL resolved)`);
762+
return alternateOverride;
763763
}
764764

765-
if (baseURLSuccess && alternateURLSuccess) {
766-
log.debug('Both URLs succeeded, no overrideBaseURL needed');
767-
} else if (baseURLSuccess && !alternateURLSuccess) {
768-
log.debug('Base URL succeeded, no overrideBaseURL needed');
765+
if (baseResolvedUrl && alternateResolvedUrl) {
766+
log.debug('Both URLs resolved, no overrideBaseURL needed');
767+
} else if (baseResolvedUrl && !alternateResolvedUrl) {
768+
log.debug('Base URL resolved, no overrideBaseURL needed');
769769
} else {
770-
log.warn('Both URLs failed Ahrefs test, no overrideBaseURL set');
770+
log.warn('Both URLs could not be resolved canonically, no overrideBaseURL set');
771771
}
772772

773773
return null;

0 commit comments

Comments
 (0)