@@ -15,8 +15,7 @@ import { createFrom } from '@adobe/spacecat-helix-content-sdk';
1515import { Octokit } from '@octokit/rest' ;
1616import { Entitlement as EntitlementModel } from '@adobe/spacecat-shared-data-access/src/models/entitlement/index.js' ;
1717import TierClient from '@adobe/spacecat-shared-tier-client' ;
18- import { composeBaseURL , tracingFetch as fetch , isNonEmptyArray } from '@adobe/spacecat-shared-utils' ;
19- import AhrefsAPIClient from '@adobe/spacecat-shared-ahrefs-client' ;
18+ import { composeBaseURL , tracingFetch as fetch , resolveCanonicalUrl } from '@adobe/spacecat-shared-utils' ;
2019import { parse as parseDomain } from 'tldts' ;
2120import { postSlackMessage } from '../../utils/slack/base.js' ;
2221
@@ -707,29 +706,23 @@ function toggleWWW(url) {
707706 }
708707}
709708
710- /**
711- * Tests a URL against the Ahrefs top pages endpoint to see if it returns data.
712- * @param {string } url - The URL to test
713- * @param {object } ahrefsClient - The Ahrefs API client
714- * @param {object } log - Logger instance
715- * @returns {Promise<boolean> } - True if the URL returns top pages data, false otherwise
716- */
717- async function testAhrefsTopPages ( url , ahrefsClient , log ) {
718- try {
719- const { result } = await ahrefsClient . getTopPages ( url , 1 ) ;
720- const hasData = isNonEmptyArray ( result ?. pages ) ;
721- log . debug ( `Ahrefs top pages test for ${ url } : ${ hasData ? 'SUCCESS' : 'NO DATA' } ` ) ;
722- return hasData ;
723- } catch ( error ) {
724- log . debug ( `Ahrefs top pages test for ${ url } : FAILED - ${ error . message } ` ) ;
725- return false ;
709+ function deriveWwwOverrideBaseURL ( baseURL , alternateURL , resolvedUrl ) {
710+ const baseUrlObj = new URL ( baseURL ) ;
711+ const alternateUrlObj = new URL ( alternateURL ) ;
712+ const resolvedUrlObj = new URL ( resolvedUrl ) ;
713+
714+ // Only set override when canonical hostname indicates the www-toggled variant.
715+ if ( resolvedUrlObj . hostname !== alternateUrlObj . hostname ) {
716+ return null ;
726717 }
718+
719+ const basePathname = baseUrlObj . pathname ;
720+ return basePathname !== '/' ? `${ alternateUrlObj . origin } ${ basePathname } ` : alternateUrlObj . origin ;
727721}
728722
729723/**
730- * Determines if overrideBaseURL should be set based on Ahrefs top pages data.
731- * Tests both the base URL and its www-variant. If only the alternate variation succeeds,
732- * returns that variation as the overrideBaseURL.
724+ * Determines if overrideBaseURL should be set based on canonical URL resolution.
725+ * Resolves the base URL first and falls back to the www-toggled variant when needed.
733726 *
734727 * @param {string } baseURL - The site's base URL
735728 * @param {object } context - The request context
@@ -740,7 +733,6 @@ export async function determineOverrideBaseURL(baseURL, context) {
740733
741734 try {
742735 log . info ( `Determining overrideBaseURL for ${ baseURL } ` ) ;
743- const ahrefsClient = AhrefsAPIClient . createFrom ( context ) ;
744736 const alternateURL = toggleWWW ( baseURL ) ;
745737
746738 // If toggleWWW returns the same URL, it means the URL has a subdomain
@@ -750,24 +742,32 @@ export async function determineOverrideBaseURL(baseURL, context) {
750742 return null ;
751743 }
752744
753- log . debug ( `Testing base URL: ${ baseURL } and alternate: ${ alternateURL } ` ) ;
745+ const baseResolvedUrl = await resolveCanonicalUrl ( baseURL ) ;
746+ const baseOverride = baseResolvedUrl
747+ ? deriveWwwOverrideBaseURL ( baseURL , alternateURL , baseResolvedUrl )
748+ : null ;
749+
750+ if ( baseOverride ) {
751+ log . info ( `Setting overrideBaseURL to ${ baseOverride } (base URL canonical resolved to alternate hostname)` ) ;
752+ return baseOverride ;
753+ }
754754
755- const [ baseURLSuccess , alternateURLSuccess ] = await Promise . all ( [
756- testAhrefsTopPages ( baseURL , ahrefsClient , log ) ,
757- testAhrefsTopPages ( alternateURL , ahrefsClient , log ) ,
758- ] ) ;
755+ const alternateResolvedUrl = await resolveCanonicalUrl ( alternateURL ) ;
756+ const alternateOverride = alternateResolvedUrl
757+ ? deriveWwwOverrideBaseURL ( baseURL , alternateURL , alternateResolvedUrl )
758+ : null ;
759759
760- if ( ! baseURLSuccess && alternateURLSuccess ) {
761- log . info ( `Setting overrideBaseURL to ${ alternateURL } (base URL failed , alternate succeeded )` ) ;
762- return alternateURL ;
760+ if ( ! baseResolvedUrl && alternateOverride ) {
761+ log . info ( `Setting overrideBaseURL to ${ alternateOverride } (base URL unresolved , alternate URL resolved )` ) ;
762+ return alternateOverride ;
763763 }
764764
765- if ( baseURLSuccess && alternateURLSuccess ) {
766- log . debug ( 'Both URLs succeeded , no overrideBaseURL needed' ) ;
767- } else if ( baseURLSuccess && ! alternateURLSuccess ) {
768- log . debug ( 'Base URL succeeded , no overrideBaseURL needed' ) ;
765+ if ( baseResolvedUrl && alternateResolvedUrl ) {
766+ log . debug ( 'Both URLs resolved , no overrideBaseURL needed' ) ;
767+ } else if ( baseResolvedUrl && ! alternateResolvedUrl ) {
768+ log . debug ( 'Base URL resolved , no overrideBaseURL needed' ) ;
769769 } else {
770- log . warn ( 'Both URLs failed Ahrefs test , no overrideBaseURL set' ) ;
770+ log . warn ( 'Both URLs could not be resolved canonically , no overrideBaseURL set' ) ;
771771 }
772772
773773 return null ;
0 commit comments