@@ -15,8 +15,7 @@ import { createFrom } from '@adobe/spacecat-helix-content-sdk';
1515import { Octokit } from '@octokit/rest' ;
1616import { Entitlement as EntitlementModel } from '@adobe/spacecat-shared-data-access/src/models/entitlement/index.js' ;
1717import TierClient from '@adobe/spacecat-shared-tier-client' ;
18- import { composeBaseURL , tracingFetch as fetch , isNonEmptyArray } from '@adobe/spacecat-shared-utils' ;
19- import AhrefsAPIClient from '@adobe/spacecat-shared-ahrefs-client' ;
18+ import { composeBaseURL , tracingFetch as fetch , resolveCanonicalUrl } from '@adobe/spacecat-shared-utils' ;
2019import { parse as parseDomain } from 'tldts' ;
2120import { postSlackMessage } from '../../utils/slack/base.js' ;
2221
@@ -707,29 +706,23 @@ function toggleWWW(url) {
707706 }
708707}
709708
710- /**
711- * Tests a URL against the Ahrefs top pages endpoint to see if it returns data.
712- * @param {string } url - The URL to test
713- * @param {object } ahrefsClient - The Ahrefs API client
714- * @param {object } log - Logger instance
715- * @returns {Promise<boolean> } - True if the URL returns top pages data, false otherwise
716- */
717- async function testAhrefsTopPages ( url , ahrefsClient , log ) {
718- try {
719- const { result } = await ahrefsClient . getTopPages ( url , 1 ) ;
720- const hasData = isNonEmptyArray ( result ?. pages ) ;
721- log . debug ( `Ahrefs top pages test for ${ url } : ${ hasData ? 'SUCCESS' : 'NO DATA' } ` ) ;
722- return hasData ;
723- } catch ( error ) {
724- log . debug ( `Ahrefs top pages test for ${ url } : FAILED - ${ error . message } ` ) ;
725- return false ;
709+ function deriveWwwOverrideBaseURL ( baseURL , alternateURL , resolvedUrl ) {
710+ const baseUrlObj = new URL ( baseURL ) ;
711+ const alternateUrlObj = new URL ( alternateURL ) ;
712+ const resolvedUrlObj = new URL ( resolvedUrl ) ;
713+
714+ // Only set override when canonical hostname indicates the www-toggled variant.
715+ if ( resolvedUrlObj . hostname !== alternateUrlObj . hostname ) {
716+ return null ;
726717 }
718+
719+ const basePathname = baseUrlObj . pathname ;
720+ return basePathname !== '/' ? `${ alternateUrlObj . origin } ${ basePathname } ` : alternateUrlObj . origin ;
727721}
728722
729723/**
730- * Determines if overrideBaseURL should be set based on Ahrefs top pages data.
731- * Tests both the base URL and its www-variant. If only the alternate variation succeeds,
732- * returns that variation as the overrideBaseURL.
724+ * Determines if overrideBaseURL should be set based on canonical URL resolution.
725+ * Resolves the base URL first and falls back to the www-toggled variant when needed.
733726 *
734727 * @param {string } baseURL - The site's base URL
735728 * @param {object } context - The request context
@@ -740,7 +733,6 @@ export async function determineOverrideBaseURL(baseURL, context) {
740733
741734 try {
742735 log . info ( `Determining overrideBaseURL for ${ baseURL } ` ) ;
743- const ahrefsClient = AhrefsAPIClient . createFrom ( context ) ;
744736 const alternateURL = toggleWWW ( baseURL ) ;
745737
746738 // If toggleWWW returns the same URL, it means the URL has a subdomain
@@ -750,24 +742,34 @@ export async function determineOverrideBaseURL(baseURL, context) {
750742 return null ;
751743 }
752744
753- log . debug ( `Testing base URL: ${ baseURL } and alternate: ${ alternateURL } ` ) ;
745+ const baseResolvedUrl = await resolveCanonicalUrl ( baseURL ) ;
746+ const baseOverride = baseResolvedUrl
747+ ? deriveWwwOverrideBaseURL ( baseURL , alternateURL , baseResolvedUrl )
748+ : null ;
754749
755- const [ baseURLSuccess , alternateURLSuccess ] = await Promise . all ( [
756- testAhrefsTopPages ( baseURL , ahrefsClient , log ) ,
757- testAhrefsTopPages ( alternateURL , ahrefsClient , log ) ,
758- ] ) ;
750+ if ( baseOverride ) {
751+ log . info ( `Setting overrideBaseURL to ${ baseOverride } (base URL canonical resolved to alternate hostname)` ) ;
752+ return baseOverride ;
753+ }
759754
760- if ( ! baseURLSuccess && alternateURLSuccess ) {
761- log . info ( `Setting overrideBaseURL to ${ alternateURL } (base URL failed, alternate succeeded)` ) ;
762- return alternateURL ;
755+ if ( baseResolvedUrl ) {
756+ log . debug ( 'Base URL resolved, no overrideBaseURL needed' ) ;
757+ return null ;
758+ }
759+ const alternateResolvedUrl = await resolveCanonicalUrl ( alternateURL ) ;
760+ const alternateOverride = alternateResolvedUrl
761+ ? deriveWwwOverrideBaseURL ( baseURL , alternateURL , alternateResolvedUrl )
762+ : null ;
763+
764+ if ( alternateOverride ) {
765+ log . info ( `Setting overrideBaseURL to ${ alternateOverride } (base URL unresolved, alternate URL resolved)` ) ;
766+ return alternateOverride ;
763767 }
764768
765- if ( baseURLSuccess && alternateURLSuccess ) {
766- log . debug ( 'Both URLs succeeded, no overrideBaseURL needed' ) ;
767- } else if ( baseURLSuccess && ! alternateURLSuccess ) {
768- log . debug ( 'Base URL succeeded, no overrideBaseURL needed' ) ;
769+ if ( alternateResolvedUrl ) {
770+ log . debug ( 'Alternate URL resolved but no hostname toggle detected, no overrideBaseURL needed' ) ;
769771 } else {
770- log . warn ( 'Both URLs failed Ahrefs test , no overrideBaseURL set' ) ;
772+ log . warn ( 'Both URLs could not be resolved canonically , no overrideBaseURL set' ) ;
771773 }
772774
773775 return null ;
0 commit comments