diff --git a/app/robots.txt/route.ts b/app/robots.txt/route.ts new file mode 100644 index 00000000..d39fbc53 --- /dev/null +++ b/app/robots.txt/route.ts @@ -0,0 +1,22 @@ +// app/robots.txt/route.ts +import { isSitemapEnabled } from '@/lib/utils'; + +export async function GET() { + const baseUrl = process.env.NEXTAUTH_URL; + + const robotsTxt = `User-agent: * + Allow: / + + Sitemap: ${baseUrl}/sitemap/main.xml + `; + + if (!isSitemapEnabled()) { + return new Response('Sitemaps are not enabled', { status: 404 }); + } + + return new Response(robotsTxt, { + headers: { + 'Content-Type': 'text/plain', + }, + }); +} diff --git a/app/sitemap/[entityPage]/route.ts b/app/sitemap/[entityPage]/route.ts new file mode 100644 index 00000000..2c6eccbc --- /dev/null +++ b/app/sitemap/[entityPage]/route.ts @@ -0,0 +1,140 @@ +// app/sitemap-[entity]-[page].xml/route.ts +import { type NextRequest } from 'next/server'; + +import { ENTITY_CONFIG, getSiteMapConfig, isSitemapEnabled } from '@/lib/utils'; +import { getGraphqlEntityCount, getSearchEntityCount } from '../main.xml/route'; + +interface EntityItem { + id: string; + slug?: string; + updated_at?: string; + __typename?: 'TypeUser' | 'TypeOrganization'; +} + +async function fetchEntityData( + entity: string, + page: number +): Promise { + const config = ENTITY_CONFIG[entity]; + + // If no config is found, return empty array + if (!config) return []; + + if (config.source === 'search') { + // Fetch entity based on general rest query + const response = await getSearchEntityCount( + entity, + getSiteMapConfig().itemsPerPage, + page + ); + if (!response || !response.list) return []; + return response.list; + } else if (config.source === 'graphql') { + // Fetch entity based on graphql query + const response = await getGraphqlEntityCount(entity, config); + if (!response || !response.list) return []; + return response.list; + } else { + return []; + } +} + +function generateEntitySitemap(items: EntityItem[], entity: string): string { + const baseUrl = process.env.NEXTAUTH_URL; + const config = ENTITY_CONFIG[entity]; + + if (!config) { + return ` + + `; + } + + const urls = items + ?.map((item) => { + console.log(item, entity); + + // Function to handle loc or URLs for different types of entities especially for contributors or organizations + const getLoc = () => { + if (item.__typename === 'TypeOrganization') { + return `${baseUrl}/${config.path}/organization/${item.id}`; + } else if (item.__typename === 'TypeUser') { + return `${baseUrl}/${config.path}/${item.id}`; + } else { + return `${baseUrl}/${config.path}/${item.slug || item.id}`; + } + }; + + const loc = getLoc(); + const lastmod = item.updated_at + ? new Date(item.updated_at).toISOString() + : new Date().toISOString(); + + return ` + + ${loc} + ${lastmod} + weekly + ${config.priority} + + `; + }) + .join(''); + + return `\n\n${urls}\n`; +} + +export async function GET( + request: NextRequest, + { params }: { params: { entityPage: string } } +) { + // Check if sitemaps are enabled via feature flag + if (!isSitemapEnabled()) { + return new Response('Sitemaps are not enabled', { status: 404 }); + } + + try { + const { entityPage } = params; + + const m = entityPage.match(/^([a-zA-Z0-9_]+)-(\d+)\.xml$/); + if (!m) { + return new Response('Invalid Route', { status: 404 }); + } + + const entity = m[1]; + const pageNumber = Number(m[2]); + + if (!ENTITY_CONFIG[entity]) { + return new Response('Entity not found', { status: 404 }); + } + + if (isNaN(pageNumber) || pageNumber < 1) { + return new Response('Invalid page number', { status: 400 }); + } + + const items = await fetchEntityData(entity, pageNumber); + const sitemap = generateEntitySitemap(items, entity); + + const flags = getSiteMapConfig(); + return new Response(sitemap, { + headers: { + 'Content-Type': 'application/xml', + 'Cache-Control': `public, max-age=${flags.childCacheDuration}`, + }, + }); + } catch (error) { + console.error('Error generating entity sitemap:', error); + + const errorSitemap = ` + + `; + + return new Response(errorSitemap, { + status: 500, + headers: { + 'Content-Type': 'application/xml', + }, + }); + } +} + +export const dynamic = 'force-dynamic'; diff --git a/app/sitemap/main.xml/route.ts b/app/sitemap/main.xml/route.ts new file mode 100644 index 00000000..ee179bd4 --- /dev/null +++ b/app/sitemap/main.xml/route.ts @@ -0,0 +1,214 @@ +// app/sitemap.xml/route.ts +import { type NextRequest } from 'next/server'; + +import { + ENTITY_CONFIG, + ENTITY_CONFIG_TYPE, + getSiteMapConfig, + isSitemapEnabled, +} from '@/lib/utils'; + +const getAllEntityCounts = async (): Promise> => { + const counts: Record = {}; + + const countPromises: Promise<{ entityName: string; count: number }>[] = []; + + Object.entries(ENTITY_CONFIG).forEach(([entityName, config]) => { + if (config.source === 'graphql' && config.graphqlQuery) { + countPromises.push(getGraphqlEntityCount(entityName, config)); + } + if (config.source === 'search' && config.endpoint) { + countPromises.push(getSearchEntityCount(entityName, 5, 1)); + } + }); + + const results = await Promise.all(countPromises); + + results.forEach(({ entityName, count }) => { + counts[entityName] = count; + }); + + return counts; +}; + +export async function getGraphqlEntityCount( + entity: string, + config: ENTITY_CONFIG_TYPE[string] +): Promise<{ entityName: string; count: number; list: any }> { + try { + const response = await fetch( + `${process.env.FEATURE_SITEMAP_BACKEND_BASE_URL}/graphql`, + { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + query: config.graphqlQuery, + variables: {}, + }), + } + ); + const data = await response.json(); + + return { + entityName: entity, + count: data?.data?.[config.queryResKey as string]?.length || 0, + list: data?.data?.[config.queryResKey as string] || [], + }; + } catch (error) { + console.error(`Error fetching count for ${entity}:`, error); + return { entityName: entity, count: 0, list: [] }; + } +} + +export async function getSearchEntityCount( + entity: string, + size: number, + page: number +): Promise<{ entityName: string; count: number; list: any }> { + try { + const config = ENTITY_CONFIG[entity]; + const response = await fetch( + `${process.env.FEATURE_SITEMAP_BACKEND_BASE_URL}${config.endpoint}?sort=recent&size=${size}&page=${page}`, + { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + }, + next: { revalidate: 3600 }, + } + ); + const data = await response.json(); + return { entityName: entity, count: data.total, list: data.results }; + } catch (error) { + console.error(`Error fetching count for ${entity}:`, error); + return { entityName: entity, count: 0, list: [] }; + } +} + +function generateStaticUrls(): string { + const baseUrl = process.env.NEXTAUTH_URL; + + const staticPages = [ + { path: '', priority: '1.0', changefreq: 'daily' }, + { path: '/datasets', priority: '0.9', changefreq: 'daily' }, + { path: '/usecases', priority: '0.8', changefreq: 'weekly' }, + { path: '/publishers', priority: '0.7', changefreq: 'weekly' }, + { path: '/sectors', priority: '0.7', changefreq: 'weekly' }, + ]; + + return staticPages + .map( + (page) => ` + + ${baseUrl}${page.path} + ${page.changefreq} + ${page.priority} + ` + ) + .join(''); +} + +function generateSitemapIndex( + sitemapUrls: string[], + staticUrls: string +): string { + const sitemapEntries = sitemapUrls + .map( + (url) => + ` + + ${url} + ${new Date().toISOString()} + ` + ) + .join(''); + + return `\n + ${staticUrls}\n${sitemapEntries}\n`; +} + +export async function GET(request: NextRequest) { + // Check if sitemaps are enabled via feature flag + if (!isSitemapEnabled()) { + return new Response('Sitemaps are not enabled', { status: 404 }); + } + + try { + const flags = getSiteMapConfig(); + const ITEMS_PER_SITEMAP = flags.itemsPerPage; + + // Fetch counts for all entities + // const [sectorsCount] = await Promise.all([ + // getGraphqlEntityCount({ sectors: ENTITY_CONFIG.sectors }), + // ]); + + const baseUrl = process.env.NEXTAUTH_URL; + + // Generate sitemap URLs for each entity + const sitemapUrls: string[] = []; + + const entityCounts = await getAllEntityCounts(); + + // Datasets sitemaps + if (entityCounts.datasets > 0) { + const datasetPages = Math.ceil(entityCounts.datasets / ITEMS_PER_SITEMAP); + for (let i = 1; i <= datasetPages; i++) { + sitemapUrls.push(`${baseUrl}/sitemap/datasets-${i}.xml`); + } + } + + // Usecases sitemaps + const usecasePages = Math.ceil(entityCounts.usecases / ITEMS_PER_SITEMAP); + for (let i = 1; i <= usecasePages; i++) { + sitemapUrls.push(`${baseUrl}/sitemap/usecases-${i}.xml`); + } + + // Contributors sitemaps + const contributorPages = Math.ceil( + entityCounts.contributors / ITEMS_PER_SITEMAP + ); + for (let i = 1; i <= contributorPages; i++) { + sitemapUrls.push(`${baseUrl}/sitemap/contributors-${i}.xml`); + } + + // Sectors sitemaps + if (entityCounts.sectors > 0) { + const sectorPages = Math.ceil(entityCounts.sectors / ITEMS_PER_SITEMAP); + for (let i = 1; i <= sectorPages; i++) { + sitemapUrls.push(`${baseUrl}/sitemap/sectors-${i}.xml`); + } + } + + const sitemapIndex = generateSitemapIndex( + sitemapUrls, + generateStaticUrls() + ); + + return new Response(sitemapIndex, { + status: 200, + headers: { + 'Content-Type': 'application/xml', + 'Cache-Control': `public, max-age=${flags.cacheDuration}`, + }, + }); + + // return new Response(JSON.stringify(entityCounts), { status: 200 }); + } catch (error) { + console.error('Error generating sitemap index:', error); + + const errorSitemap = ` + +`; + + return new Response(errorSitemap, { + status: 500, + headers: { + 'Content-Type': 'application/xml', + }, + }); + } +} + +export const dynamic = 'force-dynamic'; diff --git a/lib/utils.ts b/lib/utils.ts index 1f9bfe7b..500daf5d 100644 --- a/lib/utils.ts +++ b/lib/utils.ts @@ -1,5 +1,5 @@ -import { twMerge, type ClassNameValue } from 'tailwind-merge'; import { Metadata } from 'next'; +import { twMerge, type ClassNameValue } from 'tailwind-merge'; type MetadataOptions = { title?: string; @@ -19,12 +19,10 @@ type MetadataOptions = { }; }; - export function generatePageMetadata(options: MetadataOptions = {}): Metadata { return { title: options.title, - description: - options.description, + description: options.description, keywords: options.keywords, openGraph: { type: 'website', @@ -43,11 +41,9 @@ export function generatePageMetadata(options: MetadataOptions = {}): Metadata { images: options.openGraph?.image, creator: 'CivicDataLab', }, - }; } - export interface JsonLdSchema { '@context': 'https://schema.org'; '@type': string; @@ -58,8 +54,6 @@ export function generateJsonLd(schema: JsonLdSchema): string { return JSON.stringify(schema, null, 2); } - - export function cn(...inputs: ClassNameValue[]) { return twMerge(inputs); } @@ -128,8 +122,6 @@ export const range = (len: number) => { return arr; }; - - export function handleRedirect(event: any, link: any) { event.preventDefault(); const confirmation = window.confirm( @@ -140,7 +132,6 @@ export function handleRedirect(event: any, link: any) { } } - export function formatDateString( input: string | number | any, isHyphenated = false @@ -149,22 +140,21 @@ export function formatDateString( // If hyphendated it would return date in this format - 2023-01-01 else in April 1, 2021 return isHyphenated ? new Date( - date.toLocaleDateString('en-US', { - year: 'numeric', - month: 'numeric', - // day: 'numeric', - }) - ) - .toISOString() - .split('T')[0] + date.toLocaleDateString('en-US', { + year: 'numeric', + month: 'numeric', + // day: 'numeric', + }) + ) + .toISOString() + .split('T')[0] : date.toLocaleDateString('en-US', { - month: 'long', - // day: 'numeric', - year: 'numeric', - }); + month: 'long', + // day: 'numeric', + year: 'numeric', + }); } - export async function getWebsiteTitle(url: string): Promise { try { const response = await fetch(url); @@ -181,3 +171,88 @@ export async function getWebsiteTitle(url: string): Promise { } } +// Feature Sitemaps +// Get configuration from environment +export const getSiteMapConfig = () => ({ + itemsPerPage: parseInt(process.env.FEATURE_SITEMAP_ITEMS_PER_PAGE || '1000'), + cacheDuration: parseInt(process.env.FEATURE_SITEMAP_CACHE_DURATION || '3600'), + childCacheDuration: parseInt( + process.env.FEATURE_SITEMAP_CHILD_CACHE_DURATION || '21600' + ), +}); + +export type ENTITY_CONFIG_TYPE = Record< + string, + { + // search for Elasticsearch type queries + // graphql for GraphQL type queries + source: 'search' | 'graphql'; + // For Elasticsearch type queries + endpoint?: string; + // For GraphQL type queries + graphqlQuery?: string; + queryResKey?: string; + path: string; + priority: string; + } +>; + +// Check if sitemap is enabled +export const isSitemapEnabled = () => { + return ( + process.env.FEATURE_SITEMAPS === 'true' || + process.env.NODE_ENV === 'production' + ); +}; + +// Entity Config +export const ENTITY_CONFIG: ENTITY_CONFIG_TYPE = { + datasets: { + source: 'search', + endpoint: '/search/dataset/', + // ?=&size=9&page=1&sort=recent + path: 'datasets', + priority: '0.8', + }, + usecases: { + source: 'graphql', + graphqlQuery: `query UseCasesList { + useCases { + id + slug + } + }`, + queryResKey: 'useCases', + path: 'usecases', + priority: '0.7', + }, + contributors: { + source: 'graphql', + graphqlQuery: `query getContributors { + getPublishers { + __typename + ... on TypeOrganization { + id + } + ... on TypeUser { + id + } + } + }`, + queryResKey: 'getPublishers', + path: 'publishers', + priority: '0.6', + }, + sectors: { + source: 'graphql', + graphqlQuery: `query SectorsLists { + activeSectors { + id + slug + } + }`, + queryResKey: 'activeSectors', + path: 'sectors', + priority: '0.6', + }, +};