@@ -26,7 +26,7 @@ import remarkStringify from 'remark-stringify';
2626import { unified } from 'unified' ;
2727import { remove } from 'unist-util-remove' ;
2828
29- const DOCS_BASE_URL = 'https://docs.sentry.io/ ' ;
29+ const DOCS_ORIGIN = 'https://docs.sentry.io' ;
3030const CACHE_VERSION = 3 ;
3131const CACHE_COMPRESS_LEVEL = 4 ;
3232const R2_BUCKET = process . env . NEXT_PUBLIC_DEVELOPER_DOCS
@@ -217,12 +217,15 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) {
217217 }
218218 }
219219 }
220-
220+ let baseUrl = DOCS_ORIGIN ;
221221 const data = String (
222222 await unified ( )
223223 . use ( rehypeParse )
224224 // Need the `head > title` selector for the headers
225- . use ( ( ) => tree => selectAll ( 'head > title, div#main' , tree ) )
225+ . use (
226+ ( ) => tree =>
227+ selectAll ( 'head > title, head > link[rel="canonical"], div#main' , tree )
228+ )
226229 // If we don't do this wrapping, rehypeRemark just returns an empty string -- yeah WTF?
227230 . use ( ( ) => tree => ( {
228231 type : 'element' ,
@@ -233,6 +236,12 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) {
233236 . use ( rehypeRemark , {
234237 document : false ,
235238 handlers : {
239+ // HACK: Extract the canonical URL during parsing
240+ link : ( _state , node ) => {
241+ if ( node . properties . rel . includes ( 'canonical' ) && node . properties . href ) {
242+ baseUrl = node . properties . href ;
243+ }
244+ } ,
236245 // Remove buttons as they usually get confusing in markdown, especially since we use them as tab headers
237246 button ( ) { } ,
238247 // Convert the title to the top level heading
@@ -254,15 +263,18 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) {
254263 // There's a chance we might be changing absolute URLs here
255264 // We'll check the code base and fix that later
256265 replacer : url => {
257- const mdUrl = new URL ( url , DOCS_BASE_URL ) ;
266+ const mdUrl = new URL ( url , baseUrl ) ;
267+ if ( mdUrl . origin !== DOCS_ORIGIN ) {
268+ return url ;
269+ }
258270 const newPathName = mdUrl . pathname . replace ( / \/ ? $ / , '' ) ;
259271 if ( path . extname ( newPathName ) === '' ) {
260272 mdUrl . pathname = `${ newPathName } .md` ;
261273 }
262274 return mdUrl ;
263275 } ,
264276 } )
265- . use ( imgLinks , { absolutePath : DOCS_BASE_URL } )
277+ . use ( imgLinks , { absolutePath : DOCS_ORIGIN } )
266278 // We end up with empty inline code blocks, probably from some tab logic in the HTML, remove them
267279 . use ( ( ) => tree => remove ( tree , { type : 'inlineCode' , value : '' } ) )
268280 . use ( remarkGfm )
0 commit comments