Skip to content

Commit 9fe9494

Browse files
committed
Divide sitemap into parts
1 parent ae4f14e commit 9fe9494

File tree

2 files changed

+67
-16
lines changed

2 files changed

+67
-16
lines changed

src/Distribution/Server/Features/Sitemap.hs

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,21 @@ import Data.ByteString.Lazy (ByteString)
2525
import Data.Time.Clock (UTCTime(..), getCurrentTime)
2626
import Data.Time.Calendar (showGregorian)
2727
import Network.URI
28+
import Control.DeepSeq
29+
import Text.Read
30+
import Data.List.Split
2831

32+
data Sitemap
33+
= Sitemap
34+
{ sitemapIndex :: XMLResponse
35+
, sitemaps :: [XMLResponse]
36+
}
37+
38+
instance NFData Sitemap where
39+
rnf (Sitemap i s) = rnf i `seq` rnf s
40+
41+
instance MemSize Sitemap where
42+
memSize (Sitemap i s) = memSize2 i s
2943

3044
data SitemapFeature = SitemapFeature {
3145
sitemapFeatureInterface :: HackageFeature
@@ -67,8 +81,8 @@ sitemapFeature :: ServerEnv
6781
-> DocumentationFeature
6882
-> TagsFeature
6983
-> UTCTime
70-
-> AsyncCache XMLResponse
71-
-> (SitemapFeature, IO XMLResponse)
84+
-> AsyncCache Sitemap
85+
-> (SitemapFeature, IO Sitemap)
7286
sitemapFeature ServerEnv{..}
7387
CoreFeature{..}
7488
DocumentationFeature{..}
@@ -79,50 +93,70 @@ sitemapFeature ServerEnv{..}
7993
where
8094

8195
sitemapFeatureInterface = (emptyHackageFeature "sitemap") {
82-
featureResources = [ xmlSitemapResource ]
96+
featureResources = [ xmlSitemapIndexResource, xmlSitemapResource ]
8397
, featureState = []
84-
, featureDesc = "Provides a sitemap.xml for search engines"
98+
, featureDesc = "Provides sitemap for search engines"
8599
, featureCaches =
86100
[ CacheComponent {
87-
cacheDesc = "sitemap.xml",
101+
cacheDesc = "sitemap",
88102
getCacheMemSize = memSize <$> readAsyncCache sitemapCache
89103
}
90104
]
91105
, featurePostInit = do
92106
syncAsyncCache sitemapCache
93107
addCronJob serverCron CronJob {
94-
cronJobName = "regenerate the cached sitemap.xml",
108+
cronJobName = "regenerate the cached sitemap",
95109
cronJobFrequency = DailyJobFrequency,
96110
cronJobOneShot = False,
97111
cronJobAction = prodAsyncCache sitemapCache "cron"
98112
}
99113
}
100114

115+
xmlSitemapIndexResource :: Resource
116+
xmlSitemapIndexResource = (resourceAt "/sitemap_index.xml") {
117+
resourceDesc = [(GET, "The dynamically generated sitemap index, in XML format")]
118+
, resourceGet = [("xml", serveSitemapIndex)]
119+
}
120+
101121
xmlSitemapResource :: Resource
102-
xmlSitemapResource = (resourceAt "/sitemap.xml") {
122+
xmlSitemapResource = (resourceAt "/sitemap/:filename") {
103123
resourceDesc = [(GET, "The dynamically generated sitemap, in XML format")]
104124
, resourceGet = [("xml", serveSitemap)]
105125
}
106126

107-
serveSitemap :: DynamicPath -> ServerPartE Response
108-
serveSitemap _ = do
109-
sitemapXML <- liftIO $ readAsyncCache sitemapCache
127+
serveSitemapIndex :: DynamicPath -> ServerPartE Response
128+
serveSitemapIndex _ = do
129+
Sitemap{..} <- liftIO $ readAsyncCache sitemapCache
110130
cacheControlWithoutETag [Public, maxAgeDays 1]
111-
return (toResponse sitemapXML)
131+
return (toResponse sitemapIndex)
132+
133+
serveSitemap :: DynamicPath -> ServerPartE Response
134+
serveSitemap dpath =
135+
case lookup "filename" dpath of
136+
Just filename
137+
| [basename, "xml"] <- splitOn "." filename
138+
, Just i <- readMaybe basename -> do
139+
Sitemap{..} <- liftIO $ readAsyncCache sitemapCache
140+
guard (i < length sitemaps)
141+
cacheControlWithoutETag [Public, maxAgeDays 1]
142+
return (toResponse (sitemaps !! i))
143+
_ -> mzero
112144

113145
-- Generates a list of sitemap entries corresponding to hackage pages, then
114146
-- builds and returns an XML sitemap.
115-
updateSitemapCache :: IO XMLResponse
147+
updateSitemapCache :: IO Sitemap
116148
updateSitemapCache = do
117149

118150
alltags <- queryGetTagList
119151
pkgIndex <- queryGetPackageIndex
120152
docIndex <- queryDocumentationIndex
121153

122-
let sitemap = generateSitemap serverBaseURI pageBuildDate
154+
let sitemaps = generateSitemap serverBaseURI pageBuildDate
123155
(map fst alltags)
124156
pkgIndex docIndex
125-
return (XMLResponse sitemap)
157+
uriScheme i = "/sitemap/" <> show i <> ".xml"
158+
sitemapIndex = renderSitemapIndex serverBaseURI (map uriScheme [0..(length sitemaps - 1)])
159+
return $ Sitemap (XMLResponse sitemapIndex) (map XMLResponse sitemaps)
126160

127161
pageBuildDate :: T.Text
128162
pageBuildDate = T.pack (showGregorian (utctDay initTime))
@@ -132,9 +166,9 @@ generateSitemap :: URI
132166
-> [Tag]
133167
-> PackageIndex.PackageIndex PkgInfo
134168
-> Map.Map PackageId a
135-
-> ByteString
169+
-> [ByteString]
136170
generateSitemap serverBaseURI pageBuildDate alltags pkgIndex docIndex =
137-
renderSitemap serverBaseURI allEntries
171+
renderSitemap serverBaseURI <$> chunksOf 50000 allEntries
138172
where
139173
-- Combine and build sitemap
140174
allEntries = miscEntries

src/Distribution/Server/Features/Sitemap/Functions.hs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
module Distribution.Server.Features.Sitemap.Functions (
2424
SitemapEntry
2525
, ChangeFreq(..)
26+
, renderSitemapIndex
2627
, renderSitemap
2728
, urlsToSitemapEntries
2829
, pathsAndDatesToSitemapEntries
@@ -47,6 +48,22 @@ data SitemapEntry = SitemapEntry {
4748

4849
data ChangeFreq = Monthly | Weekly | Daily
4950

51+
-- | Generate a sitemap index file from each sitemap uri.
52+
renderSitemapIndex :: URI -> [String] -> ByteString
53+
renderSitemapIndex serverBaseURI sitemaps =
54+
xrender $
55+
doc defaultDocInfo $
56+
xelem "sitemapindex" $
57+
xattr "xmlns" "http://www.sitemaps.org/schemas/sitemap/0.9"
58+
<#> map renderLink sitemaps
59+
where
60+
serverBaseURI' = T.pack (show serverBaseURI)
61+
renderLink :: String -> Xml Elem
62+
renderLink uri = xelem "sitemap" $
63+
xelems [
64+
xelem "loc" (xtext (serverBaseURI' <> T.pack (uri)))
65+
]
66+
5067
-- | Primary function - generates the XML file from a list of Nodes.
5168
renderSitemap :: URI -> [SitemapEntry] -> ByteString
5269
renderSitemap serverBaseURI entries =

0 commit comments

Comments
 (0)