@@ -25,7 +25,21 @@ import Data.ByteString.Lazy (ByteString)
25
25
import Data.Time.Clock (UTCTime (.. ), getCurrentTime )
26
26
import Data.Time.Calendar (showGregorian )
27
27
import Network.URI
28
+ import Control.DeepSeq
29
+ import Text.Read
30
+ import Data.List.Split
28
31
32
+ data Sitemap
33
+ = Sitemap
34
+ { sitemapIndex :: XMLResponse
35
+ , sitemaps :: [XMLResponse ]
36
+ }
37
+
38
+ instance NFData Sitemap where
39
+ rnf (Sitemap i s) = rnf i `seq` rnf s
40
+
41
+ instance MemSize Sitemap where
42
+ memSize (Sitemap i s) = memSize2 i s
29
43
30
44
data SitemapFeature = SitemapFeature {
31
45
sitemapFeatureInterface :: HackageFeature
@@ -67,8 +81,8 @@ sitemapFeature :: ServerEnv
67
81
-> DocumentationFeature
68
82
-> TagsFeature
69
83
-> UTCTime
70
- -> AsyncCache XMLResponse
71
- -> (SitemapFeature , IO XMLResponse )
84
+ -> AsyncCache Sitemap
85
+ -> (SitemapFeature , IO Sitemap )
72
86
sitemapFeature ServerEnv {.. }
73
87
CoreFeature {.. }
74
88
DocumentationFeature {.. }
@@ -79,50 +93,70 @@ sitemapFeature ServerEnv{..}
79
93
where
80
94
81
95
sitemapFeatureInterface = (emptyHackageFeature " sitemap" ) {
82
- featureResources = [ xmlSitemapResource ]
96
+ featureResources = [ xmlSitemapIndexResource, xmlSitemapResource ]
83
97
, featureState = []
84
- , featureDesc = " Provides a sitemap.xml for search engines"
98
+ , featureDesc = " Provides sitemap for search engines"
85
99
, featureCaches =
86
100
[ CacheComponent {
87
- cacheDesc = " sitemap.xml " ,
101
+ cacheDesc = " sitemap" ,
88
102
getCacheMemSize = memSize <$> readAsyncCache sitemapCache
89
103
}
90
104
]
91
105
, featurePostInit = do
92
106
syncAsyncCache sitemapCache
93
107
addCronJob serverCron CronJob {
94
- cronJobName = " regenerate the cached sitemap.xml " ,
108
+ cronJobName = " regenerate the cached sitemap" ,
95
109
cronJobFrequency = DailyJobFrequency ,
96
110
cronJobOneShot = False ,
97
111
cronJobAction = prodAsyncCache sitemapCache " cron"
98
112
}
99
113
}
100
114
115
+ xmlSitemapIndexResource :: Resource
116
+ xmlSitemapIndexResource = (resourceAt " /sitemap_index.xml" ) {
117
+ resourceDesc = [(GET , " The dynamically generated sitemap index, in XML format" )]
118
+ , resourceGet = [(" xml" , serveSitemapIndex)]
119
+ }
120
+
101
121
xmlSitemapResource :: Resource
102
- xmlSitemapResource = (resourceAt " /sitemap.xml " ) {
122
+ xmlSitemapResource = (resourceAt " /sitemap/:filename " ) {
103
123
resourceDesc = [(GET , " The dynamically generated sitemap, in XML format" )]
104
124
, resourceGet = [(" xml" , serveSitemap)]
105
125
}
106
126
107
- serveSitemap :: DynamicPath -> ServerPartE Response
108
- serveSitemap _ = do
109
- sitemapXML <- liftIO $ readAsyncCache sitemapCache
127
+ serveSitemapIndex :: DynamicPath -> ServerPartE Response
128
+ serveSitemapIndex _ = do
129
+ Sitemap { .. } <- liftIO $ readAsyncCache sitemapCache
110
130
cacheControlWithoutETag [Public , maxAgeDays 1 ]
111
- return (toResponse sitemapXML)
131
+ return (toResponse sitemapIndex)
132
+
133
+ serveSitemap :: DynamicPath -> ServerPartE Response
134
+ serveSitemap dpath =
135
+ case lookup " filename" dpath of
136
+ Just filename
137
+ | [basename, " xml" ] <- splitOn " ." filename
138
+ , Just i <- readMaybe basename -> do
139
+ Sitemap {.. } <- liftIO $ readAsyncCache sitemapCache
140
+ guard (i < length sitemaps)
141
+ cacheControlWithoutETag [Public , maxAgeDays 1 ]
142
+ return (toResponse (sitemaps !! i))
143
+ _ -> mzero
112
144
113
145
-- Generates a list of sitemap entries corresponding to hackage pages, then
114
146
-- builds and returns an XML sitemap.
115
- updateSitemapCache :: IO XMLResponse
147
+ updateSitemapCache :: IO Sitemap
116
148
updateSitemapCache = do
117
149
118
150
alltags <- queryGetTagList
119
151
pkgIndex <- queryGetPackageIndex
120
152
docIndex <- queryDocumentationIndex
121
153
122
- let sitemap = generateSitemap serverBaseURI pageBuildDate
154
+ let sitemaps = generateSitemap serverBaseURI pageBuildDate
123
155
(map fst alltags)
124
156
pkgIndex docIndex
125
- return (XMLResponse sitemap)
157
+ uriScheme i = " /sitemap/" <> show i <> " .xml"
158
+ sitemapIndex = renderSitemapIndex serverBaseURI (map uriScheme [0 .. (length sitemaps - 1 )])
159
+ return $ Sitemap (XMLResponse sitemapIndex) (map XMLResponse sitemaps)
126
160
127
161
pageBuildDate :: T. Text
128
162
pageBuildDate = T. pack (showGregorian (utctDay initTime))
@@ -132,9 +166,9 @@ generateSitemap :: URI
132
166
-> [Tag ]
133
167
-> PackageIndex. PackageIndex PkgInfo
134
168
-> Map. Map PackageId a
135
- -> ByteString
169
+ -> [ ByteString ]
136
170
generateSitemap serverBaseURI pageBuildDate alltags pkgIndex docIndex =
137
- renderSitemap serverBaseURI allEntries
171
+ renderSitemap serverBaseURI <$> chunksOf 50000 allEntries
138
172
where
139
173
-- Combine and build sitemap
140
174
allEntries = miscEntries
0 commit comments