@@ -11,15 +11,14 @@ import (
1111 "strconv"
1212 "strings"
1313
14+ "jaytaylor.com/html2text"
1415 "github.com/fxamacker/cbor/v2"
1516 "github.com/goccy/go-json"
1617 "github.com/shamaton/msgpack/v2"
1718 minify "github.com/tdewolff/minify/v2"
1819 mjson "github.com/tdewolff/minify/v2/json"
1920)
2021
21- const lang = ""
22-
2322type Tag struct {
2423 XMLName xml.Name
2524 Content string `xml:",innerxml"`
@@ -54,13 +53,21 @@ type Releases struct {
5453}
5554
5655type Component struct {
57- Name []Tag `xml:"name"`
58- Screenshots []Screenshot `xml:"screenshots>screenshot"`
59- Summary []Tag `xml:"summary"`
60- Description []Tag `xml:"description>p"`
61- Categories []Tag `xml:"categories>category"`
62- Keywords []Tag `xml:"keywords>keyword"`
63- Icons []struct {
56+ Names []struct {
57+ Lang string `xml:"lang,attr"`
58+ Content string `xml:",chardata"`
59+ } `xml:"name"`
60+ Summaries []struct {
61+ Lang string `xml:"lang,attr"`
62+ Content string `xml:",chardata"`
63+ } `xml:"summary"`
64+ Descriptions []struct {
65+ Lang string `xml:"lang,attr"`
66+ Content string `xml:",innerxml"`
67+ } `xml:"description"`
68+ Categories []Tag `xml:"categories>category"`
69+ Keywords []Tag `xml:"keywords>keyword"`
70+ Icons []struct {
6471 Type string `xml:"type,attr"`
6572 Width string `xml:"width,attr"`
6673 Height string `xml:"height,attr"`
@@ -78,6 +85,7 @@ type Component struct {
7885 } `xml:"launchable"`
7986 ContentRating []Tag `xml:"content_rating"`
8087 Releases Releases `xml:"releases"`
88+ Screenshots []Screenshot `xml:"screenshots>screenshot"`
8189}
8290
8391type AppStreamData struct {
@@ -126,24 +134,32 @@ func saveCBOR(filename string, metadata []AppStreamData) error {
126134 }
127135 return os .WriteFile (filename + ".cbor" , cborData , 0644 )
128136}
137+
129138func saveJSON (filename string , metadata []AppStreamData ) error {
130- jsonData , err := json .MarshalIndent (metadata , "" , " " )
131- if err != nil {
139+ var buffer strings.Builder
140+ encoder := json .NewEncoder (& buffer )
141+ encoder .SetEscapeHTML (false ) // Prevent escaping HTML tags
142+ encoder .SetIndent ("" , " " )
143+
144+ if err := encoder .Encode (metadata ); err != nil {
132145 return err
133146 }
147+
148+ jsonData := []byte (buffer .String ())
134149 if err := os .WriteFile (filename + ".json" , jsonData , 0644 ); err != nil {
135150 return err
136151 }
137- // Minify JSON
152+
138153 m := minify .New ()
139154 m .AddFunc ("application/json" , mjson .Minify )
140- if jsonData , err = m .Bytes ("application/json" , jsonData ); err != nil {
155+ if minifiedData , err : = m .Bytes ("application/json" , jsonData ); err != nil {
141156 return err
142- } else if err := os .WriteFile (filename + ".min.json" , jsonData , 0644 ); err != nil {
157+ } else if err := os .WriteFile (filename + ".min.json" , minifiedData , 0644 ); err != nil {
143158 return err
144159 }
145160 return nil
146161}
162+
147163func saveMsgp (filename string , metadata []AppStreamData ) error {
148164 msgpData , err := msgpack .Marshal (metadata )
149165 if err != nil {
@@ -162,15 +178,24 @@ func getCategoriesString(categories []Tag) string {
162178 return strings .Join (categoryStrings , "," )
163179}
164180
165- func getRichDescription (descriptions []Tag ) string {
181+ func getRichDescription (descriptions []struct {
182+ Lang string `xml:"lang,attr"`
183+ Content string `xml:",innerxml"`
184+ }) string {
166185 return getContentByLang (descriptions )
167186}
168187
169- func getName (names []Tag ) string {
188+ func getName (names []struct {
189+ Lang string `xml:"lang,attr"`
190+ Content string `xml:",chardata"`
191+ }) string {
170192 return getContentByLang (names )
171193}
172194
173- func getSummary (summaries []Tag ) string {
195+ func getSummary (summaries []struct {
196+ Lang string `xml:"lang,attr"`
197+ Content string `xml:",chardata"`
198+ }) string {
174199 return getContentByLang (summaries )
175200}
176201
@@ -182,12 +207,60 @@ func getContentRating(ratings []Tag) string {
182207 return contentRating .String ()
183208}
184209
185- func getContentByLang (tags []Tag ) string {
186- for _ , tag := range tags {
187- if tag .Lang == lang {
188- return tag .Content
210+ func getContentByLang [T any ](elements []T ) string {
211+ for _ , elem := range elements {
212+ switch v := any (elem ).(type ) {
213+ case struct {
214+ Lang string `xml:"lang,attr"`
215+ Content string `xml:",chardata"`
216+ }:
217+ if v .Lang == "en" || v .Lang == "en_US" || v .Lang == "en_GB" {
218+ return strings .TrimSpace (v .Content )
219+ }
220+ case struct {
221+ Lang string `xml:"lang,attr"`
222+ Content string `xml:",innerxml"`
223+ }:
224+ if v .Lang == "en" || v .Lang == "en_US" || v .Lang == "en_GB" {
225+ return strings .TrimSpace (v .Content )
226+ }
189227 }
190228 }
229+
230+ for _ , elem := range elements {
231+ switch v := any (elem ).(type ) {
232+ case struct {
233+ Lang string `xml:"lang,attr"`
234+ Content string `xml:",chardata"`
235+ }:
236+ if v .Lang == "" {
237+ return strings .TrimSpace (v .Content )
238+ }
239+ case struct {
240+ Lang string `xml:"lang,attr"`
241+ Content string `xml:",innerxml"`
242+ }:
243+ if v .Lang == "" {
244+ return strings .TrimSpace (v .Content )
245+ }
246+ }
247+ }
248+
249+ if len (elements ) > 0 {
250+ switch v := any (elements [0 ]).(type ) {
251+ case struct {
252+ Lang string `xml:"lang,attr"`
253+ Content string `xml:",chardata"`
254+ }:
255+ return strings .TrimSpace (v .Content )
256+ case struct {
257+ Lang string `xml:"lang,attr"`
258+ Content string `xml:",innerxml"`
259+ }:
260+ return strings .TrimSpace (v .Content )
261+ }
262+ }
263+
191264 return ""
192265}
193266
@@ -231,7 +304,6 @@ func main() {
231304 }
232305
233306 for _ , screenshot := range component .Screenshots {
234- // Sort images by area (largest first)
235307 sort .Slice (screenshot .Images , func (i , j int ) bool {
236308 widthI , _ := strconv .Atoi (screenshot .Images [i ].Width )
237309 heightI , _ := strconv .Atoi (screenshot .Images [i ].Height )
@@ -250,9 +322,9 @@ func main() {
250322 }
251323
252324 categories := getCategoriesString (component .Categories )
253- richDescription := getRichDescription (component .Description )
254- name := getName (component .Name )
255- summary := getSummary (component .Summary )
325+ richDescription := getRichDescription (component .Descriptions )
326+ name := getName (component .Names )
327+ summary := getSummary (component .Summaries )
256328 contentRating := getContentRating (component .ContentRating )
257329 version := ""
258330 if len (component .Releases .Release ) > 0 {
0 commit comments