-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathxml.go
More file actions
1656 lines (1490 loc) · 47.9 KB
/
xml.go
File metadata and controls
1656 lines (1490 loc) · 47.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
package epo_ops
import (
_ "embed"
"encoding/xml"
"fmt"
"reflect"
"sort"
"strings"
"sync"
)
// Embed XSD schemas into the binary at compile time
// When users import this library, these files are compiled into their binary
// No filesystem access needed at runtime
//go:embed resources/exchange-documents.xsd
var exchangeDocumentsXSD string
//go:embed resources/fulltext-documents.xsd
var fulltextDocumentsXSD string
//go:embed resources/ops_legal.xsd
var opsLegalXSD string
//go:embed resources/ops.xsd
var opsXSD string
//go:embed resources/CPCSchema.xsd
var cpcSchemaXSD string
// GetEmbeddedXSD returns the embedded XSD schema content by name.
// This allows users to access schemas for custom validation if needed.
//
// Available schemas: "exchange-documents", "fulltext-documents", "ops_legal", "ops", "cpc"
func GetEmbeddedXSD(name string) (string, bool) {
schemas := map[string]string{
"exchange-documents": exchangeDocumentsXSD,
"fulltext-documents": fulltextDocumentsXSD,
"ops_legal": opsLegalXSD,
"ops": opsXSD,
"cpc": cpcSchemaXSD,
}
schema, ok := schemas[name]
return schema, ok
}
// XML Parsing Structs and Functions
// AbstractData represents parsed patent abstract
type AbstractData struct {
XMLName xml.Name `xml:"world-patent-data"`
PatentNumber string
Country string
DocNumber string
Kind string
Language string
Text string
}
// BiblioData represents parsed bibliographic data
type BiblioData struct {
XMLName xml.Name `xml:"world-patent-data"`
PatentNumber string
Country string
DocNumber string
Kind string
PublicationDate string
FamilyID string
Titles map[string]string // lang -> title
Applicants []Party
Inventors []Party
IPCClasses []string
CPCClasses []CPCClass
}
// ClaimsData represents parsed patent claims
type ClaimsData struct {
XMLName xml.Name `xml:"world-patent-data"`
PatentNumber string
Country string
DocNumber string
Kind string
Language string
Claims []Claim
}
// Party represents an applicant or inventor
type Party struct {
Name string
Country string
}
// CPCClass represents a Cooperative Patent Classification
type CPCClass struct {
Section string
Class string
Subclass string
MainGroup string
Subgroup string
Full string // Combined representation (e.g., "H04W 84/20")
}
// Claim represents a single patent claim
type Claim struct {
Number int
Text string
}
// FamilyMember represents a single member of a patent family
type FamilyMember struct {
FamilyID string
Country string
DocNumber string
Kind string
Date string
Title string // Invention title (populated from biblio data, English preferred)
Applicants []string // Applicant names (populated from biblio data, epodoc format)
ApplicationRef ApplicationReference
PriorityClaims []PriorityClaim
}
// ApplicationReference represents the application reference for a family member
type ApplicationReference struct {
Country string
DocNumber string
Kind string
Date string
DocID string
}
// PriorityClaim represents a priority claim for a family member
type PriorityClaim struct {
Country string
DocNumber string
Kind string
Date string
Sequence string
Active string
}
// FamilyData represents parsed patent family data
type FamilyData struct {
XMLName xml.Name `xml:"world-patent-data"`
PatentNumber string
FamilyID string
TotalCount int
Legal bool
Countries []string // Unique country codes from all members, sorted alphabetically
Members []FamilyMember
}
// LegalEvent represents a single legal event
type LegalEvent struct {
Code string
Description string
Influence string
DateMigr string
Country string // Country code from L001EP field
Date string // Gazette date from L007EP field
EventCode string // Legal event code from L008EP field
Fields map[string]string // All L-code field values
}
// LegalData represents parsed legal event data
type LegalData struct {
XMLName xml.Name `xml:"world-patent-data"`
PatentNumber string
FamilyID string
LegalEvents []LegalEvent
}
// RegisterEvent represents a single dossier event from the EPO Register
type RegisterEvent struct {
ID string // Event identifier (e.g., "EVT_434577438")
Date string // Event date (YYYYMMDD)
EventCode string // Event code (e.g., "0009250", "EPIDOSNIGR1")
Description string // Human-readable description
Category string // Categorized: filing, publication, examination, grant, opposition, appeal, other
GazetteNum string // Gazette number (e.g., "2022/32"), empty if not available
GazetteDate string // Gazette date (YYYYMMDD), empty if not available
}
// PatentStatus represents a patent status entry from the EPO Register
type PatentStatus struct {
Date string // Status change date (YYYYMMDD)
Code string // Status code (e.g., "7", "8")
Description string // Human-readable description
}
// RegisterEventsData represents parsed register events data
type RegisterEventsData struct {
PatentNumber string
Query string
Statuses []PatentStatus
Events []RegisterEvent
}
// NumberConversionData represents parsed patent number conversion data
type NumberConversionData struct {
InputFormat string
OutputFormat string
Country string
DocNumber string
Kind string
Date string
}
// ClassificationChild represents a child classification item
type ClassificationChild struct {
Symbol string
Title string
Level int
}
// ClassificationData represents parsed CPC classification schema data
type ClassificationData struct {
Symbol string
Title string
Level int
SchemeType string
Children []ClassificationChild
Ancestors []ClassificationChild
}
// Paragraph represents a description paragraph
type Paragraph struct {
ID string
Num string
Text string
}
// DescriptionData represents parsed description data
type DescriptionData struct {
XMLName xml.Name `xml:"world-patent-data"`
PatentNumber string
Country string
DocNumber string
Kind string
Language string
Paragraphs []Paragraph
}
// FulltextData represents complete fulltext document data
type FulltextData struct {
XMLName xml.Name `xml:"world-patent-data"`
Country string
DocNumber string
Kind string
Language string
Status string
Biblio *BiblioData
Abstract *AbstractData
Description *DescriptionData
Claims *ClaimsData
}
// SearchResult represents a single search result
type SearchResult struct {
System string
FamilyID string
Country string
DocNumber string
Kind string
Title string
}
// SearchResultData represents search results with pagination
type SearchResultData struct {
XMLName xml.Name `xml:"world-patent-data"`
Query string
TotalCount int
RangeBegin int
RangeEnd int
Results []SearchResult
}
// EquivalentPatent represents an equivalent patent
type EquivalentPatent struct {
Country string
DocNumber string
Kind string
}
// EquivalentsData represents published equivalents inquiry results
type EquivalentsData struct {
XMLName xml.Name `xml:"world-patent-data"`
PatentNumber string
Equivalents []EquivalentPatent
}
// Internal structs for XML unmarshaling
type abstractXML struct {
XMLName xml.Name `xml:"world-patent-data"`
ExchangeDocument struct {
Country string `xml:"country,attr"`
DocNumber string `xml:"doc-number,attr"`
Kind string `xml:"kind,attr"`
Abstract struct {
Lang string `xml:"lang,attr"`
P string `xml:"p"`
} `xml:"abstract"`
} `xml:"exchange-documents>exchange-document"`
}
type biblioXML struct {
XMLName xml.Name `xml:"world-patent-data"`
ExchangeDocument struct {
Country string `xml:"country,attr"`
DocNumber string `xml:"doc-number,attr"`
Kind string `xml:"kind,attr"`
FamilyID string `xml:"family-id,attr"`
BiblioData struct {
PublicationRef struct {
DocumentID []struct {
Type string `xml:"document-id-type,attr"`
Country string `xml:"country"`
DocNumber string `xml:"doc-number"`
Kind string `xml:"kind"`
Date string `xml:"date"`
} `xml:"document-id"`
} `xml:"publication-reference"`
InventionTitles []struct {
Lang string `xml:"lang,attr"`
Text string `xml:",chardata"`
} `xml:"invention-title"`
Parties struct {
Applicants []struct {
Sequence string `xml:"sequence,attr"`
DataFormat string `xml:"data-format,attr"`
ApplicantName struct {
Name string `xml:"name"`
} `xml:"applicant-name"`
} `xml:"applicants>applicant"`
Inventors []struct {
Sequence string `xml:"sequence,attr"`
DataFormat string `xml:"data-format,attr"`
InventorName struct {
Name string `xml:"name"`
} `xml:"inventor-name"`
} `xml:"inventors>inventor"`
} `xml:"parties"`
ClassificationsIPCR []struct {
Text string `xml:"text"`
} `xml:"classifications-ipcr>classification-ipcr"`
PatentClassifications []struct {
Section string `xml:"section"`
Class string `xml:"class"`
Subclass string `xml:"subclass"`
MainGroup string `xml:"main-group"`
Subgroup string `xml:"subgroup"`
} `xml:"patent-classifications>patent-classification"`
} `xml:"bibliographic-data"`
} `xml:"exchange-documents>exchange-document"`
}
type claimsXML struct {
XMLName xml.Name `xml:"world-patent-data"`
FulltextDocuments struct {
FulltextDocument struct {
BiblioData struct {
PublicationRef struct {
DocumentID struct {
Country string `xml:"country"`
DocNumber string `xml:"doc-number"`
Kind string `xml:"kind"`
} `xml:"document-id"`
} `xml:"publication-reference"`
} `xml:"bibliographic-data"`
Claims struct {
Lang string `xml:"lang,attr"`
ClaimList struct {
ClaimTexts []struct {
Text string `xml:",chardata"`
} `xml:"claim-text"`
} `xml:"claim"`
} `xml:"claims"`
} `xml:"fulltext-document"`
} `xml:"fulltext-documents"`
}
// ParseAbstract parses abstract XML into structured data
func ParseAbstract(xmlData string) (*AbstractData, error) {
var raw abstractXML
if err := xml.Unmarshal([]byte(xmlData), &raw); err != nil {
return nil, err
}
data := &AbstractData{
Country: raw.ExchangeDocument.Country,
DocNumber: raw.ExchangeDocument.DocNumber,
Kind: raw.ExchangeDocument.Kind,
Language: raw.ExchangeDocument.Abstract.Lang,
Text: strings.TrimSpace(raw.ExchangeDocument.Abstract.P),
}
// Construct patent number
if data.Country != "" && data.DocNumber != "" && data.Kind != "" {
data.PatentNumber = fmt.Sprintf("%s%s%s", data.Country, data.DocNumber, data.Kind)
}
return data, nil
}
// ParseBiblio parses bibliographic XML into structured data
func ParseBiblio(xmlData string) (*BiblioData, error) {
var raw biblioXML
if err := xml.Unmarshal([]byte(xmlData), &raw); err != nil {
return nil, err
}
data := &BiblioData{
Country: raw.ExchangeDocument.Country,
DocNumber: raw.ExchangeDocument.DocNumber,
Kind: raw.ExchangeDocument.Kind,
FamilyID: raw.ExchangeDocument.FamilyID,
Titles: make(map[string]string),
}
// Construct patent number
if data.Country != "" && data.DocNumber != "" && data.Kind != "" {
data.PatentNumber = fmt.Sprintf("%s%s%s", data.Country, data.DocNumber, data.Kind)
}
// Extract publication date from first docdb document-id
for _, docID := range raw.ExchangeDocument.BiblioData.PublicationRef.DocumentID {
if docID.Type == "docdb" && docID.Date != "" {
data.PublicationDate = docID.Date
break
}
}
// Extract titles (multilingual)
for _, title := range raw.ExchangeDocument.BiblioData.InventionTitles {
if title.Lang != "" && title.Text != "" {
data.Titles[title.Lang] = strings.TrimSpace(title.Text)
}
}
// Extract applicants (only epodoc format to avoid duplicates)
for _, applicant := range raw.ExchangeDocument.BiblioData.Parties.Applicants {
if applicant.DataFormat == "epodoc" && applicant.ApplicantName.Name != "" {
name := strings.TrimSpace(applicant.ApplicantName.Name)
// Extract country from name if present (format: "NAME [CC]")
country := ""
if idx := strings.LastIndex(name, "["); idx > 0 {
if idx2 := strings.Index(name[idx:], "]"); idx2 > 0 {
country = name[idx+1 : idx+idx2]
name = strings.TrimSpace(name[:idx])
}
}
data.Applicants = append(data.Applicants, Party{
Name: name,
Country: country,
})
}
}
// Extract inventors (only epodoc format)
for _, inventor := range raw.ExchangeDocument.BiblioData.Parties.Inventors {
if inventor.DataFormat == "epodoc" && inventor.InventorName.Name != "" {
name := strings.TrimSpace(inventor.InventorName.Name)
country := ""
if idx := strings.LastIndex(name, "["); idx > 0 {
if idx2 := strings.Index(name[idx:], "]"); idx2 > 0 {
country = name[idx+1 : idx+idx2]
name = strings.TrimSpace(name[:idx])
}
}
data.Inventors = append(data.Inventors, Party{
Name: name,
Country: country,
})
}
}
// Extract IPC classifications
for _, ipc := range raw.ExchangeDocument.BiblioData.ClassificationsIPCR {
if ipc.Text != "" {
data.IPCClasses = append(data.IPCClasses, strings.TrimSpace(ipc.Text))
}
}
// Extract CPC classifications
for _, cpc := range raw.ExchangeDocument.BiblioData.PatentClassifications {
class := CPCClass{
Section: cpc.Section,
Class: cpc.Class,
Subclass: cpc.Subclass,
MainGroup: cpc.MainGroup,
Subgroup: cpc.Subgroup,
}
// Build full representation
class.Full = fmt.Sprintf("%s%s%s %s/%s", class.Section, class.Class, class.Subclass, class.MainGroup, class.Subgroup)
data.CPCClasses = append(data.CPCClasses, class)
}
return data, nil
}
// ParseClaims parses claims XML into structured data
func ParseClaims(xmlData string) (*ClaimsData, error) {
var raw claimsXML
if err := xml.Unmarshal([]byte(xmlData), &raw); err != nil {
return nil, err
}
doc := raw.FulltextDocuments.FulltextDocument
data := &ClaimsData{
Country: doc.BiblioData.PublicationRef.DocumentID.Country,
DocNumber: doc.BiblioData.PublicationRef.DocumentID.DocNumber,
Kind: doc.BiblioData.PublicationRef.DocumentID.Kind,
Language: doc.Claims.Lang,
}
// Construct patent number
if data.Country != "" && data.DocNumber != "" && data.Kind != "" {
data.PatentNumber = fmt.Sprintf("%s%s%s", data.Country, data.DocNumber, data.Kind)
}
// Extract claims
for i, claimText := range doc.Claims.ClaimList.ClaimTexts {
if claimText.Text != "" {
data.Claims = append(data.Claims, Claim{
Number: i + 1,
Text: strings.TrimSpace(claimText.Text),
})
}
}
return data, nil
}
// imageInquiryXML is the internal structure for unmarshaling image inquiry XML.
//
// Note on Link field structure:
// The EPO OPS API returns the image link as a nested element, not an attribute:
//
// <ops:document-instance desc="Drawing" ...>
// <ops:document-instance-link href="/rest-services/..."/>
// </ops:document-instance>
//
// Previous versions of this library incorrectly used:
//
// Link string `xml:"link,attr"`
//
// The correct structure is:
//
// Link struct { Href string `xml:"href,attr"` } `xml:"document-instance-link"`
type imageInquiryXML struct {
XMLName xml.Name `xml:"world-patent-data"`
DocumentInquiry struct {
InquiryResult struct {
DocumentInstances []struct {
Desc string `xml:"desc,attr"`
NumberOfPages int `xml:"number-of-pages,attr"`
DocType string `xml:"doc-type,attr"`
Link struct {
Href string `xml:"href,attr"`
} `xml:"document-instance-link"`
FormatOptions struct {
Formats []struct {
Value string `xml:",chardata"`
} `xml:"document-format"`
} `xml:"document-format-options"`
} `xml:"document-instance"`
} `xml:"inquiry-result"`
} `xml:"document-inquiry"`
}
// ParseImageInquiry parses image inquiry XML into structured data.
//
// This function processes the XML response from the EPO OPS Published Images Inquiry service
// and extracts information about available document instances (drawings, full document, etc.),
// their page counts, available formats, and download links.
//
// Example XML structure:
//
// <ops:world-patent-data>
// <ops:document-inquiry>
// <ops:inquiry-result>
// <ops:document-instance desc="Drawing" number-of-pages="5" doc-type="Drawing">
// <ops:document-instance-link href="..."/>
// <ops:document-format-options>
// <ops:document-format>application/pdf</ops:document-format>
// <ops:document-format>image/tiff</ops:document-format>
// </ops:document-format-options>
// </ops:document-instance>
// </ops:inquiry-result>
// </ops:document-inquiry>
// </ops:world-patent-data>
func ParseImageInquiry(xmlData string) (*ImageInquiry, error) {
var raw imageInquiryXML
if err := xml.Unmarshal([]byte(xmlData), &raw); err != nil {
return nil, &XMLParseError{
Parser: "ParseImageInquiry",
Element: "root",
XMLSample: truncateXML(xmlData, 200),
Cause: err,
}
}
result := &ImageInquiry{
DocumentInstances: make([]DocumentInstance, 0, len(raw.DocumentInquiry.InquiryResult.DocumentInstances)),
}
for i, inst := range raw.DocumentInquiry.InquiryResult.DocumentInstances {
// Validate required fields
if inst.Link.Href == "" {
return nil, &DataValidationError{
Parser: "ParseImageInquiry",
MissingField: fmt.Sprintf("DocumentInstances[%d].Link.Href", i),
Message: "document-instance-link href is required but was empty",
}
}
// Extract formats
formats := make([]string, 0, len(inst.FormatOptions.Formats))
for _, f := range inst.FormatOptions.Formats {
format := strings.TrimSpace(f.Value)
if format != "" {
formats = append(formats, format)
}
}
result.DocumentInstances = append(result.DocumentInstances, DocumentInstance{
Description: inst.Desc,
Link: inst.Link.Href,
NumberOfPages: inst.NumberOfPages,
Formats: formats,
DocType: inst.DocType,
})
}
// Validate we have at least one document instance
if len(result.DocumentInstances) == 0 {
return nil, &DataValidationError{
Parser: "ParseImageInquiry",
MissingField: "DocumentInstances",
Message: "no document instances found in response",
}
}
return result, nil
}
// Internal structs for Family XML unmarshaling
type familyXML struct {
XMLName xml.Name `xml:"world-patent-data"`
PatentFamily struct {
Legal string `xml:"legal,attr"`
TotalResultCount string `xml:"total-result-count,attr"`
PublicationRef struct {
DocumentID struct {
Country string `xml:"country"`
DocNumber string `xml:"doc-number"`
Kind string `xml:"kind"`
} `xml:"document-id"`
} `xml:"publication-reference"`
FamilyMembers []struct {
FamilyID string `xml:"family-id,attr"`
PublicationRef struct {
DocumentIDs []struct {
Type string `xml:"document-id-type,attr"`
Country string `xml:"country"`
DocNumber string `xml:"doc-number"`
Kind string `xml:"kind"`
Date string `xml:"date"`
} `xml:"document-id"`
} `xml:"publication-reference"`
ApplicationRef struct {
DocID string `xml:"doc-id,attr"`
DocumentID struct {
Country string `xml:"country"`
DocNumber string `xml:"doc-number"`
Kind string `xml:"kind"`
Date string `xml:"date"`
} `xml:"document-id"`
} `xml:"application-reference"`
PriorityClaims []struct {
Sequence string `xml:"sequence,attr"`
Kind string `xml:"kind,attr"`
DocumentID struct {
Country string `xml:"country"`
DocNumber string `xml:"doc-number"`
Kind string `xml:"kind"`
Date string `xml:"date"`
} `xml:"document-id"`
ActiveIndicator string `xml:"priority-active-indicator"`
} `xml:"priority-claim"`
ExchangeDocuments []struct {
BiblioData struct {
Titles []struct {
Lang string `xml:"lang,attr"`
Title string `xml:",chardata"`
} `xml:"invention-title"`
Parties struct {
Applicants struct {
Applicants []struct {
DataFormat string `xml:"data-format,attr"`
Name struct {
Name string `xml:"name"`
} `xml:"applicant-name"`
} `xml:"applicant"`
} `xml:"applicants"`
} `xml:"parties"`
} `xml:"bibliographic-data"`
} `xml:"exchange-document"`
} `xml:"family-member"`
} `xml:"patent-family"`
}
// truncateXML truncates XML for error messages
func truncateXML(xmlData string, maxLen int) string {
if len(xmlData) <= maxLen {
return xmlData
}
return xmlData[:maxLen] + "..."
}
// ParseFamily parses patent family XML into structured data
func ParseFamily(xmlData string) (*FamilyData, error) {
var raw familyXML
if err := xml.Unmarshal([]byte(xmlData), &raw); err != nil {
return nil, &XMLParseError{
Parser: "ParseFamily",
Element: "root",
XMLSample: truncateXML(xmlData, 200),
Cause: err,
}
}
data := &FamilyData{
Members: []FamilyMember{}, // Always non-nil, even if empty
}
// Parse patent number from publication reference
// Some family responses have a top-level publication-reference, others don't
pubRef := raw.PatentFamily.PublicationRef.DocumentID
if pubRef.Country != "" && pubRef.DocNumber != "" {
data.PatentNumber = pubRef.Country + pubRef.DocNumber
} else if len(raw.PatentFamily.FamilyMembers) > 0 {
// If no top-level publication-reference, use first family member
firstMember := raw.PatentFamily.FamilyMembers[0]
if len(firstMember.PublicationRef.DocumentIDs) > 0 {
firstDoc := firstMember.PublicationRef.DocumentIDs[0]
data.PatentNumber = firstDoc.Country + firstDoc.DocNumber
}
}
// Parse attributes
data.Legal = raw.PatentFamily.Legal == "true"
if raw.PatentFamily.TotalResultCount != "" {
_, _ = fmt.Sscanf(raw.PatentFamily.TotalResultCount, "%d", &data.TotalCount)
}
// Parse family members
countrySet := make(map[string]bool)
for _, member := range raw.PatentFamily.FamilyMembers {
familyMember := FamilyMember{
FamilyID: member.FamilyID,
}
// Get first publication reference (docdb format)
if len(member.PublicationRef.DocumentIDs) > 0 {
pubDoc := member.PublicationRef.DocumentIDs[0]
familyMember.Country = pubDoc.Country
familyMember.DocNumber = pubDoc.DocNumber
familyMember.Kind = pubDoc.Kind
familyMember.Date = pubDoc.Date
}
// Parse application reference
appDoc := member.ApplicationRef.DocumentID
familyMember.ApplicationRef = ApplicationReference{
Country: appDoc.Country,
DocNumber: appDoc.DocNumber,
Kind: appDoc.Kind,
Date: appDoc.Date,
DocID: member.ApplicationRef.DocID,
}
// Parse priority claims
for _, pc := range member.PriorityClaims {
familyMember.PriorityClaims = append(familyMember.PriorityClaims, PriorityClaim{
Country: pc.DocumentID.Country,
DocNumber: pc.DocumentID.DocNumber,
Kind: pc.DocumentID.Kind,
Date: pc.DocumentID.Date,
Sequence: pc.Sequence,
Active: pc.ActiveIndicator,
})
}
// Extract biblio data from exchange-document (present in GetFamilyWithBiblio responses)
for _, exDoc := range member.ExchangeDocuments {
// Title: prefer English, fallback to first available
if familyMember.Title == "" {
for _, t := range exDoc.BiblioData.Titles {
title := strings.TrimSpace(t.Title)
if title == "" {
continue
}
if strings.EqualFold(t.Lang, "en") {
familyMember.Title = title
break
}
if familyMember.Title == "" {
familyMember.Title = title
}
}
}
// Applicants: prefer epodoc format, deduplicate
if len(familyMember.Applicants) == 0 {
seen := make(map[string]bool)
for _, a := range exDoc.BiblioData.Parties.Applicants.Applicants {
if !strings.EqualFold(a.DataFormat, "epodoc") {
continue
}
name := strings.TrimSpace(a.Name.Name)
if name != "" && !seen[name] {
seen[name] = true
familyMember.Applicants = append(familyMember.Applicants, name)
}
}
}
}
if familyMember.FamilyID != "" {
data.FamilyID = familyMember.FamilyID
}
if familyMember.Country != "" {
countrySet[familyMember.Country] = true
}
data.Members = append(data.Members, familyMember)
}
// Compute sorted unique countries
if len(countrySet) > 0 {
countries := make([]string, 0, len(countrySet))
for c := range countrySet {
countries = append(countries, c)
}
sort.Strings(countries)
data.Countries = countries
} else {
data.Countries = []string{}
}
return data, nil
}
// Internal structs for Legal XML unmarshaling
type legalXML struct {
XMLName xml.Name `xml:"world-patent-data"`
PatentFamily struct {
PublicationRef struct {
DocumentID struct {
Country string `xml:"country"`
DocNumber string `xml:"doc-number"`
Kind string `xml:"kind"`
} `xml:"document-id"`
} `xml:"publication-reference"`
FamilyMembers []struct {
FamilyID string `xml:"family-id,attr"`
LegalEvents []legalEventXML `xml:"legal"`
} `xml:"family-member"`
} `xml:"patent-family"`
}
// legalEventXML represents a single legal event with dynamic L*EP fields.
// The EPO OPS API returns legal event data with L-fields (L001EP, L002EP, etc.)
// that vary by event type and jurisdiction. We define a sufficient range
// (L001EP through L050EP) and use reflection to dynamically extract all non-empty fields.
type legalEventXML struct {
Code string `xml:"code,attr"`
Desc string `xml:"desc,attr"`
Infl string `xml:"infl,attr"`
DateMigr string `xml:"dateMigr,attr"`
Pre []string `xml:"pre"`
// L-fields: Extended to L050EP to support future EPO additions
L001EP string `xml:"L001EP"`
L002EP string `xml:"L002EP"`
L003EP string `xml:"L003EP"`
L004EP string `xml:"L004EP"`
L005EP string `xml:"L005EP"`
L006EP string `xml:"L006EP"`
L007EP string `xml:"L007EP"`
L008EP string `xml:"L008EP"`
L009EP string `xml:"L009EP"`
L010EP string `xml:"L010EP"`
L011EP string `xml:"L011EP"`
L012EP string `xml:"L012EP"`
L013EP string `xml:"L013EP"`
L014EP string `xml:"L014EP"`
L015EP string `xml:"L015EP"`
L016EP string `xml:"L016EP"`
L017EP string `xml:"L017EP"`
L018EP string `xml:"L018EP"`
L019EP string `xml:"L019EP"`
L020EP string `xml:"L020EP"`
L021EP string `xml:"L021EP"`
L022EP string `xml:"L022EP"`
L023EP string `xml:"L023EP"`
L024EP string `xml:"L024EP"`
L025EP string `xml:"L025EP"`
L026EP string `xml:"L026EP"`
L027EP string `xml:"L027EP"`
L028EP string `xml:"L028EP"`
L029EP string `xml:"L029EP"`
L030EP string `xml:"L030EP"`
L031EP string `xml:"L031EP"`
L032EP string `xml:"L032EP"`
L033EP string `xml:"L033EP"`
L034EP string `xml:"L034EP"`
L035EP string `xml:"L035EP"`
L036EP string `xml:"L036EP"`
L037EP string `xml:"L037EP"`
L038EP string `xml:"L038EP"`
L039EP string `xml:"L039EP"`
L040EP string `xml:"L040EP"`
L041EP string `xml:"L041EP"`
L042EP string `xml:"L042EP"`
L043EP string `xml:"L043EP"`
L044EP string `xml:"L044EP"`
L045EP string `xml:"L045EP"`
L046EP string `xml:"L046EP"`
L047EP string `xml:"L047EP"`
L048EP string `xml:"L048EP"`
L049EP string `xml:"L049EP"`
L050EP string `xml:"L050EP"`
}
// Cache for legal field metadata to avoid repeated reflection
var (
legalFieldIndices []int // indices of L*EP fields in legalEventXML
legalFieldNames []string // corresponding field names
legalFieldsOnce sync.Once // ensures cache is initialized only once
)
// initLegalFieldsCache initializes the cache of legal field indices and names.
// This is called once via sync.Once to avoid repeated reflection overhead.
func initLegalFieldsCache() {
t := reflect.TypeOf(legalEventXML{})
for i := 0; i < t.NumField(); i++ {
field := t.Field(i)
fieldName := field.Name
// Check if field name matches L*EP pattern (starts with L, ends with EP)
if strings.HasPrefix(fieldName, "L") && strings.HasSuffix(fieldName, "EP") {
// Only cache string fields
if field.Type.Kind() == reflect.String {
legalFieldIndices = append(legalFieldIndices, i)
legalFieldNames = append(legalFieldNames, fieldName)
}
}
}
}
// extractLegalFields uses reflection to dynamically extract all L*EP fields
// from a legalEventXML struct. This automatically handles any number of L-fields
// without requiring hardcoded field names.
//
// Performance: Field metadata is cached on first call using sync.Once, so subsequent
// calls only perform value extraction without type inspection overhead.
func extractLegalFields(legal legalEventXML) map[string]string {
// Initialize cache on first call
legalFieldsOnce.Do(initLegalFieldsCache)
fields := make(map[string]string)
v := reflect.ValueOf(legal)
// Use cached indices to extract values
for i, fieldIdx := range legalFieldIndices {
fieldValue := v.Field(fieldIdx)
value := fieldValue.String()
if value != "" {
fields[legalFieldNames[i]] = value
}
}
return fields
}
// ParseLegal parses legal event XML into structured data
func ParseLegal(xmlData string) (*LegalData, error) {
var raw legalXML
if err := xml.Unmarshal([]byte(xmlData), &raw); err != nil {
return nil, &XMLParseError{
Parser: "ParseLegal",
Element: "root",
XMLSample: truncateXML(xmlData, 200),
Cause: err,
}
}