Skip to content

Commit 3fbc0ad

Browse files
authored
Merge pull request #84 from michaeladler/perf/improve-n-quads-2
perf: improve n-quad parser using map-based lookups
2 parents 5eec423 + ce7ba6e commit 3fbc0ad

File tree

4 files changed

+39
-38
lines changed

4 files changed

+39
-38
lines changed

ld/api_normalize.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -244,13 +244,13 @@ func (na *NormalisationAlgorithm) Normalize(dataset *RDFDataset) {
244244
// node identifiers using the canonical identifiers previously issued by
245245
// canonical issuer.
246246
// Note: We optimize away the copy here.
247-
for _, attrNode := range []Node{quad.Subject, quad.Object, quad.Graph} {
248-
if attrNode != nil {
249-
attrValue := attrNode.GetValue()
250-
if IsBlankNode(attrNode) && strings.Index(attrValue, "_:c14n") != 0 {
251-
bn := attrNode.(*BlankNode)
252-
bn.Attribute = na.canonicalIssuer.GetId(attrValue)
253-
}
247+
for _, nodePtr := range []*Node{&quad.Subject, &quad.Object, &quad.Graph} {
248+
if *nodePtr == nil {
249+
continue
250+
}
251+
attrValue := (*nodePtr).GetValue()
252+
if IsBlankNode(*nodePtr) && !strings.HasPrefix(attrValue, "_:c14n") {
253+
*nodePtr = NewBlankNode(na.canonicalIssuer.GetId(attrValue))
254254
}
255255
}
256256

ld/node.go

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ type Node interface {
3131
// GetValue returns the node's value.
3232
GetValue() string
3333

34-
// Equal returns true id this node is equal to the given node.
34+
// Equal returns true if this node is equal to the given node.
3535
Equal(n Node) bool
3636
}
3737

@@ -43,8 +43,8 @@ type Literal struct {
4343
}
4444

4545
// NewLiteral creates a new instance of Literal.
46-
func NewLiteral(value string, datatype string, language string) *Literal {
47-
l := &Literal{
46+
func NewLiteral(value string, datatype string, language string) Literal {
47+
l := Literal{
4848
Value: value,
4949
Language: language,
5050
}
@@ -59,12 +59,12 @@ func NewLiteral(value string, datatype string, language string) *Literal {
5959
}
6060

6161
// GetValue returns the node's value.
62-
func (l *Literal) GetValue() string {
62+
func (l Literal) GetValue() string {
6363
return l.Value
6464
}
6565

66-
// Equal returns true id this node is equal to the given node.
67-
func (l *Literal) Equal(n Node) bool {
66+
// Equal returns true if this node is equal to the given node.
67+
func (l Literal) Equal(n Node) bool {
6868
ol, ok := n.(*Literal)
6969
if !ok {
7070
return false
@@ -91,21 +91,21 @@ type IRI struct {
9191
}
9292

9393
// NewIRI creates a new instance of IRI.
94-
func NewIRI(iri string) *IRI {
95-
i := &IRI{
94+
func NewIRI(iri string) IRI {
95+
i := IRI{
9696
Value: iri,
9797
}
9898

9999
return i
100100
}
101101

102102
// GetValue returns the node's value.
103-
func (iri *IRI) GetValue() string {
103+
func (iri IRI) GetValue() string {
104104
return iri.Value
105105
}
106106

107-
// Equal returns true id this node is equal to the given node.
108-
func (iri *IRI) Equal(n Node) bool {
107+
// Equal returns true if this node is equal to the given node.
108+
func (iri IRI) Equal(n Node) bool {
109109
if oiri, ok := n.(*IRI); ok {
110110
return iri.Value == oiri.Value
111111
}
@@ -119,21 +119,21 @@ type BlankNode struct {
119119
}
120120

121121
// NewBlankNode creates a new instance of BlankNode.
122-
func NewBlankNode(attribute string) *BlankNode {
123-
bn := &BlankNode{
122+
func NewBlankNode(attribute string) BlankNode {
123+
bn := BlankNode{
124124
Attribute: attribute,
125125
}
126126

127127
return bn
128128
}
129129

130130
// GetValue returns the node's value.
131-
func (bn *BlankNode) GetValue() string {
131+
func (bn BlankNode) GetValue() string {
132132
return bn.Attribute
133133
}
134134

135-
// Equal returns true id this node is equal to the given node.
136-
func (bn *BlankNode) Equal(n Node) bool {
135+
// Equal returns true if this node is equal to the given node.
136+
func (bn BlankNode) Equal(n Node) bool {
137137
if obn, ok := n.(*BlankNode); ok {
138138
return bn.Attribute == obn.Attribute
139139
}
@@ -143,19 +143,19 @@ func (bn *BlankNode) Equal(n Node) bool {
143143

144144
// IsBlankNode returns true if the given node is a blank node
145145
func IsBlankNode(node Node) bool {
146-
_, isBlankNode := node.(*BlankNode)
146+
_, isBlankNode := node.(BlankNode)
147147
return isBlankNode
148148
}
149149

150150
// IsIRI returns true if the given node is an IRI node
151151
func IsIRI(node Node) bool {
152-
_, isIRI := node.(*IRI)
152+
_, isIRI := node.(IRI)
153153
return isIRI
154154
}
155155

156156
// IsLiteral returns true if the given node is a literal node
157157
func IsLiteral(node Node) bool {
158-
_, isLiteral := node.(*Literal)
158+
_, isLiteral := node.(Literal)
159159
return isLiteral
160160
}
161161

@@ -173,7 +173,7 @@ func RdfToObject(n Node, useNativeTypes bool) (map[string]interface{}, error) {
173173
}, nil
174174
}
175175

176-
literal := n.(*Literal)
176+
literal := n.(Literal)
177177

178178
// convert literal object to JSON-LD
179179
rval := map[string]interface{}{

ld/rdf_dataset.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,11 +306,11 @@ var (
306306
func InvalidNode(node Node) bool {
307307

308308
switch v := node.(type) {
309-
case *IRI:
309+
case IRI:
310310
if !validIRI(v.Value) {
311311
return true
312312
}
313-
case *Literal:
313+
case Literal:
314314
if v.Language != "" && !validLanguageRegex.MatchString(v.Language) {
315315
return true
316316
}

ld/serialize_nquads.go

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ func toNQuad(triple *Quad, graphName string) string {
8484
} else if IsBlankNode(o) {
8585
quad += o.GetValue()
8686
} else {
87-
literal := o.(*Literal)
87+
literal := o.(Literal)
8888
escaped := escape(literal.GetValue())
8989
quad += "\"" + escaped + "\""
9090
if literal.Datatype == RDFLangString {
@@ -237,6 +237,9 @@ func ParseNQuadsFrom(o interface{}) (*RDFDataset, error) {
237237
// build RDF dataset
238238
dataset := NewRDFDataset()
239239

240+
// maintain a set of triples for each graph to check for duplicates
241+
triplesByGraph := make(map[string]map[Quad]struct{})
242+
240243
scanner, err := newScannerFor(o)
241244
if err != nil {
242245
return nil, err
@@ -302,21 +305,19 @@ func ParseNQuadsFrom(o interface{}) (*RDFDataset, error) {
302305

303306
// initialise graph in dataset
304307
triples, present := dataset.Graphs[name]
308+
if triplesByGraph[name] == nil {
309+
triplesByGraph[name] = make(map[Quad]struct{})
310+
}
311+
305312
if !present {
306313
dataset.Graphs[name] = []*Quad{triple}
307314
} else {
308315
// add triple if unique to its graph
309-
containsTriple := false
310-
for _, elem := range triples {
311-
if triple.Equal(elem) {
312-
containsTriple = true
313-
break
314-
}
315-
}
316-
if !containsTriple {
316+
if _, hasTriple := triplesByGraph[name][*triple]; !hasTriple {
317317
dataset.Graphs[name] = append(triples, triple)
318318
}
319319
}
320+
triplesByGraph[name][*triple] = struct{}{}
320321
}
321322
if err := scanner.Err(); err != nil {
322323
return nil, NewJsonLdError(IOError, err)

0 commit comments

Comments
 (0)