Skip to content

Commit aa6c493

Browse files
authored
Merge pull request #677 from moov-io/refactor-cleanup-ingest-merge
ingest: speed up merging and dedup
2 parents 22f4f1c + f1400e2 commit aa6c493

File tree

7 files changed

+226
-72
lines changed

7 files changed

+226
-72
lines changed

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ toolchain go1.25.5
77
require (
88
fyne.io/fyne/v2 v2.7.1
99
github.com/abadojack/whatlanggo v1.0.1
10-
github.com/adamdecaf/merge v0.1.1
10+
github.com/adamdecaf/merge v0.2.1
1111
github.com/antchfx/htmlquery v1.3.5
1212
github.com/bbalet/stopwords v1.0.0
1313
github.com/dongri/phonenumber v0.1.12
@@ -141,7 +141,7 @@ require (
141141
go.yaml.in/yaml/v2 v2.4.2 // indirect
142142
go.yaml.in/yaml/v3 v3.0.4 // indirect
143143
golang.org/x/crypto v0.45.0 // indirect
144-
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
144+
golang.org/x/exp v0.0.0-20251209150349-8475f28825e9 // indirect
145145
golang.org/x/image v0.24.0 // indirect
146146
golang.org/x/net v0.47.0 // indirect
147147
golang.org/x/oauth2 v0.31.0 // indirect

go.sum

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -651,19 +651,15 @@ github.com/VividCortex/gohistogram v1.0.0 h1:6+hBz+qvs0JOrrNhhmR7lFxo5sINxBCGXrd
651651
github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g=
652652
github.com/abadojack/whatlanggo v1.0.1 h1:19N6YogDnf71CTHm3Mp2qhYfkRdyvbgwWdd2EPxJRG4=
653653
github.com/abadojack/whatlanggo v1.0.1/go.mod h1:66WiQbSbJBIlOZMsvbKe5m6pzQovxCH9B/K8tQB2uoc=
654-
github.com/adamdecaf/merge v0.1.1 h1:nUnmRTiDK+yP2F09ih8/qwX3zv2D6SJed4fUO5VLgrw=
655-
github.com/adamdecaf/merge v0.1.1/go.mod h1:2oBFKg0m+01A8hHqA3C5oompJ6gt1YbDOaFp0PrGzy0=
654+
github.com/adamdecaf/merge v0.2.1 h1:bUJkYl217UNcHWf1vefryRU84i0VNkKVbfCoN1Khq08=
655+
github.com/adamdecaf/merge v0.2.1/go.mod h1:I0HNGd/7LuyUOa2G/eOB+r1+s9Dvr5JfNyZl7SxNujA=
656656
github.com/ajstarks/deck v0.0.0-20200831202436-30c9fc6549a9/go.mod h1:JynElWSGnm/4RlzPXRlREEwqTHAN3T56Bv2ITsFT3gY=
657657
github.com/ajstarks/deck/generate v0.0.0-20210309230005-c3f852c02e19/go.mod h1:T13YZdzov6OU0A1+RfKZiZN9ca6VeKdBdyDV+BY97Tk=
658658
github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
659659
github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b/go.mod h1:1KcenG0jGWcpt8ov532z81sp/kMMUG485J2InIOyADM=
660660
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
661-
github.com/antchfx/htmlquery v1.3.4 h1:Isd0srPkni2iNTWCwVj/72t7uCphFeor5Q8nCzj1jdQ=
662-
github.com/antchfx/htmlquery v1.3.4/go.mod h1:K9os0BwIEmLAvTqaNSua8tXLWRWZpocZIH73OzWQbwM=
663661
github.com/antchfx/htmlquery v1.3.5 h1:aYthDDClnG2a2xePf6tys/UyyM/kRcsFRm+ifhFKoU0=
664662
github.com/antchfx/htmlquery v1.3.5/go.mod h1:5oyIPIa3ovYGtLqMPNjBF2Uf25NPCKsMjCnQ8lvjaoA=
665-
github.com/antchfx/xpath v1.3.3 h1:tmuPQa1Uye0Ym1Zn65vxPgfltWb/Lxu2jeqIGteJSRs=
666-
github.com/antchfx/xpath v1.3.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
667663
github.com/antchfx/xpath v1.3.5 h1:PqbXLC3TkfeZyakF5eeh3NTWEbYl4VHNVeufANzDbKQ=
668664
github.com/antchfx/xpath v1.3.5/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
669665
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
@@ -1153,8 +1149,6 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu
11531149
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
11541150
github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
11551151
github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
1156-
github.com/urfave/cli/v3 v3.5.0 h1:qCuFMmdayTF3zmjG8TSsoBzrDqszNrklYg2x3g4MSgw=
1157-
github.com/urfave/cli/v3 v3.5.0/go.mod h1:ysVLtOEmg2tOy6PknnYVhDoouyC/6N42TMeoMzskhso=
11581152
github.com/urfave/cli/v3 v3.6.1 h1:j8Qq8NyUawj/7rTYdBGrxcH7A/j7/G8Q5LhWEW4G3Mo=
11591153
github.com/urfave/cli/v3 v3.6.1/go.mod h1:ysVLtOEmg2tOy6PknnYVhDoouyC/6N42TMeoMzskhso=
11601154
github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8=
@@ -1252,8 +1246,8 @@ golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u0
12521246
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
12531247
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
12541248
golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
1255-
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
1256-
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
1249+
golang.org/x/exp v0.0.0-20251209150349-8475f28825e9 h1:MDfG8Cvcqlt9XXrmEiD4epKn7VJHZO84hejP9Jmp0MM=
1250+
golang.org/x/exp v0.0.0-20251209150349-8475f28825e9/go.mod h1:EPRbTFwzwjXj9NpYyyrvenVh9Y+GFeEvMNh7Xuz7xgU=
12571251
golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
12581252
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
12591253
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
@@ -1416,8 +1410,6 @@ golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
14161410
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
14171411
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
14181412
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
1419-
golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
1420-
golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
14211413
golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
14221414
golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
14231415
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -1538,8 +1530,6 @@ golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
15381530
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
15391531
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
15401532
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
1541-
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
1542-
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
15431533
golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=
15441534
golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY=
15451535
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=

internal/entitytest/entitytest.go

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
package entitytest
2+
3+
import (
4+
"testing"
5+
6+
"github.com/moov-io/watchman/pkg/search"
7+
8+
"github.com/stretchr/testify/require"
9+
)
10+
11+
func Equal[T any](tb testing.TB, e1, e2 search.Entity[T]) {
12+
require.Equal(tb, e1.Name, e2.Name)
13+
require.Equal(tb, e1.Type, e2.Type)
14+
require.Equal(tb, e1.Source, e2.Source)
15+
16+
require.Equal(tb, e1.SourceID, e2.SourceID)
17+
18+
if e1.Person != nil && e2.Person != nil {
19+
require.Equal(tb, e1.Person.Name, e2.Person.Name)
20+
require.ElementsMatch(tb, e1.Person.AltNames, e2.Person.AltNames)
21+
22+
require.Equal(tb, e1.Person.Gender, e2.Person.Gender)
23+
24+
require.Equal(tb, e1.Person.BirthDate, e2.Person.BirthDate)
25+
require.Equal(tb, e1.Person.PlaceOfBirth, e2.Person.PlaceOfBirth)
26+
require.Equal(tb, e1.Person.DeathDate, e2.Person.DeathDate)
27+
28+
require.ElementsMatch(tb, e1.Person.Titles, e2.Person.Titles)
29+
require.ElementsMatch(tb, e1.Person.GovernmentIDs, e2.Person.GovernmentIDs)
30+
}
31+
32+
if e1.Business != nil && e2.Business != nil {
33+
require.Equal(tb, e1.Business.Name, e2.Business.Name)
34+
require.ElementsMatch(tb, e1.Business.AltNames, e2.Business.AltNames)
35+
36+
require.Equal(tb, e1.Business.Created, e2.Business.Created)
37+
require.Equal(tb, e1.Business.Dissolved, e2.Business.Dissolved)
38+
39+
require.ElementsMatch(tb, e1.Business.GovernmentIDs, e2.Business.GovernmentIDs)
40+
}
41+
42+
if e1.Organization != nil && e2.Organization != nil {
43+
require.Equal(tb, e1.Organization.Name, e2.Organization.Name)
44+
require.ElementsMatch(tb, e1.Organization.AltNames, e2.Organization.AltNames)
45+
46+
require.Equal(tb, e1.Organization.Created, e2.Organization.Created)
47+
require.Equal(tb, e1.Organization.Dissolved, e2.Organization.Dissolved)
48+
49+
require.ElementsMatch(tb, e1.Organization.GovernmentIDs, e2.Organization.GovernmentIDs)
50+
}
51+
52+
if e1.Aircraft != nil && e2.Aircraft != nil {
53+
require.Equal(tb, e1.Aircraft.Name, e2.Aircraft.Name)
54+
require.ElementsMatch(tb, e1.Aircraft.AltNames, e2.Aircraft.AltNames)
55+
56+
require.Equal(tb, e1.Aircraft.Type, e2.Aircraft.Type)
57+
require.Equal(tb, e1.Aircraft.Flag, e2.Aircraft.Flag)
58+
require.Equal(tb, e1.Aircraft.Built, e2.Aircraft.Built)
59+
require.Equal(tb, e1.Aircraft.ICAOCode, e2.Aircraft.ICAOCode)
60+
require.Equal(tb, e1.Aircraft.Model, e2.Aircraft.Model)
61+
require.Equal(tb, e1.Aircraft.SerialNumber, e2.Aircraft.SerialNumber)
62+
}
63+
64+
if e1.Vessel != nil && e2.Vessel != nil {
65+
require.Equal(tb, e1.Vessel.Name, e2.Vessel.Name)
66+
require.ElementsMatch(tb, e1.Vessel.AltNames, e2.Vessel.AltNames)
67+
68+
require.Equal(tb, e1.Vessel.IMONumber, e2.Vessel.IMONumber)
69+
require.Equal(tb, e1.Vessel.Type, e2.Vessel.Type)
70+
require.Equal(tb, e1.Vessel.Flag, e2.Vessel.Flag)
71+
require.Equal(tb, e1.Vessel.Built, e2.Vessel.Built)
72+
require.Equal(tb, e1.Vessel.Model, e2.Vessel.Model)
73+
require.Equal(tb, e1.Vessel.Tonnage, e2.Vessel.Tonnage)
74+
require.Equal(tb, e1.Vessel.MMSI, e2.Vessel.MMSI)
75+
require.Equal(tb, e1.Vessel.CallSign, e2.Vessel.CallSign)
76+
require.Equal(tb, e1.Vessel.GrossRegisteredTonnage, e2.Vessel.GrossRegisteredTonnage)
77+
require.Equal(tb, e1.Vessel.Owner, e2.Vessel.Owner)
78+
}
79+
80+
require.ElementsMatch(tb, e1.Contact.EmailAddresses, e2.Contact.EmailAddresses)
81+
require.ElementsMatch(tb, e1.Contact.PhoneNumbers, e2.Contact.PhoneNumbers)
82+
require.ElementsMatch(tb, e1.Contact.FaxNumbers, e2.Contact.FaxNumbers)
83+
require.ElementsMatch(tb, e1.Contact.Websites, e2.Contact.Websites)
84+
85+
require.ElementsMatch(tb, e1.Addresses, e2.Addresses)
86+
require.ElementsMatch(tb, e1.CryptoAddresses, e2.CryptoAddresses)
87+
88+
require.ElementsMatch(tb, e1.Affiliations, e2.Affiliations)
89+
require.Equal(tb, e1.SanctionsInfo, e2.SanctionsInfo)
90+
require.ElementsMatch(tb, e1.HistoricalInfo, e2.HistoricalInfo)
91+
92+
// require.Equal(tb, e1.PreparedFields, e2.PreparedFields) // TODO(adam): want to check these?
93+
require.Equal(tb, e1.SourceData, e2.SourceData)
94+
}

internal/ingest/service.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func (s *service) ReadEntitiesFromFile(ctx context.Context, name string, content
5757
}
5858
}
5959
if err != nil {
60-
return out, err
60+
return out, fmt.Errorf("reading entities from %s failed: %w", name, err)
6161
}
6262

6363
// Merge the entities

internal/ingest/service_test.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"os"
66
"path/filepath"
7+
"strings"
78
"testing"
89
"time"
910

@@ -323,9 +324,18 @@ func TestService_ReadEntitiesFromFile_FincenPerson(t *testing.T) {
323324

324325
require.Len(t, parsedFile.Entities, len(expected))
325326

326-
require.Equal(t, expected[0], parsedFile.Entities[0])
327-
require.Equal(t, expected[1], parsedFile.Entities[1])
328-
require.Equal(t, expected[2], parsedFile.Entities[2])
327+
for _, exp := range expected {
328+
var found bool
329+
for _, parsed := range parsedFile.Entities {
330+
if strings.EqualFold(exp.Name, parsed.Name) {
331+
found = true
332+
require.Equal(t, exp, parsed, exp.Name)
333+
}
334+
}
335+
if !found {
336+
t.Fatalf("no matching parsed record for %#v", exp.Name)
337+
}
338+
}
329339
})
330340
}
331341

0 commit comments

Comments
 (0)