Skip to content

Commit 6189192

Browse files
committed
updated field names for matrixprofile, removed cac from analyze default, removed discord and motif parameters from analyze, added stamp for sample <1
1 parent c561d1e commit 6189192

File tree

10 files changed

+83
-81
lines changed

10 files changed

+83
-81
lines changed

analyze.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,18 @@ package matrixprofile
33
// AnalyzeOpts contains all the parameters needed for basic features to discover from
44
// a matrix profile. This is currently limited to motif, discord, and segmentation discovery.
55
type AnalyzeOpts struct {
6-
KMotifs int // the top k motifs to find
7-
RMotifs float64 // the max radius to find motifs
8-
KDiscords int // the top k discords to find
6+
kMotifs int // the top k motifs to find
7+
rMotifs float64 // the max radius to find motifs
8+
kDiscords int // the top k discords to find
99
OutputFilename string // relative or absolute filepath for the visualization output
1010
}
1111

1212
// NewAnalyzeOpts creates a default set of parameters to analyze the matrix profile.
1313
func NewAnalyzeOpts() *AnalyzeOpts {
1414
return &AnalyzeOpts{
15-
KMotifs: 3,
16-
RMotifs: 2,
17-
KDiscords: 3,
15+
kMotifs: 3,
16+
rMotifs: 2,
17+
kDiscords: 3,
1818
OutputFilename: "mp.png",
1919
}
2020
}

example_caseStudy_test.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,14 @@ func Example_caseStudy() {
1616
noise := siggen.Noise(0.1, len(sig))
1717
sig = siggen.Add(sig, noise)
1818

19-
var m, k int
20-
var r float64
19+
var m int
2120
m = 32
22-
k = 6
23-
r = 3
2421
mp, err := New(sig, nil, m)
2522
if err != nil {
2623
panic(err)
2724
}
2825

2926
ao := NewAnalyzeOpts()
30-
ao.KMotifs = k
31-
ao.RMotifs = r
3227
ao.OutputFilename = "mp_sine.png"
3328

3429
if err = mp.Analyze(nil, ao); err != nil {

example_matrixprofile_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ func ExampleMatrixProfile_DiscoverMotifs() {
8686
// finds the top 3 motifs in the signal. Motif groups include
8787
// all subsequences that are within 2 times the distance of the
8888
// original motif pair
89-
motifs, err := mp.DiscoverMotifs(2, 2)
89+
motifs, err := mp.DiscoverMotifs(2, 2, 10, mp.W/2)
9090
if err != nil {
9191
panic(err)
9292
}

matrixprofile.go

Lines changed: 64 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ type MatrixProfile struct {
4242
IdxB []int `json:"pi_ba"` // matrix profile index for the BA join
4343
AV av.AV `json:"annotation_vector"` // type of annotation vector which defaults to all ones
4444
Opts *MPOpts `json:"options"` // options used for the computation
45+
Motifs []MotifGroup
46+
Discords []int
4547
}
4648

4749
// New creates a matrix profile struct with a given timeseries length n and
@@ -347,8 +349,8 @@ const (
347349
// MPOpts are parameters to vary the algorithm to compute the matrix profile.
348350
type MPOpts struct {
349351
Algorithm Algo `json:"algorithm"` // choose which algorithm to compute the matrix profile
350-
Sample float64 `json:"sample_pct"` // only applicable to algorithm STAMP
351-
Parallelism int `json:"parallelism"`
352+
SamplePct float64 `json:"sample_pct"` // only applicable to algorithm STAMP
353+
NumJobs int `json:"num_jobs"`
352354
Euclidean bool `json:"euclidean"` // defaults to using euclidean distance instead of pearson correlation for matrix profile
353355
RemapNegCorr bool `json:"remap_negative_correlation"` // defaults to no remapping. This is used so that highly negatively correlated sequences will show a low distance as well.
354356
}
@@ -360,10 +362,10 @@ func NewMPOpts() *MPOpts {
360362
p = 1
361363
}
362364
return &MPOpts{
363-
Algorithm: AlgoMPX,
364-
Sample: 1.0,
365-
Parallelism: p,
366-
Euclidean: true,
365+
Algorithm: AlgoMPX,
366+
SamplePct: 1.0,
367+
NumJobs: p,
368+
Euclidean: true,
367369
}
368370
}
369371

@@ -374,6 +376,10 @@ func (mp *MatrixProfile) Compute(o *MPOpts) error {
374376
}
375377
mp.Opts = o
376378

379+
if o.SamplePct < 1 {
380+
return mp.stamp()
381+
}
382+
377383
switch o.Algorithm {
378384
case AlgoSTOMP:
379385
return mp.stomp()
@@ -383,6 +389,8 @@ func (mp *MatrixProfile) Compute(o *MPOpts) error {
383389
return mp.stmp()
384390
case AlgoMPX:
385391
return mp.mpx()
392+
default:
393+
return fmt.Errorf("Unsupported algorithm for matrix profile, %s", o.Algorithm)
386394
}
387395
return nil
388396
}
@@ -658,8 +666,8 @@ func (mp *MatrixProfile) mergeMPResults(results []chan *mpResult, euclidean bool
658666
// profile. This should compute far faster at the cost of an approximation of the
659667
// matrix profile. Stores the matrix profile and matrix profile index in the struct.
660668
func (mp *MatrixProfile) stamp() error {
661-
if mp.Opts.Sample <= 0.0 {
662-
return fmt.Errorf("must provide a sampling greater than 0 and at most 1, sample: %.3f", mp.Opts.Sample)
669+
if mp.Opts.SamplePct <= 0.0 {
670+
return fmt.Errorf("must provide a sampling greater than 0 and at most 1, sample: %.3f", mp.Opts.SamplePct)
663671
}
664672

665673
if err := mp.initCaches(); err != nil {
@@ -675,9 +683,9 @@ func (mp *MatrixProfile) stamp() error {
675683

676684
randIdx := rand.Perm(len(mp.A) - mp.W + 1)
677685

678-
batchSize := (len(mp.A)-mp.W+1)/mp.Opts.Parallelism + 1
679-
results := make([]chan *mpResult, mp.Opts.Parallelism)
680-
for i := 0; i < mp.Opts.Parallelism; i++ {
686+
batchSize := (len(mp.A)-mp.W+1)/mp.Opts.NumJobs + 1
687+
results := make([]chan *mpResult, mp.Opts.NumJobs)
688+
for i := 0; i < mp.Opts.NumJobs; i++ {
681689
results[i] = make(chan *mpResult)
682690
}
683691

@@ -695,10 +703,10 @@ func (mp *MatrixProfile) stamp() error {
695703
// kick off multiple go routines to process a batch of rows returning back
696704
// the matrix profile for that batch and any error encountered
697705
var wg sync.WaitGroup
698-
wg.Add(mp.Opts.Parallelism)
699-
for batch := 0; batch < mp.Opts.Parallelism; batch++ {
706+
wg.Add(mp.Opts.NumJobs)
707+
for batch := 0; batch < mp.Opts.NumJobs; batch++ {
700708
go func(idx int) {
701-
results[idx] <- mp.stampBatch(idx, batchSize, mp.Opts.Sample, randIdx, &wg)
709+
results[idx] <- mp.stampBatch(idx, batchSize, mp.Opts.SamplePct, randIdx, &wg)
702710
}(batch)
703711
}
704712
wg.Wait()
@@ -764,9 +772,9 @@ func (mp *MatrixProfile) stomp() error {
764772
mp.Idx[i] = math.MaxInt64
765773
}
766774

767-
batchSize := (len(mp.A)-mp.W+1)/mp.Opts.Parallelism + 1
768-
results := make([]chan *mpResult, mp.Opts.Parallelism)
769-
for i := 0; i < mp.Opts.Parallelism; i++ {
775+
batchSize := (len(mp.A)-mp.W+1)/mp.Opts.NumJobs + 1
776+
results := make([]chan *mpResult, mp.Opts.NumJobs)
777+
for i := 0; i < mp.Opts.NumJobs; i++ {
770778
results[i] = make(chan *mpResult)
771779
}
772780

@@ -784,8 +792,8 @@ func (mp *MatrixProfile) stomp() error {
784792
// kick off multiple go routines to process a batch of rows returning back
785793
// the matrix profile for that batch and any error encountered
786794
var wg sync.WaitGroup
787-
wg.Add(mp.Opts.Parallelism)
788-
for batch := 0; batch < mp.Opts.Parallelism; batch++ {
795+
wg.Add(mp.Opts.NumJobs)
796+
for batch := 0; batch < mp.Opts.NumJobs; batch++ {
789797
go func(idx int) {
790798
results[idx] <- mp.stompBatch(idx, batchSize, &wg)
791799
}(batch)
@@ -911,9 +919,9 @@ func (mp *MatrixProfile) mpx() error {
911919
}
912920

913921
// setup for AB join
914-
batchScheme := util.DiagBatchingScheme(lenA, mp.Opts.Parallelism)
915-
results := make([]chan *mpResult, mp.Opts.Parallelism)
916-
for i := 0; i < mp.Opts.Parallelism; i++ {
922+
batchScheme := util.DiagBatchingScheme(lenA, mp.Opts.NumJobs)
923+
results := make([]chan *mpResult, mp.Opts.NumJobs)
924+
for i := 0; i < mp.Opts.NumJobs; i++ {
917925
results[i] = make(chan *mpResult)
918926
}
919927

@@ -931,8 +939,8 @@ func (mp *MatrixProfile) mpx() error {
931939
// kick off multiple go routines to process a batch of rows returning back
932940
// the matrix profile for that batch and any error encountered
933941
var wg sync.WaitGroup
934-
wg.Add(mp.Opts.Parallelism)
935-
for batch := 0; batch < mp.Opts.Parallelism; batch++ {
942+
wg.Add(mp.Opts.NumJobs)
943+
for batch := 0; batch < mp.Opts.NumJobs; batch++ {
936944
go func(batchNum int) {
937945
b := batchScheme[batchNum]
938946
if mp.SelfJoin {
@@ -952,9 +960,9 @@ func (mp *MatrixProfile) mpx() error {
952960
}
953961

954962
// setup for BA join
955-
batchScheme = util.DiagBatchingScheme(lenB, mp.Opts.Parallelism)
956-
results = make([]chan *mpResult, mp.Opts.Parallelism)
957-
for i := 0; i < mp.Opts.Parallelism; i++ {
963+
batchScheme = util.DiagBatchingScheme(lenB, mp.Opts.NumJobs)
964+
results = make([]chan *mpResult, mp.Opts.NumJobs)
965+
for i := 0; i < mp.Opts.NumJobs; i++ {
958966
results[i] = make(chan *mpResult)
959967
}
960968

@@ -969,8 +977,8 @@ func (mp *MatrixProfile) mpx() error {
969977

970978
// kick off multiple go routines to process a batch of rows returning back
971979
// the matrix profile for that batch and any error encountered
972-
wg.Add(mp.Opts.Parallelism)
973-
for batch := 0; batch < mp.Opts.Parallelism; batch++ {
980+
wg.Add(mp.Opts.NumJobs)
981+
for batch := 0; batch < mp.Opts.NumJobs; batch++ {
974982
go func(batchNum int) {
975983
b := batchScheme[batchNum]
976984
results[batchNum] <- mp.mpxbaBatch(b.Idx, mua, siga, dfa, dga, mub, sigb, dfb, dgb, b.Size, &wg)
@@ -1203,24 +1211,22 @@ func (mp MatrixProfile) Analyze(mo *MPOpts, ao *AnalyzeOpts) error {
12031211
ao = NewAnalyzeOpts()
12041212
}
12051213

1206-
_, _, cac := mp.DiscoverSegments()
1207-
1208-
motifs, err := mp.DiscoverMotifs(ao.KMotifs, ao.RMotifs)
1214+
_, err = mp.DiscoverMotifs(ao.kMotifs, ao.rMotifs, 10, mp.W/2)
12091215
if err != nil {
12101216
return err
12111217
}
12121218

1213-
discords, err := mp.DiscoverDiscords(ao.KDiscords, mp.W/2)
1219+
_, err = mp.DiscoverDiscords(ao.kDiscords, mp.W/2)
12141220
if err != nil {
12151221
return err
12161222
}
12171223

1218-
return mp.Visualize(ao.OutputFilename, motifs, discords, cac)
1224+
return mp.Visualize(ao.OutputFilename)
12191225
}
12201226

12211227
// DiscoverMotifs will iteratively go through the matrix profile to find the
12221228
// top k motifs with a given radius. Only applies to self joins.
1223-
func (mp MatrixProfile) DiscoverMotifs(k int, r float64) ([]MotifGroup, error) {
1229+
func (mp *MatrixProfile) DiscoverMotifs(k int, r float64, neighborCount, exclusionZone int) ([]MotifGroup, error) {
12241230
if !mp.SelfJoin {
12251231
return nil, errors.New("can only find top motifs if a self join is performed")
12261232
}
@@ -1272,12 +1278,12 @@ func (mp MatrixProfile) DiscoverMotifs(k int, r float64) ([]MotifGroup, error) {
12721278

12731279
// kill off any indices around the initial motif pair since they are
12741280
// trivial solutions
1275-
util.ApplyExclusionZone(prof, initialMotif[0], mp.W/2)
1276-
util.ApplyExclusionZone(prof, initialMotif[1], mp.W/2)
1281+
util.ApplyExclusionZone(prof, initialMotif[0], exclusionZone)
1282+
util.ApplyExclusionZone(prof, initialMotif[1], exclusionZone)
12771283
if j > 0 {
12781284
for k := j; k >= 0; k-- {
12791285
for _, idx := range motifs[k].Idx {
1280-
util.ApplyExclusionZone(prof, idx, mp.W/2)
1286+
util.ApplyExclusionZone(prof, idx, exclusionZone)
12811287
}
12821288
}
12831289
}
@@ -1290,12 +1296,16 @@ func (mp MatrixProfile) DiscoverMotifs(k int, r float64) ([]MotifGroup, error) {
12901296

12911297
if prof[minDistIdx] < motifDistance*r {
12921298
motifSet[minDistIdx] = struct{}{}
1293-
util.ApplyExclusionZone(prof, minDistIdx, mp.W/2)
1299+
util.ApplyExclusionZone(prof, minDistIdx, exclusionZone)
12941300
} else {
12951301
// the closest distance in the profile is greater than the desired
12961302
// distance so break
12971303
break
12981304
}
1305+
// we hit our limit of neighborCount so stop searching
1306+
if len(motifSet) == neighborCount {
1307+
break
1308+
}
12991309
}
13001310

13011311
// store the found motif indexes and create an exclusion zone around
@@ -1306,20 +1316,21 @@ func (mp MatrixProfile) DiscoverMotifs(k int, r float64) ([]MotifGroup, error) {
13061316
}
13071317
for idx := range motifSet {
13081318
motifs[j].Idx = append(motifs[j].Idx, idx)
1309-
util.ApplyExclusionZone(mpCurrent, idx, mp.W/2)
1319+
util.ApplyExclusionZone(mpCurrent, idx, exclusionZone)
13101320
}
13111321

13121322
// sorts the indices in ascending order
13131323
sort.IntSlice(motifs[j].Idx).Sort()
13141324
}
1325+
mp.Motifs = motifs[:j]
13151326

13161327
return motifs[:j], nil
13171328
}
13181329

13191330
// DiscoverDiscords finds the top k time series discords starting indexes from a computed
13201331
// matrix profile. Each discovery of a discord will apply an exclusion zone around
13211332
// the found index so that new discords can be discovered.
1322-
func (mp MatrixProfile) DiscoverDiscords(k int, exclusionZone int) ([]int, error) {
1333+
func (mp *MatrixProfile) DiscoverDiscords(k int, exclusionZone int) ([]int, error) {
13231334
mpCurrent, _, err := mp.ApplyAV()
13241335
if err != nil {
13251336
return nil, err
@@ -1352,6 +1363,8 @@ func (mp MatrixProfile) DiscoverDiscords(k int, exclusionZone int) ([]int, error
13521363
discords[i] = maxIdx
13531364
util.ApplyExclusionZone(mpCurrent, maxIdx, exclusionZone)
13541365
}
1366+
mp.Discords = discords[:i]
1367+
13551368
return discords[:i], nil
13561369
}
13571370

@@ -1385,28 +1398,27 @@ func (mp MatrixProfile) DiscoverSegments() (int, float64, []float64) {
13851398
}
13861399

13871400
// Visualize creates a png of the matrix profile given a matrix profile.
1388-
func (mp MatrixProfile) Visualize(fn string, motifs []MotifGroup, discords []int, cac []float64) error {
1401+
func (mp MatrixProfile) Visualize(fn string) error {
13891402
sigPts := points(mp.A, len(mp.A))
13901403
mpPts := points(mp.MP, len(mp.A))
1391-
cacPts := points(cac, len(mp.A))
1392-
motifPts := make([][]plotter.XYs, len(motifs))
1393-
discordPts := make([]plotter.XYs, len(discords))
1394-
discordLabels := make([]string, len(discords))
1404+
motifPts := make([][]plotter.XYs, len(mp.Motifs))
1405+
discordPts := make([]plotter.XYs, len(mp.Discords))
1406+
discordLabels := make([]string, len(mp.Discords))
13951407

1396-
for i := 0; i < len(motifs); i++ {
1397-
motifPts[i] = make([]plotter.XYs, len(motifs[i].Idx))
1408+
for i := 0; i < len(mp.Motifs); i++ {
1409+
motifPts[i] = make([]plotter.XYs, len(mp.Motifs[i].Idx))
13981410
}
13991411

1400-
for i := 0; i < len(motifs); i++ {
1401-
for j, idx := range motifs[i].Idx {
1412+
for i := 0; i < len(mp.Motifs); i++ {
1413+
for j, idx := range mp.Motifs[i].Idx {
14021414
motifPts[i][j] = points(mp.A[idx:idx+mp.W], mp.W)
14031415
}
14041416
}
14051417

1406-
for i, idx := range discords {
1418+
for i, idx := range mp.Discords {
14071419
discordPts[i] = points(mp.A[idx:idx+mp.W], mp.W)
14081420
discordLabels[i] = strconv.Itoa(idx)
14091421
}
14101422

1411-
return plotMP(sigPts, mpPts, cacPts, motifPts, discordPts, discordLabels, fn)
1423+
return plotMP(sigPts, mpPts, motifPts, discordPts, discordLabels, fn)
14121424
}

matrixprofile_bench_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,8 @@ func BenchmarkStamp(b *testing.B) {
207207

208208
o := NewMPOpts()
209209
o.Algorithm = AlgoSTAMP
210-
o.Sample = 1.0
211-
o.Parallelism = 2
210+
o.SamplePct = 1.0
211+
o.NumJobs = 2
212212

213213
b.Run("m32_p2_pts1k", func(b *testing.B) {
214214
for i := 0; i < b.N; i++ {
@@ -248,7 +248,7 @@ func BenchmarkStomp(b *testing.B) {
248248
b.Error(err)
249249
}
250250

251-
o.Parallelism = bm.parallelism
251+
o.NumJobs = bm.parallelism
252252
for i := 0; i < b.N; i++ {
253253
err = mp.Compute(o)
254254
if err != nil {
@@ -287,7 +287,7 @@ func BenchmarkMpx(b *testing.B) {
287287
b.Error(err)
288288
}
289289

290-
o.Parallelism = bm.parallelism
290+
o.NumJobs = bm.parallelism
291291
for i := 0; i < b.N; i++ {
292292
err = mp.Compute(o)
293293
if err != nil {

0 commit comments

Comments
 (0)