@@ -42,6 +42,8 @@ type MatrixProfile struct {
4242 IdxB []int `json:"pi_ba"` // matrix profile index for the BA join
4343 AV av.AV `json:"annotation_vector"` // type of annotation vector which defaults to all ones
4444 Opts * MPOpts `json:"options"` // options used for the computation
45+ Motifs []MotifGroup
46+ Discords []int
4547}
4648
4749// New creates a matrix profile struct with a given timeseries length n and
@@ -347,8 +349,8 @@ const (
347349// MPOpts are parameters to vary the algorithm to compute the matrix profile.
348350type MPOpts struct {
349351 Algorithm Algo `json:"algorithm"` // choose which algorithm to compute the matrix profile
350- Sample float64 `json:"sample_pct"` // only applicable to algorithm STAMP
351- Parallelism int `json:"parallelism "`
352+ SamplePct float64 `json:"sample_pct"` // only applicable to algorithm STAMP
353+ NumJobs int `json:"num_jobs "`
352354 Euclidean bool `json:"euclidean"` // defaults to using euclidean distance instead of pearson correlation for matrix profile
353355 RemapNegCorr bool `json:"remap_negative_correlation"` // defaults to no remapping. This is used so that highly negatively correlated sequences will show a low distance as well.
354356}
@@ -360,10 +362,10 @@ func NewMPOpts() *MPOpts {
360362 p = 1
361363 }
362364 return & MPOpts {
363- Algorithm : AlgoMPX ,
364- Sample : 1.0 ,
365- Parallelism : p ,
366- Euclidean : true ,
365+ Algorithm : AlgoMPX ,
366+ SamplePct : 1.0 ,
367+ NumJobs : p ,
368+ Euclidean : true ,
367369 }
368370}
369371
@@ -374,6 +376,10 @@ func (mp *MatrixProfile) Compute(o *MPOpts) error {
374376 }
375377 mp .Opts = o
376378
379+ if o .SamplePct < 1 {
380+ return mp .stamp ()
381+ }
382+
377383 switch o .Algorithm {
378384 case AlgoSTOMP :
379385 return mp .stomp ()
@@ -383,6 +389,8 @@ func (mp *MatrixProfile) Compute(o *MPOpts) error {
383389 return mp .stmp ()
384390 case AlgoMPX :
385391 return mp .mpx ()
392+ default :
393+ return fmt .Errorf ("Unsupported algorithm for matrix profile, %s" , o .Algorithm )
386394 }
387395 return nil
388396}
@@ -658,8 +666,8 @@ func (mp *MatrixProfile) mergeMPResults(results []chan *mpResult, euclidean bool
658666// profile. This should compute far faster at the cost of an approximation of the
659667// matrix profile. Stores the matrix profile and matrix profile index in the struct.
660668func (mp * MatrixProfile ) stamp () error {
661- if mp .Opts .Sample <= 0.0 {
662- return fmt .Errorf ("must provide a sampling greater than 0 and at most 1, sample: %.3f" , mp .Opts .Sample )
669+ if mp .Opts .SamplePct <= 0.0 {
670+ return fmt .Errorf ("must provide a sampling greater than 0 and at most 1, sample: %.3f" , mp .Opts .SamplePct )
663671 }
664672
665673 if err := mp .initCaches (); err != nil {
@@ -675,9 +683,9 @@ func (mp *MatrixProfile) stamp() error {
675683
676684 randIdx := rand .Perm (len (mp .A ) - mp .W + 1 )
677685
678- batchSize := (len (mp .A )- mp .W + 1 )/ mp .Opts .Parallelism + 1
679- results := make ([]chan * mpResult , mp .Opts .Parallelism )
680- for i := 0 ; i < mp .Opts .Parallelism ; i ++ {
686+ batchSize := (len (mp .A )- mp .W + 1 )/ mp .Opts .NumJobs + 1
687+ results := make ([]chan * mpResult , mp .Opts .NumJobs )
688+ for i := 0 ; i < mp .Opts .NumJobs ; i ++ {
681689 results [i ] = make (chan * mpResult )
682690 }
683691
@@ -695,10 +703,10 @@ func (mp *MatrixProfile) stamp() error {
695703 // kick off multiple go routines to process a batch of rows returning back
696704 // the matrix profile for that batch and any error encountered
697705 var wg sync.WaitGroup
698- wg .Add (mp .Opts .Parallelism )
699- for batch := 0 ; batch < mp .Opts .Parallelism ; batch ++ {
706+ wg .Add (mp .Opts .NumJobs )
707+ for batch := 0 ; batch < mp .Opts .NumJobs ; batch ++ {
700708 go func (idx int ) {
701- results [idx ] <- mp .stampBatch (idx , batchSize , mp .Opts .Sample , randIdx , & wg )
709+ results [idx ] <- mp .stampBatch (idx , batchSize , mp .Opts .SamplePct , randIdx , & wg )
702710 }(batch )
703711 }
704712 wg .Wait ()
@@ -764,9 +772,9 @@ func (mp *MatrixProfile) stomp() error {
764772 mp .Idx [i ] = math .MaxInt64
765773 }
766774
767- batchSize := (len (mp .A )- mp .W + 1 )/ mp .Opts .Parallelism + 1
768- results := make ([]chan * mpResult , mp .Opts .Parallelism )
769- for i := 0 ; i < mp .Opts .Parallelism ; i ++ {
775+ batchSize := (len (mp .A )- mp .W + 1 )/ mp .Opts .NumJobs + 1
776+ results := make ([]chan * mpResult , mp .Opts .NumJobs )
777+ for i := 0 ; i < mp .Opts .NumJobs ; i ++ {
770778 results [i ] = make (chan * mpResult )
771779 }
772780
@@ -784,8 +792,8 @@ func (mp *MatrixProfile) stomp() error {
784792 // kick off multiple go routines to process a batch of rows returning back
785793 // the matrix profile for that batch and any error encountered
786794 var wg sync.WaitGroup
787- wg .Add (mp .Opts .Parallelism )
788- for batch := 0 ; batch < mp .Opts .Parallelism ; batch ++ {
795+ wg .Add (mp .Opts .NumJobs )
796+ for batch := 0 ; batch < mp .Opts .NumJobs ; batch ++ {
789797 go func (idx int ) {
790798 results [idx ] <- mp .stompBatch (idx , batchSize , & wg )
791799 }(batch )
@@ -911,9 +919,9 @@ func (mp *MatrixProfile) mpx() error {
911919 }
912920
913921 // setup for AB join
914- batchScheme := util .DiagBatchingScheme (lenA , mp .Opts .Parallelism )
915- results := make ([]chan * mpResult , mp .Opts .Parallelism )
916- for i := 0 ; i < mp .Opts .Parallelism ; i ++ {
922+ batchScheme := util .DiagBatchingScheme (lenA , mp .Opts .NumJobs )
923+ results := make ([]chan * mpResult , mp .Opts .NumJobs )
924+ for i := 0 ; i < mp .Opts .NumJobs ; i ++ {
917925 results [i ] = make (chan * mpResult )
918926 }
919927
@@ -931,8 +939,8 @@ func (mp *MatrixProfile) mpx() error {
931939 // kick off multiple go routines to process a batch of rows returning back
932940 // the matrix profile for that batch and any error encountered
933941 var wg sync.WaitGroup
934- wg .Add (mp .Opts .Parallelism )
935- for batch := 0 ; batch < mp .Opts .Parallelism ; batch ++ {
942+ wg .Add (mp .Opts .NumJobs )
943+ for batch := 0 ; batch < mp .Opts .NumJobs ; batch ++ {
936944 go func (batchNum int ) {
937945 b := batchScheme [batchNum ]
938946 if mp .SelfJoin {
@@ -952,9 +960,9 @@ func (mp *MatrixProfile) mpx() error {
952960 }
953961
954962 // setup for BA join
955- batchScheme = util .DiagBatchingScheme (lenB , mp .Opts .Parallelism )
956- results = make ([]chan * mpResult , mp .Opts .Parallelism )
957- for i := 0 ; i < mp .Opts .Parallelism ; i ++ {
963+ batchScheme = util .DiagBatchingScheme (lenB , mp .Opts .NumJobs )
964+ results = make ([]chan * mpResult , mp .Opts .NumJobs )
965+ for i := 0 ; i < mp .Opts .NumJobs ; i ++ {
958966 results [i ] = make (chan * mpResult )
959967 }
960968
@@ -969,8 +977,8 @@ func (mp *MatrixProfile) mpx() error {
969977
970978 // kick off multiple go routines to process a batch of rows returning back
971979 // the matrix profile for that batch and any error encountered
972- wg .Add (mp .Opts .Parallelism )
973- for batch := 0 ; batch < mp .Opts .Parallelism ; batch ++ {
980+ wg .Add (mp .Opts .NumJobs )
981+ for batch := 0 ; batch < mp .Opts .NumJobs ; batch ++ {
974982 go func (batchNum int ) {
975983 b := batchScheme [batchNum ]
976984 results [batchNum ] <- mp .mpxbaBatch (b .Idx , mua , siga , dfa , dga , mub , sigb , dfb , dgb , b .Size , & wg )
@@ -1203,24 +1211,22 @@ func (mp MatrixProfile) Analyze(mo *MPOpts, ao *AnalyzeOpts) error {
12031211 ao = NewAnalyzeOpts ()
12041212 }
12051213
1206- _ , _ , cac := mp .DiscoverSegments ()
1207-
1208- motifs , err := mp .DiscoverMotifs (ao .KMotifs , ao .RMotifs )
1214+ _ , err = mp .DiscoverMotifs (ao .kMotifs , ao .rMotifs , 10 , mp .W / 2 )
12091215 if err != nil {
12101216 return err
12111217 }
12121218
1213- discords , err : = mp .DiscoverDiscords (ao .KDiscords , mp .W / 2 )
1219+ _ , err = mp .DiscoverDiscords (ao .kDiscords , mp .W / 2 )
12141220 if err != nil {
12151221 return err
12161222 }
12171223
1218- return mp .Visualize (ao .OutputFilename , motifs , discords , cac )
1224+ return mp .Visualize (ao .OutputFilename )
12191225}
12201226
12211227// DiscoverMotifs will iteratively go through the matrix profile to find the
12221228// top k motifs with a given radius. Only applies to self joins.
1223- func (mp MatrixProfile ) DiscoverMotifs (k int , r float64 ) ([]MotifGroup , error ) {
1229+ func (mp * MatrixProfile ) DiscoverMotifs (k int , r float64 , neighborCount , exclusionZone int ) ([]MotifGroup , error ) {
12241230 if ! mp .SelfJoin {
12251231 return nil , errors .New ("can only find top motifs if a self join is performed" )
12261232 }
@@ -1272,12 +1278,12 @@ func (mp MatrixProfile) DiscoverMotifs(k int, r float64) ([]MotifGroup, error) {
12721278
12731279 // kill off any indices around the initial motif pair since they are
12741280 // trivial solutions
1275- util .ApplyExclusionZone (prof , initialMotif [0 ], mp . W / 2 )
1276- util .ApplyExclusionZone (prof , initialMotif [1 ], mp . W / 2 )
1281+ util .ApplyExclusionZone (prof , initialMotif [0 ], exclusionZone )
1282+ util .ApplyExclusionZone (prof , initialMotif [1 ], exclusionZone )
12771283 if j > 0 {
12781284 for k := j ; k >= 0 ; k -- {
12791285 for _ , idx := range motifs [k ].Idx {
1280- util .ApplyExclusionZone (prof , idx , mp . W / 2 )
1286+ util .ApplyExclusionZone (prof , idx , exclusionZone )
12811287 }
12821288 }
12831289 }
@@ -1290,12 +1296,16 @@ func (mp MatrixProfile) DiscoverMotifs(k int, r float64) ([]MotifGroup, error) {
12901296
12911297 if prof [minDistIdx ] < motifDistance * r {
12921298 motifSet [minDistIdx ] = struct {}{}
1293- util .ApplyExclusionZone (prof , minDistIdx , mp . W / 2 )
1299+ util .ApplyExclusionZone (prof , minDistIdx , exclusionZone )
12941300 } else {
12951301 // the closest distance in the profile is greater than the desired
12961302 // distance so break
12971303 break
12981304 }
1305+ // we hit our limit of neighborCount so stop searching
1306+ if len (motifSet ) == neighborCount {
1307+ break
1308+ }
12991309 }
13001310
13011311 // store the found motif indexes and create an exclusion zone around
@@ -1306,20 +1316,21 @@ func (mp MatrixProfile) DiscoverMotifs(k int, r float64) ([]MotifGroup, error) {
13061316 }
13071317 for idx := range motifSet {
13081318 motifs [j ].Idx = append (motifs [j ].Idx , idx )
1309- util .ApplyExclusionZone (mpCurrent , idx , mp . W / 2 )
1319+ util .ApplyExclusionZone (mpCurrent , idx , exclusionZone )
13101320 }
13111321
13121322 // sorts the indices in ascending order
13131323 sort .IntSlice (motifs [j ].Idx ).Sort ()
13141324 }
1325+ mp .Motifs = motifs [:j ]
13151326
13161327 return motifs [:j ], nil
13171328}
13181329
13191330// DiscoverDiscords finds the top k time series discords starting indexes from a computed
13201331// matrix profile. Each discovery of a discord will apply an exclusion zone around
13211332// the found index so that new discords can be discovered.
1322- func (mp MatrixProfile ) DiscoverDiscords (k int , exclusionZone int ) ([]int , error ) {
1333+ func (mp * MatrixProfile ) DiscoverDiscords (k int , exclusionZone int ) ([]int , error ) {
13231334 mpCurrent , _ , err := mp .ApplyAV ()
13241335 if err != nil {
13251336 return nil , err
@@ -1352,6 +1363,8 @@ func (mp MatrixProfile) DiscoverDiscords(k int, exclusionZone int) ([]int, error
13521363 discords [i ] = maxIdx
13531364 util .ApplyExclusionZone (mpCurrent , maxIdx , exclusionZone )
13541365 }
1366+ mp .Discords = discords [:i ]
1367+
13551368 return discords [:i ], nil
13561369}
13571370
@@ -1385,28 +1398,27 @@ func (mp MatrixProfile) DiscoverSegments() (int, float64, []float64) {
13851398}
13861399
13871400// Visualize creates a png of the matrix profile given a matrix profile.
1388- func (mp MatrixProfile ) Visualize (fn string , motifs [] MotifGroup , discords [] int , cac [] float64 ) error {
1401+ func (mp MatrixProfile ) Visualize (fn string ) error {
13891402 sigPts := points (mp .A , len (mp .A ))
13901403 mpPts := points (mp .MP , len (mp .A ))
1391- cacPts := points (cac , len (mp .A ))
1392- motifPts := make ([][]plotter.XYs , len (motifs ))
1393- discordPts := make ([]plotter.XYs , len (discords ))
1394- discordLabels := make ([]string , len (discords ))
1404+ motifPts := make ([][]plotter.XYs , len (mp .Motifs ))
1405+ discordPts := make ([]plotter.XYs , len (mp .Discords ))
1406+ discordLabels := make ([]string , len (mp .Discords ))
13951407
1396- for i := 0 ; i < len (motifs ); i ++ {
1397- motifPts [i ] = make ([]plotter.XYs , len (motifs [i ].Idx ))
1408+ for i := 0 ; i < len (mp . Motifs ); i ++ {
1409+ motifPts [i ] = make ([]plotter.XYs , len (mp . Motifs [i ].Idx ))
13981410 }
13991411
1400- for i := 0 ; i < len (motifs ); i ++ {
1401- for j , idx := range motifs [i ].Idx {
1412+ for i := 0 ; i < len (mp . Motifs ); i ++ {
1413+ for j , idx := range mp . Motifs [i ].Idx {
14021414 motifPts [i ][j ] = points (mp .A [idx :idx + mp .W ], mp .W )
14031415 }
14041416 }
14051417
1406- for i , idx := range discords {
1418+ for i , idx := range mp . Discords {
14071419 discordPts [i ] = points (mp .A [idx :idx + mp .W ], mp .W )
14081420 discordLabels [i ] = strconv .Itoa (idx )
14091421 }
14101422
1411- return plotMP (sigPts , mpPts , cacPts , motifPts , discordPts , discordLabels , fn )
1423+ return plotMP (sigPts , mpPts , motifPts , discordPts , discordLabels , fn )
14121424}
0 commit comments