88 "sync"
99 "time"
1010
11- "github.com/getsentry/sentry-go"
1211 "github.com/sirupsen/logrus"
1312
1413 "github.com/element-hq/dendrite/setup/config"
@@ -36,17 +35,20 @@ func DeleteAllStreams(js natsclient.JetStreamContext, cfg *config.JetStream) {
3635func (s * NATSInstance ) Prepare (process * process.ProcessContext , cfg * config.JetStream ) (natsclient.JetStreamContext , * natsclient.Conn ) {
3736 natsLock .Lock ()
3837 defer natsLock .Unlock ()
39- // check if we need an in-process NATS Server
40- if len (cfg .Addresses ) != 0 {
41- // reuse existing connections
42- if s .nc != nil {
43- return s .js , s .nc
44- }
38+ var err error
39+
40+ // If an existing connection exists, return it.
41+ if s .nc != nil && s .js != nil {
42+ return s .js , s .nc
43+ }
44+
45+ // For connecting to an external NATS server.
46+ if len (cfg .Addresses ) > 0 {
4547 s .js , s .nc = setupNATS (process , cfg , nil )
4648 return s .js , s .nc
4749 }
48- if s . Server == nil {
49- var err error
50+
51+ if len ( cfg . Addresses ) == 0 && s . Server == nil {
5052 opts := & natsserver.Options {
5153 ServerName : "monolith" ,
5254 DontListen : true ,
@@ -58,8 +60,7 @@ func (s *NATSInstance) Prepare(process *process.ProcessContext, cfg *config.JetS
5860 NoLog : cfg .NoLog ,
5961 SyncAlways : true ,
6062 }
61- s .Server , err = natsserver .NewServer (opts )
62- if err != nil {
63+ if s .Server , err = natsserver .NewServer (opts ); err != nil {
6364 panic (err )
6465 }
6566 if ! cfg .NoLog {
@@ -75,29 +76,42 @@ func (s *NATSInstance) Prepare(process *process.ProcessContext, cfg *config.JetS
7576 s .WaitForShutdown ()
7677 process .ComponentFinished ()
7778 }()
79+ if ! s .ReadyForConnections (time .Second * 60 ) {
80+ logrus .Fatalln ("NATS did not start in time" )
81+ }
7882 }
79- if ! s .ReadyForConnections (time .Second * 60 ) {
80- logrus .Fatalln ("NATS did not start in time" )
81- }
82- // reuse existing connections
83- if s .nc != nil {
84- return s .js , s .nc
85- }
86- nc , err := natsclient .Connect ("" , natsclient .InProcessServer (s ))
87- if err != nil {
83+
84+ // No existing process connection, create a new one.
85+ if s .nc , err = natsclient .Connect ("" , natsclient .InProcessServer (s .Server )); err != nil {
8886 logrus .Fatalln ("Failed to create NATS client" )
8987 }
90- js , _ := setupNATS (process , cfg , nc )
91- s .js = js
92- s .nc = nc
93- return js , nc
88+ s .js , s .nc = setupNATS (process , cfg , s .nc )
89+ return s .js , s .nc
9490}
9591
9692// nolint:gocyclo
9793func setupNATS (process * process.ProcessContext , cfg * config.JetStream , nc * natsclient.Conn ) (natsclient.JetStreamContext , * natsclient.Conn ) {
94+ jsOpts := []natsclient.JSOpt {}
95+ if cfg .JetStreamDomain != "" {
96+ jsOpts = append (jsOpts , natsclient .Domain (cfg .JetStreamDomain ))
97+ }
98+
9899 if nc == nil {
99100 var err error
100- opts := []natsclient.Option {}
101+ opts := []natsclient.Option {
102+ natsclient .Name ("Dendrite" ),
103+ natsclient .MaxReconnects (- 1 ), // Try forever
104+ natsclient .ReconnectJitter (time .Second , time .Second ),
105+ natsclient .ReconnectWait (time .Second * 10 ),
106+ natsclient .ReconnectHandler (func (c * natsclient.Conn ) {
107+ js , jerr := c .JetStream (jsOpts ... )
108+ if jerr != nil {
109+ logrus .WithError (jerr ).Panic ("Unable to get JetStream context in reconnect handler" )
110+ return
111+ }
112+ checkAndConfigureStreams (process , cfg , js )
113+ }),
114+ }
101115 if cfg .DisableTLSValidation {
102116 opts = append (opts , natsclient .Secure (& tls.Config {
103117 InsecureSkipVerify : true ,
@@ -113,15 +127,19 @@ func setupNATS(process *process.ProcessContext, cfg *config.JetStream, nc *natsc
113127 }
114128 }
115129
116- s , err := nc .JetStream ()
130+ js , err := nc .JetStream (jsOpts ... )
117131 if err != nil {
118132 logrus .WithError (err ).Panic ("Unable to get JetStream context" )
119133 return nil , nil
120134 }
135+ checkAndConfigureStreams (process , cfg , js )
136+ return js , nc
137+ }
121138
139+ func checkAndConfigureStreams (process * process.ProcessContext , cfg * config.JetStream , js natsclient.JetStreamContext ) {
122140 for _ , stream := range streams { // streams are defined in streams.go
123141 name := cfg .Prefixed (stream .Name )
124- info , err := s .StreamInfo (name )
142+ info , err := js .StreamInfo (name )
125143 if err != nil && err != natsclient .ErrStreamNotFound {
126144 logrus .WithError (err ).Fatal ("Unable to get stream info" )
127145 }
@@ -153,11 +171,11 @@ func setupNATS(process *process.ProcessContext, cfg *config.JetStream, nc *natsc
153171 case info .Config .MaxAge != stream .MaxAge :
154172 // Try updating the stream first, as many things can be updated
155173 // non-destructively.
156- if info , err = s .UpdateStream (stream ); err != nil {
174+ if info , err = js .UpdateStream (stream ); err != nil {
157175 logrus .WithError (err ).Warnf ("Unable to update stream %q, recreating..." , name )
158176 // We failed to update the stream, this is a last attempt to get
159177 // things working but may result in data loss.
160- if err = s .DeleteStream (name ); err != nil {
178+ if err = js .DeleteStream (name ); err != nil {
161179 logrus .WithError (err ).Fatalf ("Unable to delete stream %q" , name )
162180 }
163181 info = nil
@@ -176,7 +194,7 @@ func setupNATS(process *process.ProcessContext, cfg *config.JetStream, nc *natsc
176194 namespaced := * stream
177195 namespaced .Name = name
178196 namespaced .Subjects = subjects
179- if _ , err = s .AddStream (& namespaced ); err != nil {
197+ if _ , err = js .AddStream (& namespaced ); err != nil {
180198 logger := logrus .WithError (err ).WithFields (logrus.Fields {
181199 "stream" : namespaced .Name ,
182200 "subjects" : namespaced .Subjects ,
@@ -193,10 +211,9 @@ func setupNATS(process *process.ProcessContext, cfg *config.JetStream, nc *natsc
193211 // we can't recover anything that was queued on the disk but we
194212 // will still be able to start and run hopefully in the meantime.
195213 logger .WithError (err ).Error ("Unable to add stream" )
196- sentry .CaptureException (fmt .Errorf ("Unable to add stream %q: %w" , namespaced .Name , err ))
197214
198215 namespaced .Storage = natsclient .MemoryStorage
199- if _ , err = s .AddStream (& namespaced ); err != nil {
216+ if _ , err = js .AddStream (& namespaced ); err != nil {
200217 // We tried to add the stream in-memory instead but something
201218 // went wrong. That's an unrecoverable situation so we will
202219 // give up at this point.
@@ -208,7 +225,6 @@ func setupNATS(process *process.ProcessContext, cfg *config.JetStream, nc *natsc
208225 // disk will be left alone, but our ability to recover from a
209226 // future crash will be limited. Yell about it.
210227 err := fmt .Errorf ("Stream %q is running in-memory; this may be due to data corruption in the JetStream storage directory" , namespaced .Name )
211- sentry .CaptureException (err )
212228 process .Degraded (err )
213229 }
214230 }
@@ -229,15 +245,13 @@ func setupNATS(process *process.ProcessContext, cfg *config.JetStream, nc *natsc
229245 streamName := cfg .Matrix .JetStream .Prefixed (stream )
230246 for _ , consumer := range consumers {
231247 consumerName := cfg .Matrix .JetStream .Prefixed (consumer ) + "Pull"
232- consumerInfo , err := s .ConsumerInfo (streamName , consumerName )
248+ consumerInfo , err := js .ConsumerInfo (streamName , consumerName )
233249 if err != nil || consumerInfo == nil {
234250 continue
235251 }
236- if err = s .DeleteConsumer (streamName , consumerName ); err != nil {
252+ if err = js .DeleteConsumer (streamName , consumerName ); err != nil {
237253 logrus .WithError (err ).Errorf ("Unable to clean up old consumer %q for stream %q" , consumer , stream )
238254 }
239255 }
240256 }
241-
242- return s , nc
243257}
0 commit comments