99 "google.golang.org/api/option"
1010 "google.golang.org/grpc"
1111 "google.golang.org/grpc/credentials/insecure"
12+ "google.golang.org/grpc/keepalive"
1213 "k8s.io/klog/v2"
1314
1415 "open-cluster-management.io/sdk-go/pkg/cloudevents/generic/options"
@@ -30,8 +31,7 @@ type pubsubAgentTransport struct {
3031 subscriber * pubsub.Subscriber
3132 // Subscriber for resync broadcasts
3233 resyncSubscriber * pubsub.Subscriber
33- // TODO: handle error channel
34- errorChan chan error
34+ errorChan chan error
3535}
3636
3737// NewAgentOptions creates a new CloudEventsAgentOptions for Pub/Sub.
@@ -49,23 +49,28 @@ func NewAgentOptions(pubsubOptions *PubSubOptions,
4949}
5050
5151func (o * pubsubAgentTransport ) Connect (ctx context.Context ) error {
52- options := []option.ClientOption {}
52+ clientOptions := []option.ClientOption {}
5353 if o .CredentialsFile != "" {
54- options = append (options , option .WithCredentialsFile (o .CredentialsFile ))
54+ clientOptions = append (clientOptions , option .WithCredentialsFile (o .CredentialsFile ))
5555 }
5656 if o .Endpoint != "" {
57- options = append (options , option .WithEndpoint (o .Endpoint ))
57+ clientOptions = append (clientOptions , option .WithEndpoint (o .Endpoint ))
5858 if o .CredentialsFile == "" {
5959 // use the insecure connection for local development/testing when no credentials are provided
6060 pubsubConn , err := grpc .NewClient (o .Endpoint , grpc .WithTransportCredentials (insecure .NewCredentials ()))
6161 if err != nil {
6262 return err
6363 }
64- options = append (options , option .WithGRPCConn (pubsubConn ))
64+ clientOptions = append (clientOptions , option .WithGRPCConn (pubsubConn ))
6565 }
6666 }
6767
68- client , err := pubsub .NewClient (ctx , o .ProjectID , options ... )
68+ if o .KeepaliveSettings != nil {
69+ // config keepalive parameters for pubsub client
70+ clientOptions = append (clientOptions , option .WithGRPCDialOption (grpc .WithKeepaliveParams (toGRPCKeepaliveParamater (o .KeepaliveSettings ))))
71+ }
72+
73+ client , err := pubsub .NewClient (ctx , o .ProjectID , clientOptions ... )
6974 if err != nil {
7075 return err
7176 }
@@ -77,6 +82,13 @@ func (o *pubsubAgentTransport) Connect(ctx context.Context) error {
7782 o .subscriber = client .Subscriber (o .Subscriptions .SourceEvents )
7883 o .resyncSubscriber = client .Subscriber (o .Subscriptions .SourceBroadcast )
7984
85+ // configure receive settings if provided
86+ if o .ReceiveSettings != nil {
87+ receiveSettings := toPubSubReceiveSettings (o .ReceiveSettings )
88+ o .subscriber .ReceiveSettings = receiveSettings
89+ o .resyncSubscriber .ReceiveSettings = receiveSettings
90+ }
91+
8092 return nil
8193}
8294
@@ -88,7 +100,7 @@ func (o *pubsubAgentTransport) Send(ctx context.Context, evt cloudevents.Event)
88100
89101 eventType , err := types .ParseCloudEventsType (evt .Context .GetType ())
90102 if err != nil {
91- return fmt .Errorf ("unsupported event type %s, %v" , eventType , err )
103+ return fmt .Errorf ("unsupported event type %s, %v" , evt . Context . GetType () , err )
92104 }
93105
94106 // determine publisher based on event type
@@ -105,10 +117,6 @@ func (o *pubsubAgentTransport) Send(ctx context.Context, evt cloudevents.Event)
105117}
106118
107119func (o * pubsubAgentTransport ) Receive (ctx context.Context , fn options.ReceiveHandlerFn ) error {
108- // create error channels for both subscribers
109- subscriberErrChan := make (chan error , 1 )
110- resyncSubscriberErrChan := make (chan error , 1 )
111-
112120 // start the subscriber for spec updates
113121 go func () {
114122 err := o .subscriber .Receive (ctx , func (ctx context.Context , msg * pubsub.Message ) {
@@ -123,7 +131,15 @@ func (o *pubsubAgentTransport) Receive(ctx context.Context, fn options.ReceiveHa
123131 msg .Ack ()
124132 })
125133 if err != nil {
126- subscriberErrChan <- fmt .Errorf ("subscriber is interrupted by error: %w" , err )
134+ // When keepalive is enabled, the Pub/Sub client's Receive call automatically retries on retryable errors.
135+ // See: https://github.com/googleapis/google-cloud-go/blob/b8e70aa0056a3e126bc36cb7bf242d987f32c0bd/pubsub/service.go#L51
136+ // If Receive returns an error, it’s usually due to a non-retryable issue or service outage.
137+ // In such cases, we send the error to the error channel to signal the source/agent client to reconnect later.
138+ select {
139+ case o .errorChan <- fmt .Errorf ("subscriber is interrupted by error: %w" , err ):
140+ default :
141+ klog .Warningf ("error channel full, dropping subscriber error: %v" , err )
142+ }
127143 }
128144 }()
129145
@@ -141,19 +157,21 @@ func (o *pubsubAgentTransport) Receive(ctx context.Context, fn options.ReceiveHa
141157 msg .Ack ()
142158 })
143159 if err != nil {
144- resyncSubscriberErrChan <- fmt .Errorf ("resync subscriber is interrupted by error: %w" , err )
160+ // When keepalive is enabled, the Pub/Sub client's Receive call automatically retries on retryable errors.
161+ // See: https://github.com/googleapis/google-cloud-go/blob/b8e70aa0056a3e126bc36cb7bf242d987f32c0bd/pubsub/service.go#L51
162+ // If Receive returns an error, it’s usually due to a non-retryable issue or service outage.
163+ // In such cases, we send the error to the error channel to signal the source/agent client to reconnect later.
164+ select {
165+ case o .errorChan <- fmt .Errorf ("resync subscriber is interrupted by error: %w" , err ):
166+ default :
167+ klog .Warningf ("error channel full, dropping resync subscriber error: %v" , err )
168+ }
145169 }
146170 }()
147171
148- // wait for either subscriber to error or context cancellation
149- select {
150- case err := <- subscriberErrChan :
151- return err
152- case err := <- resyncSubscriberErrChan :
153- return err
154- case <- ctx .Done ():
155- return ctx .Err ()
156- }
172+ // wait for context cancellation or timeout
173+ <- ctx .Done ()
174+ return ctx .Err ()
157175}
158176
159177func (o * pubsubAgentTransport ) Close (ctx context.Context ) error {
@@ -163,3 +181,44 @@ func (o *pubsubAgentTransport) Close(ctx context.Context) error {
163181func (o * pubsubAgentTransport ) ErrorChan () <- chan error {
164182 return o .errorChan
165183}
184+
185+ // toGRPCKeepaliveParamater converts our KeepaliveSettings to GRPC ClientParameters.
186+ func toGRPCKeepaliveParamater (settings * KeepaliveSettings ) keepalive.ClientParameters {
187+ keepaliveParamater := keepalive.ClientParameters {
188+ PermitWithoutStream : settings .PermitWithoutStream ,
189+ }
190+ if settings .Time > 0 {
191+ keepaliveParamater .Time = settings .Time
192+ }
193+ if settings .Timeout > 0 {
194+ keepaliveParamater .Timeout = settings .Timeout
195+ }
196+
197+ return keepaliveParamater
198+ }
199+
200+ // toPubSubReceiveSettings converts our ReceiveSettings to Pub/Sub ReceiveSettings.
201+ func toPubSubReceiveSettings (settings * ReceiveSettings ) pubsub.ReceiveSettings {
202+ receiveSettings := pubsub.ReceiveSettings {}
203+
204+ if settings .MaxExtension > 0 {
205+ receiveSettings .MaxExtension = settings .MaxExtension
206+ }
207+ if settings .MaxDurationPerAckExtension > 0 {
208+ receiveSettings .MaxDurationPerAckExtension = settings .MaxDurationPerAckExtension
209+ }
210+ if settings .MinDurationPerAckExtension > 0 {
211+ receiveSettings .MinDurationPerAckExtension = settings .MinDurationPerAckExtension
212+ }
213+ if settings .MaxOutstandingMessages > 0 {
214+ receiveSettings .MaxOutstandingMessages = settings .MaxOutstandingMessages
215+ }
216+ if settings .MaxOutstandingBytes > 0 {
217+ receiveSettings .MaxOutstandingBytes = settings .MaxOutstandingBytes
218+ }
219+ if settings .NumGoroutines > 0 {
220+ receiveSettings .NumGoroutines = settings .NumGoroutines
221+ }
222+
223+ return receiveSettings
224+ }
0 commit comments