1515import org .apache .pulsar .client .api .MessageId ;
1616import org .apache .pulsar .client .api .Messages ;
1717import org .apache .pulsar .client .api .PulsarClientException ;
18+ import org .apache .pulsar .client .api .SchemaSerializationException ;
19+ import org .apache .pulsar .client .api .schema .GenericRecord ;
20+ import org .apache .pulsar .client .api .schema .Field ;
1821import org .apache .pulsar .client .admin .PulsarAdmin ;
1922import org .apache .pulsar .client .admin .PulsarAdminException ;
2023import org .apache .pulsar .common .policies .data .TopicStats ;
2427import org .springframework .stereotype .Component ;
2528
2629import javax .annotation .PostConstruct ;
30+ import java .io .IOException ;
2731import java .nio .charset .StandardCharsets ;
2832import java .time .Instant ;
2933import java .util .ArrayList ;
3741@ ConditionalOnProperty (prefix = "spring.pulsar.consumer" , name = "enabled" , havingValue = "true" )
3842public class PulsarSource extends Sourcer {
3943
40- // Map tracking received messages (keyed by topicName + messageId for multi-topic support)
41- private final Map <String , org .apache .pulsar .client .api .Message <byte []>> messagesToAck = new HashMap <>();
44+ // Map tracking received messages (keyed by topicName + messageId for multi-topic support).
45+ // Value is Message<?> because we hold either Message<byte[]> or Message<GenericRecord> depending on
46+ // useAutoConsumeSchema. only use the Message interface from the map (getMessageId, getTopicName, etc.
47+ // for ack and buildHeaders), never the payload type, so a single map with Message<?> is appropriate;
48+ private final Map <String , org .apache .pulsar .client .api .Message <?>> messagesToAck = new HashMap <>();
4249
4350 private Server server ;
4451
@@ -60,49 +67,112 @@ public void startServer() throws Exception {
6067
6168 @ Override
6269 public void read (ReadRequest request , OutputObserver observer ) {
63- // If there are messages not acknowledged, return
6470 if (!messagesToAck .isEmpty ()) {
6571 log .trace ("messagesToAck not empty: {}" , messagesToAck );
6672 return ;
6773 }
6874
69- Consumer <byte []> consumer = null ;
70-
7175 try {
72- // Obtain a consumer with the desired settings.
73- consumer = pulsarConsumerManager .getOrCreateConsumer (request .getCount (), request .getTimeout ().toMillis ());
76+ if (pulsarConsumerProperties .isUseAutoConsumeSchema ()) {
77+ readWithAutoConsume (request , observer );
78+ } else {
79+ readWithBytes (request , observer );
80+ }
81+ } catch (Exception e ) {
82+ log .error ("Failed to read from Pulsar" , e );
83+ throw new RuntimeException (e );
84+ }
85+ }
7486
75- Messages <byte []> batchMessages = consumer .batchReceive ();
87+ private void readWithBytes (ReadRequest request , OutputObserver observer ) throws PulsarClientException {
88+ Consumer <byte []> consumer = pulsarConsumerManager .getOrCreateBytesConsumer (request .getCount (), request .getTimeout ().toMillis ());
89+ Messages <byte []> batchMessages = consumer .batchReceive ();
7690
77- if (batchMessages == null || batchMessages .size () == 0 ) {
78- log .trace ("Received 0 messages, return early." );
79- return ;
80- }
91+ if (batchMessages == null || batchMessages .size () == 0 ) {
92+ log .trace ("Received 0 messages, return early." );
93+ return ;
94+ }
8195
82- // Process each message in the batch.
83- for (org .apache .pulsar .client .api .Message <byte []> pMsg : batchMessages ) {
84- String topicName = pMsg .getTopicName ();
85- String msgId = pMsg .getMessageId ().toString ();
86- String topicMessageIdKey = topicName + msgId ;
87-
88- // TODO : change to .debug or .trace to reduce log noise
89- log .info ("Consumed Pulsar message [topic: {}, id: {}]: {}" , topicName , pMsg .getMessageId (),
90- new String (pMsg .getValue (), StandardCharsets .UTF_8 ));
96+ for (org .apache .pulsar .client .api .Message <byte []> pMsg : batchMessages ) {
97+
98+ // TODO : change to .debug or .trace to reduce log noise
99+ log .info ("Consumed Pulsar message [topic: {}, id: {}]: {}" , pMsg .getTopicName (), pMsg .getMessageId (),
100+ new String (pMsg .getValue (), StandardCharsets .UTF_8 ));
101+ sendMessage (pMsg , pMsg .getValue (), observer );
102+ }
103+ }
104+
105+ private void readWithAutoConsume (ReadRequest request , OutputObserver observer ) throws PulsarClientException {
106+ Consumer <GenericRecord > consumer = pulsarConsumerManager .getOrCreateGenericRecordConsumer (request .getCount (), request .getTimeout ().toMillis ());
107+ Messages <GenericRecord > batchMessages = consumer .batchReceive ();
108+
109+ if (batchMessages == null || batchMessages .size () == 0 ) {
110+ log .trace ("Received 0 messages, return early." );
111+ return ;
112+ }
113+
114+ for (org .apache .pulsar .client .api .Message <GenericRecord > pMsg : batchMessages ) {
91115
92- byte [] offsetBytes = topicMessageIdKey .getBytes (StandardCharsets .UTF_8 );
93- Offset offset = new Offset (offsetBytes );
116+ try {
117+ GenericRecord record = pMsg .getValue (); // This will throw SchemaSerializationException if the message is not valid based on the topic schema
118+ byte [] payloadBytes = pMsg .getData ();
94119
95- Map <String , String > headers = buildHeaders (pMsg );
120+ String decoded = recordToLogString (record );
121+ // TODO : change to .debug or .trace to reduce log noise
122+ log .info ("Consumed Pulsar message (AUTO_CONSUME) [topic: {}, id: {}]: {} bytes, decoded={}" , pMsg .getTopicName (), pMsg .getMessageId (), payloadBytes .length , decoded );
123+ sendMessage (pMsg , payloadBytes , observer );
124+ } catch (Exception e ) {
125+ if (isSchemaValidationFailure (e )) {
126+ throw new RuntimeException ("Schema validation failure" , e );
127+ }
128+ throw new RuntimeException (e );
129+ }
130+ }
131+ }
96132
97- Message message = new Message (pMsg .getValue (), offset , Instant .now (), headers );
98- observer .send (message );
133+ /** Builds offset and headers, sends the message to the observer, and records it for ack. */
134+ private void sendMessage (org .apache .pulsar .client .api .Message <?> pMsg , byte [] payloadBytes , OutputObserver observer ) {
135+ String topicMessageIdKey = pMsg .getTopicName () + pMsg .getMessageId ().toString ();
136+ byte [] offsetBytes = topicMessageIdKey .getBytes (StandardCharsets .UTF_8 );
137+ Offset offset = new Offset (offsetBytes );
138+ Map <String , String > headers = buildHeaders (pMsg );
139+ observer .send (new Message (payloadBytes , offset , Instant .now (), headers ));
140+ messagesToAck .put (topicMessageIdKey , pMsg );
141+ }
99142
100- messagesToAck .put (topicMessageIdKey , pMsg );
143+ /** True if the exception indicates schema deserialization failure. The Pulsar client throws
144+ * SchemaSerializationException when decoding fails (e.g. schema mismatch, bad payload)
145+ * and wraps underlying IOException from the decoder as the cause. */
146+ private static boolean isSchemaValidationFailure (Throwable e ) {
147+ for (Throwable t = e ; t != null ; t = t .getCause ()) {
148+ if (t instanceof SchemaSerializationException ) {
149+ return true ;
101150 }
102- } catch (PulsarClientException e ) {
103- log .error ("Failed to get consumer or receive messages from Pulsar" , e );
104- throw new RuntimeException ("Failed to get consumer or receive messages from Pulsar" , e );
105151 }
152+ return false ;
153+ }
154+
155+ // TODO : remove this logging later to reduce log noise
156+ /** Builds a log-friendly string of the decoded record (field names and values). */
157+ private static String recordToLogString (GenericRecord record ) {
158+ if (record == null ) {
159+ return "null" ;
160+ }
161+ List <Field > fields = record .getFields ();
162+ if (fields == null || fields .isEmpty ()) {
163+ return record .getSchemaType () + ":{}" ;
164+ }
165+ StringBuilder sb = new StringBuilder ();
166+ sb .append (record .getSchemaType ()).append (":{" );
167+ for (int i = 0 ; i < fields .size (); i ++) {
168+ if (i > 0 ) sb .append (", " );
169+ Field f = fields .get (i );
170+ String name = f .getName ();
171+ Object value = record .getField (f );
172+ sb .append (name ).append ("=" ).append (value );
173+ }
174+ sb .append ("}" );
175+ return sb .toString ();
106176 }
107177
108178 @ Override
@@ -128,8 +198,11 @@ public void ack(AckRequest request) {
128198 .toList ();
129199
130200 try {
131- Consumer <byte []> consumer = pulsarConsumerManager .getOrCreateConsumer (0 , 0 );
132- consumer .acknowledge (messageIds );
201+ if (pulsarConsumerProperties .isUseAutoConsumeSchema ()) {
202+ pulsarConsumerManager .getOrCreateGenericRecordConsumer (0 , 0 ).acknowledge (messageIds );
203+ } else {
204+ pulsarConsumerManager .getOrCreateBytesConsumer (0 , 0 ).acknowledge (messageIds );
205+ }
133206 log .info ("Successfully acknowledged {} messages" , messageIds .size ());
134207 } catch (PulsarClientException e ) {
135208 log .error ("Failed to acknowledge Pulsar messages" , e );
@@ -138,9 +211,9 @@ public void ack(AckRequest request) {
138211 }
139212
140213 /**
141- * Builds headers from Pulsar message metadata
214+ * Builds headers from Pulsar message metadata. Works for both Message<?> (byte[] or GenericRecord).
142215 */
143- private Map <String , String > buildHeaders (org .apache .pulsar .client .api .Message <byte [] > pulsarMessage ) {
216+ private Map <String , String > buildHeaders (org .apache .pulsar .client .api .Message <? > pulsarMessage ) {
144217 Map <String , String > headers = new HashMap <>();
145218
146219 headers .put (NumaHeaderKeys .PULSAR_PRODUCER_NAME , pulsarMessage .getProducerName ());
@@ -150,7 +223,6 @@ private Map<String, String> buildHeaders(org.apache.pulsar.client.api.Message<by
150223 headers .put (NumaHeaderKeys .PULSAR_EVENT_TIME , String .valueOf (pulsarMessage .getEventTime ()));
151224 headers .put (NumaHeaderKeys .PULSAR_REDELIVERY_COUNT , String .valueOf (pulsarMessage .getRedeliveryCount ()));
152225
153- // Add message properties as headers
154226 if (pulsarMessage .getProperties () != null && !pulsarMessage .getProperties ().isEmpty ()) {
155227 pulsarMessage .getProperties ().forEach ((key , value ) -> {
156228 if (key != null && value != null ) {
0 commit comments