From 99e973155cb94800d1f2f91c86b276be8d974ef4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reto=20Sch=C3=BCttel?= Date: Tue, 14 Jun 2016 17:53:39 +0200 Subject: [PATCH] Correctly handle reads by the input module that are not aligned to a newline The input plugin might not align the reads by the newlines (i.e. stdin). For example stdin reads 16384 bytes and passes them to the codec, regardless if that read ends with a newline. This commit fixes this by keeping the last 'partial' line and adds it with the next decode call. In our environment this change didn't have any noticable effect on the performance. Fixes Issue #37. --- lib/logstash/codecs/multiline.rb | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/lib/logstash/codecs/multiline.rb b/lib/logstash/codecs/multiline.rb index 9f633c9..83b222d 100644 --- a/lib/logstash/codecs/multiline.rb +++ b/lib/logstash/codecs/multiline.rb @@ -187,8 +187,25 @@ def accept(listener) end def decode(text, &block) - text = @converter.convert(text) - text.split("\n").each do |line| + parts = text.split("\n") + + # oftentimes the sysread done by the input plugin + # is not perfectly aligned with newlines. In these + # cases (no newline at string ending) we keep that part + # as a carry over and will reattach it at the next + # decode call + + if @carry_over + parts[0] = @carry_over + parts[0] + @carry_over = nil + end + + unless text =~ /\n\Z/ + @carry_over = parts.pop + end + + parts.each do |line| + line = @converter.convert(line) match = @grok.match(line) @logger.debug("Multiline", :pattern => @pattern, :text => line, :match => !match.nil?, :negate => @negate)