Skip to content

Commit 6cba2bf

Browse files
committed
Merge pull request #22 from abligh/automatic-format-detection
Automatic format detection
2 parents 97e2282 + 96e0277 commit 6cba2bf

File tree

3 files changed

+173
-3
lines changed

3 files changed

+173
-3
lines changed

format/automatic.go

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
package format
2+
3+
import (
4+
"bufio"
5+
"bytes"
6+
"errors"
7+
"strconv"
8+
9+
"github.com/jeromer/syslogparser"
10+
"github.com/jeromer/syslogparser/rfc3164"
11+
"github.com/jeromer/syslogparser/rfc5424"
12+
)
13+
14+
/* Selecting an 'Automatic' format detects incoming format (i.e. RFC3164 vs RFC5424) and Framing
15+
* (i.e. RFC6587 s3.4.1 octet counting as described here as RFC6587, and either no framing or
16+
* RFC6587 s3.4.2 octet stuffing / non-transparent framing, described here as either RFC3164
17+
* or RFC6587).
18+
*
19+
* In essence if you don't know which format to select, or have multiple incoming formats, this
20+
* is the one to go for. There is a theoretical performance penalty (it has to look at a few bytes
21+
* at the start of the frame), and a risk that you may parse things you don't want to parse
22+
* (rogue syslog clients using other formats), so if you can be absolutely sure of your syslog
23+
* format, it would be best to select it explicitly.
24+
*/
25+
26+
type Automatic struct{}
27+
28+
const (
29+
detectedUnknown = iota
30+
detectedRFC3164 = iota
31+
detectedRFC5424 = iota
32+
detectedRFC6587 = iota
33+
)
34+
35+
func detect(data []byte) (detected int, err error) {
36+
// all formats have a sapce somewhere
37+
if i := bytes.IndexByte(data, ' '); i > 0 {
38+
pLength := data[0:i]
39+
if _, err := strconv.Atoi(string(pLength)); err == nil {
40+
return detectedRFC6587, nil
41+
}
42+
43+
// is there a close angle bracket before the ' '? there should be
44+
angle := bytes.IndexByte(data, '>')
45+
if (angle < 0) || (angle >= i) {
46+
return detectedUnknown, errors.New("No close angle bracket before space")
47+
}
48+
49+
// if a single digit immediately follows the angle bracket, then a space
50+
// it is RFC5424, as RFC3164 must begin with a letter (month name)
51+
if (angle+2 == i) && (data[angle+1] >= '0') && (data[angle+1] <= '9') {
52+
return detectedRFC5424, nil
53+
} else {
54+
return detectedRFC3164, nil
55+
}
56+
}
57+
return detectedUnknown, nil
58+
}
59+
60+
func (f *Automatic) GetParser(line []byte) syslogparser.LogParser {
61+
switch format, _ := detect(line); format {
62+
case detectedRFC3164:
63+
return rfc3164.NewParser(line)
64+
case detectedRFC5424:
65+
return rfc5424.NewParser(line)
66+
default:
67+
// If the line was an RFC6587 line, the splitter should already have removed the length,
68+
// so one of the above two will be chosen if the line is correctly formed. However, it
69+
// may have a second length illegally placed at the start, in which case the detector
70+
// will return detectedRFC6587. The line may also simply be malformed after the length in
71+
// which case we will have detectedUnknown. In this case we return the simplest parser so
72+
// the illegally formatted line is properly handled
73+
return rfc3164.NewParser(line)
74+
}
75+
}
76+
77+
func (f *Automatic) GetSplitFunc() bufio.SplitFunc {
78+
return f.automaticScannerSplit
79+
}
80+
81+
func (f *Automatic) automaticScannerSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
82+
if atEOF && len(data) == 0 {
83+
return 0, nil, nil
84+
}
85+
86+
switch format, err := detect(data); format {
87+
case detectedRFC6587:
88+
return rfc6587ScannerSplit(data, atEOF)
89+
case detectedRFC3164, detectedRFC5424:
90+
// the default
91+
return bufio.ScanLines(data, atEOF)
92+
default:
93+
if err != nil {
94+
return 0, nil, err
95+
}
96+
// Request more data
97+
return 0, nil, nil
98+
}
99+
}

server.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ import (
1111
)
1212

1313
var (
14-
RFC3164 = &format.RFC3164{} // RFC3164: http://www.ietf.org/rfc/rfc3164.txt
15-
RFC5424 = &format.RFC5424{} // RFC5424: http://www.ietf.org/rfc/rfc5424.txt
16-
RFC6587 = &format.RFC6587{} // RFC6587: http://www.ietf.org/rfc/rfc6587.txt
14+
RFC3164 = &format.RFC3164{} // RFC3164: http://www.ietf.org/rfc/rfc3164.txt
15+
RFC5424 = &format.RFC5424{} // RFC5424: http://www.ietf.org/rfc/rfc5424.txt
16+
RFC6587 = &format.RFC6587{} // RFC6587: http://www.ietf.org/rfc/rfc6587.txt - octet counting variant
17+
Automatic = &format.Automatic{} // Automatically identify the format
1718
)
1819

1920
const (

server_test.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,3 +183,73 @@ func (s *ServerSuite) TestUDP6587(c *C) {
183183
c.Check(handler.LastMessageLength, Equals, int64(len(exampleRFC5424Syslog)))
184184
c.Check(handler.LastError, IsNil)
185185
}
186+
187+
func (s *ServerSuite) TestUDPAutomatic3164(c *C) {
188+
handler := new(HandlerMock)
189+
server := NewServer()
190+
server.SetFormat(Automatic)
191+
server.SetHandler(handler)
192+
server.SetTimeout(10)
193+
server.goParseDatagrams()
194+
server.datagramChannel <- DatagramMessage{[]byte(exampleSyslog), "0.0.0.0"}
195+
close(server.datagramChannel)
196+
server.Wait()
197+
c.Check(handler.LastLogParts["hostname"], Equals, "hostname")
198+
c.Check(handler.LastLogParts["tag"], Equals, "tag")
199+
c.Check(handler.LastLogParts["content"], Equals, "content")
200+
c.Check(handler.LastMessageLength, Equals, int64(len(exampleSyslog)))
201+
c.Check(handler.LastError, IsNil)
202+
}
203+
204+
func (s *ServerSuite) TestUDPAutomatic5424(c *C) {
205+
handler := new(HandlerMock)
206+
server := NewServer()
207+
server.SetFormat(Automatic)
208+
server.SetHandler(handler)
209+
server.SetTimeout(10)
210+
server.goParseDatagrams()
211+
server.datagramChannel <- DatagramMessage{[]byte(exampleRFC5424Syslog), "0.0.0.0"}
212+
close(server.datagramChannel)
213+
server.Wait()
214+
c.Check(handler.LastLogParts["hostname"], Equals, "mymachine.example.com")
215+
c.Check(handler.LastLogParts["facility"], Equals, 4)
216+
c.Check(handler.LastLogParts["message"], Equals, "'su root' failed for lonvick on /dev/pts/8")
217+
c.Check(handler.LastMessageLength, Equals, int64(len(exampleRFC5424Syslog)))
218+
c.Check(handler.LastError, IsNil)
219+
}
220+
221+
func (s *ServerSuite) TestUDPAutomatic3164Plus6587OctetCount(c *C) {
222+
handler := new(HandlerMock)
223+
server := NewServer()
224+
server.SetFormat(Automatic)
225+
server.SetHandler(handler)
226+
server.SetTimeout(10)
227+
server.goParseDatagrams()
228+
framedSyslog := []byte(fmt.Sprintf("%d %s", len(exampleSyslog), exampleSyslog))
229+
server.datagramChannel <- DatagramMessage{[]byte(framedSyslog), "0.0.0.0"}
230+
close(server.datagramChannel)
231+
server.Wait()
232+
c.Check(handler.LastLogParts["hostname"], Equals, "hostname")
233+
c.Check(handler.LastLogParts["tag"], Equals, "tag")
234+
c.Check(handler.LastLogParts["content"], Equals, "content")
235+
c.Check(handler.LastMessageLength, Equals, int64(len(exampleSyslog)))
236+
c.Check(handler.LastError, IsNil)
237+
}
238+
239+
func (s *ServerSuite) TestUDPAutomatic5424Plus6587OctetCount(c *C) {
240+
handler := new(HandlerMock)
241+
server := NewServer()
242+
server.SetFormat(Automatic)
243+
server.SetHandler(handler)
244+
server.SetTimeout(10)
245+
server.goParseDatagrams()
246+
framedSyslog := []byte(fmt.Sprintf("%d %s", len(exampleRFC5424Syslog), exampleRFC5424Syslog))
247+
server.datagramChannel <- DatagramMessage{[]byte(framedSyslog), "0.0.0.0"}
248+
close(server.datagramChannel)
249+
server.Wait()
250+
c.Check(handler.LastLogParts["hostname"], Equals, "mymachine.example.com")
251+
c.Check(handler.LastLogParts["facility"], Equals, 4)
252+
c.Check(handler.LastLogParts["message"], Equals, "'su root' failed for lonvick on /dev/pts/8")
253+
c.Check(handler.LastMessageLength, Equals, int64(len(exampleRFC5424Syslog)))
254+
c.Check(handler.LastError, IsNil)
255+
}

0 commit comments

Comments
 (0)