Skip to content

Commit a16b0cf

Browse files
committed
Add automatic format detection.
Detect the format of incoming syslog packets automatically by using a format type of 'Automatic' This patch somewhat accidentally adds support for octet count framing (i.e. RFC6587 section 3.4.1) for RFC3164 syslog messages in addition to the currently supported RFC6587 syslog messages, if the automatic option is selected. This would appear useful. This has been tested with all 8 combinations of UDP/TCP, RFC3164/RFC5424 message format, and both framing types specified by RFC6587 (i.e. s3.4.1 octet counting, and s3.4.2 non-transparent framing). A version of util-linux that has been adapted to test these 8 options is here: https://github.com/abligh/util-linux/tree/RFC6587-octet-counting Note that the RFC6587 'format' option as currently presented in fact selects RFC5424 format messages with RFC6587 s3.4.1 framing, whereas the RFC5424 'format' option selects RFC5424 format messages with RFC6587 s3.4.2 framing. IE the RFC6587 format option is somewhat confusingly nameed as it should be called RFC5482WithRFC6587OctetCounting or similar. Signed-off-by: Alex Bligh <[email protected]>
1 parent 31402d4 commit a16b0cf

File tree

2 files changed

+103
-3
lines changed

2 files changed

+103
-3
lines changed

format/automatic.go

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
package format
2+
3+
import (
4+
"bufio"
5+
"bytes"
6+
"errors"
7+
"strconv"
8+
9+
"github.com/jeromer/syslogparser"
10+
"github.com/jeromer/syslogparser/rfc3164"
11+
"github.com/jeromer/syslogparser/rfc5424"
12+
)
13+
14+
/* Selecting an 'Automatic' format detects incoming format (i.e. RFC3164 vs RFC5424) and Framing
15+
* (i.e. RFC6587 s3.4.1 octet counting as described here as RFC6587, and either no framing or
16+
* RFC6587 s3.4.2 octet stuffing / non-transparent framing, described here as either RFC3164
17+
* or RFC6587).
18+
*
19+
* In essence if you don't know which format to select, or have multiple incoming formats, this
20+
* is the one to go for. There is a theoretical performance penalty (it has to look at a few bytes
21+
* at the start of the frame), and a risk that you may parse things you don't want to parse
22+
* (rogue syslog clients using other formats), so if you can be absolutely sure of your syslog
23+
* format, it would be best to select it explicitly.
24+
*/
25+
26+
type Automatic struct{}
27+
28+
const (
29+
detectedUnknown = iota
30+
detectedRFC3164 = iota
31+
detectedRFC5424 = iota
32+
detectedRFC6587 = iota
33+
)
34+
35+
func detect(data []byte) (detected int, err error) {
36+
// all formats have a sapce somewhere
37+
if i := bytes.IndexByte(data, ' '); i > 0 {
38+
pLength := data[0:i]
39+
if _, err := strconv.Atoi(string(pLength)); err == nil {
40+
return detectedRFC6587, nil
41+
}
42+
43+
// is there a close angle bracket before the ' '? there should be
44+
angle := bytes.IndexByte(data, '>')
45+
if (angle < 0) || (angle >= i) {
46+
return detectedUnknown, errors.New("No close angle bracket before space")
47+
}
48+
49+
// if a single digit immediately follows the angle bracket, then a space
50+
// it is RFC5424, as RFC3164 must begin with a letter (month name)
51+
if (angle+2 == i) && (data[angle+1] >= '0') && (data[angle+1] <= '9') {
52+
return detectedRFC5424, nil
53+
} else {
54+
return detectedRFC3164, nil
55+
}
56+
}
57+
return detectedUnknown, nil
58+
}
59+
60+
func (f *Automatic) GetParser(line []byte) syslogparser.LogParser {
61+
switch format, _ := detect(line); format {
62+
case detectedRFC3164:
63+
return rfc3164.NewParser(line)
64+
case detectedRFC5424:
65+
return rfc5424.NewParser(line)
66+
default:
67+
// If the line was an RFC6587 line, the splitter should already have removed the length,
68+
// so one of the above two will be chosen if the line is correctly formed. However, it
69+
// may have a second length illegally placed at the start, in which case the detector
70+
// will return detectedRFC6587. The line may also simply be malformed after the length in
71+
// which case we will have detectedUnknown. In this case we return the simplest parser so
72+
// the illegally formatted line is properly handled
73+
return rfc3164.NewParser(line)
74+
}
75+
}
76+
77+
func (f *Automatic) GetSplitFunc() bufio.SplitFunc {
78+
return f.automaticScannerSplit
79+
}
80+
81+
func (f *Automatic) automaticScannerSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
82+
if atEOF && len(data) == 0 {
83+
return 0, nil, nil
84+
}
85+
86+
switch format, err := detect(data); format {
87+
case detectedRFC6587:
88+
return rfc6587ScannerSplit(data, atEOF)
89+
case detectedRFC3164, detectedRFC5424:
90+
// the default
91+
return bufio.ScanLines(data, atEOF)
92+
default:
93+
if err != nil {
94+
return 0, nil, err
95+
}
96+
// Request more data
97+
return 0, nil, nil
98+
}
99+
}

server.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ import (
1111
)
1212

1313
var (
14-
RFC3164 = &format.RFC3164{} // RFC3164: http://www.ietf.org/rfc/rfc3164.txt
15-
RFC5424 = &format.RFC5424{} // RFC5424: http://www.ietf.org/rfc/rfc5424.txt
16-
RFC6587 = &format.RFC6587{} // RFC6587: http://www.ietf.org/rfc/rfc6587.txt
14+
RFC3164 = &format.RFC3164{} // RFC3164: http://www.ietf.org/rfc/rfc3164.txt
15+
RFC5424 = &format.RFC5424{} // RFC5424: http://www.ietf.org/rfc/rfc5424.txt
16+
RFC6587 = &format.RFC6587{} // RFC6587: http://www.ietf.org/rfc/rfc6587.txt - octet counting variant
17+
Automatic = &format.Automatic{} // Automatically identify the format
1718
)
1819

1920
const (

0 commit comments

Comments
 (0)