New syslog parser for syslog datasource (#1554)

This commit is contained in:
blotus 2022-06-08 15:16:58 +02:00 committed by GitHub
parent 4b311684ab
commit bdda8691ff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 1615 additions and 92 deletions

View file

@ -0,0 +1,255 @@
package rfc3164
import (
"fmt"
"time"
"github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/syslog/internal/parser/utils"
)
type RFC3164Option func(*RFC3164)
type RFC3164 struct {
PRI int
Timestamp time.Time
Hostname string
Tag string
Message string
PID string
//
len int
position int
buf []byte
useCurrentYear bool //If no year is specified in the timestamp, use the current year
strictHostname bool //If the hostname contains invalid characters or is not an IP, return an error
}
const PRI_MAX_LEN = 3
//Order is important: format with the most information must be first because we will stop on the first match
var VALID_TIMESTAMPS = []string{
time.RFC3339,
"Jan 02 15:04:05 2006",
"Jan _2 15:04:05 2006",
"Jan 02 15:04:05",
"Jan _2 15:04:05",
}
func WithCurrentYear() RFC3164Option {
return func(r *RFC3164) {
r.useCurrentYear = true
}
}
func WithStrictHostname() RFC3164Option {
return func(r *RFC3164) {
r.strictHostname = true
}
}
func (r *RFC3164) parsePRI() error {
pri := 0
if r.buf[r.position] != '<' {
return fmt.Errorf("PRI must start with '<'")
}
r.position++
for r.position < r.len {
c := r.buf[r.position]
if c == '>' {
r.position++
break
}
if c < '0' || c > '9' {
return fmt.Errorf("PRI must be a number")
}
pri = pri*10 + int(c-'0')
r.position++
}
if pri > 999 {
return fmt.Errorf("PRI must be up to 3 characters long")
}
if r.position == r.len && r.buf[r.position-1] != '>' {
return fmt.Errorf("PRI must end with '>'")
}
r.PRI = pri
return nil
}
func (r *RFC3164) parseTimestamp() error {
validTs := false
for _, layout := range VALID_TIMESTAMPS {
tsLen := len(layout)
if r.position+tsLen > r.len {
continue
}
t, err := time.Parse(layout, string(r.buf[r.position:r.position+tsLen]))
if err == nil {
validTs = true
r.Timestamp = t
r.position += tsLen
break
}
}
if !validTs {
return fmt.Errorf("timestamp is not valid")
}
if r.useCurrentYear {
if r.Timestamp.Year() == 0 {
r.Timestamp = time.Date(time.Now().Year(), r.Timestamp.Month(), r.Timestamp.Day(), r.Timestamp.Hour(), r.Timestamp.Minute(), r.Timestamp.Second(), r.Timestamp.Nanosecond(), r.Timestamp.Location())
}
}
r.position++
return nil
}
func (r *RFC3164) parseHostname() error {
hostname := []byte{}
for r.position < r.len {
c := r.buf[r.position]
if c == ' ' {
r.position++
break
}
hostname = append(hostname, c)
r.position++
}
if r.strictHostname {
if !utils.IsValidHostnameOrIP(string(hostname)) {
return fmt.Errorf("hostname is not valid")
}
}
if len(hostname) == 0 {
return fmt.Errorf("hostname is empty")
}
r.Hostname = string(hostname)
return nil
}
//We do not enforce tag len as quite a lot of syslog client send tags with more than 32 chars
func (r *RFC3164) parseTag() error {
tag := []byte{}
tmpPid := []byte{}
pidEnd := false
hasPid := false
for r.position < r.len {
c := r.buf[r.position]
if !utils.IsAlphaNumeric(c) {
break
}
tag = append(tag, c)
r.position++
}
if len(tag) == 0 {
return fmt.Errorf("tag is empty")
}
r.Tag = string(tag)
if r.position == r.len {
return nil
}
c := r.buf[r.position]
if c == '[' {
hasPid = true
r.position++
for r.position < r.len {
c = r.buf[r.position]
if c == ']' {
pidEnd = true
r.position++
break
}
if c < '0' || c > '9' {
return fmt.Errorf("pid inside tag must be a number")
}
tmpPid = append(tmpPid, c)
r.position++
}
}
if hasPid && !pidEnd {
return fmt.Errorf("pid inside tag must be closed with ']'")
}
if hasPid {
r.PID = string(tmpPid)
}
return nil
}
func (r *RFC3164) parseMessage() error {
err := r.parseTag()
if err != nil {
return err
}
if r.position == r.len {
return fmt.Errorf("message is empty")
}
c := r.buf[r.position]
if c == ':' {
r.position++
}
for {
if r.position >= r.len {
return fmt.Errorf("message is empty")
}
c := r.buf[r.position]
if c != ' ' {
break
}
r.position++
}
message := r.buf[r.position:r.len]
r.Message = string(message)
return nil
}
func (r *RFC3164) Parse(message []byte) error {
r.len = len(message)
if r.len == 0 {
return fmt.Errorf("message is empty")
}
r.buf = message
err := r.parsePRI()
if err != nil {
return err
}
err = r.parseTimestamp()
if err != nil {
return err
}
err = r.parseHostname()
if err != nil {
return err
}
err = r.parseMessage()
if err != nil {
return err
}
return nil
}
func NewRFC3164Parser(opts ...RFC3164Option) *RFC3164 {
r := &RFC3164{}
for _, opt := range opts {
opt(r)
}
return r
}

View file

@ -0,0 +1,370 @@
package rfc3164
import (
"testing"
"time"
)
func TestPri(t *testing.T) {
tests := []struct {
input string
expected int
expectedErr string
}{
{"<0>", 0, ""},
{"<19>", 19, ""},
{"<200>", 200, ""},
{"<4999>", 0, "PRI must be up to 3 characters long"},
{"<123", 0, "PRI must end with '>'"},
{"123>", 0, "PRI must start with '<'"},
{"<abc>", 0, "PRI must be a number"},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
r := &RFC3164{}
r.buf = []byte(test.input)
r.len = len(r.buf)
err := r.parsePRI()
if err != nil {
if test.expectedErr != "" {
if err.Error() != test.expectedErr {
t.Errorf("expected error %s, got %s", test.expectedErr, err.Error())
}
} else {
t.Errorf("unexpected error: %s", err.Error())
}
} else {
if test.expectedErr != "" {
t.Errorf("expected error %s, got no error", test.expectedErr)
} else {
if r.PRI != test.expected {
t.Errorf("expected %d, got %d", test.expected, r.PRI)
}
}
}
})
}
}
func TestTimestamp(t *testing.T) {
tests := []struct {
input string
expected string
expectedErr string
currentYear bool
}{
{"May 20 09:33:54", "0000-05-20T09:33:54Z", "", false},
{"May 20 09:33:54", "2022-05-20T09:33:54Z", "", true},
{"May 20 09:33:54 2022", "2022-05-20T09:33:54Z", "", false},
{"May 1 09:33:54 2022", "2022-05-01T09:33:54Z", "", false},
{"May 01 09:33:54 2021", "2021-05-01T09:33:54Z", "", true},
{"foobar", "", "timestamp is not valid", false},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
opts := []RFC3164Option{}
if test.currentYear {
opts = append(opts, WithCurrentYear())
}
r := NewRFC3164Parser(opts...)
r.buf = []byte(test.input)
r.len = len(r.buf)
err := r.parseTimestamp()
if err != nil {
if test.expectedErr != "" {
if err.Error() != test.expectedErr {
t.Errorf("expected error %s, got %s", test.expectedErr, err.Error())
}
} else {
t.Errorf("unexpected error: %s", err.Error())
}
} else {
if test.expectedErr != "" {
t.Errorf("expected error %s, got no error", test.expectedErr)
} else {
if r.Timestamp.Format(time.RFC3339) != test.expected {
t.Errorf("expected %s, got %s", test.expected, r.Timestamp.Format(time.RFC3339))
}
}
}
})
}
}
func TestHostname(t *testing.T) {
tests := []struct {
input string
expected string
expectedErr string
strictHostname bool
}{
{"127.0.0.1", "127.0.0.1", "", false},
{"::1", "::1", "", false},
{"foo.-bar", "", "hostname is not valid", true},
{"foo-.bar", "", "hostname is not valid", true},
{"foo123.bar", "foo123.bar", "", true},
{"a..", "", "hostname is not valid", true},
{"foo.bar", "foo.bar", "", false},
{"foo,bar", "foo,bar", "", false},
{"foo,bar", "", "hostname is not valid", true},
{"", "", "hostname is empty", false},
{".", ".", "", true},
{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "", "hostname is not valid", true},
{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bla", "", "hostname is not valid", true},
{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bla", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bla", "", false},
{"a.foo-", "", "hostname is not valid", true},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
opts := []RFC3164Option{}
if test.strictHostname {
opts = append(opts, WithStrictHostname())
}
r := NewRFC3164Parser(opts...)
r.buf = []byte(test.input)
r.len = len(r.buf)
err := r.parseHostname()
if err != nil {
if test.expectedErr != "" {
if err.Error() != test.expectedErr {
t.Errorf("expected error %s, got %s", test.expectedErr, err.Error())
}
} else {
t.Errorf("unexpected error: %s", err.Error())
}
} else {
if test.expectedErr != "" {
t.Errorf("expected error %s, got no error", test.expectedErr)
} else {
if r.Hostname != test.expected {
t.Errorf("expected %s, got %s", test.expected, r.Hostname)
}
}
}
})
}
}
func TestTag(t *testing.T) {
tests := []struct {
input string
expected string
expectedPID string
expectedErr string
}{
{"foobar", "foobar", "", ""},
{"foobar[42]", "foobar", "42", ""},
{"", "", "", "tag is empty"},
{"foobar[", "", "", "pid inside tag must be closed with ']'"},
{"foobar[42", "", "", "pid inside tag must be closed with ']'"},
{"foobar[asd]", "foobar", "", "pid inside tag must be a number"},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
r := &RFC3164{}
r.buf = []byte(test.input)
r.len = len(r.buf)
err := r.parseTag()
if err != nil {
if test.expectedErr != "" {
if err.Error() != test.expectedErr {
t.Errorf("expected error %s, got %s", test.expectedErr, err.Error())
}
} else {
t.Errorf("unexpected error: %s", err.Error())
}
} else {
if test.expectedErr != "" {
t.Errorf("expected error %s, got no error", test.expectedErr)
} else {
if r.Tag != test.expected {
t.Errorf("expected %s, got %s", test.expected, r.Tag)
}
if r.PID != test.expectedPID {
t.Errorf("expected %s, got %s", test.expected, r.Message)
}
}
}
})
}
}
func TestMessage(t *testing.T) {
tests := []struct {
input string
expected string
expectedErr string
}{
{"foobar: pouet", "pouet", ""},
{"foobar[42]: test", "test", ""},
{"foobar[123]: this is a test", "this is a test", ""},
{"foobar[123]: ", "", "message is empty"},
{"foobar[123]:", "", "message is empty"},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
r := &RFC3164{}
r.buf = []byte(test.input)
r.len = len(r.buf)
err := r.parseMessage()
if err != nil {
if test.expectedErr != "" {
if err.Error() != test.expectedErr {
t.Errorf("expected error %s, got %s", test.expectedErr, err.Error())
}
} else {
t.Errorf("unexpected error: %s", err.Error())
}
} else {
if test.expectedErr != "" {
t.Errorf("expected error %s, got no error", test.expectedErr)
} else {
if r.Message != test.expected {
t.Errorf("expected message %s, got %s", test.expected, r.Tag)
}
}
}
})
}
}
func TestParse(t *testing.T) {
type expected struct {
Timestamp time.Time
Hostname string
Tag string
PID string
Message string
PRI int
}
tests := []struct {
input string
expected expected
expectedErr string
opts []RFC3164Option
}{
{
"<12>May 20 09:33:54 UDMPRO,a2edd0c6ae48,udm-1.10.0.3686 kernel: foo", expected{
Timestamp: time.Date(0, time.May, 20, 9, 33, 54, 0, time.UTC),
Hostname: "UDMPRO,a2edd0c6ae48,udm-1.10.0.3686",
Tag: "kernel",
PID: "",
Message: "foo",
PRI: 12,
}, "", []RFC3164Option{},
},
{
"<12>May 20 09:33:54 UDMPRO,a2edd0c6ae48,udm-1.10.0.3686 kernel: foo", expected{
Timestamp: time.Date(2022, time.May, 20, 9, 33, 54, 0, time.UTC),
Hostname: "UDMPRO,a2edd0c6ae48,udm-1.10.0.3686",
Tag: "kernel",
PID: "",
Message: "foo",
PRI: 12,
}, "", []RFC3164Option{WithCurrentYear()},
},
{
"<12>May 20 09:33:54 UDMPRO,a2edd0c6ae48,udm-1.10.0.3686 kernel: foo", expected{}, "hostname is not valid", []RFC3164Option{WithStrictHostname()},
},
{
"foobar", expected{}, "PRI must start with '<'", []RFC3164Option{},
},
{
"<12>", expected{}, "timestamp is not valid", []RFC3164Option{},
},
{
"<12 May 02 09:33:54 foo.bar", expected{}, "PRI must be a number", []RFC3164Option{},
},
{
"<12>May 02 09:33:54", expected{}, "hostname is empty", []RFC3164Option{},
},
{
"<12>May 02 09:33:54 foo.bar", expected{}, "tag is empty", []RFC3164Option{},
},
{
"<12>May 02 09:33:54 foo.bar bla[42", expected{}, "pid inside tag must be closed with ']'", []RFC3164Option{},
},
{
"<12>May 02 09:33:54 foo.bar bla[42]", expected{}, "message is empty", []RFC3164Option{},
},
{
"<12>May 02 09:33:54 foo.bar bla[42]: ", expected{}, "message is empty", []RFC3164Option{},
},
{
"<12>May 02 09:33:54 foo.bar bla", expected{}, "message is empty", []RFC3164Option{},
},
{
"<12>May 02 09:33:54 foo.bar bla:", expected{}, "message is empty", []RFC3164Option{},
},
{
"", expected{}, "message is empty", []RFC3164Option{},
},
{
`<13>1 2021-05-18T11:58:40.828081+02:00 mantis sshd 49340 - [timeQuality isSynced="0" tzKnown="1"] blabla`, expected{}, "timestamp is not valid", []RFC3164Option{},
},
{
`<46>Jun 2 06:55:39 localhost haproxy[27213]: Connect from 100.100.100.99:52611 to 100.100.100.99:443 (https_shared-merged/HTTP)\\n 10.0.0.1}`, expected{
Timestamp: time.Date(time.Now().Year(), time.June, 2, 6, 55, 39, 0, time.UTC),
Hostname: "localhost",
Tag: "haproxy",
PID: "27213",
Message: `Connect from 100.100.100.99:52611 to 100.100.100.99:443 (https_shared-merged/HTTP)\\n 10.0.0.1}`,
PRI: 46,
}, "", []RFC3164Option{WithCurrentYear()},
},
{
`<46>Jun 2 06:55:39 2022 localhost haproxy[27213]: Connect from 100.100.100.99:52611 to 100.100.100.99:443 (https_shared-merged/HTTP)\\n 10.0.0.1}`, expected{
Timestamp: time.Date(2022, time.June, 2, 6, 55, 39, 0, time.UTC),
Hostname: "localhost",
Tag: "haproxy",
PID: "27213",
Message: `Connect from 100.100.100.99:52611 to 100.100.100.99:443 (https_shared-merged/HTTP)\\n 10.0.0.1}`,
PRI: 46,
}, "", []RFC3164Option{},
},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
r := NewRFC3164Parser(test.opts...)
err := r.Parse([]byte(test.input))
if err != nil {
if test.expectedErr != "" {
if err.Error() != test.expectedErr {
t.Errorf("expected error '%s', got '%s'", test.expectedErr, err.Error())
}
} else {
t.Errorf("unexpected error: '%s'", err.Error())
}
} else {
if test.expectedErr != "" {
t.Errorf("expected error '%s', got no error", test.expectedErr)
} else {
if r.Timestamp != test.expected.Timestamp {
t.Errorf("expected timestamp '%s', got '%s'", test.expected.Timestamp, r.Timestamp)
}
if r.Hostname != test.expected.Hostname {
t.Errorf("expected hostname '%s', got '%s'", test.expected.Hostname, r.Hostname)
}
if r.Tag != test.expected.Tag {
t.Errorf("expected tag '%s', got '%s'", test.expected.Tag, r.Tag)
}
if r.PID != test.expected.PID {
t.Errorf("expected pid '%s', got '%s'", test.expected.PID, r.PID)
}
if r.Message != test.expected.Message {
t.Errorf("expected message '%s', got '%s'", test.expected.Message, r.Message)
}
if r.PRI != test.expected.PRI {
t.Errorf("expected pri '%d', got '%d'", test.expected.PRI, r.PRI)
}
}
}
})
}
}

View file

@ -0,0 +1,62 @@
package rfc3164
import "testing"
var e error
func BenchmarkParse(b *testing.B) {
tests := []struct {
input []byte
opts []RFC3164Option
}{
{
[]byte("<12>May 20 09:33:54 UDMPRO,a2edd0c6ae48,udm-1.10.0.3686 kernel: [1670546.400229] IN=eth9 OUT= MAC=24:5a:4c:7b:0a:4c:34:27:92:67:0f:2b:08:00 SRC=79.124.62.34 DST=x.x.x.x LEN=44 TOS=0x00 PREC=0x00 TTL=243 ID=37520 PROTO=TCP SPT=55055 DPT=51443 WINDOW=1024 RES=0x00 SYN URGP=0"), []RFC3164Option{},
},
{
[]byte("<12>May 20 09:33:54 UDMPRO,a2edd0c6ae48,udm-1.10.0.3686 kernel: foo"), []RFC3164Option{WithCurrentYear()},
},
{
[]byte("<12>May 20 09:33:54 UDMPRO,a2edd0c6ae48,udm-1.10.0.3686 kernel: foo"), []RFC3164Option{WithStrictHostname()},
},
{
[]byte("foobar"), []RFC3164Option{},
},
{
[]byte("<12>"), []RFC3164Option{},
},
{
[]byte("<12>May 02 09:33:54"), []RFC3164Option{},
},
{
[]byte("<12>May 02 09:33:54 foo.bar"), []RFC3164Option{},
},
{
[]byte("<12>May 02 09:33:54 foo.bar bla[42"), []RFC3164Option{},
},
{
[]byte("<12>May 02 09:33:54 foo.bar bla[42]"), []RFC3164Option{},
},
{
[]byte("<12>May 02 09:33:54 foo.bar bla[42]: "), []RFC3164Option{},
},
{
[]byte("<12>May 02 09:33:54 foo.bar bla"), []RFC3164Option{},
},
{
[]byte("<12>May 02 09:33:54 foo.bar bla:"), []RFC3164Option{},
},
{
[]byte(""), []RFC3164Option{},
},
}
var err error
for _, test := range tests {
b.Run(string(test.input), func(b *testing.B) {
for i := 0; i < b.N; i++ {
r := NewRFC3164Parser(test.opts...)
err = r.Parse(test.input)
}
})
}
e = err
}

View file

@ -0,0 +1,398 @@
package rfc5424
import (
"fmt"
"time"
"github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/syslog/internal/parser/utils"
)
type RFC5424Option func(*RFC5424)
type RFC5424 struct {
PRI int
Timestamp time.Time
Hostname string
Tag string
Message string
PID string
MsgID string
//
len int
position int
buf []byte
useCurrentYear bool //If no year is specified in the timestamp, use the current year
strictHostname bool //If the hostname contains invalid characters or is not an IP, return an error
}
const PRI_MAX_LEN = 3
const NIL_VALUE = '-'
var VALID_TIMESTAMPS = []string{
time.RFC3339,
}
const VALID_TIMESTAMP = time.RFC3339Nano
func WithCurrentYear() RFC5424Option {
return func(r *RFC5424) {
r.useCurrentYear = true
}
}
func WithStrictHostname() RFC5424Option {
return func(r *RFC5424) {
r.strictHostname = true
}
}
func (r *RFC5424) parsePRI() error {
pri := 0
if r.buf[r.position] != '<' {
return fmt.Errorf("PRI must start with '<'")
}
r.position++
for r.position < r.len {
c := r.buf[r.position]
if c == '>' {
r.position++
break
}
if c < '0' || c > '9' {
return fmt.Errorf("PRI must be a number")
}
pri = pri*10 + int(c-'0')
r.position++
}
if pri > 999 {
return fmt.Errorf("PRI must be up to 3 characters long")
}
if r.position == r.len && r.buf[r.position-1] != '>' {
return fmt.Errorf("PRI must end with '>'")
}
r.PRI = pri
return nil
}
func (r *RFC5424) parseVersion() error {
if r.buf[r.position] != '1' {
return fmt.Errorf("version must be 1")
}
r.position += 2
if r.position >= r.len {
return fmt.Errorf("version must be followed by a space")
}
return nil
}
func (r *RFC5424) parseTimestamp() error {
timestamp := []byte{}
if r.buf[r.position] == NIL_VALUE {
r.Timestamp = time.Now().UTC().Round(0)
r.position += 2
return nil
}
for r.position < r.len {
c := r.buf[r.position]
if c == ' ' {
break
}
timestamp = append(timestamp, c)
r.position++
}
if len(timestamp) == 0 {
return fmt.Errorf("timestamp is empty")
}
if r.position == r.len {
return fmt.Errorf("EOL after timestamp")
}
date, err := time.Parse(VALID_TIMESTAMP, string(timestamp))
if err != nil {
return fmt.Errorf("timestamp is not valid")
}
r.Timestamp = date
r.position++
if r.position >= r.len {
return fmt.Errorf("EOL after timestamp")
}
return nil
}
func (r *RFC5424) parseHostname() error {
if r.buf[r.position] == NIL_VALUE {
r.Hostname = ""
r.position += 2
return nil
}
hostname := []byte{}
for r.position < r.len {
c := r.buf[r.position]
if c == ' ' {
r.position++
break
}
hostname = append(hostname, c)
r.position++
}
if r.strictHostname {
if !utils.IsValidHostnameOrIP(string(hostname)) {
return fmt.Errorf("hostname is not valid")
}
}
if len(hostname) == 0 {
return fmt.Errorf("hostname is empty")
}
r.Hostname = string(hostname)
return nil
}
func (r *RFC5424) parseAppName() error {
if r.buf[r.position] == NIL_VALUE {
r.Tag = ""
r.position += 2
return nil
}
appname := []byte{}
for r.position < r.len {
c := r.buf[r.position]
if c == ' ' {
r.position++
break
}
appname = append(appname, c)
r.position++
}
if len(appname) == 0 {
return fmt.Errorf("appname is empty")
}
if len(appname) > 48 {
return fmt.Errorf("appname is too long")
}
r.Tag = string(appname)
return nil
}
func (r *RFC5424) parseProcID() error {
if r.buf[r.position] == NIL_VALUE {
r.PID = ""
r.position += 2
return nil
}
procid := []byte{}
for r.position < r.len {
c := r.buf[r.position]
if c == ' ' {
r.position++
break
}
procid = append(procid, c)
r.position++
}
if len(procid) == 0 {
return fmt.Errorf("procid is empty")
}
if len(procid) > 128 {
return fmt.Errorf("procid is too long")
}
r.PID = string(procid)
return nil
}
func (r *RFC5424) parseMsgID() error {
if r.buf[r.position] == NIL_VALUE {
r.MsgID = ""
r.position += 2
return nil
}
msgid := []byte{}
for r.position < r.len {
c := r.buf[r.position]
if c == ' ' {
r.position++
break
}
msgid = append(msgid, c)
r.position++
}
if len(msgid) == 0 {
return fmt.Errorf("msgid is empty")
}
if len(msgid) > 32 {
return fmt.Errorf("msgid is too long")
}
r.MsgID = string(msgid)
return nil
}
func (r *RFC5424) parseStructuredData() error {
done := false
if r.buf[r.position] == NIL_VALUE {
r.position += 2
return nil
}
if r.buf[r.position] != '[' {
return fmt.Errorf("structured data must start with '[' or be '-'")
}
prev := byte(0)
for r.position < r.len {
done = false
c := r.buf[r.position]
if c == ']' && prev != '\\' {
done = true
r.position++
if r.position < r.len && r.buf[r.position] == ' ' {
break
}
}
prev = c
r.position++
}
r.position++
if !done {
return fmt.Errorf("structured data must end with ']'")
}
return nil
}
func (r *RFC5424) parseMessage() error {
if r.position == r.len {
return fmt.Errorf("message is empty")
}
message := []byte{}
for r.position < r.len {
c := r.buf[r.position]
message = append(message, c)
r.position++
}
r.Message = string(message)
return nil
}
func (r *RFC5424) Parse(message []byte) error {
r.len = len(message)
if r.len == 0 {
return fmt.Errorf("syslog line is empty")
}
r.buf = message
err := r.parsePRI()
if err != nil {
return err
}
if r.position >= r.len {
return fmt.Errorf("EOL after PRI")
}
err = r.parseVersion()
if err != nil {
return err
}
if r.position >= r.len {
return fmt.Errorf("EOL after Version")
}
err = r.parseTimestamp()
if err != nil {
return err
}
if r.position >= r.len {
return fmt.Errorf("EOL after Timestamp")
}
err = r.parseHostname()
if err != nil {
return err
}
if r.position >= r.len {
return fmt.Errorf("EOL after hostname")
}
err = r.parseAppName()
if err != nil {
return err
}
if r.position >= r.len {
return fmt.Errorf("EOL after appname")
}
err = r.parseProcID()
if err != nil {
return err
}
if r.position >= r.len {
return fmt.Errorf("EOL after ProcID")
}
err = r.parseMsgID()
if err != nil {
return err
}
if r.position >= r.len {
return fmt.Errorf("EOL after MSGID")
}
err = r.parseStructuredData()
if err != nil {
return err
}
if r.position >= r.len {
return fmt.Errorf("EOL after SD")
}
err = r.parseMessage()
if err != nil {
return err
}
return nil
}
func NewRFC5424Parser(opts ...RFC5424Option) *RFC5424 {
r := &RFC5424{}
for _, opt := range opts {
opt(r)
}
return r
}

View file

@ -0,0 +1,269 @@
package rfc5424
import (
"testing"
"time"
)
func TestPri(t *testing.T) {
tests := []struct {
input string
expected int
expectedErr string
}{
{"<0>", 0, ""},
{"<19>", 19, ""},
{"<200>", 200, ""},
{"<4999>", 0, "PRI must be up to 3 characters long"},
{"<123", 0, "PRI must end with '>'"},
{"123>", 0, "PRI must start with '<'"},
{"<abc>", 0, "PRI must be a number"},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
r := &RFC5424{}
r.buf = []byte(test.input)
r.len = len(r.buf)
err := r.parsePRI()
if err != nil {
if test.expectedErr != "" {
if err.Error() != test.expectedErr {
t.Errorf("expected error %s, got %s", test.expectedErr, err.Error())
}
} else {
t.Errorf("unexpected error: %s", err.Error())
}
} else {
if test.expectedErr != "" {
t.Errorf("expected error %s, got no error", test.expectedErr)
} else {
if r.PRI != test.expected {
t.Errorf("expected %d, got %d", test.expected, r.PRI)
}
}
}
})
}
}
func TestHostname(t *testing.T) {
tests := []struct {
input string
expected string
expectedErr string
strictHostname bool
}{
{"127.0.0.1", "127.0.0.1", "", false},
{"::1", "::1", "", false},
{"-", "", "", false},
{"foo.-bar", "", "hostname is not valid", true},
{"foo-.bar", "", "hostname is not valid", true},
{"foo123.bar", "foo123.bar", "", true},
{"a..", "", "hostname is not valid", true},
{"foo.bar", "foo.bar", "", false},
{"foo,bar", "foo,bar", "", false},
{"foo,bar", "", "hostname is not valid", true},
{".", ".", "", true},
{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "", "hostname is not valid", true},
{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bla", "", "hostname is not valid", true},
{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bla", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bla", "", false},
{"a.foo-", "", "hostname is not valid", true},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
opts := []RFC5424Option{}
if test.strictHostname {
opts = append(opts, WithStrictHostname())
}
r := NewRFC5424Parser(opts...)
r.buf = []byte(test.input)
r.len = len(r.buf)
err := r.parseHostname()
if err != nil {
if test.expectedErr != "" {
if err.Error() != test.expectedErr {
t.Errorf("expected error %s, got %s", test.expectedErr, err.Error())
}
} else {
t.Errorf("unexpected error: %s", err.Error())
}
} else {
if test.expectedErr != "" {
t.Errorf("expected error %s, got no error", test.expectedErr)
} else {
if r.Hostname != test.expected {
t.Errorf("expected %s, got %s", test.expected, r.Hostname)
}
}
}
})
}
}
func TestParse(t *testing.T) {
type expected struct {
Timestamp time.Time
Hostname string
Tag string
PID string
Message string
PRI int
MsgID string
}
tests := []struct {
name string
input string
expected expected
expectedErr string
opts []RFC5424Option
}{
{
"valid msg",
`<13>1 2021-05-18T11:58:40.828081+02:42 mantis sshd 49340 - [timeQuality isSynced="0" tzKnown="1"] blabla`, expected{
Timestamp: time.Date(2021, 5, 18, 11, 58, 40, 828081000, time.FixedZone("+0242", 9720)),
Hostname: "mantis",
Tag: "sshd",
PID: "49340",
MsgID: "",
Message: "blabla",
PRI: 13,
}, "", []RFC5424Option{},
},
{
"valid msg with msgid",
`<13>1 2021-05-18T11:58:40.828081+02:42 mantis foobar 49340 123123 [timeQuality isSynced="0" tzKnown="1"] blabla`, expected{
Timestamp: time.Date(2021, 5, 18, 11, 58, 40, 828081000, time.FixedZone("+0242", 9720)),
Hostname: "mantis",
Tag: "foobar",
PID: "49340",
MsgID: "123123",
Message: "blabla",
PRI: 13,
}, "", []RFC5424Option{},
},
{
"valid msg with repeating SD",
`<13>1 2021-05-18T11:58:40.828081+02:42 mantis foobar 49340 123123 [timeQuality isSynced="0" tzKnown="1"][foo="bar][a] blabla`, expected{
Timestamp: time.Date(2021, 5, 18, 11, 58, 40, 828081000, time.FixedZone("+0242", 9720)),
Hostname: "mantis",
Tag: "foobar",
PID: "49340",
MsgID: "123123",
Message: "blabla",
PRI: 13,
}, "", []RFC5424Option{},
},
{
"invalid SD",
`<13>1 2021-05-18T11:58:40.828081+02:00 mantis foobar 49340 123123 [timeQuality asd`, expected{}, "structured data must end with ']'", []RFC5424Option{},
},
{
"invalid version",
`<13>42 2021-05-18T11:58:40.828081+02:00 mantis foobar 49340 123123 [timeQuality isSynced="0" tzKnown="1"] blabla`, expected{}, "version must be 1", []RFC5424Option{},
},
{
"invalid message",
`<13>1`, expected{}, "version must be followed by a space", []RFC5424Option{},
},
{
"valid msg with empty fields",
`<13>1 - foo - - - - blabla`, expected{
Timestamp: time.Now().UTC().Round(0),
Hostname: "foo",
PRI: 13,
Message: "blabla",
}, "", []RFC5424Option{},
},
{
"valid msg with empty fields",
`<13>1 - - - - - - blabla`, expected{
Timestamp: time.Now().UTC().Round(0),
PRI: 13,
Message: "blabla",
}, "", []RFC5424Option{},
},
{
"valid msg with escaped SD",
`<13>1 2022-05-24T10:57:39Z testhostname unknown - sn="msgid" [foo="\]" bar="a\""][a b="[\]" c] testmessage`,
expected{
PRI: 13,
Timestamp: time.Date(2022, 5, 24, 10, 57, 39, 0, time.UTC),
Tag: "unknown",
Hostname: "testhostname",
MsgID: `sn="msgid"`,
Message: `testmessage`,
}, "", []RFC5424Option{},
},
{
"valid complex msg",
`<13>1 2022-05-24T10:57:39Z myhostname unknown - sn="msgid" [all@0 request="/dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js" src_ip_geo_country="DE" MONTH="May" COMMONAPACHELOG="1.1.1.1 - - [24/May/2022:10:57:37 +0200\] \"GET /dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js HTTP/2.0\" 304 0" auth="-" HOUR="10" gl2_remote_ip="172.31.32.142" ident="-" gl2_remote_port="43375" BASE10NUM="[2.0, 304, 0\]" pid="-1" program="nginx" gl2_source_input="623ed3440183476d61cff974" INT="+0200" is_private_ip="false" YEAR="2022" src_ip_geo_city="Achern" clientip="1.1.1.1" USERNAME="-" src_ip_geo_location="48.6306,8.0743" gl2_source_node="8620c2bb-dbb7-4535-b1ce-83df223acd8d" MINUTE="57" timestamp="2022-05-24T08:57:37.000Z" src_ip_asn="3320" level="5" IP="1.1.1.1" IPV4="1.1.1.1" verb="GET" gl2_message_id="01G3TMJFAMFS4H60QSF7M029R0" TIME="10:57:37" USER="-" src_ip_asn_owner="Deutsche Telekom AG" response="304" bytes="0" SECOND="37" httpversion="2.0" _id="906ce155-db3f-11ec-b25f-0a189ba2c64e" facility="user" MONTHDAY="24"] source: sn="www.foobar.com" | message: 1.1.1.1 - - [24/May/2022:10:57:37 +0200] "GET /dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js HTTP/2.0" 304 0 "https://www.foobar.com/sw.js" "Mozilla/5.0 (Linux; Android 9; ANE-LX1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.61 Mobile Safari/537.36" "-" "www.foobar.com" sn="www.foobar.com" rt=0.000 ua="-" us="-" ut="-" ul="-" cs=HIT { request: /dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js | src_ip_geo_country: DE | MONTH: May | COMMONAPACHELOG: 1.1.1.1 - - [24/May/2022:10:57:37 +0200] "GET /dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js HTTP/2.0" 304 0 | auth: - | HOUR: 10 | gl2_remote_ip: 172.31.32.142 | ident: - | gl2_remote_port: 43375 | BASE10NUM: [2.0, 304, 0] | pid: -1 | program: nginx | gl2_source_input: 623ed3440183476d61cff974 | INT: +0200 | is_private_ip: false | YEAR: 2022 | src_ip_geo_city: Achern | clientip: 1.1.1.1 | USERNAME:`,
expected{
Timestamp: time.Date(2022, 5, 24, 10, 57, 39, 0, time.UTC),
Hostname: "myhostname",
Tag: "unknown",
PRI: 13,
MsgID: `sn="msgid"`,
Message: `source: sn="www.foobar.com" | message: 1.1.1.1 - - [24/May/2022:10:57:37 +0200] "GET /dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js HTTP/2.0" 304 0 "https://www.foobar.com/sw.js" "Mozilla/5.0 (Linux; Android 9; ANE-LX1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.61 Mobile Safari/537.36" "-" "www.foobar.com" sn="www.foobar.com" rt=0.000 ua="-" us="-" ut="-" ul="-" cs=HIT { request: /dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js | src_ip_geo_country: DE | MONTH: May | COMMONAPACHELOG: 1.1.1.1 - - [24/May/2022:10:57:37 +0200] "GET /dist/precache-manifest.58b57debe6bc4f96698da0dc314461e9.js HTTP/2.0" 304 0 | auth: - | HOUR: 10 | gl2_remote_ip: 172.31.32.142 | ident: - | gl2_remote_port: 43375 | BASE10NUM: [2.0, 304, 0] | pid: -1 | program: nginx | gl2_source_input: 623ed3440183476d61cff974 | INT: +0200 | is_private_ip: false | YEAR: 2022 | src_ip_geo_city: Achern | clientip: 1.1.1.1 | USERNAME:`,
}, "", []RFC5424Option{},
},
{
"partial message",
`<13>1 2022-05-24T10:57:39Z foo bar -`,
expected{},
"EOL after ProcID",
[]RFC5424Option{},
},
{
"partial message",
`<13>1 2022-05-24T10:57:39Z foo bar `,
expected{},
"EOL after appname",
[]RFC5424Option{},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
r := NewRFC5424Parser(test.opts...)
err := r.Parse([]byte(test.input))
if err != nil {
if test.expectedErr != "" {
if err.Error() != test.expectedErr {
t.Errorf("expected error '%s', got '%s'", test.expectedErr, err.Error())
}
} else {
t.Errorf("unexpected error: '%s'", err.Error())
}
} else {
if test.expectedErr != "" {
t.Errorf("expected error '%s', got no error", test.expectedErr)
} else {
if r.Timestamp.Round(time.Second).String() != test.expected.Timestamp.Round(time.Second).String() {
t.Errorf("expected timestamp '%s', got '%s'", test.expected.Timestamp, r.Timestamp)
}
if r.Hostname != test.expected.Hostname {
t.Errorf("expected hostname '%s', got '%s'", test.expected.Hostname, r.Hostname)
}
if r.Tag != test.expected.Tag {
t.Errorf("expected tag '%s', got '%s'", test.expected.Tag, r.Tag)
}
if r.PID != test.expected.PID {
t.Errorf("expected pid '%s', got '%s'", test.expected.PID, r.PID)
}
if r.Message != test.expected.Message {
t.Errorf("expected message '%s', got '%s'", test.expected.Message, r.Message)
}
if r.PRI != test.expected.PRI {
t.Errorf("expected pri '%d', got '%d'", test.expected.PRI, r.PRI)
}
if r.MsgID != test.expected.MsgID {
t.Errorf("expected msgid '%s', got '%s'", test.expected.MsgID, r.MsgID)
}
}
}
})
}
}

View file

@ -0,0 +1,103 @@
package rfc5424
import "testing"
var e error
const BOM = "\xEF\xBB\xBF"
//Test case are from https://github.com/influxdata/go-syslog (the parser we used previously)
func BenchmarkParse(b *testing.B) {
tests := []struct {
label string
input []byte
}{
{
label: "[no] empty input",
input: []byte(``),
},
{
label: "[no] multiple syslog messages on multiple lines",
input: []byte("<1>1 - - - - - -\x0A<2>1 - - - - - -"),
},
{
label: "[no] impossible timestamp",
input: []byte(`<101>11 2003-09-31T22:14:15.003Z`),
},
{
label: "[no] malformed structured data",
input: []byte("<1>1 - - - - - X"),
},
{
label: "[no] with duplicated structured data id",
input: []byte("<165>3 2003-10-11T22:14:15.003Z example.com evnts - ID27 [id1][id1]"),
},
{
label: "[ok] minimal",
input: []byte(`<1>1 - - - - - -`),
},
{
label: "[ok] average message",
input: []byte(`<29>1 2016-02-21T04:32:57+00:00 web1 someservice - - [origin x-service="someservice"][meta sequenceId="14125553"] 127.0.0.1 - - 1456029177 "GET /v1/ok HTTP/1.1" 200 145 "-" "hacheck 0.9.0" 24306 127.0.0.1:40124 575`),
},
{
label: "[ok] complicated message",
input: []byte(`<78>1 2016-01-15T00:04:01Z host1 CROND 10391 - [meta sequenceId="29" sequenceBlah="foo"][my key="value"] some_message`),
},
{
label: "[ok] very long message",
input: []byte(`<190>1 2016-02-21T01:19:11+00:00 batch6sj - - - [meta sequenceId="21881798" x-group="37051387"][origin x-service="tracking"] metascutellar conversationalist nephralgic exogenetic graphy streng outtaken acouasm amateurism prenotice Lyonese bedull antigrammatical diosphenol gastriloquial bayoneteer sweetener naggy roughhouser dighter addend sulphacid uneffectless ferroprussiate reveal Mazdaist plaudite Australasian distributival wiseman rumness Seidel topazine shahdom sinsion mesmerically pinguedinous ophthalmotonometer scuppler wound eciliate expectedly carriwitchet dictatorialism bindweb pyelitic idic atule kokoon poultryproof rusticial seedlip nitrosate splenadenoma holobenthic uneternal Phocaean epigenic doubtlessly indirection torticollar robomb adoptedly outspeak wappenschawing talalgia Goop domitic savola unstrafed carded unmagnified mythologically orchester obliteration imperialine undisobeyed galvanoplastical cycloplegia quinquennia foremean umbonal marcgraviaceous happenstance theoretical necropoles wayworn Igbira pseudoangelic raising unfrounced lamasary centaurial Japanolatry microlepidoptera`),
},
{
label: "[ok] all max length and complete",
input: []byte(`<191>999 2018-12-31T23:59:59.999999-23:59 abcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabc abcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdef abcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzab abcdefghilmnopqrstuvzabcdefghilm [an@id key1="val1" key2="val2"][another@id key1="val1"] Some message "GET"`),
},
{
label: "[ok] all max length except structured data and message",
input: []byte(`<191>999 2018-12-31T23:59:59.999999-23:59 abcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabc abcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdef abcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzabcdefghilmnopqrstuvzab abcdefghilmnopqrstuvzabcdefghilm -`),
},
{
label: "[ok] minimal with message containing newline",
input: []byte("<1>1 - - - - - - x\x0Ay"),
},
{
label: "[ok] w/o procid, w/o structured data, with message starting with BOM",
input: []byte("<34>1 2003-10-11T22:14:15.003Z mymachine.example.com su - ID47 - " + BOM + "'su root' failed for lonvick on /dev/pts/8"),
},
{
label: "[ok] minimal with UTF-8 message",
input: []byte("<0>1 - - - - - - ⠊⠀⠉⠁⠝⠀⠑⠁⠞⠀⠛⠇⠁⠎⠎⠀⠁⠝⠙⠀⠊⠞⠀⠙⠕⠑⠎⠝⠞⠀⠓⠥⠗⠞⠀⠍⠑"),
},
{
label: "[ok] minimal with UTF-8 message starting with BOM",
input: []byte("<0>1 - - - - - - " + BOM + "⠊⠀⠉⠁⠝⠀⠑⠁⠞⠀⠛⠇⠁⠎⠎⠀⠁⠝⠙⠀⠊⠞⠀⠙⠕⠑⠎⠝⠞⠀⠓⠥⠗⠞⠀⠍⠑"),
},
{
label: "[ok] with structured data id, w/o structured data params",
input: []byte(`<29>50 2016-01-15T01:00:43Z hn S - - [my@id]`),
},
{
label: "[ok] with multiple structured data",
input: []byte(`<29>50 2016-01-15T01:00:43Z hn S - - [my@id1 k="v"][my@id2 c="val"]`),
},
{
label: "[ok] with escaped backslash within structured data param value, with message",
input: []byte(`<29>50 2016-01-15T01:00:43Z hn S - - [meta es="\\valid"] 1452819643`),
},
{
label: "[ok] with UTF-8 structured data param value, with message",
input: []byte(`<78>1 2016-01-15T00:04:01+00:00 host1 CROND 10391 - [sdid x="⌘"] some_message`),
},
}
var err error
for _, test := range tests {
b.Run(test.label, func(b *testing.B) {
for i := 0; i < b.N; i++ {
r := NewRFC5424Parser()
err = r.Parse(test.input)
}
})
}
e = err
}

View file

@ -0,0 +1,76 @@
package utils
import "net"
func isValidIP(ip string) bool {
return net.ParseIP(ip) != nil
}
func IsAlphaNumeric(c byte) bool {
return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9'
}
//This function is lifted from go source
//See https://github.com/golang/go/blob/master/src/net/dnsclient.go#L75
func isValidHostname(s string) bool {
// The root domain name is valid. See golang.org/issue/45715.
if s == "." {
return true
}
// See RFC 1035, RFC 3696.
// Presentation format has dots before every label except the first, and the
// terminal empty label is optional here because we assume fully-qualified
// (absolute) input. We must therefore reserve space for the first and last
// labels' length octets in wire format, where they are necessary and the
// maximum total length is 255.
// So our _effective_ maximum is 253, but 254 is not rejected if the last
// character is a dot.
l := len(s)
if l == 0 || l > 254 || l == 254 && s[l-1] != '.' {
return false
}
last := byte('.')
nonNumeric := false // true once we've seen a letter or hyphen
partlen := 0
for i := 0; i < len(s); i++ {
c := s[i]
switch {
default:
return false
case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_':
nonNumeric = true
partlen++
case '0' <= c && c <= '9':
// fine
partlen++
case c == '-':
// Byte before dash cannot be dot.
if last == '.' {
return false
}
partlen++
nonNumeric = true
case c == '.':
// Byte before dot cannot be dot, dash.
if last == '.' || last == '-' {
return false
}
if partlen > 63 || partlen == 0 {
return false
}
partlen = 0
}
last = c
}
if last == '-' || partlen > 63 {
return false
}
return nonNumeric
}
func IsValidHostnameOrIP(hostname string) bool {
return isValidIP(hostname) || isValidHostname(hostname)
}

View file

@ -3,15 +3,15 @@ package syslogacquisition
import (
"fmt"
"net"
"strconv"
"strings"
"time"
"github.com/crowdsecurity/crowdsec/pkg/acquisition/configuration"
syslogserver "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/syslog/internal"
"github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/syslog/internal/parser/rfc3164"
"github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/syslog/internal/parser/rfc5424"
syslogserver "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/syslog/internal/server"
leaky "github.com/crowdsecurity/crowdsec/pkg/leakybucket"
"github.com/crowdsecurity/crowdsec/pkg/types"
"github.com/influxdata/go-syslog/v3/rfc3164"
"github.com/influxdata/go-syslog/v3/rfc5424"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
log "github.com/sirupsen/logrus"
@ -133,49 +133,33 @@ func (s *SyslogSource) StreamingAcquisition(out chan types.Event, t *tomb.Tomb)
return nil
}
func (s *SyslogSource) buildLogFromSyslog(ts *time.Time, hostname *string,
appname *string, pid *string, msg *string) (string, error) {
func (s *SyslogSource) buildLogFromSyslog(ts time.Time, hostname string,
appname string, pid string, msg string) string {
ret := ""
if msg == nil {
return "", errors.Errorf("missing message field in syslog message")
}
if ts != nil {
if !ts.IsZero() {
ret += ts.Format("Jan 2 15:04:05")
} else {
s.logger.Tracef("%s - missing TS", *msg)
s.logger.Tracef("%s - missing TS", msg)
ret += time.Now().UTC().Format("Jan 2 15:04:05")
}
if hostname != nil {
ret += " " + *hostname
if hostname != "" {
ret += " " + hostname
} else {
s.logger.Tracef("%s - missing host", *msg)
s.logger.Tracef("%s - missing host", msg)
ret += " unknownhost"
}
if appname != nil {
ret += " " + *appname
} else {
return "", errors.Errorf("missing appname field in syslog message")
if appname != "" {
ret += " " + appname
}
if pid != nil {
/*
!!! ugly hack !!!
Due to a bug in the syslog parser we use (https://github.com/influxdata/go-syslog/issues/31),
the ProcID field will contain garbage if the message as a ] anywhere in it.
Assume that a correctly formatted ProcID only contains number, and if this is not the case, set it to an arbitrary value
*/
_, err := strconv.Atoi(*pid)
if err != nil {
ret += "[1]: "
} else {
ret += "[" + *pid + "]: "
}
if pid != "" {
ret += "[" + pid + "]: "
} else {
ret += ": "
}
if msg != nil {
ret += *msg
if msg != "" {
ret += msg
}
return ret, nil
return ret
}
@ -199,38 +183,24 @@ func (s *SyslogSource) handleSyslogMsg(out chan types.Event, t *tomb.Tomb, c cha
logger := s.logger.WithField("client", syslogLine.Client)
logger.Tracef("raw: %s", syslogLine)
linesReceived.With(prometheus.Labels{"source": syslogLine.Client}).Inc()
p := rfc5424.NewParser()
m, err := p.Parse(syslogLine.Message)
p := rfc3164.NewRFC3164Parser(rfc3164.WithCurrentYear())
err := p.Parse(syslogLine.Message)
if err != nil {
logger.Debugf("could not parse as RFC5424 (%s)", err)
p = rfc3164.NewParser(rfc3164.WithYear(rfc3164.CurrentYear{}))
m, err = p.Parse(syslogLine.Message)
logger.Debugf("could not parse as RFC3164 (%s)", err)
p2 := rfc5424.NewRFC5424Parser()
err = p2.Parse(syslogLine.Message)
if err != nil {
logger.Errorf("could not parse message: %s", err)
logger.Debugf("could not parse as RFC3164 (%s) : %s", err, syslogLine.Message)
logger.Debugf("could not parse as RFC5424 (%s) : %s", err, syslogLine.Message)
continue
}
msg := m.(*rfc3164.SyslogMessage)
line, err = s.buildLogFromSyslog(msg.Timestamp, msg.Hostname, msg.Appname, msg.ProcID, msg.Message)
if err != nil {
logger.Debugf("could not parse as RFC3164 (%s) : %s", err, syslogLine.Message)
logger.Error(err)
continue
}
linesParsed.With(prometheus.Labels{"source": syslogLine.Client,
"type": "RFC3164"}).Inc()
line = s.buildLogFromSyslog(p2.Timestamp, p2.Hostname, p2.Tag, p2.PID, p2.Message)
} else {
msg := m.(*rfc5424.SyslogMessage)
line, err = s.buildLogFromSyslog(msg.Timestamp, msg.Hostname, msg.Appname, msg.ProcID, msg.Message)
if err != nil {
log.Debugf("could not parse message as RFC5424 (%s) : %s", err, syslogLine.Message)
logger.Error(err)
continue
}
linesParsed.With(prometheus.Labels{"source": syslogLine.Client,
"type": "RFC5424"}).Inc()
line = s.buildLogFromSyslog(p.Timestamp, p.Hostname, p.Tag, p.PID, p.Message)
}
line = strings.TrimSuffix(line, "\n")
l := types.Line{}
l.Raw = line
l.Module = s.GetName()

View file

@ -67,27 +67,36 @@ func writeToSyslog(logs []string) {
return
}
for _, log := range logs {
fmt.Fprint(conn, log)
n, err := fmt.Fprint(conn, log)
if err != nil {
fmt.Printf("could not write to syslog server : %s", err)
return
}
if n != len(log) {
fmt.Printf("could not write to syslog server : %s", err)
return
}
}
}
func TestStreamingAcquisition(t *testing.T) {
tests := []struct {
name string
config string
expectedErr string
logs []string
expectedLines int
}{
{
config: `
source: syslog
name: "invalid msgs",
config: `source: syslog
listen_port: 4242
listen_addr: 127.0.0.1`,
logs: []string{"foobar", "bla", "pouet"},
},
{
config: `
source: syslog
name: "RFC5424",
config: `source: syslog
listen_port: 4242
listen_addr: 127.0.0.1`,
expectedLines: 2,
@ -95,8 +104,8 @@ listen_addr: 127.0.0.1`,
`<13>1 2021-05-18T12:12:37.560695+02:00 mantis sshd 49340 - [timeQuality isSynced="0" tzKnown="1"] blabla2[foobar]`},
},
{
config: `
source: syslog
name: "RFC3164",
config: `source: syslog
listen_port: 4242
listen_addr: 127.0.0.1`,
expectedLines: 3,
@ -108,43 +117,54 @@ listen_addr: 127.0.0.1`,
}
if runtime.GOOS != "windows" {
tests = append(tests, struct {
name string
config string
expectedErr string
logs []string
expectedLines int
}{
name: "privileged port",
config: `source: syslog`,
expectedErr: "could not start syslog server: could not listen on port 514: listen udp 127.0.0.1:514: bind: permission denied",
})
}
for _, ts := range tests {
subLogger := log.WithFields(log.Fields{
"type": "syslog",
})
s := SyslogSource{}
_ = s.Configure([]byte(ts.config), subLogger)
tomb := tomb.Tomb{}
out := make(chan types.Event)
err := s.StreamingAcquisition(out, &tomb)
cstest.AssertErrorContains(t, err, ts.expectedErr)
if err != nil {
continue
}
actualLines := 0
go writeToSyslog(ts.logs)
READLOOP:
for {
select {
case <-out:
actualLines++
case <-time.After(2 * time.Second):
break READLOOP
t.Run(ts.name, func(t *testing.T) {
subLogger := log.WithFields(log.Fields{
"type": "syslog",
})
s := SyslogSource{}
err := s.Configure([]byte(ts.config), subLogger)
if err != nil {
t.Fatalf("could not configure syslog source : %s", err)
}
}
assert.Equal(t, ts.expectedLines, actualLines)
tomb.Kill(nil)
tomb.Wait()
tomb := tomb.Tomb{}
out := make(chan types.Event)
err = s.StreamingAcquisition(out, &tomb)
cstest.AssertErrorContains(t, err, ts.expectedErr)
if ts.expectedErr != "" {
return
}
if err != nil && ts.expectedErr == "" {
t.Fatalf("unexpected error while starting syslog server: %s", err)
return
}
actualLines := 0
go writeToSyslog(ts.logs)
READLOOP:
for {
select {
case <-out:
actualLines++
case <-time.After(2 * time.Second):
break READLOOP
}
}
assert.Equal(t, ts.expectedLines, actualLines)
tomb.Kill(nil)
tomb.Wait()
})
}
}