fix #1733 : add support for exclusion regexps (#1735)

* allow to specify a list of regular expressions to skip some specific files
This commit is contained in:
Thibault "bui" Koechlin 2022-09-06 14:58:37 +02:00 committed by GitHub
parent 414282a2c9
commit 9d199fd4a9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 78 additions and 0 deletions

View file

@ -9,6 +9,7 @@ import (
"os"
"path"
"path/filepath"
"regexp"
"strings"
"time"
@ -33,6 +34,7 @@ var linesRead = prometheus.NewCounterVec(
type FileConfiguration struct {
Filenames []string
ExcludeRegexps []string `yaml:"exclude_regexps"`
Filename string
ForceInotify bool `yaml:"force_inotify"`
configuration.DataSourceCommonCfg `yaml:",inline"`
@ -45,6 +47,7 @@ type FileSource struct {
tails map[string]bool
logger *log.Entry
files []string
exclude_regexps []*regexp.Regexp
}
func (f *FileSource) Configure(Config []byte, logger *log.Entry) error {
@ -74,6 +77,13 @@ func (f *FileSource) Configure(Config []byte, logger *log.Entry) error {
if err != nil {
return errors.Wrapf(err, "Could not create fsnotify watcher")
}
for _, exclude := range f.config.ExcludeRegexps {
re, err := regexp.Compile(exclude)
if err != nil {
return errors.Wrapf(err, "Could not compile regexp %s", exclude)
}
f.exclude_regexps = append(f.exclude_regexps, re)
}
f.logger.Tracef("Actual FileAcquisition Configuration %+v", f.config)
for _, pattern := range f.config.Filenames {
if f.config.ForceInotify {
@ -97,6 +107,19 @@ func (f *FileSource) Configure(Config []byte, logger *log.Entry) error {
continue
}
for _, file := range files {
//check if file is excluded
excluded := false
for _, pattern := range f.exclude_regexps {
if pattern.MatchString(file) {
excluded = true
f.logger.Infof("Skipping file %s as it matches exclude pattern %s", file, pattern)
break
}
}
if excluded {
continue
}
if files[0] != pattern && f.config.Mode == configuration.TAIL_MODE { //we have a glob pattern
directory := filepath.Dir(file)
f.logger.Debugf("Will add watch to directory: %s", directory)
@ -232,6 +255,19 @@ func (f *FileSource) StreamingAcquisition(out chan types.Event, t *tomb.Tomb) er
return f.monitorNewFiles(out, t)
})
for _, file := range f.files {
//before opening the file, check if we need to specifically avoid it. (XXX)
skip := false
for _, pattern := range f.exclude_regexps {
if pattern.MatchString(file) {
f.logger.Infof("file %s matches exclusion pattern %s, skipping", file, pattern.String())
skip = true
break
}
}
if skip {
continue
}
//cf. https://github.com/crowdsecurity/crowdsec/issues/1168
//do not rely on stat, reclose file immediately as it's opened by Tail
fd, err := os.Open(file)
@ -252,6 +288,7 @@ func (f *FileSource) StreamingAcquisition(out chan types.Event, t *tomb.Tomb) er
f.logger.Warnf("%s is a directory, ignoring it.", file)
continue
}
tail, err := tail.TailFile(file, tail.Config{ReOpen: true, Follow: true, Poll: true, Location: &tail.SeekInfo{Offset: 0, Whence: io.SeekEnd}})
if err != nil {
f.logger.Errorf("Could not start tailing file %s : %s", file, err)
@ -304,6 +341,20 @@ func (f *FileSource) monitorNewFiles(out chan types.Event, t *tomb.Tomb) error {
if !matched {
continue
}
//before opening the file, check if we need to specifically avoid it. (XXX)
skip := false
for _, pattern := range f.exclude_regexps {
if pattern.MatchString(event.Name) {
f.logger.Infof("file %s matches exclusion pattern %s, skipping", event.Name, pattern.String())
skip = true
break
}
}
if skip {
continue
}
if f.tails[event.Name] {
//we already have a tail on it, do not start a new one
logger.Debugf("Already tailing file %s, not creating a new tail", event.Name)

View file

@ -32,6 +32,11 @@ func TestBadConfiguration(t *testing.T) {
config: `filename: "[asd-.log"`,
expectedErr: "Glob failure: syntax error in pattern",
},
{
config: `filenames: ["asd.log"]
exclude_regexps: ["as[a-$d"]`,
expectedErr: "Could not compile regexp as",
},
}
subLogger := log.WithFields(log.Fields{
@ -440,3 +445,25 @@ force_inotify: true`, testPattern),
tomb.Kill(nil)
}
}
func TestExclusion(t *testing.T) {
config := `filenames: ["test_files/*.log*"]
exclude_regexps: ["\\.gz$"]`
logger, hook := test.NewNullLogger()
//logger.SetLevel(ts.logLevel)
subLogger := logger.WithFields(log.Fields{
"type": "file",
})
f := FileSource{}
err := f.Configure([]byte(config), subLogger)
if err != nil {
subLogger.Fatalf("unexpected error: %s", err)
}
expectedLogOutput := "Skipping file test_files/test.log.gz as it matches exclude pattern"
if hook.LastEntry() == nil {
t.Fatalf("expected output %s, but got nothing", expectedLogOutput)
}
assert.Contains(t, hook.LastEntry().Message, expectedLogOutput)
hook.Reset()
}