From 9d199fd4a916ce1445725c28eff9badff7f47554 Mon Sep 17 00:00:00 2001 From: "Thibault \"bui\" Koechlin" Date: Tue, 6 Sep 2022 14:58:37 +0200 Subject: [PATCH] fix #1733 : add support for exclusion regexps (#1735) * allow to specify a list of regular expressions to skip some specific files --- pkg/acquisition/modules/file/file.go | 51 +++++++++++++++++++++++ pkg/acquisition/modules/file/file_test.go | 27 ++++++++++++ 2 files changed, 78 insertions(+) diff --git a/pkg/acquisition/modules/file/file.go b/pkg/acquisition/modules/file/file.go index e9f9bb189..fb125c5f2 100644 --- a/pkg/acquisition/modules/file/file.go +++ b/pkg/acquisition/modules/file/file.go @@ -9,6 +9,7 @@ import ( "os" "path" "path/filepath" + "regexp" "strings" "time" @@ -33,6 +34,7 @@ var linesRead = prometheus.NewCounterVec( type FileConfiguration struct { Filenames []string + ExcludeRegexps []string `yaml:"exclude_regexps"` Filename string ForceInotify bool `yaml:"force_inotify"` configuration.DataSourceCommonCfg `yaml:",inline"` @@ -45,6 +47,7 @@ type FileSource struct { tails map[string]bool logger *log.Entry files []string + exclude_regexps []*regexp.Regexp } func (f *FileSource) Configure(Config []byte, logger *log.Entry) error { @@ -74,6 +77,13 @@ func (f *FileSource) Configure(Config []byte, logger *log.Entry) error { if err != nil { return errors.Wrapf(err, "Could not create fsnotify watcher") } + for _, exclude := range f.config.ExcludeRegexps { + re, err := regexp.Compile(exclude) + if err != nil { + return errors.Wrapf(err, "Could not compile regexp %s", exclude) + } + f.exclude_regexps = append(f.exclude_regexps, re) + } f.logger.Tracef("Actual FileAcquisition Configuration %+v", f.config) for _, pattern := range f.config.Filenames { if f.config.ForceInotify { @@ -97,6 +107,19 @@ func (f *FileSource) Configure(Config []byte, logger *log.Entry) error { continue } for _, file := range files { + + //check if file is excluded + excluded := false + for _, pattern := range f.exclude_regexps { + if pattern.MatchString(file) { + excluded = true + f.logger.Infof("Skipping file %s as it matches exclude pattern %s", file, pattern) + break + } + } + if excluded { + continue + } if files[0] != pattern && f.config.Mode == configuration.TAIL_MODE { //we have a glob pattern directory := filepath.Dir(file) f.logger.Debugf("Will add watch to directory: %s", directory) @@ -232,6 +255,19 @@ func (f *FileSource) StreamingAcquisition(out chan types.Event, t *tomb.Tomb) er return f.monitorNewFiles(out, t) }) for _, file := range f.files { + //before opening the file, check if we need to specifically avoid it. (XXX) + skip := false + for _, pattern := range f.exclude_regexps { + if pattern.MatchString(file) { + f.logger.Infof("file %s matches exclusion pattern %s, skipping", file, pattern.String()) + skip = true + break + } + } + if skip { + continue + } + //cf. https://github.com/crowdsecurity/crowdsec/issues/1168 //do not rely on stat, reclose file immediately as it's opened by Tail fd, err := os.Open(file) @@ -252,6 +288,7 @@ func (f *FileSource) StreamingAcquisition(out chan types.Event, t *tomb.Tomb) er f.logger.Warnf("%s is a directory, ignoring it.", file) continue } + tail, err := tail.TailFile(file, tail.Config{ReOpen: true, Follow: true, Poll: true, Location: &tail.SeekInfo{Offset: 0, Whence: io.SeekEnd}}) if err != nil { f.logger.Errorf("Could not start tailing file %s : %s", file, err) @@ -304,6 +341,20 @@ func (f *FileSource) monitorNewFiles(out chan types.Event, t *tomb.Tomb) error { if !matched { continue } + + //before opening the file, check if we need to specifically avoid it. (XXX) + skip := false + for _, pattern := range f.exclude_regexps { + if pattern.MatchString(event.Name) { + f.logger.Infof("file %s matches exclusion pattern %s, skipping", event.Name, pattern.String()) + skip = true + break + } + } + if skip { + continue + } + if f.tails[event.Name] { //we already have a tail on it, do not start a new one logger.Debugf("Already tailing file %s, not creating a new tail", event.Name) diff --git a/pkg/acquisition/modules/file/file_test.go b/pkg/acquisition/modules/file/file_test.go index 45ebdb031..d0a20fec1 100644 --- a/pkg/acquisition/modules/file/file_test.go +++ b/pkg/acquisition/modules/file/file_test.go @@ -32,6 +32,11 @@ func TestBadConfiguration(t *testing.T) { config: `filename: "[asd-.log"`, expectedErr: "Glob failure: syntax error in pattern", }, + { + config: `filenames: ["asd.log"] +exclude_regexps: ["as[a-$d"]`, + expectedErr: "Could not compile regexp as", + }, } subLogger := log.WithFields(log.Fields{ @@ -440,3 +445,25 @@ force_inotify: true`, testPattern), tomb.Kill(nil) } } + +func TestExclusion(t *testing.T) { + + config := `filenames: ["test_files/*.log*"] +exclude_regexps: ["\\.gz$"]` + logger, hook := test.NewNullLogger() + //logger.SetLevel(ts.logLevel) + subLogger := logger.WithFields(log.Fields{ + "type": "file", + }) + f := FileSource{} + err := f.Configure([]byte(config), subLogger) + if err != nil { + subLogger.Fatalf("unexpected error: %s", err) + } + expectedLogOutput := "Skipping file test_files/test.log.gz as it matches exclude pattern" + if hook.LastEntry() == nil { + t.Fatalf("expected output %s, but got nothing", expectedLogOutput) + } + assert.Contains(t, hook.LastEntry().Message, expectedLogOutput) + hook.Reset() +}