From 5b0fe4b7f13d5373065ac5764e0bb6523fff20a3 Mon Sep 17 00:00:00 2001 From: "Thibault \"bui\" Koechlin" Date: Wed, 8 Mar 2023 16:07:49 +0100 Subject: [PATCH] support for regexps result cache (#2104) * support for regexps result cache : gcache + xxhash Co-authored-by: Marco Mariani --- cmd/crowdsec/metrics.go | 18 ++++--- go.mod | 2 +- pkg/cache/cache.go | 6 +-- pkg/exprhelpers/exprlib.go | 87 ++++++++++++++++++++++++++++++++- pkg/exprhelpers/exprlib_test.go | 72 ++++++++++++++++++--------- pkg/leakybucket/manager_load.go | 23 +++++---- pkg/parser/stage.go | 12 +++-- pkg/types/dataset.go | 6 +++ 8 files changed, 173 insertions(+), 53 deletions(-) diff --git a/cmd/crowdsec/metrics.go b/cmd/crowdsec/metrics.go index 6b9c1e53a..6b549603f 100644 --- a/cmd/crowdsec/metrics.go +++ b/cmd/crowdsec/metrics.go @@ -2,22 +2,22 @@ package main import ( "fmt" + "net/http" "time" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + log "github.com/sirupsen/logrus" + v1 "github.com/crowdsecurity/crowdsec/pkg/apiserver/controllers/v1" "github.com/crowdsecurity/crowdsec/pkg/cache" "github.com/crowdsecurity/crowdsec/pkg/csconfig" "github.com/crowdsecurity/crowdsec/pkg/cwversion" "github.com/crowdsecurity/crowdsec/pkg/database" + "github.com/crowdsecurity/crowdsec/pkg/exprhelpers" leaky "github.com/crowdsecurity/crowdsec/pkg/leakybucket" "github.com/crowdsecurity/crowdsec/pkg/parser" "github.com/crowdsecurity/crowdsec/pkg/types" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" - - "net/http" - - log "github.com/sirupsen/logrus" ) /*prometheus*/ @@ -103,6 +103,8 @@ func computeDynamicMetrics(next http.Handler, dbClient *database.Client) http.Ha return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { //update cache metrics (stash) cache.UpdateCacheMetrics() + //update cache metrics (regexp) + exprhelpers.UpdateRegexpCacheMetrics() //decision metrics are only relevant for LAPI if dbClient == nil { @@ -166,7 +168,7 @@ func registerPrometheus(config *csconfig.PrometheusCfg) { leaky.BucketsUnderflow, leaky.BucketsCanceled, leaky.BucketsInstantiation, leaky.BucketsOverflow, v1.LapiRouteHits, leaky.BucketsCurrentCount, - cache.CacheMetrics) + cache.CacheMetrics, exprhelpers.RegexpCacheMetrics) } else { log.Infof("Loading prometheus collectors") prometheus.MustRegister(globalParserHits, globalParserHitsOk, globalParserHitsKo, @@ -175,7 +177,7 @@ func registerPrometheus(config *csconfig.PrometheusCfg) { v1.LapiRouteHits, v1.LapiMachineHits, v1.LapiBouncerHits, v1.LapiNilDecisions, v1.LapiNonNilDecisions, v1.LapiResponseTime, leaky.BucketsPour, leaky.BucketsUnderflow, leaky.BucketsCanceled, leaky.BucketsInstantiation, leaky.BucketsOverflow, leaky.BucketsCurrentCount, globalActiveDecisions, globalAlerts, - cache.CacheMetrics) + cache.CacheMetrics, exprhelpers.RegexpCacheMetrics) } } diff --git a/go.mod b/go.mod index 6852b9a13..6b7a378b4 100644 --- a/go.mod +++ b/go.mod @@ -71,6 +71,7 @@ require ( github.com/beevik/etree v1.1.0 github.com/blackfireio/osinfo v1.0.3 github.com/bluele/gcache v0.0.2 + github.com/cespare/xxhash/v2 v2.1.2 github.com/goccy/go-yaml v1.9.7 github.com/gofrs/uuid v4.0.0+incompatible github.com/golang-jwt/jwt/v4 v4.2.0 @@ -98,7 +99,6 @@ require ( github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect github.com/asaskevich/govalidator v0.0.0-20200907205600-7a23bdc65eef // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/cespare/xxhash/v2 v2.1.2 // indirect github.com/containerd/containerd v1.6.18 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect github.com/docker/distribution v2.8.0+incompatible // indirect diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go index f575ea9aa..0f3b9c4a0 100644 --- a/pkg/cache/cache.go +++ b/pkg/cache/cache.go @@ -4,10 +4,10 @@ import ( "time" "github.com/bluele/gcache" - "github.com/crowdsecurity/crowdsec/pkg/types" "github.com/prometheus/client_golang/prometheus" - "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus" + + "github.com/crowdsecurity/crowdsec/pkg/types" ) var Caches []gcache.Cache @@ -52,7 +52,7 @@ func CacheInit(cfg CacheCfg) error { cfg.LogLevel = new(log.Level) *cfg.LogLevel = log.InfoLevel } - var clog = logrus.New() + var clog = log.New() if err := types.ConfigureLogger(clog); err != nil { log.Fatalf("While creating cache logger : %s", err) } diff --git a/pkg/exprhelpers/exprlib.go b/pkg/exprhelpers/exprlib.go index b830507af..5921b4c10 100644 --- a/pkg/exprhelpers/exprlib.go +++ b/pkg/exprhelpers/exprlib.go @@ -12,16 +12,33 @@ import ( "strings" "time" + "github.com/bluele/gcache" "github.com/c-robinson/iplib" + "github.com/cespare/xxhash/v2" + "github.com/davecgh/go-spew/spew" + "github.com/prometheus/client_golang/prometheus" + log "github.com/sirupsen/logrus" "github.com/crowdsecurity/crowdsec/pkg/cache" "github.com/crowdsecurity/crowdsec/pkg/database" - "github.com/davecgh/go-spew/spew" - log "github.com/sirupsen/logrus" + "github.com/crowdsecurity/crowdsec/pkg/types" ) var dataFile map[string][]string var dataFileRegex map[string][]*regexp.Regexp + +// This is used to (optionally) cache regexp results for RegexpInFile operations +var dataFileRegexCache map[string]gcache.Cache = make(map[string]gcache.Cache) + +/*prometheus*/ +var RegexpCacheMetrics = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "cs_regexp_cache_size", + Help: "Entries per regexp cache.", + }, + []string{"name"}, +) + var dbClient *database.Client func Get(arr []string, index int) string { @@ -116,6 +133,54 @@ func Init(databaseClient *database.Client) error { return nil } +func RegexpCacheInit(filename string, CacheCfg types.DataSource) error { + + //cache is explicitly disabled + if CacheCfg.Cache != nil && !*CacheCfg.Cache { + return nil + } + //cache is implicitly disabled if no cache config is provided + if CacheCfg.Strategy == nil && CacheCfg.TTL == nil && CacheCfg.Size == nil { + return nil + } + //cache is enabled + + if CacheCfg.Size == nil { + CacheCfg.Size = types.IntPtr(50) + } + + gc := gcache.New(*CacheCfg.Size) + + if CacheCfg.Strategy == nil { + CacheCfg.Strategy = types.StrPtr("LRU") + } + switch *CacheCfg.Strategy { + case "LRU": + gc = gc.LRU() + case "LFU": + gc = gc.LFU() + case "ARC": + gc = gc.ARC() + default: + return fmt.Errorf("unknown cache strategy '%s'", *CacheCfg.Strategy) + } + + if CacheCfg.TTL != nil { + gc.Expiration(*CacheCfg.TTL) + } + cache := gc.Build() + dataFileRegexCache[filename] = cache + return nil +} + +// UpdateCacheMetrics is called directly by the prom handler +func UpdateRegexpCacheMetrics() { + RegexpCacheMetrics.Reset() + for name := range dataFileRegexCache { + RegexpCacheMetrics.With(prometheus.Labels{"name": name}).Set(float64(dataFileRegexCache[name].Len(true))) + } +} + func FileInit(fileFolder string, filename string, fileType string) error { log.Debugf("init (folder:%s) (file:%s) (type:%s)", fileFolder, filename, fileType) filepath := path.Join(fileFolder, filename) @@ -192,9 +257,24 @@ func File(filename string) []string { } func RegexpInFile(data string, filename string) bool { + + var hash uint64 + hasCache := false + + if _, ok := dataFileRegexCache[filename]; ok { + hasCache = true + hash = xxhash.Sum64String(data) + if val, err := dataFileRegexCache[filename].Get(hash); err == nil { + return val.(bool) + } + } + if _, ok := dataFileRegex[filename]; ok { for _, re := range dataFileRegex[filename] { if re.Match([]byte(data)) { + if hasCache { + dataFileRegexCache[filename].Set(hash, true) + } return true } } @@ -202,6 +282,9 @@ func RegexpInFile(data string, filename string) bool { log.Errorf("file '%s' (type:regexp) not found in expr library", filename) log.Errorf("expr library : %s", spew.Sdump(dataFileRegex)) } + if hasCache { + dataFileRegexCache[filename].Set(hash, false) + } return false } diff --git a/pkg/exprhelpers/exprlib_test.go b/pkg/exprhelpers/exprlib_test.go index 2f7f9de4e..bba3339a6 100644 --- a/pkg/exprhelpers/exprlib_test.go +++ b/pkg/exprhelpers/exprlib_test.go @@ -4,22 +4,20 @@ import ( "context" "fmt" "os" + "testing" "time" + "github.com/antonmedv/expr" "github.com/pkg/errors" + log "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/crowdsecurity/crowdsec/pkg/csconfig" "github.com/crowdsecurity/crowdsec/pkg/cstest" "github.com/crowdsecurity/crowdsec/pkg/database" "github.com/crowdsecurity/crowdsec/pkg/models" "github.com/crowdsecurity/crowdsec/pkg/types" - log "github.com/sirupsen/logrus" - - "testing" - - "github.com/antonmedv/expr" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) var ( @@ -29,24 +27,21 @@ var ( func getDBClient(t *testing.T) *database.Client { t.Helper() dbPath, err := os.CreateTemp("", "*sqlite") - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) + testDbClient, err := database.NewClient(&csconfig.DatabaseCfg{ Type: "sqlite", DbName: "crowdsec", DbPath: dbPath.Name(), }) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) + return testDbClient } func TestVisitor(t *testing.T) { - if err := Init(nil); err != nil { - log.Fatal(err) - } + err := Init(nil) + require.NoError(t, err) tests := []struct { name string @@ -130,6 +125,39 @@ func TestVisitor(t *testing.T) { } } +func TestRegexpCacheBehavior(t *testing.T) { + err := Init(nil) + require.NoError(t, err) + + filename := "test_data_re.txt" + err = FileInit(TestFolder, filename, "regex") + require.NoError(t, err) + + //cache with no TTL + err = RegexpCacheInit(filename, types.DataSource{Type: "regex", Size: types.IntPtr(1)}) + require.NoError(t, err) + + ret := RegexpInFile("crowdsec", filename) + assert.False(t, ret) + assert.Equal(t, 1, dataFileRegexCache[filename].Len(false)) + + ret = RegexpInFile("Crowdsec", filename) + assert.True(t, ret) + assert.Equal(t, 1, dataFileRegexCache[filename].Len(false)) + + //cache with TTL + ttl := 500 * time.Millisecond + err = RegexpCacheInit(filename, types.DataSource{Type: "regex", Size: types.IntPtr(2), TTL: &ttl}) + require.NoError(t, err) + + ret = RegexpInFile("crowdsec", filename) + assert.False(t, ret) + assert.Equal(t, 1, dataFileRegexCache[filename].Len(true)) + + time.Sleep(1 * time.Second) + assert.Equal(t, 0, dataFileRegexCache[filename].Len(true)) +} + func TestRegexpInFile(t *testing.T) { if err := Init(nil); err != nil { log.Fatal(err) @@ -449,7 +477,7 @@ func TestAtof(t *testing.T) { expectedFloat := 1.5 if Atof(testFloat) != expectedFloat { - t.Fatalf("Atof should returned 1.5 as a float") + t.Fatalf("Atof should return 1.5 as a float") } log.Printf("test 'Atof()' : OK") @@ -459,7 +487,7 @@ func TestAtof(t *testing.T) { expectedFloat = 0.0 if Atof(testFloat) != expectedFloat { - t.Fatalf("Atof should returned a negative value (error) as a float got") + t.Fatalf("Atof should return a negative value (error) as a float got") } log.Printf("test 'Atof()' : OK") @@ -470,7 +498,7 @@ func TestUpper(t *testing.T) { expectedStr := "TEST" if Upper(testStr) != expectedStr { - t.Fatalf("Upper() should returned test in upper case") + t.Fatalf("Upper() should return test in upper case") } log.Printf("test 'Upper()' : OK") @@ -503,7 +531,7 @@ func TestParseUri(t *testing.T) { "ParseUri": ParseUri, }, code: "ParseUri(uri)", - result: map[string][]string{"a": []string{"1"}, "b": []string{"2"}}, + result: map[string][]string{"a": {"1"}, "b": {"2"}}, err: "", }, { @@ -523,7 +551,7 @@ func TestParseUri(t *testing.T) { "ParseUri": ParseUri, }, code: "ParseUri(uri)", - result: map[string][]string{"a": []string{"1"}, "b": []string{"2?"}}, + result: map[string][]string{"a": {"1"}, "b": {"2?"}}, err: "", }, { @@ -533,7 +561,7 @@ func TestParseUri(t *testing.T) { "ParseUri": ParseUri, }, code: "ParseUri(uri)", - result: map[string][]string{"?": []string{"", "123"}}, + result: map[string][]string{"?": {"", "123"}}, err: "", }, { diff --git a/pkg/leakybucket/manager_load.go b/pkg/leakybucket/manager_load.go index 462a665c0..dfcdcee4f 100644 --- a/pkg/leakybucket/manager_load.go +++ b/pkg/leakybucket/manager_load.go @@ -11,24 +11,20 @@ import ( "sync" "time" - "github.com/crowdsecurity/crowdsec/pkg/alertcontext" - - "github.com/crowdsecurity/crowdsec/pkg/csconfig" - "github.com/crowdsecurity/crowdsec/pkg/cwhub" - "github.com/crowdsecurity/crowdsec/pkg/cwversion" - "github.com/crowdsecurity/crowdsec/pkg/types" - - "github.com/davecgh/go-spew/spew" - "github.com/sirupsen/logrus" - log "github.com/sirupsen/logrus" - "github.com/antonmedv/expr" "github.com/antonmedv/expr/vm" + "github.com/davecgh/go-spew/spew" "github.com/goombaio/namegenerator" + log "github.com/sirupsen/logrus" "gopkg.in/tomb.v2" yaml "gopkg.in/yaml.v2" + "github.com/crowdsecurity/crowdsec/pkg/alertcontext" + "github.com/crowdsecurity/crowdsec/pkg/csconfig" + "github.com/crowdsecurity/crowdsec/pkg/cwhub" + "github.com/crowdsecurity/crowdsec/pkg/cwversion" "github.com/crowdsecurity/crowdsec/pkg/exprhelpers" + "github.com/crowdsecurity/crowdsec/pkg/types" ) // BucketFactory struct holds all fields for any bucket configuration. This is to have a @@ -254,7 +250,7 @@ func LoadBuckets(cscfg *csconfig.CrowdsecServiceCfg, files []string, tomb *tomb. func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error { var err error if bucketFactory.Debug { - var clog = logrus.New() + var clog = log.New() if err := types.ConfigureLogger(clog); err != nil { log.Fatalf("While creating bucket-specific logger : %s", err) } @@ -374,6 +370,9 @@ func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error { if err != nil { bucketFactory.logger.Errorf("unable to init data for file '%s': %s", data.DestPath, err) } + if data.Type == "regexp" { //cache only makes sense for regexp + exprhelpers.RegexpCacheInit(data.DestPath, *data) + } } } diff --git a/pkg/parser/stage.go b/pkg/parser/stage.go index a66c4e3b5..4ffa50f2f 100644 --- a/pkg/parser/stage.go +++ b/pkg/parser/stage.go @@ -16,13 +16,12 @@ import ( "strings" "time" + "github.com/goombaio/namegenerator" + log "github.com/sirupsen/logrus" + yaml "gopkg.in/yaml.v2" + "github.com/crowdsecurity/crowdsec/pkg/cwversion" "github.com/crowdsecurity/crowdsec/pkg/exprhelpers" - - log "github.com/sirupsen/logrus" - - "github.com/goombaio/namegenerator" - yaml "gopkg.in/yaml.v2" ) var seed namegenerator.Generator = namegenerator.NewNameGenerator(time.Now().UTC().UnixNano()) @@ -116,6 +115,9 @@ func LoadStages(stageFiles []Stagefile, pctx *UnixParserCtx, ectx EnricherCtx) ( if err != nil { log.Error(err) } + if data.Type == "regexp" { //cache only makes sense for regexp + exprhelpers.RegexpCacheInit(data.DestPath, *data) + } } } nodes = append(nodes, node) diff --git a/pkg/types/dataset.go b/pkg/types/dataset.go index 3074f18ac..2684342a9 100644 --- a/pkg/types/dataset.go +++ b/pkg/types/dataset.go @@ -6,6 +6,7 @@ import ( "net/http" "os" "path" + "time" log "github.com/sirupsen/logrus" ) @@ -14,6 +15,11 @@ type DataSource struct { SourceURL string `yaml:"source_url"` DestPath string `yaml:"dest_file"` Type string `yaml:"type"` + //Control cache strategy on expensive regexps + Cache *bool `yaml:"cache"` + Strategy *string `yaml:"strategy"` + Size *int `yaml:"size"` + TTL *time.Duration `yaml:"ttl"` } type DataSet struct {