support for regexps result cache (#2104)

* support for regexps result cache : gcache + xxhash

Co-authored-by: Marco Mariani <marco@crowdsec.net>
This commit is contained in:
Thibault "bui" Koechlin 2023-03-08 16:07:49 +01:00 committed by GitHub
parent e71d146a2d
commit 5b0fe4b7f1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 173 additions and 53 deletions

View file

@ -2,22 +2,22 @@ package main
import (
"fmt"
"net/http"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
log "github.com/sirupsen/logrus"
v1 "github.com/crowdsecurity/crowdsec/pkg/apiserver/controllers/v1"
"github.com/crowdsecurity/crowdsec/pkg/cache"
"github.com/crowdsecurity/crowdsec/pkg/csconfig"
"github.com/crowdsecurity/crowdsec/pkg/cwversion"
"github.com/crowdsecurity/crowdsec/pkg/database"
"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
leaky "github.com/crowdsecurity/crowdsec/pkg/leakybucket"
"github.com/crowdsecurity/crowdsec/pkg/parser"
"github.com/crowdsecurity/crowdsec/pkg/types"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"net/http"
log "github.com/sirupsen/logrus"
)
/*prometheus*/
@ -103,6 +103,8 @@ func computeDynamicMetrics(next http.Handler, dbClient *database.Client) http.Ha
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
//update cache metrics (stash)
cache.UpdateCacheMetrics()
//update cache metrics (regexp)
exprhelpers.UpdateRegexpCacheMetrics()
//decision metrics are only relevant for LAPI
if dbClient == nil {
@ -166,7 +168,7 @@ func registerPrometheus(config *csconfig.PrometheusCfg) {
leaky.BucketsUnderflow, leaky.BucketsCanceled, leaky.BucketsInstantiation, leaky.BucketsOverflow,
v1.LapiRouteHits,
leaky.BucketsCurrentCount,
cache.CacheMetrics)
cache.CacheMetrics, exprhelpers.RegexpCacheMetrics)
} else {
log.Infof("Loading prometheus collectors")
prometheus.MustRegister(globalParserHits, globalParserHitsOk, globalParserHitsKo,
@ -175,7 +177,7 @@ func registerPrometheus(config *csconfig.PrometheusCfg) {
v1.LapiRouteHits, v1.LapiMachineHits, v1.LapiBouncerHits, v1.LapiNilDecisions, v1.LapiNonNilDecisions, v1.LapiResponseTime,
leaky.BucketsPour, leaky.BucketsUnderflow, leaky.BucketsCanceled, leaky.BucketsInstantiation, leaky.BucketsOverflow, leaky.BucketsCurrentCount,
globalActiveDecisions, globalAlerts,
cache.CacheMetrics)
cache.CacheMetrics, exprhelpers.RegexpCacheMetrics)
}
}

2
go.mod
View file

@ -71,6 +71,7 @@ require (
github.com/beevik/etree v1.1.0
github.com/blackfireio/osinfo v1.0.3
github.com/bluele/gcache v0.0.2
github.com/cespare/xxhash/v2 v2.1.2
github.com/goccy/go-yaml v1.9.7
github.com/gofrs/uuid v4.0.0+incompatible
github.com/golang-jwt/jwt/v4 v4.2.0
@ -98,7 +99,6 @@ require (
github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect
github.com/asaskevich/govalidator v0.0.0-20200907205600-7a23bdc65eef // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/containerd/containerd v1.6.18 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/docker/distribution v2.8.0+incompatible // indirect

6
pkg/cache/cache.go vendored
View file

@ -4,10 +4,10 @@ import (
"time"
"github.com/bluele/gcache"
"github.com/crowdsecurity/crowdsec/pkg/types"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
log "github.com/sirupsen/logrus"
"github.com/crowdsecurity/crowdsec/pkg/types"
)
var Caches []gcache.Cache
@ -52,7 +52,7 @@ func CacheInit(cfg CacheCfg) error {
cfg.LogLevel = new(log.Level)
*cfg.LogLevel = log.InfoLevel
}
var clog = logrus.New()
var clog = log.New()
if err := types.ConfigureLogger(clog); err != nil {
log.Fatalf("While creating cache logger : %s", err)
}

View file

@ -12,16 +12,33 @@ import (
"strings"
"time"
"github.com/bluele/gcache"
"github.com/c-robinson/iplib"
"github.com/cespare/xxhash/v2"
"github.com/davecgh/go-spew/spew"
"github.com/prometheus/client_golang/prometheus"
log "github.com/sirupsen/logrus"
"github.com/crowdsecurity/crowdsec/pkg/cache"
"github.com/crowdsecurity/crowdsec/pkg/database"
"github.com/davecgh/go-spew/spew"
log "github.com/sirupsen/logrus"
"github.com/crowdsecurity/crowdsec/pkg/types"
)
var dataFile map[string][]string
var dataFileRegex map[string][]*regexp.Regexp
// This is used to (optionally) cache regexp results for RegexpInFile operations
var dataFileRegexCache map[string]gcache.Cache = make(map[string]gcache.Cache)
/*prometheus*/
var RegexpCacheMetrics = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "cs_regexp_cache_size",
Help: "Entries per regexp cache.",
},
[]string{"name"},
)
var dbClient *database.Client
func Get(arr []string, index int) string {
@ -116,6 +133,54 @@ func Init(databaseClient *database.Client) error {
return nil
}
func RegexpCacheInit(filename string, CacheCfg types.DataSource) error {
//cache is explicitly disabled
if CacheCfg.Cache != nil && !*CacheCfg.Cache {
return nil
}
//cache is implicitly disabled if no cache config is provided
if CacheCfg.Strategy == nil && CacheCfg.TTL == nil && CacheCfg.Size == nil {
return nil
}
//cache is enabled
if CacheCfg.Size == nil {
CacheCfg.Size = types.IntPtr(50)
}
gc := gcache.New(*CacheCfg.Size)
if CacheCfg.Strategy == nil {
CacheCfg.Strategy = types.StrPtr("LRU")
}
switch *CacheCfg.Strategy {
case "LRU":
gc = gc.LRU()
case "LFU":
gc = gc.LFU()
case "ARC":
gc = gc.ARC()
default:
return fmt.Errorf("unknown cache strategy '%s'", *CacheCfg.Strategy)
}
if CacheCfg.TTL != nil {
gc.Expiration(*CacheCfg.TTL)
}
cache := gc.Build()
dataFileRegexCache[filename] = cache
return nil
}
// UpdateCacheMetrics is called directly by the prom handler
func UpdateRegexpCacheMetrics() {
RegexpCacheMetrics.Reset()
for name := range dataFileRegexCache {
RegexpCacheMetrics.With(prometheus.Labels{"name": name}).Set(float64(dataFileRegexCache[name].Len(true)))
}
}
func FileInit(fileFolder string, filename string, fileType string) error {
log.Debugf("init (folder:%s) (file:%s) (type:%s)", fileFolder, filename, fileType)
filepath := path.Join(fileFolder, filename)
@ -192,9 +257,24 @@ func File(filename string) []string {
}
func RegexpInFile(data string, filename string) bool {
var hash uint64
hasCache := false
if _, ok := dataFileRegexCache[filename]; ok {
hasCache = true
hash = xxhash.Sum64String(data)
if val, err := dataFileRegexCache[filename].Get(hash); err == nil {
return val.(bool)
}
}
if _, ok := dataFileRegex[filename]; ok {
for _, re := range dataFileRegex[filename] {
if re.Match([]byte(data)) {
if hasCache {
dataFileRegexCache[filename].Set(hash, true)
}
return true
}
}
@ -202,6 +282,9 @@ func RegexpInFile(data string, filename string) bool {
log.Errorf("file '%s' (type:regexp) not found in expr library", filename)
log.Errorf("expr library : %s", spew.Sdump(dataFileRegex))
}
if hasCache {
dataFileRegexCache[filename].Set(hash, false)
}
return false
}

View file

@ -4,22 +4,20 @@ import (
"context"
"fmt"
"os"
"testing"
"time"
"github.com/antonmedv/expr"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/crowdsecurity/crowdsec/pkg/csconfig"
"github.com/crowdsecurity/crowdsec/pkg/cstest"
"github.com/crowdsecurity/crowdsec/pkg/database"
"github.com/crowdsecurity/crowdsec/pkg/models"
"github.com/crowdsecurity/crowdsec/pkg/types"
log "github.com/sirupsen/logrus"
"testing"
"github.com/antonmedv/expr"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var (
@ -29,24 +27,21 @@ var (
func getDBClient(t *testing.T) *database.Client {
t.Helper()
dbPath, err := os.CreateTemp("", "*sqlite")
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)
testDbClient, err := database.NewClient(&csconfig.DatabaseCfg{
Type: "sqlite",
DbName: "crowdsec",
DbPath: dbPath.Name(),
})
if err != nil {
t.Fatal(err)
}
require.NoError(t, err)
return testDbClient
}
func TestVisitor(t *testing.T) {
if err := Init(nil); err != nil {
log.Fatal(err)
}
err := Init(nil)
require.NoError(t, err)
tests := []struct {
name string
@ -130,6 +125,39 @@ func TestVisitor(t *testing.T) {
}
}
func TestRegexpCacheBehavior(t *testing.T) {
err := Init(nil)
require.NoError(t, err)
filename := "test_data_re.txt"
err = FileInit(TestFolder, filename, "regex")
require.NoError(t, err)
//cache with no TTL
err = RegexpCacheInit(filename, types.DataSource{Type: "regex", Size: types.IntPtr(1)})
require.NoError(t, err)
ret := RegexpInFile("crowdsec", filename)
assert.False(t, ret)
assert.Equal(t, 1, dataFileRegexCache[filename].Len(false))
ret = RegexpInFile("Crowdsec", filename)
assert.True(t, ret)
assert.Equal(t, 1, dataFileRegexCache[filename].Len(false))
//cache with TTL
ttl := 500 * time.Millisecond
err = RegexpCacheInit(filename, types.DataSource{Type: "regex", Size: types.IntPtr(2), TTL: &ttl})
require.NoError(t, err)
ret = RegexpInFile("crowdsec", filename)
assert.False(t, ret)
assert.Equal(t, 1, dataFileRegexCache[filename].Len(true))
time.Sleep(1 * time.Second)
assert.Equal(t, 0, dataFileRegexCache[filename].Len(true))
}
func TestRegexpInFile(t *testing.T) {
if err := Init(nil); err != nil {
log.Fatal(err)
@ -449,7 +477,7 @@ func TestAtof(t *testing.T) {
expectedFloat := 1.5
if Atof(testFloat) != expectedFloat {
t.Fatalf("Atof should returned 1.5 as a float")
t.Fatalf("Atof should return 1.5 as a float")
}
log.Printf("test 'Atof()' : OK")
@ -459,7 +487,7 @@ func TestAtof(t *testing.T) {
expectedFloat = 0.0
if Atof(testFloat) != expectedFloat {
t.Fatalf("Atof should returned a negative value (error) as a float got")
t.Fatalf("Atof should return a negative value (error) as a float got")
}
log.Printf("test 'Atof()' : OK")
@ -470,7 +498,7 @@ func TestUpper(t *testing.T) {
expectedStr := "TEST"
if Upper(testStr) != expectedStr {
t.Fatalf("Upper() should returned test in upper case")
t.Fatalf("Upper() should return test in upper case")
}
log.Printf("test 'Upper()' : OK")
@ -503,7 +531,7 @@ func TestParseUri(t *testing.T) {
"ParseUri": ParseUri,
},
code: "ParseUri(uri)",
result: map[string][]string{"a": []string{"1"}, "b": []string{"2"}},
result: map[string][]string{"a": {"1"}, "b": {"2"}},
err: "",
},
{
@ -523,7 +551,7 @@ func TestParseUri(t *testing.T) {
"ParseUri": ParseUri,
},
code: "ParseUri(uri)",
result: map[string][]string{"a": []string{"1"}, "b": []string{"2?"}},
result: map[string][]string{"a": {"1"}, "b": {"2?"}},
err: "",
},
{
@ -533,7 +561,7 @@ func TestParseUri(t *testing.T) {
"ParseUri": ParseUri,
},
code: "ParseUri(uri)",
result: map[string][]string{"?": []string{"", "123"}},
result: map[string][]string{"?": {"", "123"}},
err: "",
},
{

View file

@ -11,24 +11,20 @@ import (
"sync"
"time"
"github.com/crowdsecurity/crowdsec/pkg/alertcontext"
"github.com/crowdsecurity/crowdsec/pkg/csconfig"
"github.com/crowdsecurity/crowdsec/pkg/cwhub"
"github.com/crowdsecurity/crowdsec/pkg/cwversion"
"github.com/crowdsecurity/crowdsec/pkg/types"
"github.com/davecgh/go-spew/spew"
"github.com/sirupsen/logrus"
log "github.com/sirupsen/logrus"
"github.com/antonmedv/expr"
"github.com/antonmedv/expr/vm"
"github.com/davecgh/go-spew/spew"
"github.com/goombaio/namegenerator"
log "github.com/sirupsen/logrus"
"gopkg.in/tomb.v2"
yaml "gopkg.in/yaml.v2"
"github.com/crowdsecurity/crowdsec/pkg/alertcontext"
"github.com/crowdsecurity/crowdsec/pkg/csconfig"
"github.com/crowdsecurity/crowdsec/pkg/cwhub"
"github.com/crowdsecurity/crowdsec/pkg/cwversion"
"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
"github.com/crowdsecurity/crowdsec/pkg/types"
)
// BucketFactory struct holds all fields for any bucket configuration. This is to have a
@ -254,7 +250,7 @@ func LoadBuckets(cscfg *csconfig.CrowdsecServiceCfg, files []string, tomb *tomb.
func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error {
var err error
if bucketFactory.Debug {
var clog = logrus.New()
var clog = log.New()
if err := types.ConfigureLogger(clog); err != nil {
log.Fatalf("While creating bucket-specific logger : %s", err)
}
@ -374,6 +370,9 @@ func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error {
if err != nil {
bucketFactory.logger.Errorf("unable to init data for file '%s': %s", data.DestPath, err)
}
if data.Type == "regexp" { //cache only makes sense for regexp
exprhelpers.RegexpCacheInit(data.DestPath, *data)
}
}
}

View file

@ -16,13 +16,12 @@ import (
"strings"
"time"
"github.com/goombaio/namegenerator"
log "github.com/sirupsen/logrus"
yaml "gopkg.in/yaml.v2"
"github.com/crowdsecurity/crowdsec/pkg/cwversion"
"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
log "github.com/sirupsen/logrus"
"github.com/goombaio/namegenerator"
yaml "gopkg.in/yaml.v2"
)
var seed namegenerator.Generator = namegenerator.NewNameGenerator(time.Now().UTC().UnixNano())
@ -116,6 +115,9 @@ func LoadStages(stageFiles []Stagefile, pctx *UnixParserCtx, ectx EnricherCtx) (
if err != nil {
log.Error(err)
}
if data.Type == "regexp" { //cache only makes sense for regexp
exprhelpers.RegexpCacheInit(data.DestPath, *data)
}
}
}
nodes = append(nodes, node)

View file

@ -6,6 +6,7 @@ import (
"net/http"
"os"
"path"
"time"
log "github.com/sirupsen/logrus"
)
@ -14,6 +15,11 @@ type DataSource struct {
SourceURL string `yaml:"source_url"`
DestPath string `yaml:"dest_file"`
Type string `yaml:"type"`
//Control cache strategy on expensive regexps
Cache *bool `yaml:"cache"`
Strategy *string `yaml:"strategy"`
Size *int `yaml:"size"`
TTL *time.Duration `yaml:"ttl"`
}
type DataSet struct {