Add bayesian bucket type (#2290)

This commit is contained in:
Emanuel Seemann 2023-06-21 15:08:27 +02:00 committed by GitHub
parent da6106bd23
commit 40e6b205bc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 471 additions and 0 deletions

View file

@ -48,6 +48,15 @@ infinite leakspeed (it never overflows, nor leaks). Nevertheless,
the event is raised after a fixed duration. The option is called the event is raised after a fixed duration. The option is called
duration. duration.
## Bayesian
A Bayesian is a special bucket that runs bayesian inference instead of
counting events. Each event must have its likelihoods specified in the
yaml file under `prob_given_benign` and `prob_given_evil`. The bucket
will continue evaluating events until the posterior goes above the
threshold (triggering the overflow) or the duration (specified by leakspeed)
expires.
## Available configuration options for buckets ## Available configuration options for buckets
### Fields for standard buckets ### Fields for standard buckets
@ -102,6 +111,22 @@ Capacity and leakspeed are not relevant for this kind of bucket.
Nevertheless, this kind of bucket is often used with an infinite Nevertheless, this kind of bucket is often used with an infinite
leakspeed and an infinite capacity [capacity set to -1 for now]. leakspeed and an infinite capacity [capacity set to -1 for now].
#### Bayesian
* bayesian_prior: The prior to start with
* bayesian_threshold: The threshold for the posterior to trigger the overflow.
* bayesian_conditions: List of Bayesian conditions with likelihoods
Bayesian Conditions are built from:
* condition: The expr for this specific condition to be true
* prob_given_evil: The likelihood an IP satisfies the condition given the fact
that it is a maliscious IP
* prob_given_benign: The likelihood an IP satisfies the condition given the fact
that it is a benign IP
* guillotine: Bool to stop the condition from getting evaluated if it has
evaluated to true once. This should be used if evaluating the condition is
computationally expensive.
## Add examples here ## Add examples here

163
pkg/leakybucket/bayesian.go Normal file
View file

@ -0,0 +1,163 @@
package leakybucket
import (
"fmt"
"github.com/antonmedv/expr"
"github.com/antonmedv/expr/vm"
"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
"github.com/crowdsecurity/crowdsec/pkg/types"
)
type RawBayesianCondition struct {
ConditionalFilterName string `yaml:"condition"`
ProbGivenEvil float32 `yaml:"prob_given_evil"`
ProbGivenBenign float32 `yaml:"prob_given_benign"`
Guillotine bool `yaml:"guillotine,omitempty"`
}
type BayesianEvent struct {
rawCondition RawBayesianCondition
conditionalFilterRuntime *vm.Program
guillotineState bool
}
type BayesianBucket struct {
bayesianEventArray []*BayesianEvent
prior float32
threshold float32
posterior float32
DumbProcessor
}
func updateProbability(prior, probGivenEvil, ProbGivenBenign float32) float32 {
numerator := probGivenEvil * prior
denominator := numerator + ProbGivenBenign*(1-prior)
return numerator / denominator
}
func (c *BayesianBucket) OnBucketInit(g *BucketFactory) error {
var err error
BayesianEventArray := make([]*BayesianEvent, len(g.BayesianConditions))
if conditionalExprCache == nil {
conditionalExprCache = make(map[string]vm.Program)
}
conditionalExprCacheLock.Lock()
for index, bcond := range g.BayesianConditions {
var bayesianEvent BayesianEvent
bayesianEvent.rawCondition = bcond
err = bayesianEvent.compileCondition()
if err != nil {
return err
}
BayesianEventArray[index] = &bayesianEvent
}
conditionalExprCacheLock.Unlock()
c.bayesianEventArray = BayesianEventArray
c.prior = g.BayesianPrior
c.threshold = g.BayesianThreshold
return err
}
func (c *BayesianBucket) AfterBucketPour(b *BucketFactory) func(types.Event, *Leaky) *types.Event {
return func(msg types.Event, l *Leaky) *types.Event {
c.posterior = c.prior
l.logger.Debugf("starting bayesian evaluation with prior: %v", c.posterior)
for _, bevent := range c.bayesianEventArray {
err := bevent.bayesianUpdate(c, msg, l)
if err != nil {
l.logger.Errorf("bayesian update failed for %s with %s", bevent.rawCondition.ConditionalFilterName, err)
}
}
l.logger.Debugf("value of posterior after events : %v", c.posterior)
if c.posterior > c.threshold {
l.logger.Debugf("Bayesian bucket overflow")
l.Ovflw_ts = l.Last_ts
l.Out <- l.Queue
return nil
}
return &msg
}
}
func (b *BayesianEvent) bayesianUpdate(c *BayesianBucket, msg types.Event, l *Leaky) error {
var condition, ok bool
if b.conditionalFilterRuntime == nil {
l.logger.Tracef("empty conditional filter runtime for %s", b.rawCondition.ConditionalFilterName)
return nil
}
l.logger.Tracef("guillotine value for %s : %v", b.rawCondition.ConditionalFilterName, b.getGuillotineState())
if b.getGuillotineState() {
l.logger.Tracef("guillotine already triggered for %s", b.rawCondition.ConditionalFilterName)
l.logger.Tracef("condition true updating prior for: %s", b.rawCondition.ConditionalFilterName)
c.posterior = updateProbability(c.posterior, b.rawCondition.ProbGivenEvil, b.rawCondition.ProbGivenBenign)
l.logger.Tracef("new value of posterior : %v", c.posterior)
return nil
}
l.logger.Debugf("running condition expression: %s", b.rawCondition.ConditionalFilterName)
ret, err := expr.Run(b.conditionalFilterRuntime, map[string]interface{}{"evt": &msg, "queue": l.Queue, "leaky": l})
if err != nil {
return fmt.Errorf("unable to run conditional filter: %s", err)
}
l.logger.Tracef("bayesian bucket expression %s returned : %v", b.rawCondition.ConditionalFilterName, ret)
if condition, ok = ret.(bool); !ok {
return fmt.Errorf("bayesian condition unexpected non-bool return: %T", ret)
}
l.logger.Tracef("condition %T updating prior for: %s", condition, b.rawCondition.ConditionalFilterName)
if condition {
c.posterior = updateProbability(c.posterior, b.rawCondition.ProbGivenEvil, b.rawCondition.ProbGivenBenign)
b.triggerGuillotine()
} else {
c.posterior = updateProbability(c.posterior, 1-b.rawCondition.ProbGivenEvil, 1-b.rawCondition.ProbGivenBenign)
}
l.logger.Tracef("new value of posterior: %v", c.posterior)
return nil
}
func (b *BayesianEvent) getGuillotineState() bool {
if b.rawCondition.Guillotine {
return b.guillotineState
}
return false
}
func (b *BayesianEvent) triggerGuillotine() {
b.guillotineState = true
}
func (b *BayesianEvent) compileCondition() error {
var err error
var compiledExpr *vm.Program
if compiled, ok := conditionalExprCache[b.rawCondition.ConditionalFilterName]; ok {
b.conditionalFilterRuntime = &compiled
return nil
}
conditionalExprCacheLock.Unlock()
//release the lock during compile same as coditional bucket
compiledExpr, err = expr.Compile(b.rawCondition.ConditionalFilterName, exprhelpers.GetExprOptions(map[string]interface{}{"queue": &Queue{}, "leaky": &Leaky{}, "evt": &types.Event{}})...)
if err != nil {
return fmt.Errorf("bayesian condition compile error: %w", err)
}
b.conditionalFilterRuntime = compiledExpr
conditionalExprCacheLock.Lock()
conditionalExprCache[b.rawCondition.ConditionalFilterName] = *compiledExpr
return nil
}

View file

@ -191,6 +191,10 @@ func FromFactory(bucketFactory BucketFactory) *Leaky {
l.conditionalOverflow = true l.conditionalOverflow = true
l.Duration = l.BucketConfig.leakspeed l.Duration = l.BucketConfig.leakspeed
} }
if l.BucketConfig.Type == "bayesian" {
l.Duration = l.BucketConfig.leakspeed
}
return l return l
} }

View file

@ -51,6 +51,9 @@ type BucketFactory struct {
Profiling bool `yaml:"profiling"` //Profiling, if true, will make the bucket record pours/overflows/etc. Profiling bool `yaml:"profiling"` //Profiling, if true, will make the bucket record pours/overflows/etc.
OverflowFilter string `yaml:"overflow_filter"` //OverflowFilter if present, is a filter that must return true for the overflow to go through OverflowFilter string `yaml:"overflow_filter"` //OverflowFilter if present, is a filter that must return true for the overflow to go through
ConditionalOverflow string `yaml:"condition"` //condition if present, is an expression that must return true for the bucket to overflow ConditionalOverflow string `yaml:"condition"` //condition if present, is an expression that must return true for the bucket to overflow
BayesianPrior float32 `yaml:"bayesian_prior"`
BayesianThreshold float32 `yaml:"bayesian_threshold"`
BayesianConditions []RawBayesianCondition `yaml:"bayesian_conditions"` //conditions for the bayesian bucket
ScopeType types.ScopeType `yaml:"scope,omitempty"` //to enforce a different remediation than blocking an IP. Will default this to IP ScopeType types.ScopeType `yaml:"scope,omitempty"` //to enforce a different remediation than blocking an IP. Will default this to IP
BucketName string `yaml:"-"` BucketName string `yaml:"-"`
Filename string `yaml:"-"` Filename string `yaml:"-"`
@ -120,6 +123,25 @@ func ValidateFactory(bucketFactory *BucketFactory) error {
if bucketFactory.leakspeed == 0 { if bucketFactory.leakspeed == 0 {
return fmt.Errorf("bad leakspeed for conditional bucket '%s'", bucketFactory.LeakSpeed) return fmt.Errorf("bad leakspeed for conditional bucket '%s'", bucketFactory.LeakSpeed)
} }
} else if bucketFactory.Type == "bayesian" {
if bucketFactory.BayesianConditions == nil {
return fmt.Errorf("bayesian bucket must have bayesian conditions")
}
if bucketFactory.BayesianPrior == 0 {
return fmt.Errorf("bayesian bucket must have a valid, non-zero prior")
}
if bucketFactory.BayesianThreshold == 0 {
return fmt.Errorf("bayesian bucket must have a valid, non-zero threshold")
}
if bucketFactory.BayesianPrior > 1 {
return fmt.Errorf("bayesian bucket must have a valid, non-zero prior")
}
if bucketFactory.BayesianThreshold > 1 {
return fmt.Errorf("bayesian bucket must have a valid, non-zero threshold")
}
if bucketFactory.Capacity != -1 {
return fmt.Errorf("bayesian bucket must have capacity -1")
}
} else { } else {
return fmt.Errorf("unknown bucket type '%s'", bucketFactory.Type) return fmt.Errorf("unknown bucket type '%s'", bucketFactory.Type)
} }
@ -316,6 +338,8 @@ func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error {
bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{}) bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{})
case "conditional": case "conditional":
bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{}) bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{})
case "bayesian":
bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{})
default: default:
return fmt.Errorf("invalid type '%s' in %s : %v", bucketFactory.Type, bucketFactory.Filename, err) return fmt.Errorf("invalid type '%s' in %s : %v", bucketFactory.Type, bucketFactory.Filename, err)
} }
@ -355,6 +379,11 @@ func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error {
bucketFactory.processors = append(bucketFactory.processors, &ConditionalOverflow{}) bucketFactory.processors = append(bucketFactory.processors, &ConditionalOverflow{})
} }
if bucketFactory.BayesianThreshold != 0 {
bucketFactory.logger.Tracef("Adding bayesian processor")
bucketFactory.processors = append(bucketFactory.processors, &BayesianBucket{})
}
if len(bucketFactory.Data) > 0 { if len(bucketFactory.Data) > 0 {
for _, data := range bucketFactory.Data { for _, data := range bucketFactory.Data {
if data.DestPath == "" { if data.DestPath == "" {

View file

@ -119,3 +119,25 @@ func TestCounterBucketsConfig(t *testing.T) {
} }
} }
func TestBayesianBucketsConfig(t *testing.T) {
var CfgTests = []cfgTest{
//basic valid counter
{BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianPrior: 0.5, BayesianThreshold: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, true, true},
//bad capacity
{BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: 1, Filter: "true", BayesianPrior: 0.5, BayesianThreshold: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false},
//missing prior
{BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianThreshold: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false},
//missing threshold
{BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianPrior: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false},
//bad prior
{BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianPrior: 1.5, BayesianThreshold: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false},
//bad threshold
{BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianPrior: 0.5, BayesianThreshold: 1.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false},
}
if err := runTest(CfgTests); err != nil {
t.Fatalf("%s", err)
}
}

View file

@ -0,0 +1,21 @@
type: bayesian
name: test/guillotine-bayesian
debug: true
description: "bayesian bucket"
filter: "evt.Meta.log_type == 'http_access-log' || evt.Meta.log_type == 'ssh_access-log'"
groupby: evt.Meta.source_ip
bayesian_prior: 0.5
bayesian_threshold: 0.8
bayesian_conditions:
- condition: evt.Meta.http_path == "/"
prob_given_evil: 0.8
prob_given_benign: 0.2
guillotine : true
- condition: evt.Meta.ssh_user == "admin"
prob_given_evil: 0.9
prob_given_benign: 0.5
guillotine : true
leakspeed: 30s
capacity: -1
labels:
type: overflow_1

View file

@ -0,0 +1 @@
- filename: {{.TestDirectory}}/bucket.yaml

View file

@ -0,0 +1,50 @@
{
"lines": [
{
"Line": {
"Labels": {
"type": "nginx"
},
"Raw": "don't care"
},
"MarshaledTime": "2020-01-01T10:00:00.000Z",
"Meta": {
"source_ip": "2a00:1450:4007:816::200e",
"log_type": "http_access-log",
"http_path": "/"
}
},
{
"Line": {
"Labels": {
"type": "nginx"
},
"Raw": "don't care"
},
"MarshaledTime": "2020-01-01T10:00:00.000Z",
"Meta": {
"source_ip": "2a00:1450:4007:816::200e",
"log_type": "ssh_access-log",
"ssh_user": "admin"
}
}
],
"results": [
{
"Type" : 1,
"Alert": {
"sources" : {
"2a00:1450:4007:816::200e": {
"ip": "2a00:1450:4007:816::200e",
"scope": "Ip",
"value": "2a00:1450:4007:816::200e"
}
},
"Alert" : {
"scenario": "test/guillotine-bayesian",
"events_count": 2
}
}
}
]
}

View file

@ -0,0 +1,21 @@
type: bayesian
name: test/multiple-bayesian
debug: true
description: "bayesian bucket"
filter: "evt.Meta.log_type == 'http_access-log' || evt.Meta.log_type == 'ssh_access-log'"
groupby: evt.Meta.source_ip
bayesian_prior: 0.5
bayesian_threshold: 0.8
bayesian_conditions:
- condition: evt.Meta.http_path == "/"
prob_given_evil: 0.8
prob_given_benign: 0.2
guillotine : true
- condition: evt.Meta.ssh_user == "admin"
prob_given_evil: 0.9
prob_given_benign: 0.5
guillotine : true
leakspeed: 30s
capacity: -1
labels:
type: overflow_1

View file

@ -0,0 +1 @@
- filename: {{.TestDirectory}}/bucket.yaml

View file

@ -0,0 +1,64 @@
{
"lines": [
{
"Line": {
"Labels": {
"type": "nginx"
},
"Raw": "don't care"
},
"MarshaledTime": "2020-01-01T10:00:00.000Z",
"Meta": {
"source_ip": "2a00:1450:4007:816::200e",
"log_type": "http_access-log",
"http_path": "/"
}
},
{
"Line": {
"Labels": {
"type": "nginx"
},
"Raw": "don't care"
},
"MarshaledTime": "2020-01-01T10:00:00.000Z",
"Meta": {
"source_ip": "1.2.3.4",
"log_type": "ssh_access-log",
"ssh_user": "admin"
}
},
{
"Line": {
"Labels": {
"type": "nginx"
},
"Raw": "don't care"
},
"MarshaledTime": "2020-01-01T10:00:00.000Z",
"Meta": {
"source_ip": "2a00:1450:4007:816::200e",
"log_type": "ssh_access-log",
"ssh_user": "admin"
}
}
],
"results": [
{
"Type" : 1,
"Alert": {
"sources" : {
"2a00:1450:4007:816::200e": {
"ip": "2a00:1450:4007:816::200e",
"scope": "Ip",
"value": "2a00:1450:4007:816::200e"
}
},
"Alert" : {
"scenario": "test/multiple-bayesian",
"events_count": 2
}
}
}
]
}

View file

@ -0,0 +1,19 @@
type: bayesian
name: test/simple-bayesian
debug: true
description: "bayesian bucket"
filter: "evt.Meta.log_type == 'http_access-log' || evt.Meta.log_type == 'ssh_access-log'"
groupby: evt.Meta.source_ip
bayesian_prior: 0.5
bayesian_threshold: 0.8
bayesian_conditions:
- condition: any(queue.Queue, {.Meta.http_path == "/"})
prob_given_evil: 0.8
prob_given_benign: 0.2
- condition: any(queue.Queue, {.Meta.ssh_user == "admin"})
prob_given_evil: 0.9
prob_given_benign: 0.5
leakspeed: 30s
capacity: -1
labels:
type: overflow_1

View file

@ -0,0 +1 @@
- filename: {{.TestDirectory}}/bucket.yaml

View file

@ -0,0 +1,50 @@
{
"lines": [
{
"Line": {
"Labels": {
"type": "nginx"
},
"Raw": "don't care"
},
"MarshaledTime": "2020-01-01T10:00:00.000Z",
"Meta": {
"source_ip": "2a00:1450:4007:816::200e",
"log_type": "http_access-log",
"http_path": "/"
}
},
{
"Line": {
"Labels": {
"type": "nginx"
},
"Raw": "don't care"
},
"MarshaledTime": "2020-01-01T10:00:00.000Z",
"Meta": {
"source_ip": "2a00:1450:4007:816::200e",
"log_type": "ssh_access-log",
"ssh_user": "admin"
}
}
],
"results": [
{
"Type" : 1,
"Alert": {
"sources" : {
"2a00:1450:4007:816::200e": {
"ip": "2a00:1450:4007:816::200e",
"scope": "Ip",
"value": "2a00:1450:4007:816::200e"
}
},
"Alert" : {
"scenario": "test/simple-bayesian",
"events_count": 2
}
}
}
]
}