diff --git a/pkg/leakybucket/README.md b/pkg/leakybucket/README.md index 3fd6714ef..1d3f31575 100644 --- a/pkg/leakybucket/README.md +++ b/pkg/leakybucket/README.md @@ -48,6 +48,15 @@ infinite leakspeed (it never overflows, nor leaks). Nevertheless, the event is raised after a fixed duration. The option is called duration. +## Bayesian + +A Bayesian is a special bucket that runs bayesian inference instead of +counting events. Each event must have its likelihoods specified in the +yaml file under `prob_given_benign` and `prob_given_evil`. The bucket +will continue evaluating events until the posterior goes above the +threshold (triggering the overflow) or the duration (specified by leakspeed) +expires. + ## Available configuration options for buckets ### Fields for standard buckets @@ -102,6 +111,22 @@ Capacity and leakspeed are not relevant for this kind of bucket. Nevertheless, this kind of bucket is often used with an infinite leakspeed and an infinite capacity [capacity set to -1 for now]. +#### Bayesian + + * bayesian_prior: The prior to start with + * bayesian_threshold: The threshold for the posterior to trigger the overflow. + * bayesian_conditions: List of Bayesian conditions with likelihoods + +Bayesian Conditions are built from: + * condition: The expr for this specific condition to be true + * prob_given_evil: The likelihood an IP satisfies the condition given the fact + that it is a maliscious IP + * prob_given_benign: The likelihood an IP satisfies the condition given the fact + that it is a benign IP + * guillotine: Bool to stop the condition from getting evaluated if it has + evaluated to true once. This should be used if evaluating the condition is + computationally expensive. + ## Add examples here diff --git a/pkg/leakybucket/bayesian.go b/pkg/leakybucket/bayesian.go new file mode 100644 index 000000000..bd9aaed96 --- /dev/null +++ b/pkg/leakybucket/bayesian.go @@ -0,0 +1,163 @@ +package leakybucket + +import ( + "fmt" + + "github.com/antonmedv/expr" + "github.com/antonmedv/expr/vm" + "github.com/crowdsecurity/crowdsec/pkg/exprhelpers" + "github.com/crowdsecurity/crowdsec/pkg/types" +) + +type RawBayesianCondition struct { + ConditionalFilterName string `yaml:"condition"` + ProbGivenEvil float32 `yaml:"prob_given_evil"` + ProbGivenBenign float32 `yaml:"prob_given_benign"` + Guillotine bool `yaml:"guillotine,omitempty"` +} + +type BayesianEvent struct { + rawCondition RawBayesianCondition + conditionalFilterRuntime *vm.Program + guillotineState bool +} + +type BayesianBucket struct { + bayesianEventArray []*BayesianEvent + prior float32 + threshold float32 + posterior float32 + DumbProcessor +} + +func updateProbability(prior, probGivenEvil, ProbGivenBenign float32) float32 { + numerator := probGivenEvil * prior + denominator := numerator + ProbGivenBenign*(1-prior) + + return numerator / denominator +} + +func (c *BayesianBucket) OnBucketInit(g *BucketFactory) error { + var err error + BayesianEventArray := make([]*BayesianEvent, len(g.BayesianConditions)) + + if conditionalExprCache == nil { + conditionalExprCache = make(map[string]vm.Program) + } + conditionalExprCacheLock.Lock() + + for index, bcond := range g.BayesianConditions { + var bayesianEvent BayesianEvent + bayesianEvent.rawCondition = bcond + err = bayesianEvent.compileCondition() + if err != nil { + return err + } + BayesianEventArray[index] = &bayesianEvent + } + conditionalExprCacheLock.Unlock() + c.bayesianEventArray = BayesianEventArray + + c.prior = g.BayesianPrior + c.threshold = g.BayesianThreshold + + return err +} + +func (c *BayesianBucket) AfterBucketPour(b *BucketFactory) func(types.Event, *Leaky) *types.Event { + return func(msg types.Event, l *Leaky) *types.Event { + c.posterior = c.prior + l.logger.Debugf("starting bayesian evaluation with prior: %v", c.posterior) + + for _, bevent := range c.bayesianEventArray { + err := bevent.bayesianUpdate(c, msg, l) + if err != nil { + l.logger.Errorf("bayesian update failed for %s with %s", bevent.rawCondition.ConditionalFilterName, err) + } + } + + l.logger.Debugf("value of posterior after events : %v", c.posterior) + + if c.posterior > c.threshold { + l.logger.Debugf("Bayesian bucket overflow") + l.Ovflw_ts = l.Last_ts + l.Out <- l.Queue + return nil + } + + return &msg + } +} + +func (b *BayesianEvent) bayesianUpdate(c *BayesianBucket, msg types.Event, l *Leaky) error { + var condition, ok bool + + if b.conditionalFilterRuntime == nil { + l.logger.Tracef("empty conditional filter runtime for %s", b.rawCondition.ConditionalFilterName) + return nil + } + + l.logger.Tracef("guillotine value for %s : %v", b.rawCondition.ConditionalFilterName, b.getGuillotineState()) + if b.getGuillotineState() { + l.logger.Tracef("guillotine already triggered for %s", b.rawCondition.ConditionalFilterName) + l.logger.Tracef("condition true updating prior for: %s", b.rawCondition.ConditionalFilterName) + c.posterior = updateProbability(c.posterior, b.rawCondition.ProbGivenEvil, b.rawCondition.ProbGivenBenign) + l.logger.Tracef("new value of posterior : %v", c.posterior) + return nil + } + + l.logger.Debugf("running condition expression: %s", b.rawCondition.ConditionalFilterName) + ret, err := expr.Run(b.conditionalFilterRuntime, map[string]interface{}{"evt": &msg, "queue": l.Queue, "leaky": l}) + if err != nil { + return fmt.Errorf("unable to run conditional filter: %s", err) + } + + l.logger.Tracef("bayesian bucket expression %s returned : %v", b.rawCondition.ConditionalFilterName, ret) + if condition, ok = ret.(bool); !ok { + return fmt.Errorf("bayesian condition unexpected non-bool return: %T", ret) + } + + l.logger.Tracef("condition %T updating prior for: %s", condition, b.rawCondition.ConditionalFilterName) + if condition { + c.posterior = updateProbability(c.posterior, b.rawCondition.ProbGivenEvil, b.rawCondition.ProbGivenBenign) + b.triggerGuillotine() + } else { + c.posterior = updateProbability(c.posterior, 1-b.rawCondition.ProbGivenEvil, 1-b.rawCondition.ProbGivenBenign) + } + l.logger.Tracef("new value of posterior: %v", c.posterior) + + return nil +} + +func (b *BayesianEvent) getGuillotineState() bool { + if b.rawCondition.Guillotine { + return b.guillotineState + } + return false +} + +func (b *BayesianEvent) triggerGuillotine() { + b.guillotineState = true +} + +func (b *BayesianEvent) compileCondition() error { + var err error + var compiledExpr *vm.Program + + if compiled, ok := conditionalExprCache[b.rawCondition.ConditionalFilterName]; ok { + b.conditionalFilterRuntime = &compiled + return nil + } + + conditionalExprCacheLock.Unlock() + //release the lock during compile same as coditional bucket + compiledExpr, err = expr.Compile(b.rawCondition.ConditionalFilterName, exprhelpers.GetExprOptions(map[string]interface{}{"queue": &Queue{}, "leaky": &Leaky{}, "evt": &types.Event{}})...) + if err != nil { + return fmt.Errorf("bayesian condition compile error: %w", err) + } + b.conditionalFilterRuntime = compiledExpr + conditionalExprCacheLock.Lock() + conditionalExprCache[b.rawCondition.ConditionalFilterName] = *compiledExpr + + return nil +} diff --git a/pkg/leakybucket/bucket.go b/pkg/leakybucket/bucket.go index 004d5b9d8..286c51f11 100644 --- a/pkg/leakybucket/bucket.go +++ b/pkg/leakybucket/bucket.go @@ -191,6 +191,10 @@ func FromFactory(bucketFactory BucketFactory) *Leaky { l.conditionalOverflow = true l.Duration = l.BucketConfig.leakspeed } + + if l.BucketConfig.Type == "bayesian" { + l.Duration = l.BucketConfig.leakspeed + } return l } diff --git a/pkg/leakybucket/manager_load.go b/pkg/leakybucket/manager_load.go index 1e212f815..dc1f4ed51 100644 --- a/pkg/leakybucket/manager_load.go +++ b/pkg/leakybucket/manager_load.go @@ -51,6 +51,9 @@ type BucketFactory struct { Profiling bool `yaml:"profiling"` //Profiling, if true, will make the bucket record pours/overflows/etc. OverflowFilter string `yaml:"overflow_filter"` //OverflowFilter if present, is a filter that must return true for the overflow to go through ConditionalOverflow string `yaml:"condition"` //condition if present, is an expression that must return true for the bucket to overflow + BayesianPrior float32 `yaml:"bayesian_prior"` + BayesianThreshold float32 `yaml:"bayesian_threshold"` + BayesianConditions []RawBayesianCondition `yaml:"bayesian_conditions"` //conditions for the bayesian bucket ScopeType types.ScopeType `yaml:"scope,omitempty"` //to enforce a different remediation than blocking an IP. Will default this to IP BucketName string `yaml:"-"` Filename string `yaml:"-"` @@ -120,6 +123,25 @@ func ValidateFactory(bucketFactory *BucketFactory) error { if bucketFactory.leakspeed == 0 { return fmt.Errorf("bad leakspeed for conditional bucket '%s'", bucketFactory.LeakSpeed) } + } else if bucketFactory.Type == "bayesian" { + if bucketFactory.BayesianConditions == nil { + return fmt.Errorf("bayesian bucket must have bayesian conditions") + } + if bucketFactory.BayesianPrior == 0 { + return fmt.Errorf("bayesian bucket must have a valid, non-zero prior") + } + if bucketFactory.BayesianThreshold == 0 { + return fmt.Errorf("bayesian bucket must have a valid, non-zero threshold") + } + if bucketFactory.BayesianPrior > 1 { + return fmt.Errorf("bayesian bucket must have a valid, non-zero prior") + } + if bucketFactory.BayesianThreshold > 1 { + return fmt.Errorf("bayesian bucket must have a valid, non-zero threshold") + } + if bucketFactory.Capacity != -1 { + return fmt.Errorf("bayesian bucket must have capacity -1") + } } else { return fmt.Errorf("unknown bucket type '%s'", bucketFactory.Type) } @@ -316,6 +338,8 @@ func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error { bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{}) case "conditional": bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{}) + case "bayesian": + bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{}) default: return fmt.Errorf("invalid type '%s' in %s : %v", bucketFactory.Type, bucketFactory.Filename, err) } @@ -355,6 +379,11 @@ func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error { bucketFactory.processors = append(bucketFactory.processors, &ConditionalOverflow{}) } + if bucketFactory.BayesianThreshold != 0 { + bucketFactory.logger.Tracef("Adding bayesian processor") + bucketFactory.processors = append(bucketFactory.processors, &BayesianBucket{}) + } + if len(bucketFactory.Data) > 0 { for _, data := range bucketFactory.Data { if data.DestPath == "" { diff --git a/pkg/leakybucket/manager_load_test.go b/pkg/leakybucket/manager_load_test.go index bb3df75cd..513f11ff3 100644 --- a/pkg/leakybucket/manager_load_test.go +++ b/pkg/leakybucket/manager_load_test.go @@ -119,3 +119,25 @@ func TestCounterBucketsConfig(t *testing.T) { } } + +func TestBayesianBucketsConfig(t *testing.T) { + var CfgTests = []cfgTest{ + + //basic valid counter + {BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianPrior: 0.5, BayesianThreshold: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, true, true}, + //bad capacity + {BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: 1, Filter: "true", BayesianPrior: 0.5, BayesianThreshold: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false}, + //missing prior + {BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianThreshold: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false}, + //missing threshold + {BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianPrior: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false}, + //bad prior + {BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianPrior: 1.5, BayesianThreshold: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false}, + //bad threshold + {BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianPrior: 0.5, BayesianThreshold: 1.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false}, + } + if err := runTest(CfgTests); err != nil { + t.Fatalf("%s", err) + } + +} diff --git a/pkg/leakybucket/tests/guillotine-bayesian-bucket/bucket.yaml b/pkg/leakybucket/tests/guillotine-bayesian-bucket/bucket.yaml new file mode 100644 index 000000000..8e8c26e6f --- /dev/null +++ b/pkg/leakybucket/tests/guillotine-bayesian-bucket/bucket.yaml @@ -0,0 +1,21 @@ +type: bayesian +name: test/guillotine-bayesian +debug: true +description: "bayesian bucket" +filter: "evt.Meta.log_type == 'http_access-log' || evt.Meta.log_type == 'ssh_access-log'" +groupby: evt.Meta.source_ip +bayesian_prior: 0.5 +bayesian_threshold: 0.8 +bayesian_conditions: +- condition: evt.Meta.http_path == "/" + prob_given_evil: 0.8 + prob_given_benign: 0.2 + guillotine : true +- condition: evt.Meta.ssh_user == "admin" + prob_given_evil: 0.9 + prob_given_benign: 0.5 + guillotine : true +leakspeed: 30s +capacity: -1 +labels: + type: overflow_1 \ No newline at end of file diff --git a/pkg/leakybucket/tests/guillotine-bayesian-bucket/scenarios.yaml b/pkg/leakybucket/tests/guillotine-bayesian-bucket/scenarios.yaml new file mode 100644 index 000000000..05e1557cf --- /dev/null +++ b/pkg/leakybucket/tests/guillotine-bayesian-bucket/scenarios.yaml @@ -0,0 +1 @@ + - filename: {{.TestDirectory}}/bucket.yaml \ No newline at end of file diff --git a/pkg/leakybucket/tests/guillotine-bayesian-bucket/test.json b/pkg/leakybucket/tests/guillotine-bayesian-bucket/test.json new file mode 100644 index 000000000..07b7b6a6e --- /dev/null +++ b/pkg/leakybucket/tests/guillotine-bayesian-bucket/test.json @@ -0,0 +1,50 @@ +{ + "lines": [ + { + "Line": { + "Labels": { + "type": "nginx" + }, + "Raw": "don't care" + }, + "MarshaledTime": "2020-01-01T10:00:00.000Z", + "Meta": { + "source_ip": "2a00:1450:4007:816::200e", + "log_type": "http_access-log", + "http_path": "/" + } + }, + { + "Line": { + "Labels": { + "type": "nginx" + }, + "Raw": "don't care" + }, + "MarshaledTime": "2020-01-01T10:00:00.000Z", + "Meta": { + "source_ip": "2a00:1450:4007:816::200e", + "log_type": "ssh_access-log", + "ssh_user": "admin" + } + } + ], + "results": [ + { + "Type" : 1, + "Alert": { + "sources" : { + "2a00:1450:4007:816::200e": { + "ip": "2a00:1450:4007:816::200e", + "scope": "Ip", + "value": "2a00:1450:4007:816::200e" + } + }, + "Alert" : { + "scenario": "test/guillotine-bayesian", + "events_count": 2 + } + } + } + ] + } \ No newline at end of file diff --git a/pkg/leakybucket/tests/multiple-bayesian-bucket/bucket.yaml b/pkg/leakybucket/tests/multiple-bayesian-bucket/bucket.yaml new file mode 100644 index 000000000..1110fb7b8 --- /dev/null +++ b/pkg/leakybucket/tests/multiple-bayesian-bucket/bucket.yaml @@ -0,0 +1,21 @@ +type: bayesian +name: test/multiple-bayesian +debug: true +description: "bayesian bucket" +filter: "evt.Meta.log_type == 'http_access-log' || evt.Meta.log_type == 'ssh_access-log'" +groupby: evt.Meta.source_ip +bayesian_prior: 0.5 +bayesian_threshold: 0.8 +bayesian_conditions: +- condition: evt.Meta.http_path == "/" + prob_given_evil: 0.8 + prob_given_benign: 0.2 + guillotine : true +- condition: evt.Meta.ssh_user == "admin" + prob_given_evil: 0.9 + prob_given_benign: 0.5 + guillotine : true +leakspeed: 30s +capacity: -1 +labels: + type: overflow_1 \ No newline at end of file diff --git a/pkg/leakybucket/tests/multiple-bayesian-bucket/scenarios.yaml b/pkg/leakybucket/tests/multiple-bayesian-bucket/scenarios.yaml new file mode 100644 index 000000000..05e1557cf --- /dev/null +++ b/pkg/leakybucket/tests/multiple-bayesian-bucket/scenarios.yaml @@ -0,0 +1 @@ + - filename: {{.TestDirectory}}/bucket.yaml \ No newline at end of file diff --git a/pkg/leakybucket/tests/multiple-bayesian-bucket/test.json b/pkg/leakybucket/tests/multiple-bayesian-bucket/test.json new file mode 100644 index 000000000..69454a6ed --- /dev/null +++ b/pkg/leakybucket/tests/multiple-bayesian-bucket/test.json @@ -0,0 +1,64 @@ +{ + "lines": [ + { + "Line": { + "Labels": { + "type": "nginx" + }, + "Raw": "don't care" + }, + "MarshaledTime": "2020-01-01T10:00:00.000Z", + "Meta": { + "source_ip": "2a00:1450:4007:816::200e", + "log_type": "http_access-log", + "http_path": "/" + } + }, + { + "Line": { + "Labels": { + "type": "nginx" + }, + "Raw": "don't care" + }, + "MarshaledTime": "2020-01-01T10:00:00.000Z", + "Meta": { + "source_ip": "1.2.3.4", + "log_type": "ssh_access-log", + "ssh_user": "admin" + } + }, + { + "Line": { + "Labels": { + "type": "nginx" + }, + "Raw": "don't care" + }, + "MarshaledTime": "2020-01-01T10:00:00.000Z", + "Meta": { + "source_ip": "2a00:1450:4007:816::200e", + "log_type": "ssh_access-log", + "ssh_user": "admin" + } + } + ], + "results": [ + { + "Type" : 1, + "Alert": { + "sources" : { + "2a00:1450:4007:816::200e": { + "ip": "2a00:1450:4007:816::200e", + "scope": "Ip", + "value": "2a00:1450:4007:816::200e" + } + }, + "Alert" : { + "scenario": "test/multiple-bayesian", + "events_count": 2 + } + } + } + ] + } \ No newline at end of file diff --git a/pkg/leakybucket/tests/simple-bayesian-bucket/bucket.yaml b/pkg/leakybucket/tests/simple-bayesian-bucket/bucket.yaml new file mode 100644 index 000000000..21a4ab074 --- /dev/null +++ b/pkg/leakybucket/tests/simple-bayesian-bucket/bucket.yaml @@ -0,0 +1,19 @@ +type: bayesian +name: test/simple-bayesian +debug: true +description: "bayesian bucket" +filter: "evt.Meta.log_type == 'http_access-log' || evt.Meta.log_type == 'ssh_access-log'" +groupby: evt.Meta.source_ip +bayesian_prior: 0.5 +bayesian_threshold: 0.8 +bayesian_conditions: +- condition: any(queue.Queue, {.Meta.http_path == "/"}) + prob_given_evil: 0.8 + prob_given_benign: 0.2 +- condition: any(queue.Queue, {.Meta.ssh_user == "admin"}) + prob_given_evil: 0.9 + prob_given_benign: 0.5 +leakspeed: 30s +capacity: -1 +labels: + type: overflow_1 \ No newline at end of file diff --git a/pkg/leakybucket/tests/simple-bayesian-bucket/scenarios.yaml b/pkg/leakybucket/tests/simple-bayesian-bucket/scenarios.yaml new file mode 100644 index 000000000..05e1557cf --- /dev/null +++ b/pkg/leakybucket/tests/simple-bayesian-bucket/scenarios.yaml @@ -0,0 +1 @@ + - filename: {{.TestDirectory}}/bucket.yaml \ No newline at end of file diff --git a/pkg/leakybucket/tests/simple-bayesian-bucket/test.json b/pkg/leakybucket/tests/simple-bayesian-bucket/test.json new file mode 100644 index 000000000..a5807c4b6 --- /dev/null +++ b/pkg/leakybucket/tests/simple-bayesian-bucket/test.json @@ -0,0 +1,50 @@ +{ + "lines": [ + { + "Line": { + "Labels": { + "type": "nginx" + }, + "Raw": "don't care" + }, + "MarshaledTime": "2020-01-01T10:00:00.000Z", + "Meta": { + "source_ip": "2a00:1450:4007:816::200e", + "log_type": "http_access-log", + "http_path": "/" + } + }, + { + "Line": { + "Labels": { + "type": "nginx" + }, + "Raw": "don't care" + }, + "MarshaledTime": "2020-01-01T10:00:00.000Z", + "Meta": { + "source_ip": "2a00:1450:4007:816::200e", + "log_type": "ssh_access-log", + "ssh_user": "admin" + } + } + ], + "results": [ + { + "Type" : 1, + "Alert": { + "sources" : { + "2a00:1450:4007:816::200e": { + "ip": "2a00:1450:4007:816::200e", + "scope": "Ip", + "value": "2a00:1450:4007:816::200e" + } + }, + "Alert" : { + "scenario": "test/simple-bayesian", + "events_count": 2 + } + } + } + ] + } \ No newline at end of file