add inference mode

2023-09-13 14:57:58 +02:00 · 2023-09-13 14:57:58 +02:00 · e69ed4db6a
parent 20d81d3eed
commit e69ed4db6a
3 changed files with 158 additions and 28 deletions
--- a/pkg/bayesiantrain/inference.go
+++ b/pkg/bayesiantrain/inference.go
@ -0,0 +1,68 @@
+package bayesiantrain
+
+import (
+	"fmt"
+
+	"github.com/antonmedv/expr"
+	"github.com/antonmedv/expr/vm"
+	"github.com/crowdsecurity/crowdsec/pkg/leakybucket"
+	"github.com/crowdsecurity/crowdsec/pkg/types"
+)
+
+type fakeBucket struct {
+	events []types.Event
+	leaky  *leakybucket.Leaky
+	label  int
+}
+
+func (f *fakeBucket) scoreTrainedClassifier(results map[string]BayesianResult, exprCache map[string]vm.Program, prior float32, threshold float32) int {
+	var posterior float32
+	var queue leakybucket.Queue
+	var program vm.Program
+	var hypothesisValue bool
+	var ok bool
+	var guillotinecache map[string]bool
+
+	guillotinecache = make(map[string]bool)
+
+	for index, evt := range f.events {
+		queue = leakybucket.Queue{Queue: f.events[:index], L: index + 1}
+		posterior = prior
+
+		for hypothesis, result := range results {
+
+			if !result.Attached {
+				continue
+			}
+			if guillotinecache[hypothesis] {
+				posterior = leakybucket.UpdateBayesianProbability(posterior, result.ProbGivenEvil, result.ProbGivenBenign)
+				continue
+			}
+
+			program = exprCache[hypothesis]
+			ret, err := expr.Run(&program, map[string]interface{}{"evt": &evt, "queue": &queue, "leaky": f.leaky})
+			if err != nil {
+				fmt.Errorf("ran into error while evaluating %v on bucket", hypothesis)
+				return -1
+			}
+
+			if hypothesisValue, ok = ret.(bool); !ok {
+				fmt.Errorf("bayesion hypothesis, unexpected non-bool return : %T", ret)
+				return -1
+			}
+
+			if hypothesisValue {
+				guillotinecache[hypothesis] = true
+				posterior = leakybucket.UpdateBayesianProbability(posterior, result.ProbGivenEvil, result.ProbGivenBenign)
+			} else {
+				posterior = leakybucket.UpdateBayesianProbability(posterior, 1-result.ProbGivenEvil, 1-result.ProbGivenBenign)
+			}
+		}
+
+		if posterior >= threshold {
+			return 1
+		}
+	}
+
+	return 0
+}
--- a/pkg/bayesiantrain/parser.go
+++ b/pkg/bayesiantrain/parser.go
@ -4,6 +4,7 @@ import (
 	"bufio"
 	"fmt"
 	"os"
+	"runtime"
 	"strings"
 	"time"

@ -75,9 +76,10 @@ func collectingRoutine(outputChan <-chan parseResult, resultChan chan<- LogEvent
 	totalIps := 0

 	storage = LogEventStorage{
-		ParsedIpEvents: make(map[string]fakeBucket),
-		exprCache:      make(map[string]vm.Program),
-		total:          0,
+		ParsedIpEvents:   make(map[string]fakeBucket),
+		CachedHypotheses: make(map[string]BayesianResult),
+		exprCache:        make(map[string]vm.Program),
+		total:            0,
 	}
 LOOP:
 	for {
@ -174,6 +176,8 @@ func (p *ParserAndFileStorage) ParseLogs(routinesCount int) (LogEventStorage, er
 	fmt.Println("Sent total to collecting routine: %v", progress)
 	storage := <-resultChan

+	runtime.GC()
+
 	return storage, nil
 }

--- a/pkg/bayesiantrain/trainer.go
+++ b/pkg/bayesiantrain/trainer.go
@ -12,25 +12,21 @@ import (
 )

 type LogEventStorage struct {
-	ParsedIpEvents map[string]fakeBucket
-	labeled        bool
-	total          int
-	nEvilIps       int
-	nBenignIps     int
-	exprCache      map[string]vm.Program
-	exprCacheLock  sync.Mutex
+	ParsedIpEvents   map[string]fakeBucket
+	CachedHypotheses map[string]BayesianResult
+	labeled          bool
+	total            int
+	nEvilIps         int
+	nBenignIps       int
+	exprCache        map[string]vm.Program
+	exprCacheLock    sync.Mutex
 }

 type BayesianResult struct {
 	condition       string
-	probGivenEvil   float32
-	probGivenBenign float32
-}
-
-type fakeBucket struct {
-	events []types.Event
-	leaky  *leakybucket.Leaky
-	label  int
+	Attached        bool
+	ProbGivenEvil   float32
+	ProbGivenBenign float32
 }

 type evalIpResult struct {
@ -141,6 +137,47 @@ func (s *LogEventStorage) InsertLabels(evilIps []string) {
 	s.labeled = true
 }

+func (s *LogEventStorage) GenerateBucketMetrics(threshold float32) error {
+	var res int
+	var falsePositive int
+	var falseNegative int
+	var truePositive int
+	var trueNegative int
+
+	prior := float32(s.nEvilIps) / float32(s.total)
+
+	for _, bucket := range s.ParsedIpEvents {
+
+		res = bucket.scoreTrainedClassifier(s.CachedHypotheses, s.exprCache, prior, threshold)
+		if res < 0 {
+			return fmt.Errorf("generatebucketmetrics returned an error, aborting")
+		}
+		if bucket.label == 1 {
+			if res == 1 {
+				truePositive += 1
+			} else {
+				falseNegative += 1
+			}
+		} else {
+			if res == 1 {
+				falsePositive += 1
+			} else {
+				trueNegative += 1
+			}
+		}
+	}
+
+	fmt.Println("raw : ", falsePositive, falseNegative, truePositive, trueNegative)
+	printBucketMetrics(falsePositive, falseNegative, truePositive, trueNegative)
+
+	return nil
+}
+
+func printBucketMetrics(falsePositive, falseNegative, truePositive, trueNegative int) {
+	fmt.Printf("Precision: %v\n", float32(truePositive)/float32(truePositive+falsePositive))
+	fmt.Printf("Recall: %v\n", float32(truePositive)/float32(truePositive+falseNegative))
+}
+
 func (s *LogEventStorage) TestHypothesisSingle(hypothesis string) error {
 	var result evalIpResult
 	var evilIpsWithHypothesis = 0
@ -149,7 +186,7 @@ func (s *LogEventStorage) TestHypothesisSingle(hypothesis string) error {
 	if !s.labeled {
 		return fmt.Errorf("LogEventStorage has not been labeled yet, add labels using InsertLabels")
 	}
-	err := compileAndCacheHypothesis(hypothesis, s)
+	err := compileAndCacheExpr(hypothesis, s)
 	if err != nil {
 		return err
 	}
@ -170,21 +207,42 @@ func (s *LogEventStorage) TestHypothesisSingle(hypothesis string) error {
 		}
 	}

-	bayesian := BayesianResult{condition: hypothesis, probGivenEvil: float32(evilIpsWithHypothesis) / float32(s.nEvilIps), probGivenBenign: float32(benignIpsWithHypothesis) / float32(s.nBenignIps)}
+	bayesian := BayesianResult{condition: hypothesis, ProbGivenEvil: float32(evilIpsWithHypothesis) / float32(s.nEvilIps), ProbGivenBenign: float32(benignIpsWithHypothesis) / float32(s.nBenignIps)}

 	bayesian.printResults()
+	s.CachedHypotheses[hypothesis] = bayesian

-	fmt.Printf("Finished Hypothesis testing, evil ips with hypothesis %v benign ips with hypothesis %v", evilIpsWithHypothesis, benignIpsWithHypothesis)
+	fmt.Printf("Finished Hypothesis testing, evil ips with hypothesis %v benign ips with hypothesis %v\n", evilIpsWithHypothesis, benignIpsWithHypothesis)

 	return nil
 }

+func (s *LogEventStorage) AttachHypothesis(hypothesis string) error {
+
+	var res BayesianResult
+	var ok bool
+
+	res, ok = s.CachedHypotheses[hypothesis]
+	if !ok {
+		fmt.Printf("Hypothesis %s was not found in cache, running a training loop\n", hypothesis)
+		err := s.TestHypothesisSingle(hypothesis)
+		if err != nil {
+			return fmt.Errorf("failed to train hypothesis, train loop returned %s", err)
+		}
+		res = s.CachedHypotheses[hypothesis]
+	}
+
+	res.Attached = true
+	s.CachedHypotheses[hypothesis] = res
+	return nil
+}
+
 func (s *LogEventStorage) TestHypothesis(hypothesis string) error {

 	if !s.labeled {
 		return fmt.Errorf("LogEventStorage has not been labeled yet, add labels using InsertLabels")
 	}
-	err := compileAndCacheHypothesis(hypothesis, s)
+	err := compileAndCacheExpr(hypothesis, s)
 	if err != nil {
 		return err
 	}
@ -205,7 +263,7 @@ func (s *LogEventStorage) TestHypothesis(hypothesis string) error {
 		return result.Error
 	}

-	bayesian := BayesianResult{condition: hypothesis, probGivenEvil: float32(result.EvilIpsWithHypothesis) / float32(s.nEvilIps), probGivenBenign: float32(result.BenignIpsWithHypothesis) / float32(s.nBenignIps)}
+	bayesian := BayesianResult{condition: hypothesis, ProbGivenEvil: float32(result.EvilIpsWithHypothesis) / float32(s.nEvilIps), ProbGivenBenign: float32(result.BenignIpsWithHypothesis) / float32(s.nBenignIps)}

 	bayesian.printResults()

@ -214,7 +272,7 @@ func (s *LogEventStorage) TestHypothesis(hypothesis string) error {
 	return nil
 }

-func compileAndCacheHypothesis(hypothesis string, s *LogEventStorage) error {
+func compileAndCacheExpr(hypothesis string, s *LogEventStorage) error {

 	var compiledExpr *vm.Program
 	var err error
@ -236,8 +294,8 @@ func compileAndCacheHypothesis(hypothesis string, s *LogEventStorage) error {
 }

 func (b *BayesianResult) printResults() {
-	fmt.Printf("- condition: %s", b.condition)
-	fmt.Printf("  prob_given_evil: %v", b.probGivenEvil)
-	fmt.Printf("  prob_given_benign: %v", b.probGivenBenign)
-	fmt.Printf("  guillotine: true")
+	fmt.Printf("- condition: %s\n", b.condition)
+	fmt.Printf("  prob_given_evil: %v\n", b.ProbGivenEvil)
+	fmt.Printf("  prob_given_benign: %v\n", b.ProbGivenBenign)
+	fmt.Printf("  guillotine: true\n")
 }