From c4473839c43dad25727f8cd341d94b9baa5b1702 Mon Sep 17 00:00:00 2001 From: mmetc <92726601+mmetc@users.noreply.github.com> Date: Thu, 25 Apr 2024 17:53:10 +0200 Subject: [PATCH] Refact pkg/parser/node (#2953) * extract method processFilter() * extract method processWhitelist() * lint (whitespace, errors) --- .golangci.yml | 4 +- pkg/parser/node.go | 236 +++++++++++++++++++++++++++++---------------- 2 files changed, 156 insertions(+), 84 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index cc6551310..9f059aa8a 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -3,7 +3,7 @@ linters-settings: cyclop: # lower this after refactoring - max-complexity: 53 + max-complexity: 48 gci: sections: @@ -22,7 +22,7 @@ linters-settings: gocyclo: # lower this after refactoring - min-complexity: 49 + min-complexity: 48 funlen: # Checks the number of lines in a function. diff --git a/pkg/parser/node.go b/pkg/parser/node.go index fe5432ce9..0906f0b51 100644 --- a/pkg/parser/node.go +++ b/pkg/parser/node.go @@ -22,69 +22,70 @@ import ( type Node struct { FormatVersion string `yaml:"format"` - //Enable config + runtime debug of node via config o/ + // Enable config + runtime debug of node via config o/ Debug bool `yaml:"debug,omitempty"` - //If enabled, the node (and its child) will report their own statistics + // If enabled, the node (and its child) will report their own statistics Profiling bool `yaml:"profiling,omitempty"` - //Name, author, description and reference(s) for parser pattern + // Name, author, description and reference(s) for parser pattern Name string `yaml:"name,omitempty"` Author string `yaml:"author,omitempty"` Description string `yaml:"description,omitempty"` References []string `yaml:"references,omitempty"` - //if debug is present in the node, keep its specific Logger in runtime structure + // if debug is present in the node, keep its specific Logger in runtime structure Logger *log.Entry `yaml:"-"` - //This is mostly a hack to make writing less repetitive. - //relying on stage, we know which field to parse, and we - //can also promote log to next stage on success + // This is mostly a hack to make writing less repetitive. + // relying on stage, we know which field to parse, and we + // can also promote log to next stage on success Stage string `yaml:"stage,omitempty"` - //OnSuccess allows to tag a node to be able to move log to next stage on success + // OnSuccess allows to tag a node to be able to move log to next stage on success OnSuccess string `yaml:"onsuccess,omitempty"` - rn string //this is only for us in debug, a random generated name for each node - //Filter is executed at runtime (with current log line as context) - //and must succeed or node is exited + rn string // this is only for us in debug, a random generated name for each node + // Filter is executed at runtime (with current log line as context) + // and must succeed or node is exited Filter string `yaml:"filter,omitempty"` - RunTimeFilter *vm.Program `yaml:"-" json:"-"` //the actual compiled filter - //If node has leafs, execute all of them until one asks for a 'break' + RunTimeFilter *vm.Program `yaml:"-" json:"-"` // the actual compiled filter + // If node has leafs, execute all of them until one asks for a 'break' LeavesNodes []Node `yaml:"nodes,omitempty"` - //Flag used to describe when to 'break' or return an 'error' + // Flag used to describe when to 'break' or return an 'error' EnrichFunctions EnricherCtx /* If the node is actually a leaf, it can have : grok, enrich, statics */ - //pattern_syntax are named grok patterns that are re-utilized over several grok patterns + // pattern_syntax are named grok patterns that are re-utilized over several grok patterns SubGroks yaml.MapSlice `yaml:"pattern_syntax,omitempty"` - //Holds a grok pattern + // Holds a grok pattern Grok GrokPattern `yaml:"grok,omitempty"` - //Statics can be present in any type of node and is executed last + // Statics can be present in any type of node and is executed last Statics []ExtraField `yaml:"statics,omitempty"` - //Stash allows to capture data from the log line and store it in an accessible cache + // Stash allows to capture data from the log line and store it in an accessible cache Stash []DataCapture `yaml:"stash,omitempty"` - //Whitelists + // Whitelists Whitelist Whitelist `yaml:"whitelist,omitempty"` Data []*types.DataSource `yaml:"data,omitempty"` } func (n *Node) validate(pctx *UnixParserCtx, ectx EnricherCtx) error { - - //stage is being set automagically + // stage is being set automagically if n.Stage == "" { - return fmt.Errorf("stage needs to be an existing stage") + return errors.New("stage needs to be an existing stage") } /* "" behaves like continue */ if n.OnSuccess != "continue" && n.OnSuccess != "next_stage" && n.OnSuccess != "" { return fmt.Errorf("onsuccess '%s' not continue,next_stage", n.OnSuccess) } + if n.Filter != "" && n.RunTimeFilter == nil { return fmt.Errorf("non-empty filter '%s' was not compiled", n.Filter) } if n.Grok.RunTimeRegexp != nil || n.Grok.TargetField != "" { if n.Grok.TargetField == "" && n.Grok.ExpValue == "" { - return fmt.Errorf("grok requires 'expression' or 'apply_on'") + return errors.New("grok requires 'expression' or 'apply_on'") } + if n.Grok.RegexpName == "" && n.Grok.RegexpValue == "" { - return fmt.Errorf("grok needs 'pattern' or 'name'") + return errors.New("grok needs 'pattern' or 'name'") } } @@ -93,6 +94,7 @@ func (n *Node) validate(pctx *UnixParserCtx, ectx EnricherCtx) error { if static.ExpValue == "" { return fmt.Errorf("static %d : when method is set, expression must be present", idx) } + if _, ok := ectx.Registered[static.Method]; !ok { log.Warningf("the method '%s' doesn't exist or the plugin has not been initialized", static.Method) } @@ -100,6 +102,7 @@ func (n *Node) validate(pctx *UnixParserCtx, ectx EnricherCtx) error { if static.Meta == "" && static.Parsed == "" && static.TargetByName == "" { return fmt.Errorf("static %d : at least one of meta/event/target must be set", idx) } + if static.Value == "" && static.RunTimeValue == nil { return fmt.Errorf("static %d value or expression must be set", idx) } @@ -110,72 +113,76 @@ func (n *Node) validate(pctx *UnixParserCtx, ectx EnricherCtx) error { if stash.Name == "" { return fmt.Errorf("stash %d : name must be set", idx) } + if stash.Value == "" { return fmt.Errorf("stash %s : value expression must be set", stash.Name) } + if stash.Key == "" { return fmt.Errorf("stash %s : key expression must be set", stash.Name) } + if stash.TTL == "" { return fmt.Errorf("stash %s : ttl must be set", stash.Name) } + if stash.Strategy == "" { stash.Strategy = "LRU" } - //should be configurable + // should be configurable if stash.MaxMapSize == 0 { stash.MaxMapSize = 100 } } + return nil } -func (n *Node) process(p *types.Event, ctx UnixParserCtx, expressionEnv map[string]interface{}) (bool, error) { - var NodeState bool - var NodeHasOKGrok bool +func (n *Node) processFilter(cachedExprEnv map[string]interface{}) (bool, error) { clog := n.Logger - - cachedExprEnv := expressionEnv - - clog.Tracef("Event entering node") - if n.RunTimeFilter != nil { - //Evaluate node's filter - output, err := exprhelpers.Run(n.RunTimeFilter, cachedExprEnv, clog, n.Debug) - if err != nil { - clog.Warningf("failed to run filter : %v", err) - clog.Debugf("Event leaving node : ko") - return false, nil - } - - switch out := output.(type) { - case bool: - if !out { - clog.Debugf("Event leaving node : ko (failed filter)") - return false, nil - } - default: - clog.Warningf("Expr '%s' returned non-bool, abort : %T", n.Filter, output) - clog.Debugf("Event leaving node : ko") - return false, nil - } - NodeState = true - } else { + if n.RunTimeFilter == nil { clog.Tracef("Node has not filter, enter") - NodeState = true + return true, nil } - if n.Name != "" { - NodesHits.With(prometheus.Labels{"source": p.Line.Src, "type": p.Line.Module, "name": n.Name}).Inc() + // Evaluate node's filter + output, err := exprhelpers.Run(n.RunTimeFilter, cachedExprEnv, clog, n.Debug) + if err != nil { + clog.Warningf("failed to run filter : %v", err) + clog.Debugf("Event leaving node : ko") + + return false, nil } - exprErr := error(nil) + + switch out := output.(type) { + case bool: + if !out { + clog.Debugf("Event leaving node : ko (failed filter)") + return false, nil + } + default: + clog.Warningf("Expr '%s' returned non-bool, abort : %T", n.Filter, output) + clog.Debugf("Event leaving node : ko") + + return false, nil + } + + return true, nil +} + +func (n *Node) processWhitelist(cachedExprEnv map[string]interface{}, p *types.Event) (bool, error) { + var exprErr error + isWhitelisted := n.CheckIPsWL(p) if !isWhitelisted { isWhitelisted, exprErr = n.CheckExprWL(cachedExprEnv, p) } + if exprErr != nil { // Previous code returned nil if there was an error, so we keep this behavior return false, nil //nolint:nilerr } + if isWhitelisted && !p.Whitelisted { p.Whitelisted = true p.WhitelistReason = n.Whitelist.Reason @@ -185,18 +192,51 @@ func (n *Node) process(p *types.Event, ctx UnixParserCtx, expressionEnv map[stri for k := range p.Overflow.Sources { ips = append(ips, k) } - clog.Infof("Ban for %s whitelisted, reason [%s]", strings.Join(ips, ","), n.Whitelist.Reason) + + n.Logger.Infof("Ban for %s whitelisted, reason [%s]", strings.Join(ips, ","), n.Whitelist.Reason) + p.Overflow.Whitelisted = true } } - //Process grok if present, should be exclusive with nodes :) + return isWhitelisted, nil +} + +func (n *Node) process(p *types.Event, ctx UnixParserCtx, expressionEnv map[string]interface{}) (bool, error) { + var NodeHasOKGrok bool + + clog := n.Logger + + cachedExprEnv := expressionEnv + + clog.Tracef("Event entering node") + + NodeState, err := n.processFilter(cachedExprEnv) + if err != nil { + return false, err + } + + if !NodeState { + return false, nil + } + + if n.Name != "" { + NodesHits.With(prometheus.Labels{"source": p.Line.Src, "type": p.Line.Module, "name": n.Name}).Inc() + } + + isWhitelisted, err := n.processWhitelist(cachedExprEnv, p) + if err != nil { + return false, err + } + + // Process grok if present, should be exclusive with nodes :) gstr := "" + if n.Grok.RunTimeRegexp != nil { clog.Tracef("Processing grok pattern : %s : %p", n.Grok.RegexpName, n.Grok.RunTimeRegexp) - //for unparsed, parsed etc. set sensible defaults to reduce user hassle + // for unparsed, parsed etc. set sensible defaults to reduce user hassle if n.Grok.TargetField != "" { - //it's a hack to avoid using real reflect + // it's a hack to avoid using real reflect if n.Grok.TargetField == "Line.Raw" { gstr = p.Line.Raw } else if val, ok := p.Parsed[n.Grok.TargetField]; ok { @@ -211,6 +251,7 @@ func (n *Node) process(p *types.Event, ctx UnixParserCtx, expressionEnv map[stri clog.Warningf("failed to run RunTimeValue : %v", err) NodeState = false } + switch out := output.(type) { case string: gstr = out @@ -229,12 +270,14 @@ func (n *Node) process(p *types.Event, ctx UnixParserCtx, expressionEnv map[stri } else { groklabel = n.Grok.RegexpName } + grok := n.Grok.RunTimeRegexp.Parse(gstr) if len(grok) > 0 { /*tag explicitly that the *current* node had a successful grok pattern. it's important to know success state*/ NodeHasOKGrok = true + clog.Debugf("+ Grok '%s' returned %d entries to merge in Parsed", groklabel, len(grok)) - //We managed to grok stuff, merged into parse + // We managed to grok stuff, merged into parse for k, v := range grok { clog.Debugf("\t.Parsed['%s'] = '%s'", k, v) p.Parsed[k] = v @@ -246,34 +289,37 @@ func (n *Node) process(p *types.Event, ctx UnixParserCtx, expressionEnv map[stri return false, err } } else { - //grok failed, node failed + // grok failed, node failed clog.Debugf("+ Grok '%s' didn't return data on '%s'", groklabel, gstr) NodeState = false } - } else { clog.Tracef("! No grok pattern : %p", n.Grok.RunTimeRegexp) } - //Process the stash (data collection) if : a grok was present and succeeded, or if there is no grok + // Process the stash (data collection) if : a grok was present and succeeded, or if there is no grok if NodeHasOKGrok || n.Grok.RunTimeRegexp == nil { for idx, stash := range n.Stash { - var value string - var key string + var ( + key string + value string + ) + if stash.ValueExpression == nil { clog.Warningf("Stash %d has no value expression, skipping", idx) continue } + if stash.KeyExpression == nil { clog.Warningf("Stash %d has no key expression, skipping", idx) continue } - //collect the data + // collect the data output, err := exprhelpers.Run(stash.ValueExpression, cachedExprEnv, clog, n.Debug) if err != nil { clog.Warningf("Error while running stash val expression : %v", err) } - //can we expect anything else than a string ? + // can we expect anything else than a string ? switch output := output.(type) { case string: value = output @@ -282,12 +328,12 @@ func (n *Node) process(p *types.Event, ctx UnixParserCtx, expressionEnv map[stri continue } - //collect the key + // collect the key output, err = exprhelpers.Run(stash.KeyExpression, cachedExprEnv, clog, n.Debug) if err != nil { clog.Warningf("Error while running stash key expression : %v", err) } - //can we expect anything else than a string ? + // can we expect anything else than a string ? switch output := output.(type) { case string: key = output @@ -299,7 +345,7 @@ func (n *Node) process(p *types.Event, ctx UnixParserCtx, expressionEnv map[stri } } - //Iterate on leafs + // Iterate on leafs for _, leaf := range n.LeavesNodes { ret, err := leaf.process(p, ctx, cachedExprEnv) if err != nil { @@ -307,7 +353,9 @@ func (n *Node) process(p *types.Event, ctx UnixParserCtx, expressionEnv map[stri clog.Debugf("Event leaving node : ko") return false, err } + clog.Tracef("\tsub-node (%s) ret : %v (strategy:%s)", leaf.rn, ret, n.OnSuccess) + if ret { NodeState = true /* if child is successful, stop processing */ @@ -328,12 +376,14 @@ func (n *Node) process(p *types.Event, ctx UnixParserCtx, expressionEnv map[stri clog.Tracef("State after nodes : %v", NodeState) - //grok or leafs failed, don't process statics + // grok or leafs failed, don't process statics if !NodeState { if n.Name != "" { NodesHitsKo.With(prometheus.Labels{"source": p.Line.Src, "type": p.Line.Module, "name": n.Name}).Inc() } + clog.Debugf("Event leaving node : ko") + return NodeState, nil } @@ -360,9 +410,10 @@ func (n *Node) process(p *types.Event, ctx UnixParserCtx, expressionEnv map[stri if NodeState { clog.Debugf("Event leaving node : ok") log.Tracef("node is successful, check strategy") + if n.OnSuccess == "next_stage" { idx := stageidx(p.Stage, ctx.Stages) - //we're at the last stage + // we're at the last stage if idx+1 == len(ctx.Stages) { clog.Debugf("node reached the last stage : %s", p.Stage) } else { @@ -375,15 +426,16 @@ func (n *Node) process(p *types.Event, ctx UnixParserCtx, expressionEnv map[stri } else { clog.Debugf("Event leaving node : ko") } + clog.Tracef("Node successful, continue") + return NodeState, nil } func (n *Node) compile(pctx *UnixParserCtx, ectx EnricherCtx) error { var err error - var valid bool - valid = false + valid := false dumpr := spew.ConfigState{MaxDepth: 1, DisablePointerAddresses: true} n.rn = seed.Generate() @@ -393,10 +445,11 @@ func (n *Node) compile(pctx *UnixParserCtx, ectx EnricherCtx) error { /* if the node has debugging enabled, create a specific logger with debug that will be used only for processing this node ;) */ if n.Debug { - var clog = log.New() + clog := log.New() if err = types.ConfigureLogger(clog); err != nil { log.Fatalf("While creating bucket-specific logger : %s", err) } + clog.SetLevel(log.DebugLevel) n.Logger = clog.WithFields(log.Fields{ "id": n.rn, @@ -414,7 +467,7 @@ func (n *Node) compile(pctx *UnixParserCtx, ectx EnricherCtx) error { n.Logger.Tracef("Compiling : %s", dumpr.Sdump(n)) - //compile filter if present + // compile filter if present if n.Filter != "" { n.RunTimeFilter, err = expr.Compile(n.Filter, exprhelpers.GetExprOptions(map[string]interface{}{"evt": &types.Event{}})...) if err != nil { @@ -425,12 +478,15 @@ func (n *Node) compile(pctx *UnixParserCtx, ectx EnricherCtx) error { /* handle pattern_syntax and groks */ for _, pattern := range n.SubGroks { n.Logger.Tracef("Adding subpattern '%s' : '%s'", pattern.Key, pattern.Value) + if err = pctx.Grok.Add(pattern.Key.(string), pattern.Value.(string)); err != nil { if errors.Is(err, grokky.ErrAlreadyExist) { n.Logger.Warningf("grok '%s' already registred", pattern.Key) continue } + n.Logger.Errorf("Unable to compile subpattern %s : %v", pattern.Key, err) + return err } } @@ -438,28 +494,36 @@ func (n *Node) compile(pctx *UnixParserCtx, ectx EnricherCtx) error { /* load grok by name or compile in-place */ if n.Grok.RegexpName != "" { n.Logger.Tracef("+ Regexp Compilation '%s'", n.Grok.RegexpName) + n.Grok.RunTimeRegexp, err = pctx.Grok.Get(n.Grok.RegexpName) if err != nil { return fmt.Errorf("unable to find grok '%s' : %v", n.Grok.RegexpName, err) } + if n.Grok.RunTimeRegexp == nil { return fmt.Errorf("empty grok '%s'", n.Grok.RegexpName) } + n.Logger.Tracef("%s regexp: %s", n.Grok.RegexpName, n.Grok.RunTimeRegexp.String()) + valid = true } else if n.Grok.RegexpValue != "" { if strings.HasSuffix(n.Grok.RegexpValue, "\n") { n.Logger.Debugf("Beware, pattern ends with \\n : '%s'", n.Grok.RegexpValue) } + n.Grok.RunTimeRegexp, err = pctx.Grok.Compile(n.Grok.RegexpValue) if err != nil { return fmt.Errorf("failed to compile grok '%s': %v", n.Grok.RegexpValue, err) } + if n.Grok.RunTimeRegexp == nil { // We shouldn't be here because compilation succeeded, so regexp shouldn't be nil return fmt.Errorf("grok compilation failure: %s", n.Grok.RegexpValue) } + n.Logger.Tracef("%s regexp : %s", n.Grok.RegexpValue, n.Grok.RunTimeRegexp.String()) + valid = true } @@ -473,7 +537,7 @@ func (n *Node) compile(pctx *UnixParserCtx, ectx EnricherCtx) error { } /* load grok statics */ - //compile expr statics if present + // compile expr statics if present for idx := range n.Grok.Statics { if n.Grok.Statics[idx].ExpValue != "" { n.Grok.Statics[idx].RunTimeValue, err = expr.Compile(n.Grok.Statics[idx].ExpValue, @@ -482,6 +546,7 @@ func (n *Node) compile(pctx *UnixParserCtx, ectx EnricherCtx) error { return err } } + valid = true } @@ -505,7 +570,7 @@ func (n *Node) compile(pctx *UnixParserCtx, ectx EnricherCtx) error { } logLvl := n.Logger.Logger.GetLevel() - //init the cache, does it make sense to create it here just to be sure everything is fine ? + // init the cache, does it make sense to create it here just to be sure everything is fine ? if err = cache.CacheInit(cache.CacheCfg{ Size: n.Stash[i].MaxMapSize, TTL: n.Stash[i].TTLVal, @@ -526,14 +591,18 @@ func (n *Node) compile(pctx *UnixParserCtx, ectx EnricherCtx) error { if !n.LeavesNodes[idx].Debug && n.Debug { n.LeavesNodes[idx].Debug = true } + if !n.LeavesNodes[idx].Profiling && n.Profiling { n.LeavesNodes[idx].Profiling = true } + n.LeavesNodes[idx].Stage = n.Stage + err = n.LeavesNodes[idx].compile(pctx, ectx) if err != nil { return err } + valid = true } @@ -546,6 +615,7 @@ func (n *Node) compile(pctx *UnixParserCtx, ectx EnricherCtx) error { return err } } + valid = true } @@ -554,13 +624,15 @@ func (n *Node) compile(pctx *UnixParserCtx, ectx EnricherCtx) error { if err != nil { return err } + valid = valid || whitelistValid if !valid { /* node is empty, error force return */ n.Logger.Error("Node is empty or invalid, abort") n.Stage = "" - return fmt.Errorf("Node is empty") + + return errors.New("Node is empty") } if err := n.validate(pctx, ectx); err != nil {