add support for 'expression' (fix #822) in grok patterns (#830)

* add support for 'expression' (fix #822) in grok patterns

* add tests
This commit is contained in:
Thibault "bui" Koechlin 2021-06-21 09:07:33 +02:00 committed by GitHub
parent c026fc7f16
commit 7f0cac8ee6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 73 additions and 7 deletions

View file

@ -7,6 +7,7 @@ import (
"github.com/antonmedv/expr"
"github.com/logrusorgru/grokky"
"github.com/pkg/errors"
yaml "gopkg.in/yaml.v2"
"github.com/antonmedv/expr/vm"
@ -77,8 +78,8 @@ func (n *Node) validate(pctx *UnixParserCtx, ectx []EnricherCtx) error {
}
if n.Grok.RunTimeRegexp != nil || n.Grok.TargetField != "" {
if n.Grok.TargetField == "" {
return fmt.Errorf("grok's apply_on can't be empty")
if n.Grok.TargetField == "" && n.Grok.ExpValue == "" {
return fmt.Errorf("grok requires 'expression' or 'apply_on'")
}
if n.Grok.RegexpName == "" && n.Grok.RegexpValue == "" {
return fmt.Errorf("grok needs 'pattern' or 'name'")
@ -232,10 +233,7 @@ func (n *Node) process(p *types.Event, ctx UnixParserCtx) (bool, error) {
if n.Grok.RunTimeRegexp != nil {
clog.Tracef("Processing grok pattern : %s : %p", n.Grok.RegexpName, n.Grok.RunTimeRegexp)
//for unparsed, parsed etc. set sensible defaults to reduce user hassle
if n.Grok.TargetField == "" {
clog.Fatalf("not default field and no specified on stage '%s'", n.Stage)
} else {
if n.Grok.TargetField != "" {
//it's a hack to avoid using real reflect
if n.Grok.TargetField == "Line.Raw" {
gstr = p.Line.Raw
@ -244,9 +242,21 @@ func (n *Node) process(p *types.Event, ctx UnixParserCtx) (bool, error) {
} else {
clog.Debugf("(%s) target field '%s' doesn't exist in %v", n.rn, n.Grok.TargetField, p.Parsed)
NodeState = false
//return false, nil
}
} else if n.Grok.RunTimeValue != nil {
output, err := expr.Run(n.Grok.RunTimeValue, exprhelpers.GetExprEnv(map[string]interface{}{"evt": p}))
if err != nil {
clog.Warningf("failed to run RunTimeValue : %v", err)
NodeState = false
}
switch out := output.(type) {
case string:
gstr = out
default:
clog.Errorf("unexpected return type for RunTimeValue : %T", output)
}
}
var groklabel string
if n.Grok.RegexpName == "" {
groklabel = fmt.Sprintf("%5.5s...", n.Grok.RegexpValue)
@ -444,6 +454,16 @@ func (n *Node) compile(pctx *UnixParserCtx, ectx []EnricherCtx) error {
n.Logger.Tracef("%s regexp : %s", n.Grok.RegexpValue, n.Grok.RunTimeRegexp.Regexp.String())
valid = true
}
/*if grok source is an expression*/
if n.Grok.ExpValue != "" {
n.Grok.RunTimeValue, err = expr.Compile(n.Grok.ExpValue,
expr.Env(exprhelpers.GetExprEnv(map[string]interface{}{"evt": &types.Event{}})))
if err != nil {
return errors.Wrap(err, "while compiling grok's expression")
}
}
/* load grok statics */
if len(n.Grok.Statics) > 0 {
//compile expr statics if present

View file

@ -0,0 +1,13 @@
filter: "evt.Line.Labels.type == 'testlog'"
debug: true
onsuccess: next_stage
name: tests/base-grok
pattern_syntax:
MYCAP1: ".*"
nodes:
- grok:
pattern: ^xxheader %{MYCAP1:extracted_value} trailing stuff$
expression: evt.Line.Raw
statics:
- meta: log_type
value: parsed_testlog

View file

@ -0,0 +1,2 @@
- filename: {{.TestDirectory}}/base-grok.yaml
stage: s00-raw

View file

@ -0,0 +1,28 @@
#these are the events we input into parser
lines:
- Line:
Labels:
#this one will be checked by a filter
type: testlog
Raw: xxheader VALUE1 trailing stuff
- Line:
#see tricky case : first one is nginx via syslog, the second one is local nginx :)
Labels:
#this one will be checked by a filter
type: testlog
Raw: xxheader VALUE2 trailing stuff
#these are the results we expect from the parser
results:
- Meta:
log_type: parsed_testlog
Parsed:
extracted_value: VALUE1
Process: true
Stage: s00-raw
- Meta:
log_type: parsed_testlog
Parsed:
extracted_value: VALUE2
Process: true
Stage: s00-raw

View file

@ -33,6 +33,9 @@ type GrokPattern struct {
RegexpValue string `yaml:"pattern,omitempty"`
//the runtime form of regexpname / regexpvalue
RunTimeRegexp *grokky.Pattern `json:"-"` //the actual regexp
//the output of the expression is going to be the source for regexp
ExpValue string `yaml:"expression,omitempty"`
RunTimeValue *vm.Program `json:"-"` //the actual compiled filter
//a grok can contain statics that apply if pattern is successfull
Statics []ExtraField `yaml:"statics,omitempty"`
}