From 8e7e7993042e3dc4a45b6b02a4ed8d5a49bf7b2c Mon Sep 17 00:00:00 2001 From: mmetc <92726601+mmetc@users.noreply.github.com> Date: Fri, 24 Jun 2022 15:55:21 +0200 Subject: [PATCH] [wip] serve metrics only after agent and/or lapi are ready; fixed some func tests (#1613) --- .github/workflows/bats.yml | 3 -- cmd/crowdsec/api.go | 6 ++-- cmd/crowdsec/crowdsec.go | 6 +++- cmd/crowdsec/main.go | 10 ++++-- cmd/crowdsec/metrics.go | 6 +++- cmd/crowdsec/run_in_svc.go | 11 ++++-- cmd/crowdsec/run_in_svc_windows.go | 7 ++-- cmd/crowdsec/serve.go | 17 +++++++--- pkg/apiserver/apiserver.go | 3 +- tests/bats/01_base.bats | 1 - tests/bats/05_config_yaml_local.bats | 13 ++++--- tests/bats/11_bouncers_tls.bats | 14 ++++---- tests/bats/30_machines.bats | 1 - tests/bats/30_machines_tls.bats | 51 +++++++++++----------------- tests/bats/40_live-ban.bats | 3 +- 15 files changed, 85 insertions(+), 67 deletions(-) diff --git a/.github/workflows/bats.yml b/.github/workflows/bats.yml index 8d70d77ba..96924813b 100644 --- a/.github/workflows/bats.yml +++ b/.github/workflows/bats.yml @@ -26,7 +26,6 @@ jobs: # disable them here by default. Remove the if..false to enable them. mariadb: - if: ${{ false }} uses: ./.github/workflows/bats-mysql.yml with: database_image: mariadb:latest @@ -34,7 +33,6 @@ jobs: DATABASE_PASSWORD: ${{ secrets.DATABASE_PASSWORD}} mysql: - if: ${{ false }} uses: ./.github/workflows/bats-mysql.yml with: database_image: mysql:latest @@ -42,7 +40,6 @@ jobs: DATABASE_PASSWORD: ${{ secrets.DATABASE_PASSWORD}} postgres: - if: ${{ false }} uses: ./.github/workflows/bats-postgres.yml secrets: DATABASE_PASSWORD: ${{ secrets.DATABASE_PASSWORD}} diff --git a/cmd/crowdsec/api.go b/cmd/crowdsec/api.go index 0fb04e85d..6f8e3969d 100644 --- a/cmd/crowdsec/api.go +++ b/cmd/crowdsec/api.go @@ -2,6 +2,7 @@ package main import ( "runtime" + "time" "github.com/crowdsecurity/crowdsec/pkg/apiserver" "github.com/crowdsecurity/crowdsec/pkg/csconfig" @@ -44,12 +45,13 @@ func initAPIServer(cConfig *csconfig.Config) (*apiserver.APIServer, error) { return apiServer, nil } -func serveAPIServer(apiServer *apiserver.APIServer) { +func serveAPIServer(apiServer *apiserver.APIServer, apiReady chan bool) { apiTomb.Go(func() error { defer types.CatchPanic("crowdsec/serveAPIServer") go func() { defer types.CatchPanic("crowdsec/runAPIServer") - if err := apiServer.Run(); err != nil { + log.Debugf("serving API after %s ms", time.Since(crowdsecT0)) + if err := apiServer.Run(apiReady); err != nil { log.Fatalf(err.Error()) } }() diff --git a/cmd/crowdsec/crowdsec.go b/cmd/crowdsec/crowdsec.go index 1b96dfa20..84cf0838b 100644 --- a/cmd/crowdsec/crowdsec.go +++ b/cmd/crowdsec/crowdsec.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "sync" + "time" "path/filepath" @@ -129,11 +130,14 @@ func runCrowdsec(cConfig *csconfig.Config, parsers *parser.Parsers) error { return nil } -func serveCrowdsec(parsers *parser.Parsers, cConfig *csconfig.Config) { +func serveCrowdsec(parsers *parser.Parsers, cConfig *csconfig.Config, agentReady chan bool) { crowdsecTomb.Go(func() error { defer types.CatchPanic("crowdsec/serveCrowdsec") go func() { defer types.CatchPanic("crowdsec/runCrowdsec") + // this logs every time, even at config reload + log.Debugf("running agent after %s ms", time.Since(crowdsecT0)) + agentReady <- true if err := runCrowdsec(cConfig, parsers); err != nil { log.Fatalf("unable to start crowdsec routines: %s", err) } diff --git a/cmd/crowdsec/main.go b/cmd/crowdsec/main.go index 76fd53a0d..e2a9515f2 100644 --- a/cmd/crowdsec/main.go +++ b/cmd/crowdsec/main.go @@ -266,8 +266,8 @@ func LoadConfig(cConfig *csconfig.Config) error { return nil } -// This must be called right before the program termination, to allow -// measuring functional test coverage in case of abnormal exit. +// exitWithCode must be called right before the program termination, +// to allow measuring functional test coverage in case of abnormal exit. // // without bincover: log error and exit with code // with bincover: log error and tell bincover the exit code, then return @@ -285,7 +285,13 @@ func exitWithCode(exitCode int, err error) { bincover.ExitCode = exitCode } +// crowdsecT0 can be used to measure start time of services, +// or uptime of the application +var crowdsecT0 time.Time + func main() { + crowdsecT0 = time.Now() + defer types.CatchPanic("crowdsec/main") log.Debugf("os.Args: %v", os.Args) diff --git a/cmd/crowdsec/metrics.go b/cmd/crowdsec/metrics.go index ed927aff3..1c64d414b 100644 --- a/cmd/crowdsec/metrics.go +++ b/cmd/crowdsec/metrics.go @@ -2,6 +2,7 @@ package main import ( "fmt" + "time" v1 "github.com/crowdsecurity/crowdsec/pkg/apiserver/controllers/v1" "github.com/crowdsecurity/crowdsec/pkg/csconfig" @@ -172,7 +173,7 @@ func registerPrometheus(config *csconfig.PrometheusCfg) { } } -func servePrometheus(config *csconfig.PrometheusCfg, dbClient *database.Client) { +func servePrometheus(config *csconfig.PrometheusCfg, dbClient *database.Client, apiReady chan bool, agentReady chan bool) { if !config.Enabled { return } @@ -180,6 +181,9 @@ func servePrometheus(config *csconfig.PrometheusCfg, dbClient *database.Client) defer types.CatchPanic("crowdsec/servePrometheus") http.Handle("/metrics", computeDynamicMetrics(promhttp.Handler(), dbClient)) + <-apiReady + <-agentReady + log.Debugf("serving metrics after %s ms", time.Since(crowdsecT0)) if err := http.ListenAndServe(fmt.Sprintf("%s:%d", config.ListenAddr, config.ListenPort), nil); err != nil { log.Warningf("prometheus: %s", err) } diff --git a/cmd/crowdsec/run_in_svc.go b/cmd/crowdsec/run_in_svc.go index 17aeeecab..40df6c227 100644 --- a/cmd/crowdsec/run_in_svc.go +++ b/cmd/crowdsec/run_in_svc.go @@ -20,7 +20,9 @@ func StartRunSvc() error { err error ) - log.AddHook(&writer.Hook{ // Send logs with level higher than warning to stderr + // Set a default logger with level=fatal on stderr, + // in addition to the one we configure afterwards + log.AddHook(&writer.Hook{ Writer: os.Stderr, LogLevels: []log.Level{ log.PanicLevel, @@ -47,6 +49,9 @@ func StartRunSvc() error { log.Debug("coverage report is enabled") } + apiReady := make(chan bool, 1) + agentReady := make(chan bool, 1) + // Enable profiling early if cConfig.Prometheus != nil { var dbClient *database.Client @@ -60,7 +65,7 @@ func StartRunSvc() error { } } registerPrometheus(cConfig.Prometheus) - go servePrometheus(cConfig.Prometheus, dbClient) + go servePrometheus(cConfig.Prometheus, dbClient, apiReady, agentReady) } - return Serve(cConfig) + return Serve(cConfig, apiReady, agentReady) } diff --git a/cmd/crowdsec/run_in_svc_windows.go b/cmd/crowdsec/run_in_svc_windows.go index efb0ac5c2..d69f532b4 100644 --- a/cmd/crowdsec/run_in_svc_windows.go +++ b/cmd/crowdsec/run_in_svc_windows.go @@ -87,6 +87,9 @@ func WindowsRun() error { log.Debug("coverage report is enabled") } + apiReady := make(chan bool, 1) + agentReady := make(chan bool, 1) + // Enable profiling early if cConfig.Prometheus != nil { var dbClient *database.Client @@ -100,7 +103,7 @@ func WindowsRun() error { } } registerPrometheus(cConfig.Prometheus) - go servePrometheus(cConfig.Prometheus, dbClient) + go servePrometheus(cConfig.Prometheus, dbClient, apiReady, agentReady) } - return Serve(cConfig) + return Serve(cConfig, apiReady, agentReady) } diff --git a/cmd/crowdsec/serve.go b/cmd/crowdsec/serve.go index 54044f20a..41133ef45 100644 --- a/cmd/crowdsec/serve.go +++ b/cmd/crowdsec/serve.go @@ -72,7 +72,8 @@ func reloadHandler(sig os.Signal, cConfig *csconfig.Config) error { return errors.Wrap(err, "unable to init api server") } - serveAPIServer(apiServer) + apiReady := make(chan bool, 1) + serveAPIServer(apiServer, apiReady) } if !cConfig.DisableAgent { @@ -89,7 +90,8 @@ func reloadHandler(sig os.Signal, cConfig *csconfig.Config) error { if err := cConfig.LoadSimulation(); err != nil { log.Errorf("reload error (simulation) : %s", err) } - serveCrowdsec(csParsers, cConfig) + agentReady := make(chan bool, 1) + serveCrowdsec(csParsers, cConfig, agentReady) } log.Printf("Reload is finished") @@ -221,7 +223,7 @@ func HandleSignals(cConfig *csconfig.Config) error { return err } -func Serve(cConfig *csconfig.Config) error { +func Serve(cConfig *csconfig.Config, apiReady chan bool, agentReady chan bool) error { acquisTomb = tomb.Tomb{} parsersTomb = tomb.Tomb{} bucketsTomb = tomb.Tomb{} @@ -253,8 +255,10 @@ func Serve(cConfig *csconfig.Config) error { return errors.Wrap(err, "api server init") } if !flags.TestMode { - serveAPIServer(apiServer) + serveAPIServer(apiServer, apiReady) } + } else { + apiReady <- true } if !cConfig.DisableAgent { @@ -264,9 +268,12 @@ func Serve(cConfig *csconfig.Config) error { } /* if it's just linting, we're done */ if !flags.TestMode { - serveCrowdsec(csParsers, cConfig) + serveCrowdsec(csParsers, cConfig, agentReady) } + } else { + agentReady <- true } + if flags.TestMode { log.Infof("test done") pluginBroker.Kill() diff --git a/pkg/apiserver/apiserver.go b/pkg/apiserver/apiserver.go index 9655f1dbd..ef2baff4e 100644 --- a/pkg/apiserver/apiserver.go +++ b/pkg/apiserver/apiserver.go @@ -284,7 +284,7 @@ func (s *APIServer) GetTLSConfig() (*tls.Config, error) { }, nil } -func (s *APIServer) Run() error { +func (s *APIServer) Run(apiReady chan bool) error { defer types.CatchPanic("lapi/runServer") tlsCfg, err := s.GetTLSConfig() if err != nil { @@ -322,6 +322,7 @@ func (s *APIServer) Run() error { s.httpServerTomb.Go(func() error { go func() { + apiReady <- true if s.TLS != nil && s.TLS.CertFilePath != "" && s.TLS.KeyFilePath != "" { if err := s.httpServer.ListenAndServeTLS(s.TLS.CertFilePath, s.TLS.KeyFilePath); err != nil { log.Fatal(err) diff --git a/tests/bats/01_base.bats b/tests/bats/01_base.bats index 7ec68877f..0673f1b4b 100644 --- a/tests/bats/01_base.bats +++ b/tests/bats/01_base.bats @@ -183,7 +183,6 @@ declare stderr } @test "cscli lapi status" { - if is_db_postgres; then sleep 4; fi run -0 --separate-stderr cscli lapi status run -0 echo "${stderr}" diff --git a/tests/bats/05_config_yaml_local.bats b/tests/bats/05_config_yaml_local.bats index 2598f9942..311978983 100644 --- a/tests/bats/05_config_yaml_local.bats +++ b/tests/bats/05_config_yaml_local.bats @@ -20,6 +20,9 @@ teardown_file() { setup() { load "../lib/setup.sh" ./instance-data load + run -0 yq e '.api.client.credentials_path' "${CONFIG_YAML}" + LOCAL_API_CREDENTIALS="${output}" + export LOCAL_API_CREDENTIALS } teardown() { @@ -50,11 +53,15 @@ teardown() { } @test "${FILE} config.yaml.local - crowdsec (listen_url)" { + # disable the agent or we'll need to patch api client credentials too + run -0 yq e 'del(.crowdsec_service)' -i "${CONFIG_YAML}" ./instance-crowdsec start run -0 ./lib/util/wait-for-port -q 8080 ./instance-crowdsec stop + run -1 ./lib/util/wait-for-port -q 8080 echo "{'api':{'server':{'listen_uri':127.0.0.1:8083}}}" >"${CONFIG_YAML}.local" + ./instance-crowdsec start run -0 ./lib/util/wait-for-port -q 8083 run -1 ./lib/util/wait-for-port -q 8080 @@ -67,15 +74,14 @@ teardown() { } @test "${FILE} local_api_credentials.yaml.local" { + run -0 yq e 'del(.crowdsec_service)' -i "${CONFIG_YAML}" echo "{'api':{'server':{'listen_uri':127.0.0.1:8083}}}" >"${CONFIG_YAML}.local" ./instance-crowdsec start run -0 ./lib/util/wait-for-port -q 8083 - run -0 yq e '.api.client.credentials_path' "${CONFIG_YAML}" - LOCAL_API_CREDENTIALS="${output}" - run -1 cscli decisions list echo "{'url':'http://127.0.0.1:8083'}" >"${LOCAL_API_CREDENTIALS}.local" + run -0 cscli decisions list } @@ -122,7 +128,6 @@ teardown() { echo -e "---\nfilename: ${tmpfile}\nlabels:\n type: syslog\n" >>"${ACQUIS_YAML}" ./instance-crowdsec start - sleep 1 fake_log >>"${tmpfile}" sleep 1 rm -f -- "${tmpfile}" diff --git a/tests/bats/11_bouncers_tls.bats b/tests/bats/11_bouncers_tls.bats index 80c08d112..39fa8027f 100644 --- a/tests/bats/11_bouncers_tls.bats +++ b/tests/bats/11_bouncers_tls.bats @@ -10,7 +10,8 @@ config_disable_agent() { setup_file() { load "../lib/setup_file.sh" ./instance-data load - tmpdir=$(mktemp -d) + + tmpdir="${BATS_FILE_TMPDIR}" export tmpdir CFDIR="${BATS_TEST_DIRNAME}/testdata/cfssl" @@ -48,7 +49,6 @@ setup_file() { teardown_file() { load "../lib/teardown_file.sh" - rm -rf "${tmpdir}" } setup() { @@ -62,12 +62,12 @@ teardown() { #---------- -@test "${FILE} there are 0 bouncers" { +@test "there are 0 bouncers" { run -0 cscli bouncers list -o json assert_output "[]" } -@test "${FILE} simulate one bouncer request with a valid cert" { +@test "simulate one bouncer request with a valid cert" { run -0 curl -s --cert "${tmpdir}/bouncer.pem" --key "${tmpdir}/bouncer-key.pem" --cacert "${tmpdir}/inter.pem" https://localhost:8080/v1/decisions\?ip=42.42.42.42 assert_output "null" run -0 cscli bouncers list -o json @@ -79,19 +79,19 @@ teardown() { run cscli bouncers delete localhost@127.0.0.1 } -@test "${FILE} simulate one bouncer request with an invalid cert" { +@test "simulate one bouncer request with an invalid cert" { run curl -s --cert "${tmpdir}/bouncer_invalid.pem" --key "${tmpdir}/bouncer_invalid-key.pem" --cacert "${tmpdir}/ca-key.pem" https://localhost:8080/v1/decisions\?ip=42.42.42.42 run -0 cscli bouncers list -o json assert_output "[]" } -@test "${FILE} simulate one bouncer request with an invalid OU" { +@test "simulate one bouncer request with an invalid OU" { run curl -s --cert "${tmpdir}/bouncer_bad_ou.pem" --key "${tmpdir}/bouncer_bad_ou-key.pem" --cacert "${tmpdir}/inter.pem" https://localhost:8080/v1/decisions\?ip=42.42.42.42 run -0 cscli bouncers list -o json assert_output "[]" } -@test "${FILE} simulate one bouncer request with a revoked certificate" { +@test "simulate one bouncer request with a revoked certificate" { run -0 curl -i -s --cert "${tmpdir}/bouncer_revoked.pem" --key "${tmpdir}/bouncer_revoked-key.pem" --cacert "${tmpdir}/inter.pem" https://localhost:8080/v1/decisions\?ip=42.42.42.42 assert_output --partial "access forbidden" run -0 cscli bouncers list -o json diff --git a/tests/bats/30_machines.bats b/tests/bats/30_machines.bats index 41a0cb68c..39d8386c2 100644 --- a/tests/bats/30_machines.bats +++ b/tests/bats/30_machines.bats @@ -60,7 +60,6 @@ teardown() { } @test "$FILE register, validate and then remove a machine" { - if is_db_postgres; then sleep 4; fi run -0 cscli lapi register --machine CiTestMachineRegister -f /dev/null -o human assert_output --partial "Successfully registered to Local API (LAPI)" assert_output --partial "Local API credentials dumped to '/dev/null'" diff --git a/tests/bats/30_machines_tls.bats b/tests/bats/30_machines_tls.bats index 574506d43..2b5d0f63b 100644 --- a/tests/bats/30_machines_tls.bats +++ b/tests/bats/30_machines_tls.bats @@ -3,11 +3,18 @@ set -u +config_disable_agent() { + yq e 'del(.crowdsec_service)' -i "${CONFIG_YAML}" +} + setup_file() { load "../lib/setup_file.sh" ./instance-data load - tmpdir=$(mktemp -d) + CONFIG_DIR=$(dirname "${CONFIG_YAML}") + export CONFIG_DIR + + tmpdir="${BATS_FILE_TMPDIR}" export tmpdir CFDIR="${BATS_TEST_DIRNAME}/testdata/cfssl" @@ -40,6 +47,8 @@ setup_file() { .api.server.tls.agents_allowed_ou=["agent-ou"] ' -i "${CONFIG_YAML}" + run -0 cscli machines delete githubciXXXXXXXXXXXXXXXXXXXXXXXX + config_disable_agent } teardown_file() { @@ -48,7 +57,6 @@ teardown_file() { setup() { load "../lib/setup.sh" - cscli machines delete githubciXXXXXXXXXXXXXXXXXXXXXXXX } teardown() { @@ -57,9 +65,7 @@ teardown() { #---------- -@test "${FILE} invalid OU for agent" { - CONFIG_DIR=$(dirname "${CONFIG_YAML}") - +@test "invalid OU for agent" { yq e ' .ca_cert_path=strenv(tmpdir) + "/inter.pem" | .key_path=strenv(tmpdir) + "/agent_bad_ou-key.pem" | @@ -67,18 +73,13 @@ teardown() { .url="https://127.0.0.1:8080" ' -i "${CONFIG_DIR}/local_api_credentials.yaml" - yq e 'del(.login)' -i "${CONFIG_DIR}/local_api_credentials.yaml" - yq e 'del(.password)' -i "${CONFIG_DIR}/local_api_credentials.yaml" + yq e 'del(.login,.password)' -i "${CONFIG_DIR}/local_api_credentials.yaml" ./instance-crowdsec start - #let the agent start - sleep 2 run -0 cscli machines list -o json assert_output '[]' } -@test "${FILE} we have exactly one machine registered with TLS" { - CONFIG_DIR=$(dirname "${CONFIG_YAML}") - +@test "we have exactly one machine registered with TLS" { yq e ' .ca_cert_path=strenv(tmpdir) + "/inter.pem" | .key_path=strenv(tmpdir) + "/agent-key.pem" | @@ -86,23 +87,17 @@ teardown() { .url="https://127.0.0.1:8080" ' -i "${CONFIG_DIR}/local_api_credentials.yaml" - yq e 'del(.login)' -i "${CONFIG_DIR}/local_api_credentials.yaml" - yq e 'del(.password)' -i "${CONFIG_DIR}/local_api_credentials.yaml" + yq e 'del(.login,.password)' -i "${CONFIG_DIR}/local_api_credentials.yaml" ./instance-crowdsec start - #let the agent start - sleep 2 + run -0 cscli lapi status run -0 cscli machines list -o json run -0 jq -c '[. | length, .[0].machineId[0:32], .[0].isValidated, .[0].ipAddress, .[0].auth_type]' <(output) assert_output '[1,"localhost@127.0.0.1",true,"127.0.0.1","tls"]' cscli machines delete localhost@127.0.0.1 - - ./instance-crowdsec stop } -@test "${FILE} invalid cert for agent" { - CONFIG_DIR=$(dirname "${CONFIG_YAML}") - +@test "invalid cert for agent" { yq e ' .ca_cert_path=strenv(tmpdir) + "/inter.pem" | .key_path=strenv(tmpdir) + "/agent_invalid-key.pem" | @@ -110,18 +105,13 @@ teardown() { .url="https://127.0.0.1:8080" ' -i "${CONFIG_DIR}/local_api_credentials.yaml" - yq e 'del(.login)' -i "${CONFIG_DIR}/local_api_credentials.yaml" - yq e 'del(.password)' -i "${CONFIG_DIR}/local_api_credentials.yaml" + yq e 'del(.login,.password)' -i "${CONFIG_DIR}/local_api_credentials.yaml" ./instance-crowdsec start - #let the agent start - sleep 2 run -0 cscli machines list -o json assert_output '[]' } -@test "${FILE} revoked cert for agent" { - CONFIG_DIR=$(dirname "${CONFIG_YAML}") - +@test "revoked cert for agent" { yq e ' .ca_cert_path=strenv(tmpdir) + "/inter.pem" | .key_path=strenv(tmpdir) + "/agent_revoked-key.pem" | @@ -129,11 +119,8 @@ teardown() { .url="https://127.0.0.1:8080" ' -i "${CONFIG_DIR}/local_api_credentials.yaml" - yq e 'del(.login)' -i "${CONFIG_DIR}/local_api_credentials.yaml" - yq e 'del(.password)' -i "${CONFIG_DIR}/local_api_credentials.yaml" + yq e 'del(.login,.password)' -i "${CONFIG_DIR}/local_api_credentials.yaml" ./instance-crowdsec start - #let the agent start - sleep 2 run -0 cscli machines list -o json assert_output '[]' } diff --git a/tests/bats/40_live-ban.bats b/tests/bats/40_live-ban.bats index d78f43869..4c9c9eccf 100644 --- a/tests/bats/40_live-ban.bats +++ b/tests/bats/40_live-ban.bats @@ -36,9 +36,8 @@ teardown() { echo -e "---\nfilename: $tmpfile\nlabels:\n type: syslog\n" >>"${ACQUIS_YAML}" ./instance-crowdsec start - sleep 2 fake_log >>"${tmpfile}" - sleep 2 + sleep 1 rm -f -- "${tmpfile}" run -0 cscli decisions list -o json run -0 jq -r '.[].decisions[0].value' <(output)