Sync data files with hub index (#1153)

* Allow versioning, updating data files
This commit is contained in:
Shivam Sandbhor 2022-02-03 22:55:49 +05:30 committed by GitHub
parent dd53d19777
commit 44d2eaad51
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 199 additions and 92 deletions

View file

@ -56,7 +56,7 @@ cscli hub update # Download list of available configurations from the hub
log.Info(v)
}
cwhub.DisplaySummary()
ListItems([]string{cwhub.PARSERS, cwhub.COLLECTIONS, cwhub.SCENARIOS, cwhub.PARSERS_OVFLW}, args, true, false)
ListItems([]string{cwhub.PARSERS, cwhub.COLLECTIONS, cwhub.SCENARIOS, cwhub.PARSERS_OVFLW, cwhub.DATA_FILES}, args, true, false)
},
}
cmdHubList.PersistentFlags().BoolVarP(&all, "all", "a", false, "List disabled items as well")

View file

@ -250,22 +250,21 @@ func UpgradeConfig(itemType string, name string, force bool) {
continue
}
found = true
if v.UpToDate || v.Tainted {
if v.UpToDate {
log.Infof("%s : up-to-date", v.Name)
}
if err = cwhub.DownloadDataIfNeeded(csConfig.Hub, v, force); err != nil {
log.Fatalf("%s : download failed : %v", v.Name, err)
}
}
if !v.Downloaded {
log.Warningf("%s : not downloaded, please install.", v.Name)
continue
}
found = true
if v.UpToDate {
log.Infof("%s : up-to-date", v.Name)
if !force {
if err = cwhub.DownloadDataIfNeeded(csConfig.Hub, v, false); err != nil {
log.Fatalf("%s : download failed : %v", v.Name, err)
}
continue
}
}
v, err = cwhub.DownloadLatest(csConfig.Hub, v, force, true)
if err != nil {
log.Fatalf("%s : download failed : %v", v.Name, err)

View file

@ -22,11 +22,12 @@ var PARSERS = "parsers"
var PARSERS_OVFLW = "postoverflows"
var SCENARIOS = "scenarios"
var COLLECTIONS = "collections"
var ItemTypes = []string{PARSERS, PARSERS_OVFLW, SCENARIOS, COLLECTIONS}
var DATA_FILES = "data_files"
var ItemTypes = []string{PARSERS, PARSERS_OVFLW, SCENARIOS, COLLECTIONS, DATA_FILES}
var hubIdx map[string]map[string]Item
var RawFileURLTemplate = "https://hub-cdn.crowdsec.net/%s/%s"
var RawFileURLTemplate = "https://raw.githubusercontent.com/sbs2001/hub/%s/%s"
var HubBranch = "master"
var HubIndexFile = ".index.json"
@ -57,11 +58,10 @@ type Item struct {
BelongsToCollections []string `yaml:"belongs_to_collections,omitempty" json:"belongs_to_collections,omitempty"` /*if it's part of collections, track name here*/
/*remote (hub) infos*/
RemoteURL string `yaml:"remoteURL,omitempty" json:"remoteURL,omitempty"` //the full remote uri of file in http
RemotePath string `json:"path,omitempty" yaml:"remote_path,omitempty"` //the path relative to git ie. /parsers/stage/author/file.yaml
RemoteHash string `yaml:"hash,omitempty" json:"hash,omitempty"` //the meow
Version string `json:"version,omitempty"` //the last version
Versions map[string]ItemVersion `json:"versions,omitempty" yaml:"-"` //the list of existing versions
RemotePath string `json:"path,omitempty" yaml:"remote_path,omitempty"` //the path relative to git ie. /parsers/stage/author/file.yaml
RemoteHash string `yaml:"hash,omitempty" json:"hash,omitempty"` //the meow
Version string `json:"version,omitempty"` //the last version
Versions map[string]ItemVersion `json:"versions,omitempty" yaml:"-"` //the list of existing versions
/*local (deployed) infos*/
LocalPath string `yaml:"local_path,omitempty" json:"local_path,omitempty"` //the local path relative to ${CFG_DIR}

View file

@ -383,8 +383,9 @@ func (t *mockTransport) RoundTrip(req *http.Request) (*http.Response, error) {
responseBody := ""
log.Printf("---> %s", req.URL.Path)
/*FAKE PARSER*/
if req.URL.Path == "/master/parsers/s01-parse/crowdsecurity/foobar_parser.yaml" {
if strings.HasSuffix(req.URL.Path, "/master/parsers/s01-parse/crowdsecurity/foobar_parser.yaml") {
responseBody = `onsuccess: next_stage
filter: evt.Parsed.program == 'foobar_parser'
name: crowdsecurity/foobar_parser
@ -395,7 +396,7 @@ grok:
apply_on: message
`
} else if req.URL.Path == "/master/parsers/s01-parse/crowdsecurity/foobar_subparser.yaml" {
} else if strings.HasSuffix(req.URL.Path, "/master/parsers/s01-parse/crowdsecurity/foobar_subparser.yaml") {
responseBody = `onsuccess: next_stage
filter: evt.Parsed.program == 'foobar_parser'
name: crowdsecurity/foobar_parser
@ -407,19 +408,19 @@ grok:
`
/*FAKE SCENARIO*/
} else if req.URL.Path == "/master/scenarios/crowdsecurity/foobar_scenario.yaml" {
} else if strings.HasSuffix(req.URL.Path, "/master/scenarios/crowdsecurity/foobar_scenario.yaml") {
responseBody = `filter: true
name: crowdsecurity/foobar_scenario`
/*FAKE COLLECTIONS*/
} else if req.URL.Path == "/master/collections/crowdsecurity/foobar.yaml" {
} else if strings.HasSuffix(req.URL.Path, "/master/collections/crowdsecurity/foobar.yaml") {
responseBody = `
blah: blalala
qwe: jejwejejw`
} else if req.URL.Path == "/master/collections/crowdsecurity/foobar_subcollection.yaml" {
} else if strings.HasSuffix(req.URL.Path, "/master/collections/crowdsecurity/foobar_subcollection.yaml") {
responseBody = `
blah: blalala
qwe: jejwejejw`
} else if req.URL.Path == "/master/.index.json" {
} else if strings.HasSuffix(req.URL.Path, "/master/.index.json") {
responseBody =
`{
"collections": {

View file

@ -3,15 +3,17 @@ package cwhub
import (
"bytes"
"crypto/sha256"
"io"
"path"
"path/filepath"
"sort"
//"errors"
"github.com/pkg/errors"
"gopkg.in/yaml.v2"
//"errors"
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
@ -20,7 +22,6 @@ import (
"github.com/crowdsecurity/crowdsec/pkg/csconfig"
"github.com/crowdsecurity/crowdsec/pkg/types"
log "github.com/sirupsen/logrus"
"gopkg.in/yaml.v2"
)
func UpdateHubIdx(hub *csconfig.Hub) error {
@ -79,7 +80,6 @@ func DownloadHubIdx(hub *csconfig.Hub) ([]byte, error) {
func DownloadLatest(hub *csconfig.Hub, target Item, overwrite bool, updateOnly bool) (Item, error) {
var err error
log.Debugf("Downloading %s %s", target.Type, target.Name)
if target.Type == COLLECTIONS {
var tmp = [][]string{target.Parsers, target.PostOverflows, target.Scenarios, target.Collections}
for idx, ptr := range tmp {
@ -143,10 +143,19 @@ func DownloadItem(hub *csconfig.Hub, target Item, overwrite bool) (Item, error)
return target, nil
}
if target.UpToDate {
log.Debugf("%s : up-to-date, not updated", target.Name)
// We still have to check if data files are present
log.Debugf("%s : up-to-date, not updated", target.Name)
data, err := os.ReadFile(target.LocalPath)
if err != nil {
return target, err
}
if err := downloadData(dataFolder, target.Author, overwrite, bytes.NewReader(data)); err != nil {
return target, errors.Wrapf(err, "while downloading data for %s", target.FileName)
}
return target, nil
}
}
log.Debugf("Downloading %s %s", target.Type, target.Name)
req, err := http.NewRequest("GET", fmt.Sprintf(RawFileURLTemplate, HubBranch, target.RemotePath), nil)
if err != nil {
return target, errors.Wrap(err, fmt.Sprintf("while downloading %s", req.URL.String()))
@ -214,7 +223,7 @@ func DownloadItem(hub *csconfig.Hub, target Item, overwrite bool) (Item, error)
target.Tainted = false
target.UpToDate = true
if err = downloadData(dataFolder, overwrite, bytes.NewReader(body)); err != nil {
if err = downloadData(dataFolder, target.Author, overwrite, bytes.NewReader(body)); err != nil {
return target, errors.Wrapf(err, "while downloading data for %s", target.FileName)
}
@ -232,16 +241,15 @@ func DownloadDataIfNeeded(hub *csconfig.Hub, target Item, force bool) error {
if itemFile, err = os.Open(itemFilePath); err != nil {
return errors.Wrapf(err, "while opening %s", itemFilePath)
}
if err = downloadData(dataFolder, force, itemFile); err != nil {
if err = downloadData(dataFolder, target.Author, force, itemFile); err != nil {
return errors.Wrapf(err, "while downloading data for %s", itemFilePath)
}
return nil
}
func downloadData(dataFolder string, force bool, reader io.Reader) error {
func downloadData(dataFolder string, parentItemAuthor string, force bool, reader io.Reader) error {
var err error
dec := yaml.NewDecoder(reader)
for {
data := &types.DataSet{}
err = dec.Decode(data)
@ -252,20 +260,59 @@ func downloadData(dataFolder string, force bool, reader io.Reader) error {
break
}
download := false
if !force {
for _, dataS := range data.Data {
if _, err := os.Stat(path.Join(dataFolder, dataS.DestPath)); os.IsNotExist(err) {
download = true
for _, dataS := range data.Data {
download := false
dfPath := path.Join(dataFolder, dataS.DestPath)
dataFileName := strings.Split(dataS.DestPath, ".")[0]
_, downloadFromHub := hubIdx[DATA_FILES][dataFileName]
if downloadFromHub {
dataS.SourceURL = fmt.Sprintf(RawFileURLTemplate, HubBranch, hubIdx[DATA_FILES][dataFileName].RemotePath)
}
if _, err := os.Stat(dfPath); os.IsNotExist(err) {
download = true
} else if downloadFromHub {
sha, err := getSHA256(dfPath)
if err != nil {
return err
}
download = dataFileHasUpdates(sha, dataFileName)
}
log.Infof("%v has updates=%v", dataFileName, download)
if download || force {
err = types.GetData(dataS, dataFolder)
if err != nil {
return errors.Wrap(err, "while getting data")
}
}
}
if download || force {
err = types.GetData(data.Data, dataFolder)
if err != nil {
return errors.Wrap(err, "while getting data")
}
}
}
return nil
}
// Checks if the provided data file is latest. Only files which are available in hub should
// be checked for.
func dataFileHasUpdates(fileSha string, dataFileName string) bool {
dataItem := hubIdx[DATA_FILES][dataFileName]
versions := make([]string, 0, len(dataItem.Versions))
for k := range dataItem.Versions {
versions = append(versions, k)
}
sort.Sort(sort.Reverse(sort.StringSlice(versions)))
for i, version := range versions {
if fileSha != dataItem.Versions[version].Digest {
continue
}
log.Debugf("data file %s matched sha with version %s", dataFileName, version)
if i != 0 {
log.Debugf("data file %s is outdated, updating to version %s", dataFileName, versions[0])
return true
}
break
}
return false
}

View file

@ -40,3 +40,29 @@ func TestDownloadHubIdx(t *testing.T) {
RawFileURLTemplate = back
fmt.Printf("->%+v", ret)
}
func TestDataFileIsLatest(t *testing.T) {
dataFileName := "crowdsecurity/sensitive-files"
hubIdx = map[string]map[string]Item{
"data_files": {
"crowdsecurity/sensitive-files": {
Versions: map[string]ItemVersion{
"0.1": {Digest: "1"},
"0.2": {Digest: "2"},
},
},
},
}
if dataFileHasUpdates("1", dataFileName) {
log.Errorf(`expected dataFileIsLatest("1", %s) = true found false `, dataFileName)
}
if !dataFileHasUpdates("2", dataFileName) {
log.Errorf(`expected dataFileIsLatest("2", %s) = false found true `, dataFileName)
}
// data file is tainted
if dataFileHasUpdates("3", dataFileName) {
log.Errorf(`expected dataFileIsLatest("3", %s) = false found true `, dataFileName)
}
}

View file

@ -21,10 +21,10 @@ import (
)
/*the walk/parser_visit function can't receive extra args*/
var hubdir, installdir string
var hubdir, installdir, datadir string
// TODO: Break this function into smaller functions.
func parser_visit(path string, f os.FileInfo, err error) error {
var target Item
var local bool
var hubpath string
@ -42,16 +42,17 @@ func parser_visit(path string, f os.FileInfo, err error) error {
if f == nil || f.IsDir() {
return nil
}
//we only care about yaml files
if !strings.HasSuffix(f.Name(), ".yaml") && !strings.HasSuffix(f.Name(), ".yml") {
// yamls -> collections, parsers, overflows etc. txt, mmdb -> data files
if !strings.HasSuffix(f.Name(), ".yaml") && !strings.HasSuffix(f.Name(), ".yml") && !strings.HasSuffix(f.Name(), ".txt") && !strings.HasSuffix(f.Name(), ".mmdb") {
return nil
}
subs := strings.Split(path, "/")
log.Tracef("path:%s, hubdir:%s, installdir:%s", path, hubdir, installdir)
log.Tracef("path:%s, hubdir:%s, installdir:%s datadir%s", path, hubdir, installdir, datadir)
/*we're in hub (~/.hub/hub/)*/
if strings.HasPrefix(path, hubdir) {
hubDirSetter := func() {
log.Tracef("in hub dir")
inhub = true
//.../hub/parsers/s00-raw/crowdsec/skip-pretag.yaml
@ -64,7 +65,18 @@ func parser_visit(path string, f os.FileInfo, err error) error {
fauthor = subs[len(subs)-2]
stage = subs[len(subs)-3]
ftype = subs[len(subs)-4]
} else if strings.HasPrefix(path, installdir) { /*we're in install /etc/crowdsec/<type>/... */
}
dataDirSetter := func() {
log.Tracef("in data dir")
fauthor = ""
fname = subs[len(subs)-1]
stage = ""
ftype = DATA_FILES
fauthor = ""
}
installDirSetter := func() {
log.Tracef("in install dir")
if len(subs) < 3 {
log.Fatalf("path is too short : %s (%d)", path, len(subs))
@ -76,23 +88,42 @@ func parser_visit(path string, f os.FileInfo, err error) error {
fname = subs[len(subs)-1]
stage = subs[len(subs)-2]
ftype = subs[len(subs)-3]
fauthor = ""
} else {
return fmt.Errorf("File '%s' is not from hub '%s' nor from the configuration directory '%s'", path, hubdir, installdir)
}
setterByPath := map[string]func(){
installdir: installDirSetter,
hubdir: hubDirSetter,
datadir: dataDirSetter,
}
paths := []string{installdir, hubdir, datadir}
sort.Slice(paths, func(i, j int) bool {
return len(paths[i]) > len(paths[j])
})
foundMatch := false
for _, p := range paths {
if strings.HasPrefix(path, p) {
setterByPath[p]()
foundMatch = true
break
}
}
if !foundMatch {
return fmt.Errorf("file '%s' is not from hub '%s' nor from the configuration directory '%s'", path, hubdir, installdir)
}
log.Tracef("stage:%s ftype:%s", stage, ftype)
//log.Printf("%s -> name:%s stage:%s", path, fname, stage)
// correct the stage and type for non-stage stuff.
if stage == SCENARIOS {
ftype = SCENARIOS
stage = ""
} else if stage == COLLECTIONS {
ftype = COLLECTIONS
stage = ""
} else if ftype != PARSERS && ftype != PARSERS_OVFLW /*its a PARSER / PARSER_OVFLW with a stage */ {
} else if ftype != PARSERS && ftype != PARSERS_OVFLW && ftype != DATA_FILES { /*its a PARSER / PARSER_OVFLW with a stage */
return fmt.Errorf("unknown configuration type for file '%s'", path)
}
log.Tracef("CORRECTED [%s] by [%s] in stage [%s] of type [%s]", fname, fauthor, stage, ftype)
/*
@ -100,7 +131,7 @@ func parser_visit(path string, f os.FileInfo, err error) error {
/etc/crowdsec/.../collections/linux.yaml -> ~/.hub/hub/collections/.../linux.yaml
when the collection is installed, both files are created
*/
//non symlinks are local user files or hub files
//non symlinks are local user files or hub files or data files
if f.Mode()&os.ModeSymlink == 0 {
local = true
log.Tracef("%s isn't a symlink", path)
@ -122,8 +153,8 @@ func parser_visit(path string, f os.FileInfo, err error) error {
log.Tracef("%s points to %s", path, hubpath)
}
//if it's not a symlink and not in hub, it's a local file, don't bother
if local && !inhub {
//if it's not a symlink and not in hub nor it is a data file. Don't bother checking this with index
if local && !inhub && ftype != DATA_FILES {
log.Tracef("%s is a local file, skip", path)
skippedLocal++
// log.Printf("local scenario, skip.")
@ -144,7 +175,7 @@ func parser_visit(path string, f os.FileInfo, err error) error {
log.Tracef("check [%s] of %s", fname, ftype)
match := false
for k, v := range hubIdx[ftype] {
for k, v := range hubIdx[ftype] { // eg ftype = "collections", k = crowdsecurity/nginx, v is an Item struct
log.Tracef("check [%s] vs [%s] : %s", fname, v.RemotePath, ftype+"/"+stage+"/"+fname+".yaml")
if fname != v.FileName {
log.Tracef("%s != %s (filename)", fname, v.FileName)
@ -152,6 +183,7 @@ func parser_visit(path string, f os.FileInfo, err error) error {
}
//wrong stage
if v.Stage != stage {
log.Tracef("%s != %s (stage)", v.Stage, stage)
continue
}
/*if we are walking hub dir, just mark present files as downloaded*/
@ -168,14 +200,6 @@ func parser_visit(path string, f os.FileInfo, err error) error {
log.Tracef("marking %s as downloaded", v.Name)
v.Downloaded = true
}
} else {
//wrong file
//<type>/<stage>/<author>/<name>.yaml
if !strings.HasSuffix(hubpath, v.RemotePath) {
//log.Printf("wrong file %s %s", hubpath, spew.Sdump(v))
continue
}
}
sha, err := getSHA256(path)
if err != nil {
@ -305,11 +329,18 @@ func CollecDepsCheck(v *Item) error {
func SyncDir(hub *csconfig.Hub, dir string) (error, []string) {
hubdir = hub.HubDir
installdir = hub.ConfigDir
datadir = hub.DataDir
warnings := []string{}
/*For each, scan PARSERS, PARSERS_OVFLW, SCENARIOS and COLLECTIONS last*/
/*For each, scan PARSERS, PARSERS_OVFLW, DATA_FILES, SCENARIOS and COLLECTIONS last*/
for _, scan := range ItemTypes {
cpath, err := filepath.Abs(fmt.Sprintf("%s/%s", dir, scan))
var cpath string
var err error
if scan == DATA_FILES {
cpath, err = filepath.Abs(hub.DataDir)
} else {
cpath, err = filepath.Abs(fmt.Sprintf("%s/%s", dir, scan))
}
if err != nil {
log.Errorf("failed %s : %s", cpath, err)
}
@ -317,24 +348,24 @@ func SyncDir(hub *csconfig.Hub, dir string) (error, []string) {
if err != nil {
return err, warnings
}
}
for k, v := range hubIdx[COLLECTIONS] {
if v.Installed {
versStat := GetVersionStatus(&v)
if versStat == 0 { //latest
if err := CollecDepsCheck(&v); err != nil {
warnings = append(warnings, fmt.Sprintf("dependency of %s : %s", v.Name, err))
hubIdx[COLLECTIONS][k] = v
}
} else if versStat == 1 { //not up-to-date
warnings = append(warnings, fmt.Sprintf("update for collection %s available (currently:%s, latest:%s)", v.Name, v.LocalVersion, v.Version))
} else { //version is higher than the highest available from hub?
warnings = append(warnings, fmt.Sprintf("collection %s is in the future (currently:%s, latest:%s)", v.Name, v.LocalVersion, v.Version))
}
log.Debugf("installed (%s) - status:%d | installed:%s | latest : %s | full : %+v", v.Name, semver.Compare("v"+v.Version, "v"+v.LocalVersion), v.LocalVersion, v.Version, v.Versions)
if !v.Installed {
continue
}
versStat := GetVersionStatus(&v)
if versStat == 0 { //latest
if err := CollecDepsCheck(&v); err != nil {
warnings = append(warnings, fmt.Sprintf("dependency of %s : %s", v.Name, err))
hubIdx[COLLECTIONS][k] = v
}
} else if versStat == 1 { //not up-to-date
warnings = append(warnings, fmt.Sprintf("update for collection %s available (currently:%s, latest:%s)", v.Name, v.LocalVersion, v.Version))
} else { //version is higher than the highest available from hub?
warnings = append(warnings, fmt.Sprintf("collection %s is in the future (currently:%s, latest:%s)", v.Name, v.LocalVersion, v.Version))
}
log.Debugf("installed (%s) - status:%d | installed:%s | latest : %s | full : %+v", v.Name, semver.Compare("v"+v.Version, "v"+v.LocalVersion), v.LocalVersion, v.Version, v.Versions)
}
return nil, warnings
}

View file

@ -6,6 +6,7 @@ import (
"net/http"
"os"
"path"
"path/filepath"
log "github.com/sirupsen/logrus"
)
@ -42,6 +43,10 @@ func downloadFile(url string, destPath string) error {
return fmt.Errorf("download response 'HTTP %d' : %s", resp.StatusCode, string(body))
}
if err := os.MkdirAll(filepath.Dir(destPath), 0666); err != nil {
return err
}
file, err := os.OpenFile(destPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return err
@ -60,14 +65,12 @@ func downloadFile(url string, destPath string) error {
return nil
}
func GetData(data []*DataSource, dataDir string) error {
for _, dataS := range data {
destPath := path.Join(dataDir, dataS.DestPath)
log.Infof("downloading data '%s' in '%s'", dataS.SourceURL, destPath)
err := downloadFile(dataS.SourceURL, destPath)
if err != nil {
return err
}
func GetData(dataS *DataSource, dataDir string) error {
destPath := path.Join(dataDir, dataS.DestPath)
log.Infof("downloading data '%s' in '%s'", dataS.SourceURL, destPath)
err := downloadFile(dataS.SourceURL, destPath)
if err != nil {
return err
}
return nil