diff --git a/cmd/crowdsec-cli/itemcli.go b/cmd/crowdsec-cli/itemcli.go index cdaf78770..8be9d9b20 100644 --- a/cmd/crowdsec-cli/itemcli.go +++ b/cmd/crowdsec-cli/itemcli.go @@ -473,11 +473,22 @@ func (cli cliItem) itemDiff(item *cwhub.Item, reverse bool) (string, error) { return "", fmt.Errorf("'%s' is not installed", item.FQName()) } - latestContent, remoteURL, err := item.FetchLatest() + dest, err := os.CreateTemp("", "cscli-diff-*") + if err != nil { + return "", fmt.Errorf("while creating temporary file: %w", err) + } + defer os.Remove(dest.Name()) + + _, remoteURL, err := item.FetchContentTo(dest.Name()) if err != nil { return "", err } + latestContent, err := os.ReadFile(dest.Name()) + if err != nil { + return "", fmt.Errorf("while reading %s: %w", dest.Name(), err) + } + localContent, err := os.ReadFile(item.State.LocalPath) if err != nil { return "", fmt.Errorf("while reading %s: %w", item.State.LocalPath, err) diff --git a/go.mod b/go.mod index 70d819a40..c7a161aa9 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/corazawaf/libinjection-go v0.1.2 github.com/crowdsecurity/coraza/v3 v3.0.0-20240108124027-a62b8d8e5607 github.com/crowdsecurity/dlog v0.0.0-20170105205344-4fb5f8204f26 - github.com/crowdsecurity/go-cs-lib v0.0.10 + github.com/crowdsecurity/go-cs-lib v0.0.11-0.20240422215546-8104b9078bfd github.com/crowdsecurity/grokky v0.2.1 github.com/crowdsecurity/machineid v1.0.2 github.com/davecgh/go-spew v1.1.1 @@ -219,3 +219,5 @@ require ( ) replace golang.org/x/time/rate => github.com/crowdsecurity/crowdsec/pkg/time/rate v0.0.0 + +replace github.com/crowdsecurity/go-cs-lib => /home/marco/src/go-cs-lib diff --git a/go.sum b/go.sum index 750439e4f..44e0b063e 100644 --- a/go.sum +++ b/go.sum @@ -102,8 +102,6 @@ github.com/crowdsecurity/coraza/v3 v3.0.0-20240108124027-a62b8d8e5607 h1:hyrYw3h github.com/crowdsecurity/coraza/v3 v3.0.0-20240108124027-a62b8d8e5607/go.mod h1:br36fEqurGYZQGit+iDYsIzW0FF6VufMbDzyyLxEuPA= github.com/crowdsecurity/dlog v0.0.0-20170105205344-4fb5f8204f26 h1:r97WNVC30Uen+7WnLs4xDScS/Ex988+id2k6mDf8psU= github.com/crowdsecurity/dlog v0.0.0-20170105205344-4fb5f8204f26/go.mod h1:zpv7r+7KXwgVUZnUNjyP22zc/D7LKjyoY02weH2RBbk= -github.com/crowdsecurity/go-cs-lib v0.0.10 h1:Twt/y/rYCUspGY1zxDnGurL2svRSREAz+2+puLepd9c= -github.com/crowdsecurity/go-cs-lib v0.0.10/go.mod h1:8FMKNGsh3hMZi2SEv6P15PURhEJnZV431XjzzBSuf0k= github.com/crowdsecurity/grokky v0.2.1 h1:t4VYnDlAd0RjDM2SlILalbwfCrQxtJSMGdQOR0zwkE4= github.com/crowdsecurity/grokky v0.2.1/go.mod h1:33usDIYzGDsgX1kHAThCbseso6JuWNJXOzRQDGXHtWM= github.com/crowdsecurity/machineid v1.0.2 h1:wpkpsUghJF8Khtmn/tg6GxgdhLA1Xflerh5lirI+bdc= diff --git a/pkg/cwhub/cwhub.go b/pkg/cwhub/cwhub.go index a7864d4c0..0496834e4 100644 --- a/pkg/cwhub/cwhub.go +++ b/pkg/cwhub/cwhub.go @@ -23,7 +23,7 @@ func (t *hubTransport) RoundTrip(req *http.Request) (*http.Response, error) { // hubClient is the HTTP client used to communicate with the CrowdSec Hub. var hubClient = &http.Client{ - Timeout: 120 * time.Second, + Timeout: 120 * time.Second, Transport: &hubTransport{http.DefaultTransport}, } diff --git a/pkg/cwhub/dataset.go b/pkg/cwhub/dataset.go index 921361e3f..ec582592c 100644 --- a/pkg/cwhub/dataset.go +++ b/pkg/cwhub/dataset.go @@ -1,19 +1,17 @@ package cwhub import ( + "context" "errors" "fmt" "io" - "io/fs" - "net/http" - "os" - "path/filepath" - "runtime" "time" "github.com/sirupsen/logrus" "gopkg.in/yaml.v3" + "github.com/crowdsecurity/go-cs-lib/downloader" + "github.com/crowdsecurity/crowdsec/pkg/types" ) @@ -22,128 +20,6 @@ type DataSet struct { Data []types.DataSource `yaml:"data,omitempty"` } -// downloadFile downloads a file and writes it to disk, with no hash verification. -func downloadFile(url string, destPath string) error { - resp, err := hubClient.Get(url) - if err != nil { - return fmt.Errorf("while downloading %s: %w", url, err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return fmt.Errorf("bad http code %d for %s", resp.StatusCode, url) - } - - // Download to a temporary location to avoid corrupting files - // that are currently in use or memory mapped. - - tmpFile, err := os.CreateTemp(filepath.Dir(destPath), filepath.Base(destPath)+".*.tmp") - if err != nil { - return err - } - - tmpFileName := tmpFile.Name() - defer func() { - tmpFile.Close() - os.Remove(tmpFileName) - }() - - // avoid reading the whole file in memory - _, err = io.Copy(tmpFile, resp.Body) - if err != nil { - return err - } - - if err = tmpFile.Sync(); err != nil { - return err - } - - if err = tmpFile.Close(); err != nil { - return err - } - - // a check on stdout is used while scripting to know if the hub has been upgraded - // and a configuration reload is required - // TODO: use a better way to communicate this - fmt.Printf("updated %s\n", filepath.Base(destPath)) - - if runtime.GOOS == "windows" { - // On Windows, rename will fail if the destination file already exists - // so we remove it first. - err = os.Remove(destPath) - switch { - case errors.Is(err, fs.ErrNotExist): - break - case err != nil: - return err - } - } - - if err = os.Rename(tmpFileName, destPath); err != nil { - return err - } - - return nil -} - -// needsUpdate checks if a data file has to be downloaded (or updated). -// if the local file doesn't exist, update. -// if the remote is newer than the local file, update. -// if the remote has no modification date, but local file has been modified > a week ago, update. -func needsUpdate(destPath string, url string, logger *logrus.Logger) bool { - fileInfo, err := os.Stat(destPath) - - switch { - case os.IsNotExist(err): - return true - case err != nil: - logger.Errorf("while getting %s: %s", destPath, err) - return true - } - - resp, err := hubClient.Head(url) - if err != nil { - logger.Errorf("while getting %s: %s", url, err) - // Head failed, Get would likely fail too -> no update - return false - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - logger.Errorf("bad http code %d for %s", resp.StatusCode, url) - return false - } - - // update if local file is older than this - shelfLife := 7 * 24 * time.Hour - - lastModify := fileInfo.ModTime() - - localIsOld := lastModify.Add(shelfLife).Before(time.Now()) - - remoteLastModified := resp.Header.Get("Last-Modified") - if remoteLastModified == "" { - if localIsOld { - logger.Infof("no last modified date for %s, but local file is older than %s", url, shelfLife) - } - - return localIsOld - } - - lastAvailable, err := time.Parse(time.RFC1123, remoteLastModified) - if err != nil { - logger.Warningf("while parsing last modified date for %s: %s", url, err) - return localIsOld - } - - if lastModify.Before(lastAvailable) { - logger.Infof("new version available, updating %s", destPath) - return true - } - - return false -} - // downloadDataSet downloads all the data files for an item. func downloadDataSet(dataFolder string, force bool, reader io.Reader, logger *logrus.Logger) error { dec := yaml.NewDecoder(reader) @@ -165,12 +41,32 @@ func downloadDataSet(dataFolder string, force bool, reader io.Reader, logger *lo return err } - if force || needsUpdate(destPath, dataS.SourceURL, logger) { - logger.Debugf("downloading %s in %s", dataS.SourceURL, destPath) + d := downloader. + New(dataS.SourceURL). + WithHTTPClient(hubClient). + ToFile(destPath). + CompareContent(). + WithLogger(logrus.WithFields(logrus.Fields{"url": dataS.SourceURL})) - if err := downloadFile(dataS.SourceURL, destPath); err != nil { - return fmt.Errorf("while getting data: %w", err) - } + if !force { + d = d.WithLastModified(). + WithShelfLife(7 * 24 * time.Hour) + } + + // TODO: real context + ctx := context.Background() + + downloaded, err := d.Download(ctx) + if err != nil { + return fmt.Errorf("while getting data: %w", err) + } + + if downloaded { + logger.Infof("Downloaded %s", destPath) + // a check on stdout is used while scripting to know if the hub has been upgraded + // and a configuration reload is required + // TODO: use a better way to communicate this + fmt.Printf("updated %s\n", destPath) } } } diff --git a/pkg/cwhub/dataset_test.go b/pkg/cwhub/dataset_test.go deleted file mode 100644 index e48202e48..000000000 --- a/pkg/cwhub/dataset_test.go +++ /dev/null @@ -1,56 +0,0 @@ -package cwhub - -import ( - "io" - "net/http" - "net/http/httptest" - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/crowdsecurity/go-cs-lib/cstest" -) - -func TestDownloadFile(t *testing.T) { - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - switch r.URL.Path { - case "/xx": - w.WriteHeader(http.StatusOK) - _, _ = io.WriteString(w, "example content oneoneone") - default: - w.WriteHeader(http.StatusNotFound) - _, _ = io.WriteString(w, "not found") - } - })) - defer ts.Close() - - dest := filepath.Join(t.TempDir(), "example.txt") - defer os.Remove(dest) - - err := downloadFile(ts.URL+"/xx", dest) - require.NoError(t, err) - - content, err := os.ReadFile(dest) - assert.Equal(t, "example content oneoneone", string(content)) - require.NoError(t, err) - - // bad uri - err = downloadFile("https://zz.com", dest) - cstest.RequireErrorContains(t, err, "lookup zz.com") - cstest.RequireErrorContains(t, err, "no such host") - - // 404 - err = downloadFile(ts.URL+"/x", dest) - cstest.RequireErrorContains(t, err, "bad http code 404") - - // bad target - err = downloadFile(ts.URL+"/xx", "") - cstest.RequireErrorContains(t, err, cstest.PathNotFoundMessage) - - // destination directory does not exist - err = downloadFile(ts.URL+"/xx", filepath.Join(t.TempDir(), "missing/example.txt")) - cstest.RequireErrorContains(t, err, cstest.PathNotFoundMessage) -} diff --git a/pkg/cwhub/hub.go b/pkg/cwhub/hub.go index 6b9f56b2e..87a6644bc 100644 --- a/pkg/cwhub/hub.go +++ b/pkg/cwhub/hub.go @@ -1,7 +1,6 @@ package cwhub import ( - "bytes" "encoding/json" "errors" "fmt" @@ -21,8 +20,8 @@ type Hub struct { items HubItems // Items read from HubDir and InstallDir local *csconfig.LocalHubCfg remote *RemoteHubCfg - Warnings []string // Warnings encountered during sync logger *logrus.Logger + Warnings []string // Warnings encountered during sync } // GetDataDir returns the data directory, where data sets are installed. @@ -150,27 +149,17 @@ func (h *Hub) ItemStats() []string { // updateIndex downloads the latest version of the index and writes it to disk if it changed. func (h *Hub) updateIndex() error { - body, err := h.remote.fetchIndex() + downloaded, err := h.remote.fetchIndex(h.local.HubIndexFile) if err != nil { return err } - oldContent, err := os.ReadFile(h.local.HubIndexFile) - if err != nil { - if !os.IsNotExist(err) { - h.logger.Warningf("failed to read hub index: %s", err) - } - } else if bytes.Equal(body, oldContent) { + if downloaded { + h.logger.Infof("Wrote index to %s", h.local.HubIndexFile) + } else { h.logger.Info("hub index is up to date") - return nil } - if err = os.WriteFile(h.local.HubIndexFile, body, 0o644); err != nil { - return fmt.Errorf("failed to write hub index: %w", err) - } - - h.logger.Infof("Wrote index to %s, %d bytes", h.local.HubIndexFile, len(body)) - return nil } diff --git a/pkg/cwhub/item.go b/pkg/cwhub/item.go index 6cdb5cadc..4249a20e1 100644 --- a/pkg/cwhub/item.go +++ b/pkg/cwhub/item.go @@ -29,10 +29,8 @@ const ( versionFuture // local version is higher latest, but is included in the index: should not happen ) -var ( - // The order is important, as it is used to range over sub-items in collections. - ItemTypes = []string{PARSERS, POSTOVERFLOWS, SCENARIOS, CONTEXTS, APPSEC_CONFIGS, APPSEC_RULES, COLLECTIONS} -) +// The order is important, as it is used to range over sub-items in collections. +var ItemTypes = []string{PARSERS, POSTOVERFLOWS, SCENARIOS, CONTEXTS, APPSEC_CONFIGS, APPSEC_RULES, COLLECTIONS} type HubItems map[string]map[string]*Item diff --git a/pkg/cwhub/iteminstall.go b/pkg/cwhub/iteminstall.go index ceae36491..cf331c05f 100644 --- a/pkg/cwhub/iteminstall.go +++ b/pkg/cwhub/iteminstall.go @@ -48,16 +48,21 @@ func (i *Item) Install(force bool, downloadOnly bool) error { } } - filePath, err := i.downloadLatest(force, true) + downloaded, err := i.downloadLatest(force, true) if err != nil { return err } - if downloadOnly { - i.hub.logger.Infof("Downloaded %s to %s", i.Name, filePath) + if downloadOnly && downloaded { + i.hub.logger.Infof("Downloaded %s", i.Name) return nil } + // a check on stdout is used while scripting to know if the hub has been upgraded + // and a configuration reload is required + // TODO: use a better way to communicate this + fmt.Printf("installed %s\n", i.Name) + if err := i.enable(); err != nil { return fmt.Errorf("while enabling %s: %w", i.Name, err) } diff --git a/pkg/cwhub/itemupgrade.go b/pkg/cwhub/itemupgrade.go index 6a8dc2f44..29b681be1 100644 --- a/pkg/cwhub/itemupgrade.go +++ b/pkg/cwhub/itemupgrade.go @@ -3,15 +3,14 @@ package cwhub // Install, upgrade and remove items from the hub to the local configuration import ( - "bytes" - "crypto/sha256" - "encoding/hex" + "context" "errors" "fmt" - "io" - "net/http" "os" - "path/filepath" + + "github.com/sirupsen/logrus" + + "github.com/crowdsecurity/go-cs-lib/downloader" "github.com/crowdsecurity/crowdsec/pkg/emoji" ) @@ -68,7 +67,7 @@ func (i *Item) Upgrade(force bool) (bool, error) { } // downloadLatest downloads the latest version of the item to the hub directory. -func (i *Item) downloadLatest(overwrite bool, updateOnly bool) (string, error) { +func (i *Item) downloadLatest(overwrite bool, updateOnly bool) (bool, error) { i.hub.logger.Debugf("Downloading %s %s", i.Type, i.Name) for _, sub := range i.SubItems() { @@ -84,98 +83,85 @@ func (i *Item) downloadLatest(overwrite bool, updateOnly bool) (string, error) { i.hub.logger.Tracef("collection, recurse") if _, err := sub.downloadLatest(overwrite, updateOnly); err != nil { - return "", err + return false, err } } downloaded := sub.State.Downloaded if _, err := sub.download(overwrite); err != nil { - return "", err + return false, err } // We need to enable an item when it has been added to a collection since latest release of the collection. // We check if sub.Downloaded is false because maybe the item has been disabled by the user. if !sub.State.Installed && !downloaded { if err := sub.enable(); err != nil { - return "", fmt.Errorf("enabling '%s': %w", sub.Name, err) + return false, fmt.Errorf("enabling '%s': %w", sub.Name, err) } } } if !i.State.Installed && updateOnly && i.State.Downloaded && !overwrite { i.hub.logger.Debugf("skipping upgrade of %s: not installed", i.Name) - return "", nil + return false, nil } - ret, err := i.download(overwrite) - if err != nil { - return "", err - } - - return ret, nil + return i.download(overwrite) } -// FetchLatest downloads the latest item from the hub, verifies the hash and returns the content and the used url. -func (i *Item) FetchLatest() ([]byte, string, error) { - if i.latestHash() == "" { - return nil, "", errors.New("latest hash missing from index") - } - +// FetchContentTo downloads the last version of the item's YAML file to the specified path. +func (i *Item) FetchContentTo(destPath string) (bool, string, error) { url, err := i.hub.remote.urlTo(i.RemotePath) if err != nil { - return nil, "", fmt.Errorf("failed to build request: %w", err) + return false, "", fmt.Errorf("failed to build request: %w", err) } - resp, err := hubClient.Get(url) + wantHash := i.latestHash() + if wantHash == "" { + return false, "", errors.New("latest hash missing from index") + } + + d := downloader. + New(url). + WithHTTPClient(hubClient). + ToFile(destPath). + WithMakeDirs(true). + WithLogger(logrus.WithFields(logrus.Fields{"url": url})). + CompareContent(). + VerifyHash("sha256", wantHash) + + // TODO: recommend hub update if hash does not match + // TODO: use real context + + ctx := context.Background() + + downloaded, err := d.Download(ctx) if err != nil { - return nil, "", err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return nil, "", fmt.Errorf("bad http code %d", resp.StatusCode) + return false, "", fmt.Errorf("while downloading %s to %s: %w", i.Name, url, err) } - body, err := io.ReadAll(resp.Body) - if err != nil { - return nil, "", err - } - - hash := sha256.New() - if _, err = hash.Write(body); err != nil { - return nil, "", fmt.Errorf("while hashing %s: %w", i.Name, err) - } - - meow := hex.EncodeToString(hash.Sum(nil)) - if meow != i.Versions[i.Version].Digest { - i.hub.logger.Errorf("Downloaded version doesn't match index, please 'hub update'") - i.hub.logger.Debugf("got %s, expected %s", meow, i.Versions[i.Version].Digest) - - return nil, "", errors.New("invalid download hash") - } - - return body, url, nil + return downloaded, url, nil } // download downloads the item from the hub and writes it to the hub directory. -func (i *Item) download(overwrite bool) (string, error) { +func (i *Item) download(overwrite bool) (bool, error) { // ensure that target file is within target dir finalPath, err := i.downloadPath() if err != nil { - return "", err + return false, err } if i.State.IsLocal() { i.hub.logger.Warningf("%s is local, can't download", i.Name) - return finalPath, nil + return false, nil } // if user didn't --force, don't overwrite local, tainted, up-to-date files if !overwrite { if i.State.Tainted { i.hub.logger.Debugf("%s: tainted, not updated", i.Name) - return "", nil + return false, nil } if i.State.UpToDate { @@ -184,45 +170,30 @@ func (i *Item) download(overwrite bool) (string, error) { } } - body, url, err := i.FetchLatest() + downloaded, _, err := i.FetchContentTo(finalPath) if err != nil { - what := i.Name - if url != "" { - what += " from " + url - } - - return "", fmt.Errorf("while downloading %s: %w", what, err) + return false, fmt.Errorf("while downloading %s: %w", i.Name, err) } - // all good, install - - parentDir := filepath.Dir(finalPath) - - if err = os.MkdirAll(parentDir, os.ModePerm); err != nil { - return "", fmt.Errorf("while creating %s: %w", parentDir, err) - } - - // check actual file - if _, err = os.Stat(finalPath); !os.IsNotExist(err) { - i.hub.logger.Warningf("%s: overwrite", i.Name) - i.hub.logger.Debugf("target: %s", finalPath) - } else { - i.hub.logger.Infof("%s: OK", i.Name) - } - - if err = os.WriteFile(finalPath, body, 0o644); err != nil { - return "", fmt.Errorf("while writing %s: %w", finalPath, err) + if downloaded { + i.hub.logger.Infof("Downloaded %s", i.Name) } i.State.Downloaded = true i.State.Tainted = false i.State.UpToDate = true - if err = downloadDataSet(i.hub.local.InstallDataDir, overwrite, bytes.NewReader(body), i.hub.logger); err != nil { - return "", fmt.Errorf("while downloading data for %s: %w", i.FileName, err) + // read content to get the list of data files + reader, err := os.Open(finalPath) + if err != nil { + return false, fmt.Errorf("while opening %s: %w", finalPath, err) } - return finalPath, nil + if err = downloadDataSet(i.hub.local.InstallDataDir, overwrite, reader, i.hub.logger); err != nil { + return false, fmt.Errorf("while downloading data for %s: %w", i.FileName, err) + } + + return true, nil } // DownloadDataIfNeeded downloads the data set for the item. diff --git a/pkg/cwhub/remote.go b/pkg/cwhub/remote.go index c1eb5a708..4839e4971 100644 --- a/pkg/cwhub/remote.go +++ b/pkg/cwhub/remote.go @@ -1,9 +1,12 @@ package cwhub import ( + "context" "fmt" - "io" - "net/http" + + "github.com/sirupsen/logrus" + + "github.com/crowdsecurity/go-cs-lib/downloader" ) // RemoteHubCfg is used to retrieve index and items from the remote hub. @@ -28,34 +31,28 @@ func (r *RemoteHubCfg) urlTo(remotePath string) (string, error) { } // fetchIndex downloads the index from the hub and returns the content. -func (r *RemoteHubCfg) fetchIndex() ([]byte, error) { +func (r *RemoteHubCfg) fetchIndex(destPath string) (bool, error) { if r == nil { - return nil, ErrNilRemoteHub + return false, ErrNilRemoteHub } url, err := r.urlTo(r.IndexPath) if err != nil { - return nil, fmt.Errorf("failed to build hub index request: %w", err) + return false, fmt.Errorf("failed to build hub index request: %w", err) } - resp, err := hubClient.Get(url) + ctx := context.Background() + + downloaded, err := downloader. + New(url). + WithHTTPClient(hubClient). + ToFile(destPath). + CompareContent(). + WithLogger(logrus.WithFields(logrus.Fields{"url": url})). + Download(ctx) if err != nil { - return nil, fmt.Errorf("failed http request for hub index: %w", err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - if resp.StatusCode == http.StatusNotFound { - return nil, IndexNotFoundError{url, r.Branch} - } - - return nil, fmt.Errorf("bad http code %d for %s", resp.StatusCode, url) + return false, err } - body, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("failed to read request answer for hub index: %w", err) - } - - return body, nil + return downloaded, nil } diff --git a/pkg/cwhub/sync.go b/pkg/cwhub/sync.go index cb7bf3786..756b97d86 100644 --- a/pkg/cwhub/sync.go +++ b/pkg/cwhub/sync.go @@ -56,11 +56,11 @@ func getSHA256(filepath string) (string, error) { // information used to create a new Item, from a file path. type itemFileInfo struct { - inhub bool fname string stage string ftype string fauthor string + inhub bool } func (h *Hub) getItemFileInfo(path string, logger *logrus.Logger) (*itemFileInfo, error) {