use go-cs-lib/downloader for data files, hub index, item YAML

This commit is contained in:
marco 2024-04-05 15:39:06 +02:00
parent 0f942a95f1
commit 688fd64a93
12 changed files with 132 additions and 321 deletions

View file

@ -473,11 +473,22 @@ func (cli cliItem) itemDiff(item *cwhub.Item, reverse bool) (string, error) {
return "", fmt.Errorf("'%s' is not installed", item.FQName())
}
latestContent, remoteURL, err := item.FetchLatest()
dest, err := os.CreateTemp("", "cscli-diff-*")
if err != nil {
return "", fmt.Errorf("while creating temporary file: %w", err)
}
defer os.Remove(dest.Name())
_, remoteURL, err := item.FetchContentTo(dest.Name())
if err != nil {
return "", err
}
latestContent, err := os.ReadFile(dest.Name())
if err != nil {
return "", fmt.Errorf("while reading %s: %w", dest.Name(), err)
}
localContent, err := os.ReadFile(item.State.LocalPath)
if err != nil {
return "", fmt.Errorf("while reading %s: %w", item.State.LocalPath, err)

4
go.mod
View file

@ -27,7 +27,7 @@ require (
github.com/corazawaf/libinjection-go v0.1.2
github.com/crowdsecurity/coraza/v3 v3.0.0-20240108124027-a62b8d8e5607
github.com/crowdsecurity/dlog v0.0.0-20170105205344-4fb5f8204f26
github.com/crowdsecurity/go-cs-lib v0.0.10
github.com/crowdsecurity/go-cs-lib v0.0.11-0.20240422215546-8104b9078bfd
github.com/crowdsecurity/grokky v0.2.1
github.com/crowdsecurity/machineid v1.0.2
github.com/davecgh/go-spew v1.1.1
@ -219,3 +219,5 @@ require (
)
replace golang.org/x/time/rate => github.com/crowdsecurity/crowdsec/pkg/time/rate v0.0.0
replace github.com/crowdsecurity/go-cs-lib => /home/marco/src/go-cs-lib

2
go.sum
View file

@ -102,8 +102,6 @@ github.com/crowdsecurity/coraza/v3 v3.0.0-20240108124027-a62b8d8e5607 h1:hyrYw3h
github.com/crowdsecurity/coraza/v3 v3.0.0-20240108124027-a62b8d8e5607/go.mod h1:br36fEqurGYZQGit+iDYsIzW0FF6VufMbDzyyLxEuPA=
github.com/crowdsecurity/dlog v0.0.0-20170105205344-4fb5f8204f26 h1:r97WNVC30Uen+7WnLs4xDScS/Ex988+id2k6mDf8psU=
github.com/crowdsecurity/dlog v0.0.0-20170105205344-4fb5f8204f26/go.mod h1:zpv7r+7KXwgVUZnUNjyP22zc/D7LKjyoY02weH2RBbk=
github.com/crowdsecurity/go-cs-lib v0.0.10 h1:Twt/y/rYCUspGY1zxDnGurL2svRSREAz+2+puLepd9c=
github.com/crowdsecurity/go-cs-lib v0.0.10/go.mod h1:8FMKNGsh3hMZi2SEv6P15PURhEJnZV431XjzzBSuf0k=
github.com/crowdsecurity/grokky v0.2.1 h1:t4VYnDlAd0RjDM2SlILalbwfCrQxtJSMGdQOR0zwkE4=
github.com/crowdsecurity/grokky v0.2.1/go.mod h1:33usDIYzGDsgX1kHAThCbseso6JuWNJXOzRQDGXHtWM=
github.com/crowdsecurity/machineid v1.0.2 h1:wpkpsUghJF8Khtmn/tg6GxgdhLA1Xflerh5lirI+bdc=

View file

@ -23,7 +23,7 @@ func (t *hubTransport) RoundTrip(req *http.Request) (*http.Response, error) {
// hubClient is the HTTP client used to communicate with the CrowdSec Hub.
var hubClient = &http.Client{
Timeout: 120 * time.Second,
Timeout: 120 * time.Second,
Transport: &hubTransport{http.DefaultTransport},
}

View file

@ -1,19 +1,17 @@
package cwhub
import (
"context"
"errors"
"fmt"
"io"
"io/fs"
"net/http"
"os"
"path/filepath"
"runtime"
"time"
"github.com/sirupsen/logrus"
"gopkg.in/yaml.v3"
"github.com/crowdsecurity/go-cs-lib/downloader"
"github.com/crowdsecurity/crowdsec/pkg/types"
)
@ -22,128 +20,6 @@ type DataSet struct {
Data []types.DataSource `yaml:"data,omitempty"`
}
// downloadFile downloads a file and writes it to disk, with no hash verification.
func downloadFile(url string, destPath string) error {
resp, err := hubClient.Get(url)
if err != nil {
return fmt.Errorf("while downloading %s: %w", url, err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("bad http code %d for %s", resp.StatusCode, url)
}
// Download to a temporary location to avoid corrupting files
// that are currently in use or memory mapped.
tmpFile, err := os.CreateTemp(filepath.Dir(destPath), filepath.Base(destPath)+".*.tmp")
if err != nil {
return err
}
tmpFileName := tmpFile.Name()
defer func() {
tmpFile.Close()
os.Remove(tmpFileName)
}()
// avoid reading the whole file in memory
_, err = io.Copy(tmpFile, resp.Body)
if err != nil {
return err
}
if err = tmpFile.Sync(); err != nil {
return err
}
if err = tmpFile.Close(); err != nil {
return err
}
// a check on stdout is used while scripting to know if the hub has been upgraded
// and a configuration reload is required
// TODO: use a better way to communicate this
fmt.Printf("updated %s\n", filepath.Base(destPath))
if runtime.GOOS == "windows" {
// On Windows, rename will fail if the destination file already exists
// so we remove it first.
err = os.Remove(destPath)
switch {
case errors.Is(err, fs.ErrNotExist):
break
case err != nil:
return err
}
}
if err = os.Rename(tmpFileName, destPath); err != nil {
return err
}
return nil
}
// needsUpdate checks if a data file has to be downloaded (or updated).
// if the local file doesn't exist, update.
// if the remote is newer than the local file, update.
// if the remote has no modification date, but local file has been modified > a week ago, update.
func needsUpdate(destPath string, url string, logger *logrus.Logger) bool {
fileInfo, err := os.Stat(destPath)
switch {
case os.IsNotExist(err):
return true
case err != nil:
logger.Errorf("while getting %s: %s", destPath, err)
return true
}
resp, err := hubClient.Head(url)
if err != nil {
logger.Errorf("while getting %s: %s", url, err)
// Head failed, Get would likely fail too -> no update
return false
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
logger.Errorf("bad http code %d for %s", resp.StatusCode, url)
return false
}
// update if local file is older than this
shelfLife := 7 * 24 * time.Hour
lastModify := fileInfo.ModTime()
localIsOld := lastModify.Add(shelfLife).Before(time.Now())
remoteLastModified := resp.Header.Get("Last-Modified")
if remoteLastModified == "" {
if localIsOld {
logger.Infof("no last modified date for %s, but local file is older than %s", url, shelfLife)
}
return localIsOld
}
lastAvailable, err := time.Parse(time.RFC1123, remoteLastModified)
if err != nil {
logger.Warningf("while parsing last modified date for %s: %s", url, err)
return localIsOld
}
if lastModify.Before(lastAvailable) {
logger.Infof("new version available, updating %s", destPath)
return true
}
return false
}
// downloadDataSet downloads all the data files for an item.
func downloadDataSet(dataFolder string, force bool, reader io.Reader, logger *logrus.Logger) error {
dec := yaml.NewDecoder(reader)
@ -165,12 +41,32 @@ func downloadDataSet(dataFolder string, force bool, reader io.Reader, logger *lo
return err
}
if force || needsUpdate(destPath, dataS.SourceURL, logger) {
logger.Debugf("downloading %s in %s", dataS.SourceURL, destPath)
d := downloader.
New(dataS.SourceURL).
WithHTTPClient(hubClient).
ToFile(destPath).
CompareContent().
WithLogger(logrus.WithFields(logrus.Fields{"url": dataS.SourceURL}))
if err := downloadFile(dataS.SourceURL, destPath); err != nil {
return fmt.Errorf("while getting data: %w", err)
}
if !force {
d = d.WithLastModified().
WithShelfLife(7 * 24 * time.Hour)
}
// TODO: real context
ctx := context.Background()
downloaded, err := d.Download(ctx)
if err != nil {
return fmt.Errorf("while getting data: %w", err)
}
if downloaded {
logger.Infof("Downloaded %s", destPath)
// a check on stdout is used while scripting to know if the hub has been upgraded
// and a configuration reload is required
// TODO: use a better way to communicate this
fmt.Printf("updated %s\n", destPath)
}
}
}

View file

@ -1,56 +0,0 @@
package cwhub
import (
"io"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/crowdsecurity/go-cs-lib/cstest"
)
func TestDownloadFile(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/xx":
w.WriteHeader(http.StatusOK)
_, _ = io.WriteString(w, "example content oneoneone")
default:
w.WriteHeader(http.StatusNotFound)
_, _ = io.WriteString(w, "not found")
}
}))
defer ts.Close()
dest := filepath.Join(t.TempDir(), "example.txt")
defer os.Remove(dest)
err := downloadFile(ts.URL+"/xx", dest)
require.NoError(t, err)
content, err := os.ReadFile(dest)
assert.Equal(t, "example content oneoneone", string(content))
require.NoError(t, err)
// bad uri
err = downloadFile("https://zz.com", dest)
cstest.RequireErrorContains(t, err, "lookup zz.com")
cstest.RequireErrorContains(t, err, "no such host")
// 404
err = downloadFile(ts.URL+"/x", dest)
cstest.RequireErrorContains(t, err, "bad http code 404")
// bad target
err = downloadFile(ts.URL+"/xx", "")
cstest.RequireErrorContains(t, err, cstest.PathNotFoundMessage)
// destination directory does not exist
err = downloadFile(ts.URL+"/xx", filepath.Join(t.TempDir(), "missing/example.txt"))
cstest.RequireErrorContains(t, err, cstest.PathNotFoundMessage)
}

View file

@ -1,7 +1,6 @@
package cwhub
import (
"bytes"
"encoding/json"
"errors"
"fmt"
@ -21,8 +20,8 @@ type Hub struct {
items HubItems // Items read from HubDir and InstallDir
local *csconfig.LocalHubCfg
remote *RemoteHubCfg
Warnings []string // Warnings encountered during sync
logger *logrus.Logger
Warnings []string // Warnings encountered during sync
}
// GetDataDir returns the data directory, where data sets are installed.
@ -150,27 +149,17 @@ func (h *Hub) ItemStats() []string {
// updateIndex downloads the latest version of the index and writes it to disk if it changed.
func (h *Hub) updateIndex() error {
body, err := h.remote.fetchIndex()
downloaded, err := h.remote.fetchIndex(h.local.HubIndexFile)
if err != nil {
return err
}
oldContent, err := os.ReadFile(h.local.HubIndexFile)
if err != nil {
if !os.IsNotExist(err) {
h.logger.Warningf("failed to read hub index: %s", err)
}
} else if bytes.Equal(body, oldContent) {
if downloaded {
h.logger.Infof("Wrote index to %s", h.local.HubIndexFile)
} else {
h.logger.Info("hub index is up to date")
return nil
}
if err = os.WriteFile(h.local.HubIndexFile, body, 0o644); err != nil {
return fmt.Errorf("failed to write hub index: %w", err)
}
h.logger.Infof("Wrote index to %s, %d bytes", h.local.HubIndexFile, len(body))
return nil
}

View file

@ -29,10 +29,8 @@ const (
versionFuture // local version is higher latest, but is included in the index: should not happen
)
var (
// The order is important, as it is used to range over sub-items in collections.
ItemTypes = []string{PARSERS, POSTOVERFLOWS, SCENARIOS, CONTEXTS, APPSEC_CONFIGS, APPSEC_RULES, COLLECTIONS}
)
// The order is important, as it is used to range over sub-items in collections.
var ItemTypes = []string{PARSERS, POSTOVERFLOWS, SCENARIOS, CONTEXTS, APPSEC_CONFIGS, APPSEC_RULES, COLLECTIONS}
type HubItems map[string]map[string]*Item

View file

@ -48,16 +48,21 @@ func (i *Item) Install(force bool, downloadOnly bool) error {
}
}
filePath, err := i.downloadLatest(force, true)
downloaded, err := i.downloadLatest(force, true)
if err != nil {
return err
}
if downloadOnly {
i.hub.logger.Infof("Downloaded %s to %s", i.Name, filePath)
if downloadOnly && downloaded {
i.hub.logger.Infof("Downloaded %s", i.Name)
return nil
}
// a check on stdout is used while scripting to know if the hub has been upgraded
// and a configuration reload is required
// TODO: use a better way to communicate this
fmt.Printf("installed %s\n", i.Name)
if err := i.enable(); err != nil {
return fmt.Errorf("while enabling %s: %w", i.Name, err)
}

View file

@ -3,15 +3,14 @@ package cwhub
// Install, upgrade and remove items from the hub to the local configuration
import (
"bytes"
"crypto/sha256"
"encoding/hex"
"context"
"errors"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"github.com/sirupsen/logrus"
"github.com/crowdsecurity/go-cs-lib/downloader"
"github.com/crowdsecurity/crowdsec/pkg/emoji"
)
@ -68,7 +67,7 @@ func (i *Item) Upgrade(force bool) (bool, error) {
}
// downloadLatest downloads the latest version of the item to the hub directory.
func (i *Item) downloadLatest(overwrite bool, updateOnly bool) (string, error) {
func (i *Item) downloadLatest(overwrite bool, updateOnly bool) (bool, error) {
i.hub.logger.Debugf("Downloading %s %s", i.Type, i.Name)
for _, sub := range i.SubItems() {
@ -84,98 +83,85 @@ func (i *Item) downloadLatest(overwrite bool, updateOnly bool) (string, error) {
i.hub.logger.Tracef("collection, recurse")
if _, err := sub.downloadLatest(overwrite, updateOnly); err != nil {
return "", err
return false, err
}
}
downloaded := sub.State.Downloaded
if _, err := sub.download(overwrite); err != nil {
return "", err
return false, err
}
// We need to enable an item when it has been added to a collection since latest release of the collection.
// We check if sub.Downloaded is false because maybe the item has been disabled by the user.
if !sub.State.Installed && !downloaded {
if err := sub.enable(); err != nil {
return "", fmt.Errorf("enabling '%s': %w", sub.Name, err)
return false, fmt.Errorf("enabling '%s': %w", sub.Name, err)
}
}
}
if !i.State.Installed && updateOnly && i.State.Downloaded && !overwrite {
i.hub.logger.Debugf("skipping upgrade of %s: not installed", i.Name)
return "", nil
return false, nil
}
ret, err := i.download(overwrite)
if err != nil {
return "", err
}
return ret, nil
return i.download(overwrite)
}
// FetchLatest downloads the latest item from the hub, verifies the hash and returns the content and the used url.
func (i *Item) FetchLatest() ([]byte, string, error) {
if i.latestHash() == "" {
return nil, "", errors.New("latest hash missing from index")
}
// FetchContentTo downloads the last version of the item's YAML file to the specified path.
func (i *Item) FetchContentTo(destPath string) (bool, string, error) {
url, err := i.hub.remote.urlTo(i.RemotePath)
if err != nil {
return nil, "", fmt.Errorf("failed to build request: %w", err)
return false, "", fmt.Errorf("failed to build request: %w", err)
}
resp, err := hubClient.Get(url)
wantHash := i.latestHash()
if wantHash == "" {
return false, "", errors.New("latest hash missing from index")
}
d := downloader.
New(url).
WithHTTPClient(hubClient).
ToFile(destPath).
WithMakeDirs(true).
WithLogger(logrus.WithFields(logrus.Fields{"url": url})).
CompareContent().
VerifyHash("sha256", wantHash)
// TODO: recommend hub update if hash does not match
// TODO: use real context
ctx := context.Background()
downloaded, err := d.Download(ctx)
if err != nil {
return nil, "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, "", fmt.Errorf("bad http code %d", resp.StatusCode)
return false, "", fmt.Errorf("while downloading %s to %s: %w", i.Name, url, err)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, "", err
}
hash := sha256.New()
if _, err = hash.Write(body); err != nil {
return nil, "", fmt.Errorf("while hashing %s: %w", i.Name, err)
}
meow := hex.EncodeToString(hash.Sum(nil))
if meow != i.Versions[i.Version].Digest {
i.hub.logger.Errorf("Downloaded version doesn't match index, please 'hub update'")
i.hub.logger.Debugf("got %s, expected %s", meow, i.Versions[i.Version].Digest)
return nil, "", errors.New("invalid download hash")
}
return body, url, nil
return downloaded, url, nil
}
// download downloads the item from the hub and writes it to the hub directory.
func (i *Item) download(overwrite bool) (string, error) {
func (i *Item) download(overwrite bool) (bool, error) {
// ensure that target file is within target dir
finalPath, err := i.downloadPath()
if err != nil {
return "", err
return false, err
}
if i.State.IsLocal() {
i.hub.logger.Warningf("%s is local, can't download", i.Name)
return finalPath, nil
return false, nil
}
// if user didn't --force, don't overwrite local, tainted, up-to-date files
if !overwrite {
if i.State.Tainted {
i.hub.logger.Debugf("%s: tainted, not updated", i.Name)
return "", nil
return false, nil
}
if i.State.UpToDate {
@ -184,45 +170,30 @@ func (i *Item) download(overwrite bool) (string, error) {
}
}
body, url, err := i.FetchLatest()
downloaded, _, err := i.FetchContentTo(finalPath)
if err != nil {
what := i.Name
if url != "" {
what += " from " + url
}
return "", fmt.Errorf("while downloading %s: %w", what, err)
return false, fmt.Errorf("while downloading %s: %w", i.Name, err)
}
// all good, install
parentDir := filepath.Dir(finalPath)
if err = os.MkdirAll(parentDir, os.ModePerm); err != nil {
return "", fmt.Errorf("while creating %s: %w", parentDir, err)
}
// check actual file
if _, err = os.Stat(finalPath); !os.IsNotExist(err) {
i.hub.logger.Warningf("%s: overwrite", i.Name)
i.hub.logger.Debugf("target: %s", finalPath)
} else {
i.hub.logger.Infof("%s: OK", i.Name)
}
if err = os.WriteFile(finalPath, body, 0o644); err != nil {
return "", fmt.Errorf("while writing %s: %w", finalPath, err)
if downloaded {
i.hub.logger.Infof("Downloaded %s", i.Name)
}
i.State.Downloaded = true
i.State.Tainted = false
i.State.UpToDate = true
if err = downloadDataSet(i.hub.local.InstallDataDir, overwrite, bytes.NewReader(body), i.hub.logger); err != nil {
return "", fmt.Errorf("while downloading data for %s: %w", i.FileName, err)
// read content to get the list of data files
reader, err := os.Open(finalPath)
if err != nil {
return false, fmt.Errorf("while opening %s: %w", finalPath, err)
}
return finalPath, nil
if err = downloadDataSet(i.hub.local.InstallDataDir, overwrite, reader, i.hub.logger); err != nil {
return false, fmt.Errorf("while downloading data for %s: %w", i.FileName, err)
}
return true, nil
}
// DownloadDataIfNeeded downloads the data set for the item.

View file

@ -1,9 +1,12 @@
package cwhub
import (
"context"
"fmt"
"io"
"net/http"
"github.com/sirupsen/logrus"
"github.com/crowdsecurity/go-cs-lib/downloader"
)
// RemoteHubCfg is used to retrieve index and items from the remote hub.
@ -28,34 +31,28 @@ func (r *RemoteHubCfg) urlTo(remotePath string) (string, error) {
}
// fetchIndex downloads the index from the hub and returns the content.
func (r *RemoteHubCfg) fetchIndex() ([]byte, error) {
func (r *RemoteHubCfg) fetchIndex(destPath string) (bool, error) {
if r == nil {
return nil, ErrNilRemoteHub
return false, ErrNilRemoteHub
}
url, err := r.urlTo(r.IndexPath)
if err != nil {
return nil, fmt.Errorf("failed to build hub index request: %w", err)
return false, fmt.Errorf("failed to build hub index request: %w", err)
}
resp, err := hubClient.Get(url)
ctx := context.Background()
downloaded, err := downloader.
New(url).
WithHTTPClient(hubClient).
ToFile(destPath).
CompareContent().
WithLogger(logrus.WithFields(logrus.Fields{"url": url})).
Download(ctx)
if err != nil {
return nil, fmt.Errorf("failed http request for hub index: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
if resp.StatusCode == http.StatusNotFound {
return nil, IndexNotFoundError{url, r.Branch}
}
return nil, fmt.Errorf("bad http code %d for %s", resp.StatusCode, url)
return false, err
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read request answer for hub index: %w", err)
}
return body, nil
return downloaded, nil
}

View file

@ -56,11 +56,11 @@ func getSHA256(filepath string) (string, error) {
// information used to create a new Item, from a file path.
type itemFileInfo struct {
inhub bool
fname string
stage string
ftype string
fauthor string
inhub bool
}
func (h *Hub) getItemFileInfo(path string, logger *logrus.Logger) (*itemFileInfo, error) {