Skip to content

[VC-43753] CyberArk Discovery and Context: Upload data in the JSON format required by the API #684

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 35 additions & 2 deletions api/datareading.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package api

import (
"bytes"
"encoding/json"
"time"

"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/version"
)

// DataReadingsPost is the payload in the upload request.
Expand All @@ -28,8 +32,8 @@ type DataReading struct {
type GatheredResource struct {
// Resource is a reference to a k8s object that was found by the informer
// should be of type unstructured.Unstructured, raw Object
Resource interface{}
DeletedAt Time
Resource interface{} `json:"resource"`
DeletedAt Time `json:"deleted_at,omitempty"`
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added json annotations here so that I can unmarshal date readings from a file, for testing.

The agent already has an --input-file option, but stops decoding the input at api.DataReading.Data, leaving the actual data as interface{}.

In the test in this PR I need to decode the Data, so that it has the same types as the DataGatherer.Fetch return values.


func (v GatheredResource) MarshalJSON() ([]byte, error) {
Expand All @@ -48,3 +52,32 @@ func (v GatheredResource) MarshalJSON() ([]byte, error) {

return json.Marshal(data)
}

func (v *GatheredResource) UnmarshalJSON(data []byte) error {
var tmpResource struct {
Resource *unstructured.Unstructured `json:"resource"`
DeletedAt Time `json:"deleted_at,omitempty"`
}

d := json.NewDecoder(bytes.NewReader(data))
d.DisallowUnknownFields()

if err := d.Decode(&tmpResource); err != nil {
return err
}
v.Resource = tmpResource.Resource
v.DeletedAt = tmpResource.DeletedAt
return nil
}

// DynamicData is the DataReading.Data returned by the k8s.DataGathererDynamic
// gatherer
type DynamicData struct {
Items []*GatheredResource `json:"items"`
}

// DiscoveryData is the DataReading.Data returned by the k8s.ConfigDiscovery
// gatherer
type DiscoveryData struct {
ServerVersion *version.Info `json:"server_version"`
}
8 changes: 8 additions & 0 deletions pkg/agent/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,14 @@ func gatherAndOutputData(ctx context.Context, eventf Eventf, config CombinedConf
var readings []*api.DataReading

if config.InputPath != "" {
// TODO(wallrj): The datareadings read from disk can not yet be pushed
// to the CyberArk Discovery and Context API. Why? Because they have
// simple data types such as map[string]interface{}. In contrast, the
// data from data gatherers can be cast to rich types like DynamicData
// or DiscoveryData The CyberArk dataupload client requires the data to
// have rich types to convert it to the Discovery and Context snapshots
// format. Consider refactoring testutil.ParseDataReadings so that it
// can be used here.
log.V(logs.Debug).Info("Reading data from local file", "inputPath", config.InputPath)
data, err := os.ReadFile(config.InputPath)
if err != nil {
Expand Down
9 changes: 0 additions & 9 deletions pkg/client/client_cyberark.go

This file was deleted.

12 changes: 5 additions & 7 deletions pkg/datagatherer/k8s/discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

"k8s.io/client-go/discovery"

"github.com/jetstack/preflight/api"
"github.com/jetstack/preflight/pkg/datagatherer"
)

Expand Down Expand Up @@ -59,15 +60,12 @@ func (g *DataGathererDiscovery) WaitForCacheSync(ctx context.Context) error {

// Fetch will fetch discovery data from the apiserver, or return an error
func (g *DataGathererDiscovery) Fetch() (interface{}, int, error) {
data, err := g.cl.ServerVersion()
serverVersion, err := g.cl.ServerVersion()
if err != nil {
return nil, -1, fmt.Errorf("failed to get server version: %v", err)
}

response := map[string]interface{}{
// data has type Info: https://godoc.org/k8s.io/apimachinery/pkg/version#Info
"server_version": data,
}

return response, len(response), nil
return &api.DiscoveryData{
ServerVersion: serverVersion,
}, 1, nil
}
8 changes: 3 additions & 5 deletions pkg/datagatherer/k8s/dynamic.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,6 @@ func (g *DataGathererDynamic) Fetch() (interface{}, int, error) {
return nil, -1, fmt.Errorf("resource type must be specified")
}

var list = map[string]interface{}{}
var items = []*api.GatheredResource{}

fetchNamespaces := g.namespaces
Expand Down Expand Up @@ -344,10 +343,9 @@ func (g *DataGathererDynamic) Fetch() (interface{}, int, error) {
return nil, -1, err
}

// add gathered resources to items
list["items"] = items

return list, len(items), nil
return &api.DynamicData{
Items: items,
}, len(items), nil
}

func redactList(list []*api.GatheredResource, excludeAnnotKeys, excludeLabelKeys []*regexp.Regexp) error {
Expand Down
16 changes: 6 additions & 10 deletions pkg/datagatherer/k8s/dynamic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -730,15 +730,12 @@ func TestDynamicGatherer_Fetch(t *testing.T) {
}

if tc.expected != nil {
items, ok := res.(map[string]interface{})
data, ok := res.(*api.DynamicData)
if !ok {
t.Errorf("expected result be an map[string]interface{} but wasn't")
t.Errorf("expected result be *api.DynamicData but wasn't")
}

list, ok := items["items"].([]*api.GatheredResource)
if !ok {
t.Errorf("expected result be an []*api.GatheredResource but wasn't")
}
list := data.Items
// sorting list of results by name
sortGatheredResources(list)
// sorting list of expected results by name
Expand Down Expand Up @@ -1045,10 +1042,9 @@ func TestDynamicGathererNativeResources_Fetch(t *testing.T) {
}

if tc.expected != nil {
res, ok := rawRes.(map[string]interface{})
require.Truef(t, ok, "expected result be an map[string]interface{} but wasn't")
actual := res["items"].([]*api.GatheredResource)
require.Truef(t, ok, "expected result be an []*api.GatheredResource but wasn't")
res, ok := rawRes.(*api.DynamicData)
require.Truef(t, ok, "expected result be an *api.DynamicData but wasn't")
actual := res.Items

// sorting list of results by name
sortGatheredResources(actual)
Expand Down
3 changes: 3 additions & 0 deletions pkg/datagatherer/k8s/fieldfilter.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ var SecretSelectedFields = []FieldPath{
{"metadata", "ownerReferences"},
{"metadata", "selfLink"},
{"metadata", "uid"},
{"metadata", "creationTimestamp"},
{"metadata", "deletionTimestamp"},
{"metadata", "resourceVersion"},
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Cyberark backend needs this extra metadata to produce its reports.
I think it makes sense to upload this metadata for TLSPK too.
I can't see any harm in it and it will be difficult to implement different field filters for the TLSPK vs CyberArk uploaded data.


{"type"},
{"data", "tls.crt"},
Expand Down
9 changes: 7 additions & 2 deletions pkg/internal/cyberark/dataupload/dataupload.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,19 @@ func NewCyberArkClient(trustedCAs *x509.CertPool, baseURL string, authenticateRe
// PostDataReadingsWithOptions PUTs the supplied payload to an [AWS presigned URL] which it obtains via the CyberArk inventory API.
//
// [AWS presigned URL]: https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html
func (c *CyberArkClient) PostDataReadingsWithOptions(ctx context.Context, payload api.DataReadingsPost, opts Options) error {
func (c *CyberArkClient) PostDataReadingsWithOptions(ctx context.Context, readings []*api.DataReading, opts Options) error {
if opts.ClusterName == "" {
return fmt.Errorf("programmer mistake: the cluster name (aka `cluster_id` in the config file) cannot be left empty")
}

snapshot, err := convertDataReadingsToCyberarkSnapshot(readings)
if err != nil {
return fmt.Errorf("while converting datareadings to Cyberark snapshot format: %s", err)
}

encodedBody := &bytes.Buffer{}
checksum := sha3.New256()
if err := json.NewEncoder(io.MultiWriter(encodedBody, checksum)).Encode(payload); err != nil {
if err := json.NewEncoder(io.MultiWriter(encodedBody, checksum)).Encode(snapshot); err != nil {
return err
}

Expand Down
71 changes: 43 additions & 28 deletions pkg/internal/cyberark/dataupload/dataupload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package dataupload_test
import (
"crypto/x509"
"encoding/pem"
"errors"
"fmt"
"net/http"
"os"
Expand All @@ -17,28 +18,23 @@ import (
"github.com/jetstack/preflight/pkg/internal/cyberark/dataupload"
"github.com/jetstack/preflight/pkg/internal/cyberark/identity"
"github.com/jetstack/preflight/pkg/internal/cyberark/servicediscovery"
"github.com/jetstack/preflight/pkg/testutil"

_ "k8s.io/klog/v2/ktesting/init"
)

func TestCyberArkClient_PostDataReadingsWithOptions(t *testing.T) {
func TestCyberArkClient_PostDataReadingsWithOptions_MockAPI(t *testing.T) {
fakeTime := time.Unix(123, 0)
defaultPayload := api.DataReadingsPost{
AgentMetadata: &api.AgentMetadata{
Version: "test-version",
ClusterID: "test",
},
DataGatherTime: fakeTime,
DataReadings: []*api.DataReading{
{
ClusterID: "success-cluster-id",
DataGatherer: "test-gatherer",
Timestamp: api.Time{Time: fakeTime},
Data: map[string]interface{}{"test": "data"},
SchemaVersion: "v1",
},
defaultDataReadings := []*api.DataReading{
{
ClusterID: "success-cluster-id",
DataGatherer: "test-gatherer",
Timestamp: api.Time{Time: fakeTime},
Data: map[string]interface{}{"test": "data"},
SchemaVersion: "v1",
},
}

defaultOpts := dataupload.Options{
ClusterName: "success-cluster-id",
}
Expand All @@ -52,14 +48,14 @@ func TestCyberArkClient_PostDataReadingsWithOptions(t *testing.T) {

tests := []struct {
name string
payload api.DataReadingsPost
readings []*api.DataReading
authenticate func(req *http.Request) error
opts dataupload.Options
requireFn func(t *testing.T, err error)
}{
{
name: "successful upload",
payload: defaultPayload,
readings: defaultDataReadings,
opts: defaultOpts,
authenticate: setToken("success-token"),
requireFn: func(t *testing.T, err error) {
Expand All @@ -68,7 +64,7 @@ func TestCyberArkClient_PostDataReadingsWithOptions(t *testing.T) {
},
{
name: "error when cluster name is empty",
payload: defaultPayload,
readings: defaultDataReadings,
opts: dataupload.Options{ClusterName: ""},
authenticate: setToken("success-token"),
requireFn: func(t *testing.T, err error) {
Expand All @@ -77,16 +73,27 @@ func TestCyberArkClient_PostDataReadingsWithOptions(t *testing.T) {
},
{
name: "error when bearer token is incorrect",
payload: defaultPayload,
readings: defaultDataReadings,
opts: defaultOpts,
authenticate: setToken("fail-token"),
requireFn: func(t *testing.T, err error) {
require.ErrorContains(t, err, "while retrieving snapshot upload URL: received response with status code 500: should authenticate using the correct bearer token")
},
},
{
name: "error contains authenticate error",
readings: defaultDataReadings,
opts: defaultOpts,
authenticate: func(_ *http.Request) error {
return errors.New("simulated-authenticate-error")
},
requireFn: func(t *testing.T, err error) {
require.ErrorContains(t, err, "while retrieving snapshot upload URL: failed to authenticate request: simulated-authenticate-error")
},
},
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a drive-by additional test for when identityclient.Authenticate fails.

{
name: "invalid JSON from server (RetrievePresignedUploadURL step)",
payload: defaultPayload,
readings: defaultDataReadings,
opts: dataupload.Options{ClusterName: "invalid-json-retrieve-presigned"},
authenticate: setToken("success-token"),
requireFn: func(t *testing.T, err error) {
Expand All @@ -95,7 +102,7 @@ func TestCyberArkClient_PostDataReadingsWithOptions(t *testing.T) {
},
{
name: "500 from server (RetrievePresignedUploadURL step)",
payload: defaultPayload,
readings: defaultDataReadings,
opts: dataupload.Options{ClusterName: "invalid-response-post-data"},
authenticate: setToken("success-token"),
requireFn: func(t *testing.T, err error) {
Expand All @@ -106,6 +113,9 @@ func TestCyberArkClient_PostDataReadingsWithOptions(t *testing.T) {

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
logger := ktesting.NewLogger(t, ktesting.DefaultConfig)
ctx := klog.NewContext(t.Context(), logger)

server := dataupload.MockDataUploadServer()
defer server.Close()

Expand All @@ -118,22 +128,22 @@ func TestCyberArkClient_PostDataReadingsWithOptions(t *testing.T) {
cyberArkClient, err := dataupload.NewCyberArkClient(certPool, server.Server.URL, tc.authenticate)
require.NoError(t, err)

err = cyberArkClient.PostDataReadingsWithOptions(t.Context(), tc.payload, tc.opts)
err = cyberArkClient.PostDataReadingsWithOptions(ctx, tc.readings, tc.opts)
tc.requireFn(t, err)
})
}
}

// TestPostDataReadingsWithOptionsWithRealAPI demonstrates that the dataupload code works with the real inventory API.
// TestCyberArkClient_PostDataReadingsWithOptions_RealAPI demonstrates that the dataupload code works with the real inventory API.
// An API token is obtained by authenticating with the ARK_USERNAME and ARK_SECRET from the environment.
// ARK_SUBDOMAIN should be your tenant subdomain.
// ARK_PLATFORM_DOMAIN should be either integration-cyberark.cloud or cyberark.cloud
//
// To enable verbose request logging:
//
// go test ./pkg/internal/cyberark/dataupload/... \
// -v -count 1 -run TestPostDataReadingsWithOptionsWithRealAPI -args -testing.v 6
func TestPostDataReadingsWithOptionsWithRealAPI(t *testing.T) {
// -v -count 1 -run TestCyberArkClient_PostDataReadingsWithOptions_RealAPI -args -testing.v 6
func TestCyberArkClient_PostDataReadingsWithOptions_RealAPI(t *testing.T) {
platformDomain := os.Getenv("ARK_PLATFORM_DOMAIN")
subdomain := os.Getenv("ARK_SUBDOMAIN")
username := os.Getenv("ARK_USERNAME")
Expand Down Expand Up @@ -172,8 +182,13 @@ func TestPostDataReadingsWithOptionsWithRealAPI(t *testing.T) {
cyberArkClient, err := dataupload.NewCyberArkClient(nil, serviceURL, identityClient.AuthenticateRequest)
require.NoError(t, err)

err = cyberArkClient.PostDataReadingsWithOptions(ctx, api.DataReadingsPost{}, dataupload.Options{
ClusterName: "bb068932-c80d-460d-88df-34bc7f3f3297",
})
dataReadings := testutil.ParseDataReadings(t, testutil.ReadGZIP(t, "testdata/example-1/datareadings.json.gz"))
err = cyberArkClient.PostDataReadingsWithOptions(
ctx,
dataReadings,
dataupload.Options{
ClusterName: "bb068932-c80d-460d-88df-34bc7f3f3297",
},
)
require.NoError(t, err)
}
Loading