Skip to content
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,13 +184,14 @@ Note: most of the resource outputs are given in 3 file types: `.json`, `.yaml`,

## Testing

You can run the script locally from your workstation.
To do that you need an OpenShift cluster and you will have to install the Red Hat GitOps Operator.
Then you can run the script like this:
To do that you need an OpenShift cluster, and you will have to install the Red Hat GitOps Operator.
Then you can test how your changes affects gathered data:

```shell
chmod +x ./gather_gitops.sh
./gather_gitops.sh --base-collection-path .
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does not work since #23.

# You may need to create the repository on quay.io manually to make sure it is public
make REGISTRY_USERNAME=my-non-production-org CONTAINER_IMAGE_TAG="$(git rev-parse HEAD)" push
# Note some differences are expected, like few lines in rapidly populated logs
./test/compare.sh registry.redhat.io/openshift-gitops-1/must-gather-rhel8:"$SOME_OLD_VERSION" quay.io/my-non-production-org/gitops-must-gather:"$(git rev-parse HEAD)"
```

Last but not least, please make sure you run `make lint` before pushing new changes.
Expand Down
26 changes: 19 additions & 7 deletions gather_gitops.sh
Original file line number Diff line number Diff line change
@@ -1,23 +1,32 @@
#!/usr/bin/env bash

set -eu -o pipefail
s=declare_out_of_trap_script # Workaround for https://github.com/koalaman/shellcheck/issues/3287
trap 's=$?; echo >&2 "$0: Error on line "$LINENO": $BASH_COMMAND"; exit $s' ERR

LOGS_DIR="/must-gather"

mkdir -p ${LOGS_DIR}

GITOPS_CURRENT_CSV=$(oc get subscription.operators.coreos.com --ignore-not-found -A -o json | jq '.items[] | select(.metadata.name=="openshift-gitops-operator") | .status.currentCSV' -r)
if [ -z "$GITOPS_CURRENT_CSV" ]; then
NON_ARGO_CRD_NAMES=()
else
readarray -t NON_ARGO_CRD_NAMES < <(oc get csv --ignore-not-found "$GITOPS_CURRENT_CSV" -o json | jq '.spec.customresourcedefinitions.owned[] | select(.name | contains("argoproj.io") | not) | .name' -rj)
fi

# Gathering cluster version all the crd related to operators.coreos.com and argoproj.io
echo "gather_gitops:$LINENO] inspecting crd, clusterversion .." | tee -a ${LOGS_DIR}/gather_gitops.log
readarray -t UPSTREAM_CRDS < <(oc get crd -o name | grep -Ei "argoproj.io|operators.coreos.com")
# Getting non.existent.crd is a hack to avoid getting all available crds in the cluster in case there are no owned resources that do not contain "argoproj.io"
oc adm inspect --dest-dir=${LOGS_DIR} "$(oc get crd -o name | grep -Ei "argoproj.io|operators.coreos.com")" "$(oc get crd non.existent.crd --ignore-not-found "$(oc get csv --ignore-not-found "$GITOPS_CURRENT_CSV" -o json | jq '.spec.customresourcedefinitions.owned[] | select(.name | contains("argoproj.io") | not) | " " + .name' -rj)" -o name)" clusterversion/version > /dev/null
readarray -t NON_ARGO_CRDS < <(oc get crd non.existent.crd --ignore-not-found "${NON_ARGO_CRD_NAMES[@]}" -o name)
oc adm inspect --dest-dir=${LOGS_DIR} "${UPSTREAM_CRDS[@]}" "${NON_ARGO_CRDS[@]}" clusterversion/version > /dev/null

# Gathering all namespaced custom resources across the cluster that contains "argoproj.io" related custom resources
oc get crd -o json | jq -r '.items[] | select((.spec.group | contains ("argoproj.io")) and .spec.scope=="Namespaced") | .spec.group + " " + .metadata.name + " " + .spec.names.plural' |
while read -r API_GROUP APIRESOURCE API_PLURAL_NAME; do
echo "gather_gitops:$LINENO] collecting ${APIRESOURCE} .." | tee -a ${LOGS_DIR}/gather_gitops.log
NAMESPACES=$(oc get "${APIRESOURCE}" --all-namespaces=true --ignore-not-found -o jsonpath='{range .items[*]}{@.metadata.namespace}{"\n"}{end}' | uniq)
readarray -t NAMESPACES < <(oc get "${APIRESOURCE}" --all-namespaces=true --ignore-not-found -o jsonpath='{range .items[*]}{@.metadata.namespace}{"\n"}{end}' | uniq)
for NAMESPACE in "${NAMESPACES[@]}"; do
mkdir -p "${LOGS_DIR}/namespaces/${NAMESPACE}/${API_GROUP}"
oc get "${APIRESOURCE}" -n "${NAMESPACE}" -o=yaml >"${LOGS_DIR}/namespaces/${NAMESPACE}/${API_GROUP}/${API_PLURAL_NAME}.yaml"
Expand All @@ -26,10 +35,10 @@ done

# Gathering all namespaced custom resources across the cluster that are owned by gitops-operator but do not contain "argoproj.io" related customer resources
# Getting "non.existent.crd" is a hack to be sure that the output is a list of items even if it only contains zero or a single item
oc get crd --ignore-not-found non.existent.crd "$(oc get csv --ignore-not-found "$GITOPS_CURRENT_CSV" -o json | jq '.spec.customresourcedefinitions.owned[] | select(.name | contains("argoproj.io") | not) | " " + .name' -rj)" -o json | jq -r '.items[] | select((.spec.group | contains ("argoproj.io")) and .spec.scope=="Namespaced") | .spec.group + " " + .metadata.name + " " + .spec.names.plural' |
oc get crd --ignore-not-found non.existent.crd "${NON_ARGO_CRD_NAMES[@]}" -o json | jq -r '.items[] | select((.spec.group | contains ("argoproj.io")) and .spec.scope=="Namespaced") | .spec.group + " " + .metadata.name + " " + .spec.names.plural' |
while read -r API_GROUP APIRESOURCE API_PLURAL_NAME; do
echo "gather_gitops:$LINENO] collecting ${APIRESOURCE} .." | tee -a ${LOGS_DIR}/gather_gitops.log
NAMESPACES=$(oc get "${APIRESOURCE}" --all-namespaces=true --ignore-not-found -o jsonpath='{range .items[*]}{@.metadata.namespace}{"\n"}{end}' | uniq)
readarray -t NAMESPACES < <(oc get "${APIRESOURCE}" --all-namespaces=true --ignore-not-found -o jsonpath='{range .items[*]}{@.metadata.namespace}{"\n"}{end}' | uniq)
for NAMESPACE in "${NAMESPACES[@]}"; do
mkdir -p "${LOGS_DIR}/namespaces/${NAMESPACE}/${API_GROUP}"
oc get "${APIRESOURCE}" -n "${NAMESPACE}" -o=yaml >"${LOGS_DIR}/namespaces/${NAMESPACE}/${API_GROUP}/${API_PLURAL_NAME}.yaml"
Expand All @@ -46,7 +55,7 @@ done

# Gathering all cluster-scoped custom resources across the cluster that are owned by gitops-operator but do not contain "argoproj.io"
# Getting "non.existent.crd" is a hack to be sure that the output is a list of items even if it only contains zero or a single item
oc get crd --ignore-not-found non.existent.crd "$(oc get csv --ignore-not-found "$GITOPS_CURRENT_CSV" -o json | jq '.spec.customresourcedefinitions.owned[] | select(.name | contains("argoproj.io") | not) | " " + .name' -rj)" -o json | jq -r '.items[] | select((.spec.group | contains ("argoproj.io")) and .spec.scope=="Namespaced") | .spec.group + " " + .metadata.name + " " + .spec.names.plural' |
oc get crd --ignore-not-found non.existent.crd "${NON_ARGO_CRD_NAMES[@]}" -o json | jq -r '.items[] | select((.spec.group | contains ("argoproj.io")) and .spec.scope=="Namespaced") | .spec.group + " " + .metadata.name + " " + .spec.names.plural' |
while read -r API_GROUP APIRESOURCE API_PLURAL_NAME; do
mkdir -p "${LOGS_DIR}/cluster-scoped-resources/${API_GROUP}"
echo "gather_gitops:$LINENO] collecting ${APIRESOURCE} .." | tee -a ${LOGS_DIR}/gather_gitops.log
Expand All @@ -55,13 +64,16 @@ done

# Inspecting namespace reported in ARGOCD_CLUSTER_CONFIG_NAMESPACES, openshift-gitops and openshift-gitops-operator, and namespaces containing ArgoCD instances
echo "gather_gitops:$LINENO] inspecting \$ARGOCD_CLUSTER_CONFIG_NAMESPACES, openshift-gitops and openshift-gitops-operator namespaces and namespaces containing ArgoCD instances .." | tee -a ${LOGS_DIR}/gather_gitops.log
oc get ns --ignore-not-found "$(oc get subs -A --ignore-not-found -o json | jq '.items[] | select(.metadata.name=="openshift-gitops-operator") | .spec.config.env[]?|select(.name=="ARGOCD_CLUSTER_CONFIG_NAMESPACES")| " " + .value | sub(","; " ")' -rj)" "$(oc get ArgoCD,Rollout,RolloutManager -A -o json | jq '.items[] | " " + .metadata.namespace' -rj)" openshift-gitops openshift-gitops-operator -o json \
readarray -t SUBSCRIPTIONS < <(oc get subs -A --ignore-not-found -o json | jq '.items[] | select(.metadata.name=="openshift-gitops-operator") | .spec.config.env[]?|select(.name=="ARGOCD_CLUSTER_CONFIG_NAMESPACES")| " " + .value | sub(","; " ")' -rj)
readarray -t ARGO_CRDS < <(oc get ArgoCD,Rollout,RolloutManager -A -o json | jq '.items[] | " " + .metadata.namespace' -rj)
oc get ns --ignore-not-found "${SUBSCRIPTIONS[@]}" "${ARGO_CRDS[@]}" openshift-gitops openshift-gitops-operator -o json \
| jq '.items | unique |.[] | .metadata.name' -r |
while read -r NAMESPACE; do
echo "gather_gitops:$LINENO] inspecting namespace $NAMESPACE .." | tee -a ${LOGS_DIR}/gather_gitops.log
oc adm inspect --dest-dir=${LOGS_DIR} "ns/$NAMESPACE" > /dev/null
echo "gather_gitops:$LINENO] inspecting csv,sub,ip for namespace $NAMESPACE .." | tee -a ${LOGS_DIR}/gather_gitops.log
oc adm inspect --dest-dir=${LOGS_DIR} "$(oc get --ignore-not-found clusterserviceversions.operators.coreos.com,installplans.operators.coreos.com,subscriptions.operators.coreos.com -o name -n "$NAMESPACE")" -n "$NAMESPACE" &> /dev/null \
readarray -t CSVS_SUBS_IPS < <(oc get --ignore-not-found clusterserviceversions.operators.coreos.com,installplans.operators.coreos.com,subscriptions.operators.coreos.com -o name -n "$NAMESPACE")
oc adm inspect --dest-dir=${LOGS_DIR} "${CSVS_SUBS_IPS[@]}" -n "$NAMESPACE" &> /dev/null \
|| echo "gather_gitops:$LINENO] no csv,sub,ip found in namespace $NAMESPACE .." | tee -a ${LOGS_DIR}/gather_gitops.log
done

Expand Down
63 changes: 63 additions & 0 deletions test/compare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env bash
# https://github.com/olivergondza/bash-strict-mode
set -eEuo pipefail
s=declare_out_of_trap_script # Workaround for https://github.com/koalaman/shellcheck/issues/3287
trap 's=$?; echo >&2 "$0: Error on line "$LINENO": $BASH_COMMAND"; exit $s' ERR

function main() {
if [[ $# -ne 2 ]]; then
echo >&2 "Usage: $0 [IMAGE_A] [IMAGE_B]"
exit 1
fi
local img_a=$1
local img_b=$2

inv_a="$(mktemp -d gitops-must-gather-A-XXXX)"
inv_b="$(mktemp -d gitops-must-gather-B-XXXX)"
trap 'rm -rf "${inv_a}" "${inv_b}"' EXIT

gather "$img_a" "$inv_a"
gather "$img_b" "$inv_b"

diff --color=auto --recursive \
--ignore-matching-lines="resourceVersion: " \
"${inv_a}" "${inv_b}"
}

function gather() {
image=$1
dir=$2

if ! oc adm must-gather --image="$image" --dest-dir="${dir}" 2>&1 | tee "${dir}/oc-adm-output.log"; then
echo >&2 "Failed gathering for $image"
return 1
fi

sanitize "$image" "$dir"
}

function sanitize() {
image=$1
dir=$2

# Unify names of the directories its name is based on image name
mv "$dir"/*-sha256-* "$dir/__RESOURCES__"

# In log files, drop image name, generated resource names, timestamp, line numbers, and transfer metrics
sed -i -r \
-e "s~${image}~__IMAGE_TAG__~g" \
-e "s~must-gather-[a-z0-9]{5}~must-gather-XXXXX~g" \
-e 's~[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+Z~__TIMESTAMP__~g' \
-e 's~gather_gitops:[0-9]+~gather_gitops:LL~g' \
-e '/total size is .* speedup is .*/d' \
-e '/sent .* received .* bytes\/sec/d' \
"$dir/oc-adm-output.log" "$dir/must-gather.logs" "$dir/__RESOURCES__/gather.logs" "$dir/__RESOURCES__/gather_gitops.log"

# Timestamps are not going to match, just test there is the same number of them
ts="$(wc -l < "$dir/timestamp")"
echo "$ts" > "$dir/timestamp"
ts="$(wc -l < "$dir/__RESOURCES__/timestamp")"
echo "$ts" > "$dir/__RESOURCES__/timestamp"
}

main "$@"