Add Trivy integration to noble cluster setup, including namespace and application configurations. Update README and playbook tags to reflect new security scanning capabilities. Enhance Longhorn and kube-prometheus-stack deployment reliability with increased timeout settings and retry mechanisms.
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
# Ansible — noble cluster
|
# Ansible — noble cluster
|
||||||
|
|
||||||
Automates [`talos/CLUSTER-BUILD.md`](../talos/CLUSTER-BUILD.md): optional **Talos Phase A** (genconfig → apply → bootstrap → kubeconfig), then **Phase B+** (CNI → add-ons → ingress → Argo CD → Kyverno → observability, etc.). **Argo CD** does not reconcile core charts — optional GitOps starts from an empty [`clusters/noble/apps/kustomization.yaml`](../clusters/noble/apps/kustomization.yaml).
|
Automates [`talos/CLUSTER-BUILD.md`](../talos/CLUSTER-BUILD.md): optional **Talos Phase A** (genconfig → apply → bootstrap → kubeconfig), then **Phase B+** (CNI → add-ons → ingress → Argo CD → Kyverno → observability → Trivy, etc.). **Argo CD** does not reconcile core charts — optional GitOps starts from an empty [`clusters/noble/apps/kustomization.yaml`](../clusters/noble/apps/kustomization.yaml).
|
||||||
|
|
||||||
## Order of operations
|
## Order of operations
|
||||||
|
|
||||||
@@ -73,6 +73,7 @@ Override with `-e` when needed, e.g. **`-e noble_talos_skip_bootstrap=true`** if
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
ansible-playbook playbooks/noble.yml --tags cilium,metallb
|
ansible-playbook playbooks/noble.yml --tags cilium,metallb
|
||||||
|
ansible-playbook playbooks/noble.yml --tags trivy
|
||||||
ansible-playbook playbooks/noble.yml --skip-tags newt
|
ansible-playbook playbooks/noble.yml --skip-tags newt
|
||||||
ansible-playbook playbooks/noble.yml --tags velero -e noble_velero_install=true -e noble_velero_s3_bucket=... -e noble_velero_s3_url=...
|
ansible-playbook playbooks/noble.yml --tags velero -e noble_velero_install=true -e noble_velero_s3_bucket=... -e noble_velero_s3_url=...
|
||||||
```
|
```
|
||||||
@@ -88,7 +89,7 @@ ansible-playbook playbooks/noble.yml --tags velero -e noble_velero_install=true
|
|||||||
|------|----------|
|
|------|----------|
|
||||||
| `talos_phase_a` | Talos genconfig, apply-config, bootstrap, kubeconfig |
|
| `talos_phase_a` | Talos genconfig, apply-config, bootstrap, kubeconfig |
|
||||||
| `helm_repos` | `helm repo add` / `update` |
|
| `helm_repos` | `helm repo add` / `update` |
|
||||||
| `noble_*` | Cilium, CSI Volume Snapshot CRDs + controller, metrics-server, Longhorn, MetalLB (20m Helm wait), kube-vip, Traefik, cert-manager, Newt, Argo CD, Kyverno, platform stack, Velero (optional) |
|
| `noble_*` | Cilium, CSI Volume Snapshot CRDs + controller, metrics-server, Longhorn, MetalLB (20m Helm wait), kube-vip, Traefik, cert-manager, Newt, Argo CD, Kyverno, platform stack, **Trivy Operator**, Velero (optional) |
|
||||||
| `noble_landing_urls` | Writes **`ansible/output/noble-lab-ui-urls.md`** — URLs, service names, and (optional) Argo/Grafana passwords from Secrets |
|
| `noble_landing_urls` | Writes **`ansible/output/noble-lab-ui-urls.md`** — URLs, service names, and (optional) Argo/Grafana passwords from Secrets |
|
||||||
| `noble_post_deploy` | Post-install reminders |
|
| `noble_post_deploy` | Post-install reminders |
|
||||||
| `talos_bootstrap` | Genconfig-only (used by older playbook) |
|
| `talos_bootstrap` | Genconfig-only (used by older playbook) |
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
# Run from repo **ansible/** directory: ansible-playbook playbooks/noble.yml
|
# Run from repo **ansible/** directory: ansible-playbook playbooks/noble.yml
|
||||||
#
|
#
|
||||||
# Tags: repos, cilium, csi_snapshot, metrics, longhorn, metallb, kube_vip, traefik, cert_manager, newt,
|
# Tags: repos, cilium, csi_snapshot, metrics, longhorn, metallb, kube_vip, traefik, cert_manager, newt,
|
||||||
# argocd, kyverno, kyverno_policies, platform, velero, all (default)
|
# argocd, kyverno, kyverno_policies, platform, trivy, velero, all (default)
|
||||||
- name: Noble cluster — platform stack (Ansible-managed)
|
- name: Noble cluster — platform stack (Ansible-managed)
|
||||||
hosts: localhost
|
hosts: localhost
|
||||||
connection: local
|
connection: local
|
||||||
@@ -206,6 +206,12 @@
|
|||||||
tags: [csi_snapshot, snapshot, storage]
|
tags: [csi_snapshot, snapshot, storage]
|
||||||
- role: noble_metrics_server
|
- role: noble_metrics_server
|
||||||
tags: [metrics, metrics_server]
|
tags: [metrics, metrics_server]
|
||||||
|
# Kyverno before Longhorn: Longhorn post-upgrade Job is admitted through Kyverno; policies use
|
||||||
|
# failurePolicy Ignore so webhook transport timeouts do not fail Helm (see policies-values.yaml).
|
||||||
|
- role: noble_kyverno
|
||||||
|
tags: [kyverno, policy]
|
||||||
|
- role: noble_kyverno_policies
|
||||||
|
tags: [kyverno_policies, policy]
|
||||||
- role: noble_longhorn
|
- role: noble_longhorn
|
||||||
tags: [longhorn, storage]
|
tags: [longhorn, storage]
|
||||||
- role: noble_metallb
|
- role: noble_metallb
|
||||||
@@ -220,12 +226,10 @@
|
|||||||
tags: [newt]
|
tags: [newt]
|
||||||
- role: noble_argocd
|
- role: noble_argocd
|
||||||
tags: [argocd, gitops]
|
tags: [argocd, gitops]
|
||||||
- role: noble_kyverno
|
|
||||||
tags: [kyverno, policy]
|
|
||||||
- role: noble_kyverno_policies
|
|
||||||
tags: [kyverno_policies, policy]
|
|
||||||
- role: noble_platform
|
- role: noble_platform
|
||||||
tags: [platform, observability, apps]
|
tags: [platform, observability, apps]
|
||||||
|
- role: noble_trivy
|
||||||
|
tags: [trivy, security, scanning]
|
||||||
- role: noble_velero
|
- role: noble_velero
|
||||||
tags: [velero, backups]
|
tags: [velero, backups]
|
||||||
- role: noble_landing_urls
|
- role: noble_landing_urls
|
||||||
|
|||||||
@@ -14,3 +14,4 @@ noble_helm_repos:
|
|||||||
- { name: headlamp, url: "https://kubernetes-sigs.github.io/headlamp/" }
|
- { name: headlamp, url: "https://kubernetes-sigs.github.io/headlamp/" }
|
||||||
- { name: kyverno, url: "https://kyverno.github.io/kyverno/" }
|
- { name: kyverno, url: "https://kyverno.github.io/kyverno/" }
|
||||||
- { name: vmware-tanzu, url: "https://vmware-tanzu.github.io/helm-charts" }
|
- { name: vmware-tanzu, url: "https://vmware-tanzu.github.io/helm-charts" }
|
||||||
|
- { name: aqua, url: "https://aquasecurity.github.io/helm-charts/" }
|
||||||
|
|||||||
@@ -2,3 +2,7 @@
|
|||||||
# Helm --wait default (5m) is often too short for first Longhorn install on several nodes
|
# Helm --wait default (5m) is often too short for first Longhorn install on several nodes
|
||||||
# (image pulls + manager/driver ordering). See ansible/roles/noble_metallb/defaults/main.yml.
|
# (image pulls + manager/driver ordering). See ansible/roles/noble_metallb/defaults/main.yml.
|
||||||
noble_helm_longhorn_wait_timeout: 20m
|
noble_helm_longhorn_wait_timeout: 20m
|
||||||
|
|
||||||
|
# Transient Kyverno webhook timeouts during post-upgrade hooks / admission storms.
|
||||||
|
noble_helm_longhorn_retries: 8
|
||||||
|
noble_helm_longhorn_retry_delay: 25
|
||||||
|
|||||||
@@ -31,4 +31,8 @@
|
|||||||
- "{{ noble_helm_longhorn_wait_timeout }}"
|
- "{{ noble_helm_longhorn_wait_timeout }}"
|
||||||
environment:
|
environment:
|
||||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||||
|
register: noble_longhorn_helm
|
||||||
|
retries: "{{ noble_helm_longhorn_retries | int }}"
|
||||||
|
delay: "{{ noble_helm_longhorn_retry_delay | int }}"
|
||||||
|
until: noble_longhorn_helm.rc == 0
|
||||||
changed_when: true
|
changed_when: true
|
||||||
|
|||||||
@@ -4,6 +4,14 @@ noble_platform_kubectl_request_timeout: 120s
|
|||||||
noble_platform_kustomize_retries: 5
|
noble_platform_kustomize_retries: 5
|
||||||
noble_platform_kustomize_delay: 20
|
noble_platform_kustomize_delay: 20
|
||||||
|
|
||||||
|
# kube-prometheus-stack: operator Deployment uses Kubernetes default progressDeadlineSeconds (600s).
|
||||||
|
# First install (images + cert-manager webhook TLS) can exceed that; patch + optional rollout restart, then Helm --wait.
|
||||||
|
noble_platform_kube_prometheus_operator_progress_deadline_seconds: 1800
|
||||||
|
noble_platform_kube_prometheus_operator_wait_retries: 60
|
||||||
|
noble_platform_kube_prometheus_operator_wait_delay: 5
|
||||||
|
# Longhorn PVCs + full stack often need 45-60m; node-exporter DaemonSet can be last at 3/4 until one node catches up.
|
||||||
|
noble_platform_kube_prometheus_helm_wait_timeout: 60m
|
||||||
|
|
||||||
# Decrypt **clusters/noble/secrets/*.yaml** with SOPS and kubectl apply (requires **sops**, **age**, and **age-key.txt**).
|
# Decrypt **clusters/noble/secrets/*.yaml** with SOPS and kubectl apply (requires **sops**, **age**, and **age-key.txt**).
|
||||||
noble_apply_sops_secrets: true
|
noble_apply_sops_secrets: true
|
||||||
noble_sops_age_key_file: "{{ noble_repo_root }}/age-key.txt"
|
noble_sops_age_key_file: "{{ noble_repo_root }}/age-key.txt"
|
||||||
|
|||||||
@@ -38,7 +38,78 @@
|
|||||||
- noble_sops_age_key_stat.stat.exists
|
- noble_sops_age_key_stat.stat.exists
|
||||||
changed_when: true
|
changed_when: true
|
||||||
|
|
||||||
- name: Install kube-prometheus-stack
|
# Helm --wait alone cannot extend the operator Deployment's progressDeadlineSeconds (default 10m).
|
||||||
|
- name: Install kube-prometheus-stack (apply without Helm wait)
|
||||||
|
ansible.builtin.command:
|
||||||
|
argv:
|
||||||
|
- helm
|
||||||
|
- upgrade
|
||||||
|
- --install
|
||||||
|
- kube-prometheus
|
||||||
|
- prometheus-community/kube-prometheus-stack
|
||||||
|
- -n
|
||||||
|
- monitoring
|
||||||
|
- --version
|
||||||
|
- "82.15.1"
|
||||||
|
- -f
|
||||||
|
- "{{ noble_repo_root }}/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml"
|
||||||
|
- --force-conflicts
|
||||||
|
- --wait=false
|
||||||
|
environment:
|
||||||
|
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Wait for prometheus-operator Deployment object
|
||||||
|
ansible.builtin.command:
|
||||||
|
argv:
|
||||||
|
- kubectl
|
||||||
|
- get
|
||||||
|
- deployment/kube-prometheus-kube-prome-operator
|
||||||
|
- -n
|
||||||
|
- monitoring
|
||||||
|
environment:
|
||||||
|
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||||
|
register: noble_kube_prom_operator_deploy
|
||||||
|
until: noble_kube_prom_operator_deploy.rc == 0
|
||||||
|
retries: "{{ noble_platform_kube_prometheus_operator_wait_retries | int }}"
|
||||||
|
delay: "{{ noble_platform_kube_prometheus_operator_wait_delay | int }}"
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Extend prometheus-operator Deployment progress deadline
|
||||||
|
ansible.builtin.command:
|
||||||
|
argv:
|
||||||
|
- kubectl
|
||||||
|
- patch
|
||||||
|
- deployment/kube-prometheus-kube-prome-operator
|
||||||
|
- -n
|
||||||
|
- monitoring
|
||||||
|
- --type=merge
|
||||||
|
- -p
|
||||||
|
- "{{ {'spec': {'progressDeadlineSeconds': (noble_platform_kube_prometheus_operator_progress_deadline_seconds | int)}} | to_json }}"
|
||||||
|
environment:
|
||||||
|
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Restart prometheus-operator if Deployment already hit progress deadline
|
||||||
|
ansible.builtin.shell: |
|
||||||
|
set -euo pipefail
|
||||||
|
dep=kube-prometheus-kube-prome-operator
|
||||||
|
msg=$(kubectl get deployment "$dep" -n monitoring -o jsonpath='{.status.conditions[?(@.type=="Progressing")].message}' 2>/dev/null || true)
|
||||||
|
reason=$(kubectl get deployment "$dep" -n monitoring -o jsonpath='{.status.conditions[?(@.type=="Progressing")].reason}' 2>/dev/null || true)
|
||||||
|
combined="${reason}${msg}"
|
||||||
|
if printf '%s' "$combined" | grep -qiE 'ProgressDeadlineExceeded|progress[[:space:]]*deadline[[:space:]]*exceeded'; then
|
||||||
|
kubectl rollout restart deployment/"$dep" -n monitoring
|
||||||
|
echo restarted
|
||||||
|
fi
|
||||||
|
args:
|
||||||
|
executable: /bin/bash
|
||||||
|
environment:
|
||||||
|
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||||
|
register: noble_kube_prom_operator_restart
|
||||||
|
changed_when: "'restarted' in noble_kube_prom_operator_restart.stdout"
|
||||||
|
|
||||||
|
# Helm --wait prints nothing until done or timeout; override noble_platform_kube_prometheus_helm_wait_timeout if needed.
|
||||||
|
- name: Install kube-prometheus-stack (Helm wait for full release; often 30-60m silent - watch kubectl -n monitoring get pods,ds,pvc)
|
||||||
ansible.builtin.command:
|
ansible.builtin.command:
|
||||||
argv:
|
argv:
|
||||||
- helm
|
- helm
|
||||||
@@ -55,7 +126,7 @@
|
|||||||
- --force-conflicts
|
- --force-conflicts
|
||||||
- --wait
|
- --wait
|
||||||
- --timeout
|
- --timeout
|
||||||
- 30m
|
- "{{ noble_platform_kube_prometheus_helm_wait_timeout }}"
|
||||||
environment:
|
environment:
|
||||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||||
changed_when: true
|
changed_when: true
|
||||||
|
|||||||
3
ansible/roles/noble_trivy/defaults/main.yml
Normal file
3
ansible/roles/noble_trivy/defaults/main.yml
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
---
|
||||||
|
noble_trivy_chart_version: "0.32.1"
|
||||||
|
noble_helm_trivy_wait_timeout: 15m
|
||||||
33
ansible/roles/noble_trivy/tasks/main.yml
Normal file
33
ansible/roles/noble_trivy/tasks/main.yml
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
---
|
||||||
|
- name: Apply trivy-system namespace (PSA)
|
||||||
|
ansible.builtin.command:
|
||||||
|
argv:
|
||||||
|
- kubectl
|
||||||
|
- apply
|
||||||
|
- -f
|
||||||
|
- "{{ noble_repo_root }}/clusters/noble/bootstrap/trivy/namespace.yaml"
|
||||||
|
environment:
|
||||||
|
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Install Trivy Operator
|
||||||
|
ansible.builtin.command:
|
||||||
|
argv:
|
||||||
|
- helm
|
||||||
|
- upgrade
|
||||||
|
- --install
|
||||||
|
- trivy-operator
|
||||||
|
- aqua/trivy-operator
|
||||||
|
- -n
|
||||||
|
- trivy-system
|
||||||
|
- --version
|
||||||
|
- "{{ noble_trivy_chart_version }}"
|
||||||
|
- -f
|
||||||
|
- "{{ noble_repo_root }}/clusters/noble/bootstrap/trivy/values.yaml"
|
||||||
|
- --force-conflicts
|
||||||
|
- --wait
|
||||||
|
- --timeout
|
||||||
|
- "{{ noble_helm_trivy_wait_timeout }}"
|
||||||
|
environment:
|
||||||
|
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||||
|
changed_when: true
|
||||||
@@ -18,3 +18,4 @@ resources:
|
|||||||
- loki-application.yaml
|
- loki-application.yaml
|
||||||
- fluent-bit-application.yaml
|
- fluent-bit-application.yaml
|
||||||
- headlamp-application.yaml
|
- headlamp-application.yaml
|
||||||
|
- trivy-operator-application.yaml
|
||||||
|
|||||||
@@ -0,0 +1,29 @@
|
|||||||
|
# Bootstrap app-of-apps leaf: Trivy Operator (vulnerability + config audit reports).
|
||||||
|
apiVersion: argoproj.io/v1alpha1
|
||||||
|
kind: Application
|
||||||
|
metadata:
|
||||||
|
name: noble-trivy-operator
|
||||||
|
namespace: argocd
|
||||||
|
finalizers:
|
||||||
|
- resources-finalizer.argocd.argoproj.io/background
|
||||||
|
spec:
|
||||||
|
project: default
|
||||||
|
sources:
|
||||||
|
- repoURL: https://aquasecurity.github.io/helm-charts/
|
||||||
|
chart: trivy-operator
|
||||||
|
targetRevision: 0.32.1
|
||||||
|
helm:
|
||||||
|
releaseName: trivy-operator
|
||||||
|
valueFiles:
|
||||||
|
- $values/clusters/noble/bootstrap/trivy/values.yaml
|
||||||
|
- repoURL: https://gitea.pcenicni.ca/gsdavidp/home-server.git
|
||||||
|
targetRevision: HEAD
|
||||||
|
ref: values
|
||||||
|
destination:
|
||||||
|
server: https://kubernetes.default.svc
|
||||||
|
namespace: trivy-system
|
||||||
|
# Manual sync: Ansible helm runs first; enable automation after cutover (see ../README.md §5).
|
||||||
|
syncPolicy:
|
||||||
|
syncOptions:
|
||||||
|
- CreateNamespace=true
|
||||||
|
- ServerSideApply=true
|
||||||
@@ -7,10 +7,10 @@
|
|||||||
# kubectl apply -f clusters/noble/bootstrap/kube-prometheus-stack/namespace.yaml
|
# kubectl apply -f clusters/noble/bootstrap/kube-prometheus-stack/namespace.yaml
|
||||||
# helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
# helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||||
# helm repo update && helm upgrade --install kube-prometheus prometheus-community/kube-prometheus-stack -n monitoring \
|
# helm repo update && helm upgrade --install kube-prometheus prometheus-community/kube-prometheus-stack -n monitoring \
|
||||||
# --version 82.15.1 -f clusters/noble/bootstrap/kube-prometheus-stack/values.yaml --wait --timeout 30m
|
# --version 82.15.1 -f clusters/noble/bootstrap/kube-prometheus-stack/values.yaml --wait --timeout 60m
|
||||||
#
|
#
|
||||||
# Why it looks "stalled": with --wait, Helm prints almost nothing until the release finishes (can be many minutes).
|
# Why it looks "stalled": with --wait, Helm prints almost nothing until the release finishes (can be many minutes).
|
||||||
# Do not use --timeout 5m for first install — Longhorn PVCs + StatefulSets often need 15–30m. To watch progress,
|
# Do not use --timeout 5m for first install — Longhorn PVCs + StatefulSets often need 30–60m. To watch progress,
|
||||||
# open a second terminal: kubectl -n monitoring get pods,sts,ds -w
|
# open a second terminal: kubectl -n monitoring get pods,sts,ds -w
|
||||||
# To apply manifest changes without blocking: omit --wait, then kubectl -n monitoring get pods -w
|
# To apply manifest changes without blocking: omit --wait, then kubectl -n monitoring get pods -w
|
||||||
#
|
#
|
||||||
@@ -36,6 +36,11 @@ crds:
|
|||||||
upgradeJob:
|
upgradeJob:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|
||||||
|
# Subchart defaults only tolerate NoSchedule; a node with other taints leaves node-exporter at 3/4 and Helm --wait times out.
|
||||||
|
prometheus-node-exporter:
|
||||||
|
tolerations:
|
||||||
|
- operator: Exists
|
||||||
|
|
||||||
# --- Longhorn-backed persistence (default chart storage is emptyDir) ---
|
# --- Longhorn-backed persistence (default chart storage is emptyDir) ---
|
||||||
alertmanager:
|
alertmanager:
|
||||||
alertmanagerSpec:
|
alertmanagerSpec:
|
||||||
|
|||||||
@@ -17,4 +17,5 @@ resources:
|
|||||||
- velero/namespace.yaml
|
- velero/namespace.yaml
|
||||||
- velero/longhorn-volumesnapshotclass.yaml
|
- velero/longhorn-volumesnapshotclass.yaml
|
||||||
- headlamp/namespace.yaml
|
- headlamp/namespace.yaml
|
||||||
|
- trivy/namespace.yaml
|
||||||
- grafana-loki-datasource/loki-datasource.yaml
|
- grafana-loki-datasource/loki-datasource.yaml
|
||||||
|
|||||||
@@ -9,6 +9,12 @@
|
|||||||
# outside baseline (see namespace PSA labels under clusters/noble/bootstrap/*/namespace.yaml)
|
# outside baseline (see namespace PSA labels under clusters/noble/bootstrap/*/namespace.yaml)
|
||||||
# plus core Kubernetes namespaces and every Ansible-managed app namespace on noble.
|
# plus core Kubernetes namespaces and every Ansible-managed app namespace on noble.
|
||||||
#
|
#
|
||||||
|
# failurePolicy **Ignore** (chart default is Fail): when the apiserver cannot reach Kyverno
|
||||||
|
# within the webhook timeout (e.g. admission overloaded during Helm hooks / Longhorn
|
||||||
|
# post-upgrade Job), Fail denies the request and breaks installs. Ignore allows the request
|
||||||
|
# through on transport failure only — policy violations are still handled per
|
||||||
|
# validationFailureAction when Kyverno responds.
|
||||||
|
#
|
||||||
# After widening excludes, Kyverno does not always prune old PolicyReport rows; refresh:
|
# After widening excludes, Kyverno does not always prune old PolicyReport rows; refresh:
|
||||||
# kubectl delete clusterpolicyreport --all
|
# kubectl delete clusterpolicyreport --all
|
||||||
# kubectl delete policyreport -A --all
|
# kubectl delete policyreport -A --all
|
||||||
@@ -22,10 +28,10 @@ policyType: ClusterPolicy
|
|||||||
podSecurityStandard: baseline
|
podSecurityStandard: baseline
|
||||||
podSecuritySeverity: medium
|
podSecuritySeverity: medium
|
||||||
validationFailureAction: Audit
|
validationFailureAction: Audit
|
||||||
failurePolicy: Fail
|
failurePolicy: Ignore
|
||||||
validationAllowExistingViolations: true
|
validationAllowExistingViolations: true
|
||||||
|
|
||||||
# All platform namespaces on noble (ansible/playbooks/noble.yml + clusters/noble/bootstrap).
|
# All platform namespaces on noble (ansible/playbooks/noble.yml + clusters/noble/bootstrap). Includes **trivy-system**.
|
||||||
x-kyverno-exclude-infra: &kyverno_exclude_infra
|
x-kyverno-exclude-infra: &kyverno_exclude_infra
|
||||||
any:
|
any:
|
||||||
- resources:
|
- resources:
|
||||||
@@ -44,6 +50,7 @@ x-kyverno-exclude-infra: &kyverno_exclude_infra
|
|||||||
- monitoring
|
- monitoring
|
||||||
- newt
|
- newt
|
||||||
- traefik
|
- traefik
|
||||||
|
- trivy-system
|
||||||
|
|
||||||
policyExclude:
|
policyExclude:
|
||||||
disallow-capabilities: *kyverno_exclude_infra
|
disallow-capabilities: *kyverno_exclude_infra
|
||||||
|
|||||||
10
clusters/noble/bootstrap/trivy/namespace.yaml
Normal file
10
clusters/noble/bootstrap/trivy/namespace.yaml
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
# Trivy Operator — apply before Helm (Ansible **noble_trivy**).
|
||||||
|
# Scan jobs may use elevated capabilities; align with other operator namespaces.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: trivy-system
|
||||||
|
labels:
|
||||||
|
pod-security.kubernetes.io/enforce: privileged
|
||||||
|
pod-security.kubernetes.io/audit: privileged
|
||||||
|
pod-security.kubernetes.io/warn: privileged
|
||||||
28
clusters/noble/bootstrap/trivy/values.yaml
Normal file
28
clusters/noble/bootstrap/trivy/values.yaml
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# Trivy Operator — in-cluster image vulnerability + config reports (Aqua trivy-operator Helm chart).
|
||||||
|
#
|
||||||
|
# helm repo add aqua https://aquasecurity.github.io/helm-charts/ && helm repo update
|
||||||
|
# kubectl apply -f clusters/noble/bootstrap/trivy/namespace.yaml
|
||||||
|
# helm upgrade --install trivy-operator aqua/trivy-operator -n trivy-system \
|
||||||
|
# --version 0.32.1 -f clusters/noble/bootstrap/trivy/values.yaml --wait --timeout 15m
|
||||||
|
#
|
||||||
|
# Inspect: kubectl get vulnerabilityreports,configauditreports -A
|
||||||
|
# Docs: https://aquasecurity.github.io/trivy-operator/
|
||||||
|
|
||||||
|
# Skip platform/system namespaces (mirrors Kyverno excludes; reduces scan load).
|
||||||
|
excludeNamespaces: "argocd,cert-manager,headlamp,kyverno,local-path-storage,logging,longhorn-system,loki,metallb-system,monitoring,newt,traefik,trivy-system,velero,kube-node-lease,kube-public,kube-system"
|
||||||
|
|
||||||
|
operator:
|
||||||
|
scanJobsConcurrentLimit: 5
|
||||||
|
# SBOM / cluster compliance add CPU and CR volume; keep vulnerability + config audit.
|
||||||
|
sbomGenerationEnabled: false
|
||||||
|
clusterSbomCacheEnabled: false
|
||||||
|
clusterComplianceEnabled: false
|
||||||
|
|
||||||
|
trivyOperator:
|
||||||
|
# Run scan Jobs on every node (Talos / mixed taints).
|
||||||
|
scanJobTolerations:
|
||||||
|
- operator: Exists
|
||||||
|
|
||||||
|
serviceMonitor:
|
||||||
|
enabled: true
|
||||||
|
namespace: monitoring
|
||||||
Reference in New Issue
Block a user