Add Trivy integration to noble cluster setup, including namespace and application configurations. Update README and playbook tags to reflect new security scanning capabilities. Enhance Longhorn and kube-prometheus-stack deployment reliability with increased timeout settings and retry mechanisms.

This commit is contained in:
Nikholas Pcenicni
2026-05-13 23:48:12 -04:00
parent 774b97894c
commit 663ebc5001
16 changed files with 223 additions and 13 deletions

View File

@@ -38,7 +38,78 @@
- noble_sops_age_key_stat.stat.exists
changed_when: true
- name: Install kube-prometheus-stack
# Helm --wait alone cannot extend the operator Deployment's progressDeadlineSeconds (default 10m).
- name: Install kube-prometheus-stack (apply without Helm wait)
ansible.builtin.command:
argv:
- helm
- upgrade
- --install
- kube-prometheus
- prometheus-community/kube-prometheus-stack
- -n
- monitoring
- --version
- "82.15.1"
- -f
- "{{ noble_repo_root }}/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml"
- --force-conflicts
- --wait=false
environment:
KUBECONFIG: "{{ noble_kubeconfig }}"
changed_when: true
- name: Wait for prometheus-operator Deployment object
ansible.builtin.command:
argv:
- kubectl
- get
- deployment/kube-prometheus-kube-prome-operator
- -n
- monitoring
environment:
KUBECONFIG: "{{ noble_kubeconfig }}"
register: noble_kube_prom_operator_deploy
until: noble_kube_prom_operator_deploy.rc == 0
retries: "{{ noble_platform_kube_prometheus_operator_wait_retries | int }}"
delay: "{{ noble_platform_kube_prometheus_operator_wait_delay | int }}"
changed_when: false
- name: Extend prometheus-operator Deployment progress deadline
ansible.builtin.command:
argv:
- kubectl
- patch
- deployment/kube-prometheus-kube-prome-operator
- -n
- monitoring
- --type=merge
- -p
- "{{ {'spec': {'progressDeadlineSeconds': (noble_platform_kube_prometheus_operator_progress_deadline_seconds | int)}} | to_json }}"
environment:
KUBECONFIG: "{{ noble_kubeconfig }}"
changed_when: true
- name: Restart prometheus-operator if Deployment already hit progress deadline
ansible.builtin.shell: |
set -euo pipefail
dep=kube-prometheus-kube-prome-operator
msg=$(kubectl get deployment "$dep" -n monitoring -o jsonpath='{.status.conditions[?(@.type=="Progressing")].message}' 2>/dev/null || true)
reason=$(kubectl get deployment "$dep" -n monitoring -o jsonpath='{.status.conditions[?(@.type=="Progressing")].reason}' 2>/dev/null || true)
combined="${reason}${msg}"
if printf '%s' "$combined" | grep -qiE 'ProgressDeadlineExceeded|progress[[:space:]]*deadline[[:space:]]*exceeded'; then
kubectl rollout restart deployment/"$dep" -n monitoring
echo restarted
fi
args:
executable: /bin/bash
environment:
KUBECONFIG: "{{ noble_kubeconfig }}"
register: noble_kube_prom_operator_restart
changed_when: "'restarted' in noble_kube_prom_operator_restart.stdout"
# Helm --wait prints nothing until done or timeout; override noble_platform_kube_prometheus_helm_wait_timeout if needed.
- name: Install kube-prometheus-stack (Helm wait for full release; often 30-60m silent - watch kubectl -n monitoring get pods,ds,pvc)
ansible.builtin.command:
argv:
- helm
@@ -55,7 +126,7 @@
- --force-conflicts
- --wait
- --timeout
- 30m
- "{{ noble_platform_kube_prometheus_helm_wait_timeout }}"
environment:
KUBECONFIG: "{{ noble_kubeconfig }}"
changed_when: true