Add Trivy integration to noble cluster setup, including namespace and application configurations. Update README and playbook tags to reflect new security scanning capabilities. Enhance Longhorn and kube-prometheus-stack deployment reliability with increased timeout settings and retry mechanisms.
This commit is contained in:
@@ -38,7 +38,78 @@
|
||||
- noble_sops_age_key_stat.stat.exists
|
||||
changed_when: true
|
||||
|
||||
- name: Install kube-prometheus-stack
|
||||
# Helm --wait alone cannot extend the operator Deployment's progressDeadlineSeconds (default 10m).
|
||||
- name: Install kube-prometheus-stack (apply without Helm wait)
|
||||
ansible.builtin.command:
|
||||
argv:
|
||||
- helm
|
||||
- upgrade
|
||||
- --install
|
||||
- kube-prometheus
|
||||
- prometheus-community/kube-prometheus-stack
|
||||
- -n
|
||||
- monitoring
|
||||
- --version
|
||||
- "82.15.1"
|
||||
- -f
|
||||
- "{{ noble_repo_root }}/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml"
|
||||
- --force-conflicts
|
||||
- --wait=false
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
changed_when: true
|
||||
|
||||
- name: Wait for prometheus-operator Deployment object
|
||||
ansible.builtin.command:
|
||||
argv:
|
||||
- kubectl
|
||||
- get
|
||||
- deployment/kube-prometheus-kube-prome-operator
|
||||
- -n
|
||||
- monitoring
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
register: noble_kube_prom_operator_deploy
|
||||
until: noble_kube_prom_operator_deploy.rc == 0
|
||||
retries: "{{ noble_platform_kube_prometheus_operator_wait_retries | int }}"
|
||||
delay: "{{ noble_platform_kube_prometheus_operator_wait_delay | int }}"
|
||||
changed_when: false
|
||||
|
||||
- name: Extend prometheus-operator Deployment progress deadline
|
||||
ansible.builtin.command:
|
||||
argv:
|
||||
- kubectl
|
||||
- patch
|
||||
- deployment/kube-prometheus-kube-prome-operator
|
||||
- -n
|
||||
- monitoring
|
||||
- --type=merge
|
||||
- -p
|
||||
- "{{ {'spec': {'progressDeadlineSeconds': (noble_platform_kube_prometheus_operator_progress_deadline_seconds | int)}} | to_json }}"
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
changed_when: true
|
||||
|
||||
- name: Restart prometheus-operator if Deployment already hit progress deadline
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
dep=kube-prometheus-kube-prome-operator
|
||||
msg=$(kubectl get deployment "$dep" -n monitoring -o jsonpath='{.status.conditions[?(@.type=="Progressing")].message}' 2>/dev/null || true)
|
||||
reason=$(kubectl get deployment "$dep" -n monitoring -o jsonpath='{.status.conditions[?(@.type=="Progressing")].reason}' 2>/dev/null || true)
|
||||
combined="${reason}${msg}"
|
||||
if printf '%s' "$combined" | grep -qiE 'ProgressDeadlineExceeded|progress[[:space:]]*deadline[[:space:]]*exceeded'; then
|
||||
kubectl rollout restart deployment/"$dep" -n monitoring
|
||||
echo restarted
|
||||
fi
|
||||
args:
|
||||
executable: /bin/bash
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
register: noble_kube_prom_operator_restart
|
||||
changed_when: "'restarted' in noble_kube_prom_operator_restart.stdout"
|
||||
|
||||
# Helm --wait prints nothing until done or timeout; override noble_platform_kube_prometheus_helm_wait_timeout if needed.
|
||||
- name: Install kube-prometheus-stack (Helm wait for full release; often 30-60m silent - watch kubectl -n monitoring get pods,ds,pvc)
|
||||
ansible.builtin.command:
|
||||
argv:
|
||||
- helm
|
||||
@@ -55,7 +126,7 @@
|
||||
- --force-conflicts
|
||||
- --wait
|
||||
- --timeout
|
||||
- 30m
|
||||
- "{{ noble_platform_kube_prometheus_helm_wait_timeout }}"
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
changed_when: true
|
||||
|
||||
Reference in New Issue
Block a user