From 1e6d84f0f3cb80c1d58399251a0638b8df5cc4d0 Mon Sep 17 00:00:00 2001 From: Nikholas Pcenicni <82239765+nikpcenicni@users.noreply.github.com> Date: Thu, 14 May 2026 16:46:45 -0400 Subject: [PATCH] Refactor noble.yml playbook to apply Argo CD Application manifests after all Helm roles, ensuring proper resource ownership and avoiding SSA conflicts. Update related documentation to reflect the new execution order and clarify the role of Argo CD in the deployment process. --- ansible/playbooks/noble.yml | 19 +++++-- .../tasks/applications_post_platform.yml | 5 +- ansible/roles/noble_platform/tasks/main.yml | 5 -- .../roles/noble_post_deploy/tasks/main.yml | 7 +-- ansible/roles/noble_trivy/tasks/main.yml | 51 +++++++++++++++++++ clusters/noble/bootstrap/argocd/README.md | 8 +-- .../argocd/bootstrap-root-application.yaml | 2 +- clusters/noble/bootstrap/kustomization.yaml | 6 +-- 8 files changed, 82 insertions(+), 21 deletions(-) diff --git a/ansible/playbooks/noble.yml b/ansible/playbooks/noble.yml index 6e67e73..8b04e81 100644 --- a/ansible/playbooks/noble.yml +++ b/ansible/playbooks/noble.yml @@ -4,7 +4,8 @@ # Run from repo **ansible/** directory: ansible-playbook playbooks/noble.yml # # Tags: repos, cilium, csi_snapshot, metrics, longhorn, metallb, kube_vip, traefik, cert_manager, newt, -# argocd, kyverno, kyverno_policies, platform, authentik, trivy, velero, all (default) +# argocd, kyverno, kyverno_policies, platform, authentik, trivy, velero, landing, all (default) +# Argo leaf **Application** CRs are applied in play **tasks:** after **noble_velero** (Ansible Helm first, then GitOps). - name: Noble cluster — platform stack (Ansible-managed) hosts: localhost connection: local @@ -234,5 +235,17 @@ tags: [trivy, security, scanning] - role: noble_velero tags: [velero, backups] - - role: noble_landing_urls - tags: [landing, platform, observability, apps] + + tasks: + # Leaf Application CRs must exist only after all Ansible Helm in this play (platform, authentik, trivy, …) + # so argocd-controller does not SSA resources before Helm owns them; then Argo can take over (manual → auto). + - name: Apply Argo CD root / bootstrap / leaf Application manifests (post–Ansible Helm) + ansible.builtin.include_role: + name: noble_argocd + tasks_from: applications_post_platform + tags: [argocd, gitops, platform, apps, observability, all] + + - name: Noble landing URLs (+ optional token fetch) + ansible.builtin.include_role: + name: noble_landing_urls + tags: [landing, platform, observability, apps, all] diff --git a/ansible/roles/noble_argocd/tasks/applications_post_platform.yml b/ansible/roles/noble_argocd/tasks/applications_post_platform.yml index 79d792e..1729705 100644 --- a/ansible/roles/noble_argocd/tasks/applications_post_platform.yml +++ b/ansible/roles/noble_argocd/tasks/applications_post_platform.yml @@ -1,6 +1,7 @@ --- -# Run after **noble_platform** Helm + `kubectl apply -k clusters/noble/bootstrap` so leaf **Application** -# CRs are not reconciled by Argo before **helm upgrade** (avoids SSA conflicts with **argocd-controller**). +# Run from **ansible/playbooks/noble.yml** *after* roles **noble_platform**, **noble_authentik**, **noble_trivy**, +# **noble_velero** (see play **tasks:**). Leaf **Application** CRs must not be reconciled before Ansible Helm +# finishes, or **argocd-controller** can SSA resources without Helm release metadata (e.g. Trivy ServiceAccount). - name: Apply Argo CD root Application (app-of-apps) ansible.builtin.command: argv: diff --git a/ansible/roles/noble_platform/tasks/main.yml b/ansible/roles/noble_platform/tasks/main.yml index 3770033..9ac55a7 100644 --- a/ansible/roles/noble_platform/tasks/main.yml +++ b/ansible/roles/noble_platform/tasks/main.yml @@ -218,8 +218,3 @@ environment: KUBECONFIG: "{{ noble_kubeconfig }}" changed_when: true - -- name: Argo CD — apply Application manifests after platform Helm - ansible.builtin.include_role: - name: noble_argocd - tasks_from: applications_post_platform diff --git a/ansible/roles/noble_post_deploy/tasks/main.yml b/ansible/roles/noble_post_deploy/tasks/main.yml index 910950d..d1d8540 100644 --- a/ansible/roles/noble_post_deploy/tasks/main.yml +++ b/ansible/roles/noble_post_deploy/tasks/main.yml @@ -9,8 +9,9 @@ - name: Argo CD optional root Application (empty app-of-apps) ansible.builtin.debug: msg: >- - App-of-apps: after **noble_platform**, noble.yml runs **noble_argocd** `applications_post_platform.yml`: - root-application.yaml when noble_argocd_apply_root_application is true; bootstrap-root + **kubectl apply -k - argocd/app-of-apps** when noble_argocd_apply_bootstrap_root_application is true (inventory/group_vars/all.yml). + App-of-apps: at the **end** of **noble.yml** (after **noble_platform**, **noble_authentik**, **noble_trivy**, + **noble_velero**), **noble_argocd** `applications_post_platform.yml` runs: root-application.yaml when + noble_argocd_apply_root_application is true; bootstrap-root + **kubectl apply -k argocd/app-of-apps** + when noble_argocd_apply_bootstrap_root_application is true (inventory/group_vars/all.yml). noble-bootstrap-root uses manual sync until you enable automation after the playbook — clusters/noble/bootstrap/argocd/README.md §5. See clusters/noble/apps/README.md and that README. diff --git a/ansible/roles/noble_trivy/tasks/main.yml b/ansible/roles/noble_trivy/tasks/main.yml index d6a1059..1ace8be 100644 --- a/ansible/roles/noble_trivy/tasks/main.yml +++ b/ansible/roles/noble_trivy/tasks/main.yml @@ -1,4 +1,55 @@ --- +# Argo CD (Helm source + SSA) or raw kubectl can leave Trivy objects without **meta.helm.sh/** ownership. +# Namespace-scoped resources go away when **trivy-system** is deleted; **ClusterRole** / **ClusterRoleBinding** / +# **ClusterComplianceReport** do not. If there is no Helm release **trivy-operator**, reset namespace + cluster scope +# so **helm upgrade --install** can adopt cleanly. +- name: Check whether trivy-operator Helm release exists in trivy-system + ansible.builtin.command: + argv: + - helm + - status + - trivy-operator + - -n + - trivy-system + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + register: noble_trivy_helm_release_status + failed_when: false + changed_when: false + +- name: Remove trivy-system namespace when Helm release is absent (orphan SSA / kubectl vs Ansible Helm) + ansible.builtin.command: + argv: + - kubectl + - delete + - namespace + - trivy-system + - --ignore-not-found=true + - --wait=true + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + when: noble_trivy_helm_release_status.rc != 0 + register: noble_trivy_ns_reset + changed_when: "'deleted' in (noble_trivy_ns_reset.stdout | default(''))" + +- name: Remove orphan Trivy cluster-scoped resources when Helm release is absent + ansible.builtin.shell: | + set -euo pipefail + # Prefer label selector (matches chart); then explicit names for objects Argo may have created without labels. + kubectl delete clusterrolebinding -l app.kubernetes.io/instance=trivy-operator --ignore-not-found=true --wait=true 2>/dev/null || true + kubectl delete clusterrolebinding trivy-operator --ignore-not-found=true --wait=true + kubectl delete clusterrole -l app.kubernetes.io/instance=trivy-operator --ignore-not-found=true --wait=true 2>/dev/null || true + kubectl delete clusterrole trivy-operator aggregate-config-audit-reports-view aggregate-exposed-secret-reports-view aggregate-vulnerability-reports-view --ignore-not-found=true --wait=true + if kubectl api-resources --api-group=aquasecurity.github.io -o name 2>/dev/null | grep -q '^clustercompliancereports\.'; then + kubectl delete clustercompliancereports.aquasecurity.github.io -l app.kubernetes.io/instance=trivy-operator --ignore-not-found=true --wait=true 2>/dev/null || true + kubectl delete clustercompliancereports.aquasecurity.github.io k8s-cis-1.23 k8s-nsa-1.0 k8s-pss-baseline-0.1 k8s-pss-restricted-0.1 --ignore-not-found=true --wait=true 2>/dev/null || true + fi + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + when: noble_trivy_helm_release_status.rc != 0 + register: noble_trivy_cluster_reset + changed_when: "'deleted' in (noble_trivy_cluster_reset.stdout | default(''))" + - name: Apply trivy-system namespace (PSA) ansible.builtin.command: argv: diff --git a/clusters/noble/bootstrap/argocd/README.md b/clusters/noble/bootstrap/argocd/README.md index 618c5d4..f3a74fe 100644 --- a/clusters/noble/bootstrap/argocd/README.md +++ b/clusters/noble/bootstrap/argocd/README.md @@ -52,13 +52,13 @@ Use **Settings → Repositories** in the UI, or `argocd repo add` / a `Secret` o ## 4. App-of-apps (GitOps) -**Ansible** (`ansible/playbooks/noble.yml`) runs **`kubectl apply -k clusters/noble/bootstrap`** (namespaces + static YAML) from **`noble_platform`**, then Helm installs, then **`noble_argocd`** `applications_post_platform.yml` applies **`root-application.yaml`**, **`bootstrap-root-application.yaml`**, and **`kubectl apply -k clusters/noble/bootstrap/argocd/app-of-apps`** so Argo **Application** CRs appear only **after** Helm (no SSA fights with **argocd-controller**). +**Ansible** (`ansible/playbooks/noble.yml`) runs **`kubectl apply -k clusters/noble/bootstrap`** from **`noble_platform`**, then Helm for the platform stack, **then** **`noble_authentik`**, **`noble_trivy`**, **`noble_velero`**, and **only then** (play **`tasks:`**) **`noble_argocd`** `applications_post_platform.yml` applies **`root-application.yaml`**, **`bootstrap-root-application.yaml`**, and **`kubectl apply -k clusters/noble/bootstrap/argocd/app-of-apps`**. That order keeps **Ansible Helm first** and lets Argo **take ownership** when you sync or enable automation (no premature SSA vs Helm). 1. Edit **`root-application.yaml`** and **`bootstrap-root-application.yaml`**: set **`repoURL`** and **`targetRevision`**. The **`resources-finalizer.argocd.argoproj.io/background`** finalizer uses Argo’s path-qualified form so **`kubectl apply`** does not warn about finalizer names. 2. Optional add-on apps: add **`Application`** manifests under **`clusters/noble/apps/`** (see **`clusters/noble/apps/README.md`**). -3. **Bootstrap kustomize** (namespaces, datasource, etc.): **`noble-bootstrap-root`** syncs **`clusters/noble/bootstrap`** (no **`argocd/app-of-apps/`** in that kustomization). Leaf **`Application`** manifests live under **`argocd/app-of-apps/`**; Ansible applies that directory **after** **`noble_platform`** Helm so Argo does not SSA charts first. The root app uses **manual** sync; each leaf app is **manual** until you enable automation (see **§5**). +3. **Bootstrap kustomize** (namespaces, datasource, etc.): **`noble-bootstrap-root`** syncs **`clusters/noble/bootstrap`** (no **`argocd/app-of-apps/`** in that kustomization). Leaf **`Application`** manifests live under **`argocd/app-of-apps/`**; Ansible applies that directory **after** all **`noble_*`** Helm roles in **`noble.yml`** (see §4) so Argo does not SSA charts before Helm. The root app uses **manual** sync; each leaf app is **manual** until you enable automation (see **§5**). - **`ansible/playbooks/noble.yml`** (roles **`noble_argocd`** Helm, then **`noble_platform`** — which **include_role**s **`noble_argocd/applications_post_platform`** after Helm) when **`noble_argocd_apply_*`** flags are set in **`ansible/inventory/group_vars/all.yml`**. + **`ansible/playbooks/noble.yml`**: roles **`noble_argocd`** (Argo Helm only), **`noble_platform`**, **`noble_authentik`**, **`noble_trivy`**, **`noble_velero`**, then play **`tasks`** run **`applications_post_platform`** when **`noble_argocd_apply_*`** flags are set in **`ansible/inventory/group_vars/all.yml`**. ```bash kubectl apply -f clusters/noble/bootstrap/argocd/root-application.yaml @@ -69,7 +69,7 @@ If you migrated from older GitOps **`Application`** names, delete stale **`Appli ## 5. After Ansible: enable automated sync for **noble-bootstrap-root** -Do this only after **`ansible-playbook playbooks/noble.yml`** has finished successfully (including **`noble_platform`** `kubectl apply -k` and any Helm stages you rely on). Until then, leave **manual** sync so Argo does not fight the playbook. +Do this only after **`ansible-playbook playbooks/noble.yml`** has finished successfully (including **`noble_platform`** / **`noble_authentik`** / **`noble_trivy`** Helm and the final **`applications_post_platform`** `kubectl apply` of leaf **Application** CRs). Until then, leave **manual** sync so Argo does not fight the playbook. **Required steps** diff --git a/clusters/noble/bootstrap/argocd/bootstrap-root-application.yaml b/clusters/noble/bootstrap/argocd/bootstrap-root-application.yaml index 08be5a4..4dcfc9c 100644 --- a/clusters/noble/bootstrap/argocd/bootstrap-root-application.yaml +++ b/clusters/noble/bootstrap/argocd/bootstrap-root-application.yaml @@ -1,6 +1,6 @@ # **noble-bootstrap-root** — Kustomize for **clusters/noble/bootstrap** (namespaces, Grafana datasource, # VolumeSnapshotClass, etc.). Leaf **Application** CRs under **argocd/app-of-apps/** are **not** in this -# path; Ansible applies them after Helm (see **noble_argocd** `applications_post_platform.yml`). +# path; Ansible applies them after all **noble.yml** Helm roles (see play **tasks:** → **applications_post_platform.yml**). # # **Initial deploy:** Ansible is the only writer; **automated sync is off** so Argo does not reconcile # during **noble.yml**. **After** the playbook finishes, enable automated sync (see **README.md** §5) diff --git a/clusters/noble/bootstrap/kustomization.yaml b/clusters/noble/bootstrap/kustomization.yaml index 3048667..f6a12d9 100644 --- a/clusters/noble/bootstrap/kustomization.yaml +++ b/clusters/noble/bootstrap/kustomization.yaml @@ -1,7 +1,7 @@ # Ansible **noble_platform**: `kubectl apply -k` this directory (namespaces + static YAML only). -# Leaf Argo **Application** manifests live under **argocd/app-of-apps/** and are applied **after** Helm -# by **noble_argocd** `applications_post_platform.yml` so **argocd-controller** does not SSA the chart -# before **helm upgrade** runs. +# Leaf Argo **Application** manifests live under **argocd/app-of-apps/** and are applied at the **end** +# of **ansible/playbooks/noble.yml** (play **tasks:** → **noble_argocd** `applications_post_platform.yml`) so +# **argocd-controller** does not SSA chart resources before **helm upgrade** (platform, authentik, trivy, …). # # **noble-bootstrap-root** syncs this same path for GitOps on namespaces/datasource/VolumeSnapshotClass. # Per-chart GitOps: each **noble-*** app under **argocd/app-of-apps/** (manual sync until you cut over).