diff --git a/ansible/playbooks/noble.yml b/ansible/playbooks/noble.yml index 6e67e73..8b04e81 100644 --- a/ansible/playbooks/noble.yml +++ b/ansible/playbooks/noble.yml @@ -4,7 +4,8 @@ # Run from repo **ansible/** directory: ansible-playbook playbooks/noble.yml # # Tags: repos, cilium, csi_snapshot, metrics, longhorn, metallb, kube_vip, traefik, cert_manager, newt, -# argocd, kyverno, kyverno_policies, platform, authentik, trivy, velero, all (default) +# argocd, kyverno, kyverno_policies, platform, authentik, trivy, velero, landing, all (default) +# Argo leaf **Application** CRs are applied in play **tasks:** after **noble_velero** (Ansible Helm first, then GitOps). - name: Noble cluster — platform stack (Ansible-managed) hosts: localhost connection: local @@ -234,5 +235,17 @@ tags: [trivy, security, scanning] - role: noble_velero tags: [velero, backups] - - role: noble_landing_urls - tags: [landing, platform, observability, apps] + + tasks: + # Leaf Application CRs must exist only after all Ansible Helm in this play (platform, authentik, trivy, …) + # so argocd-controller does not SSA resources before Helm owns them; then Argo can take over (manual → auto). + - name: Apply Argo CD root / bootstrap / leaf Application manifests (post–Ansible Helm) + ansible.builtin.include_role: + name: noble_argocd + tasks_from: applications_post_platform + tags: [argocd, gitops, platform, apps, observability, all] + + - name: Noble landing URLs (+ optional token fetch) + ansible.builtin.include_role: + name: noble_landing_urls + tags: [landing, platform, observability, apps, all] diff --git a/ansible/roles/noble_argocd/tasks/applications_post_platform.yml b/ansible/roles/noble_argocd/tasks/applications_post_platform.yml index 79d792e..1729705 100644 --- a/ansible/roles/noble_argocd/tasks/applications_post_platform.yml +++ b/ansible/roles/noble_argocd/tasks/applications_post_platform.yml @@ -1,6 +1,7 @@ --- -# Run after **noble_platform** Helm + `kubectl apply -k clusters/noble/bootstrap` so leaf **Application** -# CRs are not reconciled by Argo before **helm upgrade** (avoids SSA conflicts with **argocd-controller**). +# Run from **ansible/playbooks/noble.yml** *after* roles **noble_platform**, **noble_authentik**, **noble_trivy**, +# **noble_velero** (see play **tasks:**). Leaf **Application** CRs must not be reconciled before Ansible Helm +# finishes, or **argocd-controller** can SSA resources without Helm release metadata (e.g. Trivy ServiceAccount). - name: Apply Argo CD root Application (app-of-apps) ansible.builtin.command: argv: diff --git a/ansible/roles/noble_platform/tasks/main.yml b/ansible/roles/noble_platform/tasks/main.yml index 3770033..9ac55a7 100644 --- a/ansible/roles/noble_platform/tasks/main.yml +++ b/ansible/roles/noble_platform/tasks/main.yml @@ -218,8 +218,3 @@ environment: KUBECONFIG: "{{ noble_kubeconfig }}" changed_when: true - -- name: Argo CD — apply Application manifests after platform Helm - ansible.builtin.include_role: - name: noble_argocd - tasks_from: applications_post_platform diff --git a/ansible/roles/noble_post_deploy/tasks/main.yml b/ansible/roles/noble_post_deploy/tasks/main.yml index 910950d..d1d8540 100644 --- a/ansible/roles/noble_post_deploy/tasks/main.yml +++ b/ansible/roles/noble_post_deploy/tasks/main.yml @@ -9,8 +9,9 @@ - name: Argo CD optional root Application (empty app-of-apps) ansible.builtin.debug: msg: >- - App-of-apps: after **noble_platform**, noble.yml runs **noble_argocd** `applications_post_platform.yml`: - root-application.yaml when noble_argocd_apply_root_application is true; bootstrap-root + **kubectl apply -k - argocd/app-of-apps** when noble_argocd_apply_bootstrap_root_application is true (inventory/group_vars/all.yml). + App-of-apps: at the **end** of **noble.yml** (after **noble_platform**, **noble_authentik**, **noble_trivy**, + **noble_velero**), **noble_argocd** `applications_post_platform.yml` runs: root-application.yaml when + noble_argocd_apply_root_application is true; bootstrap-root + **kubectl apply -k argocd/app-of-apps** + when noble_argocd_apply_bootstrap_root_application is true (inventory/group_vars/all.yml). noble-bootstrap-root uses manual sync until you enable automation after the playbook — clusters/noble/bootstrap/argocd/README.md §5. See clusters/noble/apps/README.md and that README. diff --git a/ansible/roles/noble_trivy/tasks/main.yml b/ansible/roles/noble_trivy/tasks/main.yml index d6a1059..1ace8be 100644 --- a/ansible/roles/noble_trivy/tasks/main.yml +++ b/ansible/roles/noble_trivy/tasks/main.yml @@ -1,4 +1,55 @@ --- +# Argo CD (Helm source + SSA) or raw kubectl can leave Trivy objects without **meta.helm.sh/** ownership. +# Namespace-scoped resources go away when **trivy-system** is deleted; **ClusterRole** / **ClusterRoleBinding** / +# **ClusterComplianceReport** do not. If there is no Helm release **trivy-operator**, reset namespace + cluster scope +# so **helm upgrade --install** can adopt cleanly. +- name: Check whether trivy-operator Helm release exists in trivy-system + ansible.builtin.command: + argv: + - helm + - status + - trivy-operator + - -n + - trivy-system + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + register: noble_trivy_helm_release_status + failed_when: false + changed_when: false + +- name: Remove trivy-system namespace when Helm release is absent (orphan SSA / kubectl vs Ansible Helm) + ansible.builtin.command: + argv: + - kubectl + - delete + - namespace + - trivy-system + - --ignore-not-found=true + - --wait=true + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + when: noble_trivy_helm_release_status.rc != 0 + register: noble_trivy_ns_reset + changed_when: "'deleted' in (noble_trivy_ns_reset.stdout | default(''))" + +- name: Remove orphan Trivy cluster-scoped resources when Helm release is absent + ansible.builtin.shell: | + set -euo pipefail + # Prefer label selector (matches chart); then explicit names for objects Argo may have created without labels. + kubectl delete clusterrolebinding -l app.kubernetes.io/instance=trivy-operator --ignore-not-found=true --wait=true 2>/dev/null || true + kubectl delete clusterrolebinding trivy-operator --ignore-not-found=true --wait=true + kubectl delete clusterrole -l app.kubernetes.io/instance=trivy-operator --ignore-not-found=true --wait=true 2>/dev/null || true + kubectl delete clusterrole trivy-operator aggregate-config-audit-reports-view aggregate-exposed-secret-reports-view aggregate-vulnerability-reports-view --ignore-not-found=true --wait=true + if kubectl api-resources --api-group=aquasecurity.github.io -o name 2>/dev/null | grep -q '^clustercompliancereports\.'; then + kubectl delete clustercompliancereports.aquasecurity.github.io -l app.kubernetes.io/instance=trivy-operator --ignore-not-found=true --wait=true 2>/dev/null || true + kubectl delete clustercompliancereports.aquasecurity.github.io k8s-cis-1.23 k8s-nsa-1.0 k8s-pss-baseline-0.1 k8s-pss-restricted-0.1 --ignore-not-found=true --wait=true 2>/dev/null || true + fi + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + when: noble_trivy_helm_release_status.rc != 0 + register: noble_trivy_cluster_reset + changed_when: "'deleted' in (noble_trivy_cluster_reset.stdout | default(''))" + - name: Apply trivy-system namespace (PSA) ansible.builtin.command: argv: diff --git a/clusters/noble/bootstrap/argocd/README.md b/clusters/noble/bootstrap/argocd/README.md index 618c5d4..f3a74fe 100644 --- a/clusters/noble/bootstrap/argocd/README.md +++ b/clusters/noble/bootstrap/argocd/README.md @@ -52,13 +52,13 @@ Use **Settings → Repositories** in the UI, or `argocd repo add` / a `Secret` o ## 4. App-of-apps (GitOps) -**Ansible** (`ansible/playbooks/noble.yml`) runs **`kubectl apply -k clusters/noble/bootstrap`** (namespaces + static YAML) from **`noble_platform`**, then Helm installs, then **`noble_argocd`** `applications_post_platform.yml` applies **`root-application.yaml`**, **`bootstrap-root-application.yaml`**, and **`kubectl apply -k clusters/noble/bootstrap/argocd/app-of-apps`** so Argo **Application** CRs appear only **after** Helm (no SSA fights with **argocd-controller**). +**Ansible** (`ansible/playbooks/noble.yml`) runs **`kubectl apply -k clusters/noble/bootstrap`** from **`noble_platform`**, then Helm for the platform stack, **then** **`noble_authentik`**, **`noble_trivy`**, **`noble_velero`**, and **only then** (play **`tasks:`**) **`noble_argocd`** `applications_post_platform.yml` applies **`root-application.yaml`**, **`bootstrap-root-application.yaml`**, and **`kubectl apply -k clusters/noble/bootstrap/argocd/app-of-apps`**. That order keeps **Ansible Helm first** and lets Argo **take ownership** when you sync or enable automation (no premature SSA vs Helm). 1. Edit **`root-application.yaml`** and **`bootstrap-root-application.yaml`**: set **`repoURL`** and **`targetRevision`**. The **`resources-finalizer.argocd.argoproj.io/background`** finalizer uses Argo’s path-qualified form so **`kubectl apply`** does not warn about finalizer names. 2. Optional add-on apps: add **`Application`** manifests under **`clusters/noble/apps/`** (see **`clusters/noble/apps/README.md`**). -3. **Bootstrap kustomize** (namespaces, datasource, etc.): **`noble-bootstrap-root`** syncs **`clusters/noble/bootstrap`** (no **`argocd/app-of-apps/`** in that kustomization). Leaf **`Application`** manifests live under **`argocd/app-of-apps/`**; Ansible applies that directory **after** **`noble_platform`** Helm so Argo does not SSA charts first. The root app uses **manual** sync; each leaf app is **manual** until you enable automation (see **§5**). +3. **Bootstrap kustomize** (namespaces, datasource, etc.): **`noble-bootstrap-root`** syncs **`clusters/noble/bootstrap`** (no **`argocd/app-of-apps/`** in that kustomization). Leaf **`Application`** manifests live under **`argocd/app-of-apps/`**; Ansible applies that directory **after** all **`noble_*`** Helm roles in **`noble.yml`** (see §4) so Argo does not SSA charts before Helm. The root app uses **manual** sync; each leaf app is **manual** until you enable automation (see **§5**). - **`ansible/playbooks/noble.yml`** (roles **`noble_argocd`** Helm, then **`noble_platform`** — which **include_role**s **`noble_argocd/applications_post_platform`** after Helm) when **`noble_argocd_apply_*`** flags are set in **`ansible/inventory/group_vars/all.yml`**. + **`ansible/playbooks/noble.yml`**: roles **`noble_argocd`** (Argo Helm only), **`noble_platform`**, **`noble_authentik`**, **`noble_trivy`**, **`noble_velero`**, then play **`tasks`** run **`applications_post_platform`** when **`noble_argocd_apply_*`** flags are set in **`ansible/inventory/group_vars/all.yml`**. ```bash kubectl apply -f clusters/noble/bootstrap/argocd/root-application.yaml @@ -69,7 +69,7 @@ If you migrated from older GitOps **`Application`** names, delete stale **`Appli ## 5. After Ansible: enable automated sync for **noble-bootstrap-root** -Do this only after **`ansible-playbook playbooks/noble.yml`** has finished successfully (including **`noble_platform`** `kubectl apply -k` and any Helm stages you rely on). Until then, leave **manual** sync so Argo does not fight the playbook. +Do this only after **`ansible-playbook playbooks/noble.yml`** has finished successfully (including **`noble_platform`** / **`noble_authentik`** / **`noble_trivy`** Helm and the final **`applications_post_platform`** `kubectl apply` of leaf **Application** CRs). Until then, leave **manual** sync so Argo does not fight the playbook. **Required steps** diff --git a/clusters/noble/bootstrap/argocd/bootstrap-root-application.yaml b/clusters/noble/bootstrap/argocd/bootstrap-root-application.yaml index 08be5a4..4dcfc9c 100644 --- a/clusters/noble/bootstrap/argocd/bootstrap-root-application.yaml +++ b/clusters/noble/bootstrap/argocd/bootstrap-root-application.yaml @@ -1,6 +1,6 @@ # **noble-bootstrap-root** — Kustomize for **clusters/noble/bootstrap** (namespaces, Grafana datasource, # VolumeSnapshotClass, etc.). Leaf **Application** CRs under **argocd/app-of-apps/** are **not** in this -# path; Ansible applies them after Helm (see **noble_argocd** `applications_post_platform.yml`). +# path; Ansible applies them after all **noble.yml** Helm roles (see play **tasks:** → **applications_post_platform.yml**). # # **Initial deploy:** Ansible is the only writer; **automated sync is off** so Argo does not reconcile # during **noble.yml**. **After** the playbook finishes, enable automated sync (see **README.md** §5) diff --git a/clusters/noble/bootstrap/kustomization.yaml b/clusters/noble/bootstrap/kustomization.yaml index 3048667..f6a12d9 100644 --- a/clusters/noble/bootstrap/kustomization.yaml +++ b/clusters/noble/bootstrap/kustomization.yaml @@ -1,7 +1,7 @@ # Ansible **noble_platform**: `kubectl apply -k` this directory (namespaces + static YAML only). -# Leaf Argo **Application** manifests live under **argocd/app-of-apps/** and are applied **after** Helm -# by **noble_argocd** `applications_post_platform.yml` so **argocd-controller** does not SSA the chart -# before **helm upgrade** runs. +# Leaf Argo **Application** manifests live under **argocd/app-of-apps/** and are applied at the **end** +# of **ansible/playbooks/noble.yml** (play **tasks:** → **noble_argocd** `applications_post_platform.yml`) so +# **argocd-controller** does not SSA chart resources before **helm upgrade** (platform, authentik, trivy, …). # # **noble-bootstrap-root** syncs this same path for GitOps on namespaces/datasource/VolumeSnapshotClass. # Per-chart GitOps: each **noble-*** app under **argocd/app-of-apps/** (manual sync until you cut over).