From 46cedc965fc7618ddfa764c26ac88fc4abd8bd97 Mon Sep 17 00:00:00 2001 From: Nikholas Pcenicni <82239765+nikpcenicni@users.noreply.github.com> Date: Sat, 28 Mar 2026 15:17:54 -0400 Subject: [PATCH] Refactor Argo CD application management by removing noble-kyverno and noble-platform configurations, transitioning to Ansible-driven installations. Update documentation to clarify the optional nature of app-of-apps and the role of kustomization.yaml as an empty resource holder. Ensure users are informed about the need to delete stale Applications when migrating from previous configurations. --- ansible/.gitignore | 1 + ansible/README.md | 87 +++++++ ansible/ansible.cfg | 10 + ansible/group_vars/all.yml | 23 ++ ansible/inventory/localhost.yml | 6 + ansible/playbooks/deploy.yml | 5 + ansible/playbooks/noble.yml | 215 ++++++++++++++++++ ansible/playbooks/post_deploy.yml | 12 + ansible/playbooks/talos_bootstrap.yml | 11 + ansible/playbooks/talos_phase_a.yml | 15 ++ .../playbooks/templates/api_health_hint.j2 | 22 ++ ansible/roles/helm_repos/defaults/main.yml | 18 ++ ansible/roles/helm_repos/tasks/main.yml | 16 ++ ansible/roles/noble_argocd/tasks/main.yml | 20 ++ .../roles/noble_cert_manager/tasks/main.yml | 65 ++++++ ansible/roles/noble_cilium/tasks/main.yml | 25 ++ ansible/roles/noble_kube_vip/tasks/main.yml | 11 + ansible/roles/noble_kyverno/tasks/main.yml | 32 +++ .../noble_kyverno_policies/tasks/main.yml | 21 ++ ansible/roles/noble_longhorn/tasks/main.yml | 29 +++ ansible/roles/noble_metallb/tasks/main.yml | 37 +++ .../roles/noble_metrics_server/tasks/main.yml | 19 ++ ansible/roles/noble_newt/tasks/main.yml | 37 +++ ansible/roles/noble_platform/tasks/main.yml | 147 ++++++++++++ .../roles/noble_post_deploy/tasks/main.yml | 27 +++ ansible/roles/noble_traefik/tasks/main.yml | 30 +++ .../roles/talos_bootstrap/defaults/main.yml | 3 + ansible/roles/talos_bootstrap/tasks/main.yml | 36 +++ ansible/roles/talos_phase_a/defaults/main.yml | 38 ++++ ansible/roles/talos_phase_a/tasks/main.yml | 209 +++++++++++++++++ clusters/noble/apps/kustomization.yaml | 4 +- clusters/noble/bootstrap/argocd/README.md | 10 +- .../noble/bootstrap/argocd/apps/README.md | 22 +- .../bootstrap/argocd/apps/kustomization.yaml | 6 + .../argocd/apps/noble-kyverno-policies.yaml | 40 ---- .../bootstrap/argocd/apps/noble-kyverno.yaml | 35 --- .../bootstrap/argocd/apps/noble-platform.yaml | 91 -------- .../bootstrap/argocd/root-application.yaml | 7 +- docs/architecture.md | 4 +- talos/CLUSTER-BUILD.md | 5 +- 40 files changed, 1264 insertions(+), 187 deletions(-) create mode 100644 ansible/.gitignore create mode 100644 ansible/README.md create mode 100644 ansible/ansible.cfg create mode 100644 ansible/group_vars/all.yml create mode 100644 ansible/inventory/localhost.yml create mode 100644 ansible/playbooks/deploy.yml create mode 100644 ansible/playbooks/noble.yml create mode 100644 ansible/playbooks/post_deploy.yml create mode 100644 ansible/playbooks/talos_bootstrap.yml create mode 100644 ansible/playbooks/talos_phase_a.yml create mode 100644 ansible/playbooks/templates/api_health_hint.j2 create mode 100644 ansible/roles/helm_repos/defaults/main.yml create mode 100644 ansible/roles/helm_repos/tasks/main.yml create mode 100644 ansible/roles/noble_argocd/tasks/main.yml create mode 100644 ansible/roles/noble_cert_manager/tasks/main.yml create mode 100644 ansible/roles/noble_cilium/tasks/main.yml create mode 100644 ansible/roles/noble_kube_vip/tasks/main.yml create mode 100644 ansible/roles/noble_kyverno/tasks/main.yml create mode 100644 ansible/roles/noble_kyverno_policies/tasks/main.yml create mode 100644 ansible/roles/noble_longhorn/tasks/main.yml create mode 100644 ansible/roles/noble_metallb/tasks/main.yml create mode 100644 ansible/roles/noble_metrics_server/tasks/main.yml create mode 100644 ansible/roles/noble_newt/tasks/main.yml create mode 100644 ansible/roles/noble_platform/tasks/main.yml create mode 100644 ansible/roles/noble_post_deploy/tasks/main.yml create mode 100644 ansible/roles/noble_traefik/tasks/main.yml create mode 100644 ansible/roles/talos_bootstrap/defaults/main.yml create mode 100644 ansible/roles/talos_bootstrap/tasks/main.yml create mode 100644 ansible/roles/talos_phase_a/defaults/main.yml create mode 100644 ansible/roles/talos_phase_a/tasks/main.yml create mode 100644 clusters/noble/bootstrap/argocd/apps/kustomization.yaml delete mode 100644 clusters/noble/bootstrap/argocd/apps/noble-kyverno-policies.yaml delete mode 100644 clusters/noble/bootstrap/argocd/apps/noble-kyverno.yaml delete mode 100644 clusters/noble/bootstrap/argocd/apps/noble-platform.yaml diff --git a/ansible/.gitignore b/ansible/.gitignore new file mode 100644 index 0000000..4c9b96c --- /dev/null +++ b/ansible/.gitignore @@ -0,0 +1 @@ +.ansible-tmp/ diff --git a/ansible/README.md b/ansible/README.md new file mode 100644 index 0000000..bb78743 --- /dev/null +++ b/ansible/README.md @@ -0,0 +1,87 @@ +# Ansible — noble cluster + +Automates [`talos/CLUSTER-BUILD.md`](../talos/CLUSTER-BUILD.md): optional **Talos Phase A** (genconfig → apply → bootstrap → kubeconfig), then **Phase B+** (CNI → add-ons → ingress → Argo CD → Kyverno → observability, etc.). **Argo CD** does not reconcile core charts — optional GitOps starts from an empty [`clusters/noble/bootstrap/argocd/apps/kustomization.yaml`](../clusters/noble/bootstrap/argocd/apps/kustomization.yaml). + +## Order of operations + +1. **From `talos/`:** `talhelper gensecret` / `talsecret` as in [`talos/README.md`](../talos/README.md) §1 (if not already done). +2. **Talos Phase A (automated):** run [`playbooks/talos_phase_a.yml`](playbooks/talos_phase_a.yml) **or** the full pipeline [`playbooks/deploy.yml`](playbooks/deploy.yml). This runs **`talhelper genconfig -o out`**, **`talosctl apply-config`** on each node, **`talosctl bootstrap`**, and **`talosctl kubeconfig`** → **`talos/kubeconfig`**. +3. **Platform stack:** [`playbooks/noble.yml`](playbooks/noble.yml) (included at the end of **`deploy.yml`**). + +Your workstation must be able to reach **node IPs on the lab LAN** (Talos API **:50000** for `talosctl`, Kubernetes **:6443** for `kubectl` / Helm). If `kubectl` cannot reach the VIP (`192.168.50.230`), use `-e 'noble_k8s_api_server_override=https://:6443'` on **`noble.yml`** (see `group_vars/all.yml`). + +**One-shot full deploy** (after nodes are booted and reachable): + +```bash +cd ansible +ansible-playbook playbooks/deploy.yml +``` + +## Prerequisites + +- `talosctl` (matches node Talos version), `talhelper`, `helm`, `kubectl`. +- **Phase A:** same LAN/VPN as nodes so **Talos :50000** and **Kubernetes :6443** are reachable (see [`talos/README.md`](../talos/README.md) §3). +- **noble.yml:** bootstrapped cluster and **`talos/kubeconfig`** (or `KUBECONFIG`). + +## Playbooks + +| Playbook | Purpose | +|----------|---------| +| [`playbooks/deploy.yml`](playbooks/deploy.yml) | **Talos Phase A** then **`noble.yml`** (full automation). | +| [`playbooks/talos_phase_a.yml`](playbooks/talos_phase_a.yml) | `genconfig` → `apply-config` → `bootstrap` → `kubeconfig` only. | +| [`playbooks/noble.yml`](playbooks/noble.yml) | Helm + `kubectl` platform (after Phase A). | +| [`playbooks/post_deploy.yml`](playbooks/post_deploy.yml) | Vault / ESO reminders (`noble_apply_vault_cluster_secret_store`). | +| [`playbooks/talos_bootstrap.yml`](playbooks/talos_bootstrap.yml) | **`talhelper genconfig` only** (legacy shortcut; prefer **`talos_phase_a.yml`**). | + +```bash +cd ansible +export KUBECONFIG=/absolute/path/to/home-server/talos/kubeconfig + +# noble.yml only — if VIP is unreachable from this host: +# ansible-playbook playbooks/noble.yml -e 'noble_k8s_api_server_override=https://192.168.50.20:6443' + +ansible-playbook playbooks/noble.yml +ansible-playbook playbooks/post_deploy.yml +``` + +### Talos Phase A variables (role `talos_phase_a` defaults) + +Override with `-e` when needed, e.g. **`-e noble_talos_skip_bootstrap=true`** if etcd is already initialized. + +| Variable | Default | Meaning | +|----------|---------|---------| +| `noble_talos_genconfig` | `true` | Run **`talhelper genconfig -o out`** first. | +| `noble_talos_apply_mode` | `auto` | **`auto`** — **`talosctl apply-config --dry-run`** on the first node picks maintenance (**`--insecure`**) vs joined (**`TALOSCONFIG`**). **`insecure`** / **`secure`** force talos/README §2 A or B. | +| `noble_talos_skip_bootstrap` | `false` | Skip **`talosctl bootstrap`**. If etcd is **already** initialized, bootstrap is treated as a no-op (same as **`talosctl`** “etcd data directory is not empty”). | +| `noble_talos_apid_wait_delay` / `noble_talos_apid_wait_timeout` | `20` / `900` | Seconds to wait for **apid :50000** on the bootstrap node after **apply-config** (nodes reboot). Increase if bootstrap hits **connection refused** to `:50000`. | +| `noble_talos_nodes` | neon/argon/krypton/helium | IP + **`out/*.yaml`** filename — align with **`talos/talconfig.yaml`**. | + +### Tags (partial runs) + +```bash +ansible-playbook playbooks/noble.yml --tags cilium,metallb +ansible-playbook playbooks/noble.yml --skip-tags newt +``` + +### Variables — `group_vars/all.yml` + +- **`noble_newt_install`**, **`noble_cert_manager_require_cloudflare_secret`**, **`noble_apply_vault_cluster_secret_store`**, **`noble_k8s_api_server_override`**, **`noble_k8s_api_server_auto_fallback`**, **`noble_k8s_api_server_fallback`**, **`noble_skip_k8s_health_check`**. + +## Roles + +| Role | Contents | +|------|----------| +| `talos_phase_a` | Talos genconfig, apply-config, bootstrap, kubeconfig | +| `helm_repos` | `helm repo add` / `update` | +| `noble_*` | Cilium, metrics-server, Longhorn, MetalLB, kube-vip, Traefik, cert-manager, Newt, Argo CD, Kyverno, platform stack | +| `noble_post_deploy` | Post-install reminders | +| `talos_bootstrap` | Genconfig-only (used by older playbook) | + +## Migrating from Argo-managed `noble-platform` + +```bash +kubectl delete application -n argocd noble-platform noble-kyverno noble-kyverno-policies --ignore-not-found +kubectl apply -f clusters/noble/bootstrap/argocd/root-application.yaml +``` + +Then run `playbooks/noble.yml` so Helm state matches git values. diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 0000000..8bb8e4f --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,10 @@ +[defaults] +inventory = inventory/localhost.yml +roles_path = roles +retry_files_enabled = False +stdout_callback = default +callback_result_format = yaml +local_tmp = .ansible-tmp + +[privilege_escalation] +become = False diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml new file mode 100644 index 0000000..5388b40 --- /dev/null +++ b/ansible/group_vars/all.yml @@ -0,0 +1,23 @@ +--- +# noble_repo_root / noble_kubeconfig are set in playbooks (use **playbook_dir** magic var). + +# When kubeconfig points at the API VIP but this workstation cannot reach the lab LAN (VPN off, etc.), +# set a reachable control-plane URL — same as: kubectl config set-cluster noble --server=https://:6443 +# Example: ansible-playbook playbooks/noble.yml -e 'noble_k8s_api_server_override=https://192.168.50.20:6443' +noble_k8s_api_server_override: "" + +# When /healthz fails with **network unreachable** to the VIP and **override** is empty, retry using this URL (neon). +noble_k8s_api_server_auto_fallback: true +noble_k8s_api_server_fallback: "https://192.168.50.20:6443" + +# Only if you must skip the kubectl /healthz preflight (not recommended). +noble_skip_k8s_health_check: false + +# Pangolin / Newt — set true only after creating newt-pangolin-auth Secret (see clusters/noble/apps/newt/README.md) +noble_newt_install: false + +# cert-manager needs Secret cloudflare-dns-api-token in cert-manager namespace before ClusterIssuers work +noble_cert_manager_require_cloudflare_secret: true + +# post_deploy.yml — apply Vault ClusterSecretStore only after Vault is initialized and K8s auth is configured +noble_apply_vault_cluster_secret_store: false diff --git a/ansible/inventory/localhost.yml b/ansible/inventory/localhost.yml new file mode 100644 index 0000000..1b63739 --- /dev/null +++ b/ansible/inventory/localhost.yml @@ -0,0 +1,6 @@ +--- +all: + hosts: + localhost: + ansible_connection: local + ansible_python_interpreter: "{{ ansible_playbook_python }}" diff --git a/ansible/playbooks/deploy.yml b/ansible/playbooks/deploy.yml new file mode 100644 index 0000000..143cfac --- /dev/null +++ b/ansible/playbooks/deploy.yml @@ -0,0 +1,5 @@ +--- +# Full bring-up: Talos Phase A then platform stack. +# Run from **ansible/**: ansible-playbook playbooks/deploy.yml +- import_playbook: talos_phase_a.yml +- import_playbook: noble.yml diff --git a/ansible/playbooks/noble.yml b/ansible/playbooks/noble.yml new file mode 100644 index 0000000..9479482 --- /dev/null +++ b/ansible/playbooks/noble.yml @@ -0,0 +1,215 @@ +--- +# Full platform install — **after** Talos bootstrap (`talosctl bootstrap` + working kubeconfig). +# Do not run until `kubectl get --raw /healthz` returns ok (see talos/README.md §3, CLUSTER-BUILD Phase A). +# Run from repo **ansible/** directory: ansible-playbook playbooks/noble.yml +# +# Tags: repos, cilium, metrics, longhorn, metallb, kube_vip, traefik, cert_manager, newt, +# argocd, kyverno, kyverno_policies, platform, all (default) +- name: Noble cluster — platform stack (Ansible-managed) + hosts: localhost + connection: local + gather_facts: false + vars: + noble_repo_root: "{{ playbook_dir | dirname | dirname }}" + noble_kubeconfig: "{{ lookup('env', 'KUBECONFIG') | default(noble_repo_root + '/talos/kubeconfig', true) }}" + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + pre_tasks: + # Helm/kubectl use $KUBECONFIG; a missing file yields "connection refused" to localhost:8080. + - name: Stat kubeconfig path from KUBECONFIG or default + ansible.builtin.stat: + path: "{{ noble_kubeconfig }}" + register: noble_kubeconfig_stat + tags: [always] + + - name: Fall back to repo talos/kubeconfig when $KUBECONFIG is unset or not a file + ansible.builtin.set_fact: + noble_kubeconfig: "{{ noble_repo_root }}/talos/kubeconfig" + when: not noble_kubeconfig_stat.stat.exists | default(false) + tags: [always] + + - name: Stat kubeconfig after fallback + ansible.builtin.stat: + path: "{{ noble_kubeconfig }}" + register: noble_kubeconfig_stat2 + tags: [always] + + - name: Require a real kubeconfig file + ansible.builtin.assert: + that: + - noble_kubeconfig_stat2.stat.exists | default(false) + - noble_kubeconfig_stat2.stat.isreg | default(false) + fail_msg: >- + No kubeconfig file at {{ noble_kubeconfig }}. + Fix: export KUBECONFIG=/actual/path/from/talosctl-kubeconfig (see talos/README.md), + or copy the admin kubeconfig to {{ noble_repo_root }}/talos/kubeconfig. + Do not use documentation placeholders as the path. + tags: [always] + + - name: Ensure temp dir for kubeconfig API override + ansible.builtin.file: + path: "{{ noble_repo_root }}/ansible/.ansible-tmp" + state: directory + mode: "0700" + when: noble_k8s_api_server_override | default('') | length > 0 + tags: [always] + + - name: Copy kubeconfig for API server override (original file unchanged) + ansible.builtin.copy: + src: "{{ noble_kubeconfig }}" + dest: "{{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.patched" + mode: "0600" + when: noble_k8s_api_server_override | default('') | length > 0 + tags: [always] + + - name: Resolve current cluster name (for set-cluster) + ansible.builtin.command: + argv: + - kubectl + - config + - view + - --minify + - -o + - jsonpath={.clusters[0].name} + environment: + KUBECONFIG: "{{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.patched" + register: noble_k8s_cluster_name + changed_when: false + when: noble_k8s_api_server_override | default('') | length > 0 + tags: [always] + + - name: Point patched kubeconfig at reachable apiserver + ansible.builtin.command: + argv: + - kubectl + - config + - set-cluster + - "{{ noble_k8s_cluster_name.stdout }}" + - --server={{ noble_k8s_api_server_override }} + - --kubeconfig={{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.patched + when: noble_k8s_api_server_override | default('') | length > 0 + changed_when: true + tags: [always] + + - name: Use patched kubeconfig for this play + ansible.builtin.set_fact: + noble_kubeconfig: "{{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.patched" + when: noble_k8s_api_server_override | default('') | length > 0 + tags: [always] + + - name: Verify Kubernetes API is reachable from this host + ansible.builtin.command: + argv: + - kubectl + - get + - --raw + - /healthz + - --request-timeout=15s + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + register: noble_k8s_health + failed_when: false + changed_when: false + tags: [always] + + # talosctl kubeconfig often sets server to the VIP; off-LAN you can reach a control-plane IP but not 192.168.50.230. + - name: Auto-fallback API server when VIP is unreachable (temp kubeconfig) + tags: [always] + when: + - noble_k8s_api_server_auto_fallback | default(true) | bool + - noble_k8s_api_server_override | default('') | length == 0 + - not (noble_skip_k8s_health_check | default(false) | bool) + - noble_k8s_health.rc != 0 or (noble_k8s_health.stdout | default('') | trim) != 'ok' + - ('network is unreachable' in (noble_k8s_health.stderr | default('') | lower)) or + ('no route to host' in (noble_k8s_health.stderr | default('') | lower)) + block: + - name: Ensure temp dir for kubeconfig auto-fallback + ansible.builtin.file: + path: "{{ noble_repo_root }}/ansible/.ansible-tmp" + state: directory + mode: "0700" + + - name: Copy kubeconfig for API auto-fallback + ansible.builtin.copy: + src: "{{ noble_kubeconfig }}" + dest: "{{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.auto-fallback" + mode: "0600" + + - name: Resolve cluster name for kubectl set-cluster + ansible.builtin.command: + argv: + - kubectl + - config + - view + - --minify + - -o + - jsonpath={.clusters[0].name} + environment: + KUBECONFIG: "{{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.auto-fallback" + register: noble_k8s_cluster_fb + changed_when: false + + - name: Point temp kubeconfig at fallback apiserver + ansible.builtin.command: + argv: + - kubectl + - config + - set-cluster + - "{{ noble_k8s_cluster_fb.stdout }}" + - --server={{ noble_k8s_api_server_fallback | default('https://192.168.50.20:6443', true) }} + - --kubeconfig={{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.auto-fallback + changed_when: true + + - name: Use kubeconfig with fallback API server for this play + ansible.builtin.set_fact: + noble_kubeconfig: "{{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.auto-fallback" + + - name: Re-verify Kubernetes API after auto-fallback + ansible.builtin.command: + argv: + - kubectl + - get + - --raw + - /healthz + - --request-timeout=15s + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + register: noble_k8s_health + failed_when: false + changed_when: false + + - name: Fail when API check did not return ok + ansible.builtin.fail: + msg: "{{ lookup('template', 'templates/api_health_hint.j2') }}" + when: + - not (noble_skip_k8s_health_check | default(false) | bool) + - noble_k8s_health.rc != 0 or (noble_k8s_health.stdout | default('') | trim) != 'ok' + tags: [always] + + roles: + - role: helm_repos + tags: [repos, helm] + - role: noble_cilium + tags: [cilium, cni] + - role: noble_metrics_server + tags: [metrics, metrics_server] + - role: noble_longhorn + tags: [longhorn, storage] + - role: noble_metallb + tags: [metallb, lb] + - role: noble_kube_vip + tags: [kube_vip, vip] + - role: noble_traefik + tags: [traefik, ingress] + - role: noble_cert_manager + tags: [cert_manager, certs] + - role: noble_newt + tags: [newt] + - role: noble_argocd + tags: [argocd, gitops] + - role: noble_kyverno + tags: [kyverno, policy] + - role: noble_kyverno_policies + tags: [kyverno_policies, policy] + - role: noble_platform + tags: [platform, observability, apps] diff --git a/ansible/playbooks/post_deploy.yml b/ansible/playbooks/post_deploy.yml new file mode 100644 index 0000000..90d1a6c --- /dev/null +++ b/ansible/playbooks/post_deploy.yml @@ -0,0 +1,12 @@ +--- +# Manual follow-ups after **noble.yml**: Vault init/unseal, Kubernetes auth for Vault, ESO ClusterSecretStore. +# Run: ansible-playbook playbooks/post_deploy.yml +- name: Noble cluster — post-install reminders + hosts: localhost + connection: local + gather_facts: false + vars: + noble_repo_root: "{{ playbook_dir | dirname | dirname }}" + noble_kubeconfig: "{{ lookup('env', 'KUBECONFIG') | default(noble_repo_root + '/talos/kubeconfig', true) }}" + roles: + - role: noble_post_deploy diff --git a/ansible/playbooks/talos_bootstrap.yml b/ansible/playbooks/talos_bootstrap.yml new file mode 100644 index 0000000..38461b1 --- /dev/null +++ b/ansible/playbooks/talos_bootstrap.yml @@ -0,0 +1,11 @@ +--- +# Genconfig only — for full Talos Phase A (apply, bootstrap, kubeconfig) use **playbooks/talos_phase_a.yml** +# or **playbooks/deploy.yml**. Run: ansible-playbook playbooks/talos_bootstrap.yml -e noble_talos_genconfig=true +- name: Talos — optional genconfig helper + hosts: localhost + connection: local + gather_facts: false + vars: + noble_repo_root: "{{ playbook_dir | dirname | dirname }}" + roles: + - role: talos_bootstrap diff --git a/ansible/playbooks/talos_phase_a.yml b/ansible/playbooks/talos_phase_a.yml new file mode 100644 index 0000000..5b41e7f --- /dev/null +++ b/ansible/playbooks/talos_phase_a.yml @@ -0,0 +1,15 @@ +--- +# Talos Phase A — **talhelper genconfig** → **apply-config** (all nodes) → **bootstrap** → **kubeconfig**. +# Requires: **talosctl**, **talhelper**, reachable node IPs (same LAN as nodes for Talos API :50000). +# See **talos/README.md** §1–§3. Then run **playbooks/noble.yml** or **deploy.yml**. +- name: Talos — genconfig, apply, bootstrap, kubeconfig + hosts: localhost + connection: local + gather_facts: false + vars: + noble_repo_root: "{{ playbook_dir | dirname | dirname }}" + noble_talos_dir: "{{ noble_repo_root }}/talos" + noble_talos_kubeconfig_out: "{{ noble_repo_root }}/talos/kubeconfig" + roles: + - role: talos_phase_a + tags: [talos, phase_a] diff --git a/ansible/playbooks/templates/api_health_hint.j2 b/ansible/playbooks/templates/api_health_hint.j2 new file mode 100644 index 0000000..53d157a --- /dev/null +++ b/ansible/playbooks/templates/api_health_hint.j2 @@ -0,0 +1,22 @@ +{# Error output for noble.yml API preflight when kubectl /healthz fails #} +Cannot use the Kubernetes API from this host (kubectl get --raw /healthz). +rc={{ noble_k8s_health.rc }} +stderr: {{ noble_k8s_health.stderr | default('') | trim }} + +{% set err = (noble_k8s_health.stderr | default('')) | lower %} +{% if 'connection refused' in err %} +Connection refused: the TCP path to that host works, but nothing is accepting HTTPS on port 6443 there. + • **Not bootstrapped yet?** Finish Talos first: `talosctl bootstrap` (once on a control plane), then `talosctl kubeconfig`, then confirm `kubectl get nodes`. See talos/README.md §2–§3 and CLUSTER-BUILD.md Phase A. **Do not run this playbook before the Kubernetes API exists.** + • If bootstrap is done: try another control-plane IP (CLUSTER-BUILD inventory: neon 192.168.50.20, argon .30, krypton .40), or the VIP if kube-vip is up and you are on the LAN: + -e 'noble_k8s_api_server_override=https://192.168.50.230:6443' + • Do not point the API URL at a worker-only node. + • `talosctl health` / `kubectl get nodes` from a working client. +{% elif 'network is unreachable' in err or 'no route to host' in err %} +Network unreachable / no route: this machine cannot route to the API IP. Join the lab LAN or VPN, or set a reachable API server URL (talos/README.md §3). +{% else %} +If kubeconfig used the VIP from off-LAN, try a reachable control-plane IP, e.g.: + -e 'noble_k8s_api_server_override=https://192.168.50.20:6443' +See talos/README.md §3. +{% endif %} + +To skip this check (not recommended): -e noble_skip_k8s_health_check=true diff --git a/ansible/roles/helm_repos/defaults/main.yml b/ansible/roles/helm_repos/defaults/main.yml new file mode 100644 index 0000000..90a33cb --- /dev/null +++ b/ansible/roles/helm_repos/defaults/main.yml @@ -0,0 +1,18 @@ +--- +noble_helm_repos: + - { name: cilium, url: "https://helm.cilium.io/" } + - { name: metallb, url: "https://metallb.github.io/metallb" } + - { name: longhorn, url: "https://charts.longhorn.io" } + - { name: traefik, url: "https://traefik.github.io/charts" } + - { name: jetstack, url: "https://charts.jetstack.io" } + - { name: fossorial, url: "https://charts.fossorial.io" } + - { name: argo, url: "https://argoproj.github.io/argo-helm" } + - { name: metrics-server, url: "https://kubernetes-sigs.github.io/metrics-server/" } + - { name: sealed-secrets, url: "https://bitnami-labs.github.io/sealed-secrets" } + - { name: external-secrets, url: "https://charts.external-secrets.io" } + - { name: hashicorp, url: "https://helm.releases.hashicorp.com" } + - { name: prometheus-community, url: "https://prometheus-community.github.io/helm-charts" } + - { name: grafana, url: "https://grafana.github.io/helm-charts" } + - { name: fluent, url: "https://fluent.github.io/helm-charts" } + - { name: headlamp, url: "https://kubernetes-sigs.github.io/headlamp/" } + - { name: kyverno, url: "https://kyverno.github.io/kyverno/" } diff --git a/ansible/roles/helm_repos/tasks/main.yml b/ansible/roles/helm_repos/tasks/main.yml new file mode 100644 index 0000000..4b398a7 --- /dev/null +++ b/ansible/roles/helm_repos/tasks/main.yml @@ -0,0 +1,16 @@ +--- +- name: Add Helm repositories + ansible.builtin.command: + cmd: "helm repo add {{ item.name }} {{ item.url }}" + loop: "{{ noble_helm_repos }}" + loop_control: + label: "{{ item.name }}" + register: helm_repo_add + changed_when: helm_repo_add.rc == 0 + failed_when: >- + helm_repo_add.rc != 0 and + ('already exists' not in (helm_repo_add.stderr | default(''))) + +- name: helm repo update + ansible.builtin.command: helm repo update + changed_when: true diff --git a/ansible/roles/noble_argocd/tasks/main.yml b/ansible/roles/noble_argocd/tasks/main.yml new file mode 100644 index 0000000..2f7649d --- /dev/null +++ b/ansible/roles/noble_argocd/tasks/main.yml @@ -0,0 +1,20 @@ +--- +- name: Install Argo CD + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - argocd + - argo/argo-cd + - --namespace + - argocd + - --create-namespace + - --version + - "9.4.17" + - -f + - "{{ noble_repo_root }}/clusters/noble/bootstrap/argocd/values.yaml" + - --wait + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true diff --git a/ansible/roles/noble_cert_manager/tasks/main.yml b/ansible/roles/noble_cert_manager/tasks/main.yml new file mode 100644 index 0000000..fd0a88c --- /dev/null +++ b/ansible/roles/noble_cert_manager/tasks/main.yml @@ -0,0 +1,65 @@ +--- +- name: Create cert-manager namespace + ansible.builtin.command: + argv: + - kubectl + - apply + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/cert-manager/namespace.yaml" + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Install cert-manager + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - cert-manager + - jetstack/cert-manager + - --namespace + - cert-manager + - --version + - v1.20.0 + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/cert-manager/values.yaml" + - --wait + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Check Cloudflare DNS API token Secret (required for ClusterIssuers) + ansible.builtin.command: + argv: + - kubectl + - -n + - cert-manager + - get + - secret + - cloudflare-dns-api-token + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + register: noble_cf_secret + failed_when: false + changed_when: false + +- name: Warn when Cloudflare Secret is missing + ansible.builtin.debug: + msg: >- + Secret cert-manager/cloudflare-dns-api-token not found. + Create it per clusters/noble/apps/cert-manager/README.md before ClusterIssuers can succeed. + when: + - noble_cert_manager_require_cloudflare_secret | bool + - noble_cf_secret.rc != 0 + +- name: Apply ClusterIssuers (staging + prod) + ansible.builtin.command: + argv: + - kubectl + - apply + - -k + - "{{ noble_repo_root }}/clusters/noble/apps/cert-manager" + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true diff --git a/ansible/roles/noble_cilium/tasks/main.yml b/ansible/roles/noble_cilium/tasks/main.yml new file mode 100644 index 0000000..25fb9e0 --- /dev/null +++ b/ansible/roles/noble_cilium/tasks/main.yml @@ -0,0 +1,25 @@ +--- +- name: Install Cilium (required CNI for Talos cni:none) + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - cilium + - cilium/cilium + - --namespace + - kube-system + - --version + - "1.16.6" + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/cilium/values.yaml" + - --wait + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Wait for Cilium DaemonSet + ansible.builtin.command: kubectl -n kube-system rollout status ds/cilium --timeout=300s + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: false diff --git a/ansible/roles/noble_kube_vip/tasks/main.yml b/ansible/roles/noble_kube_vip/tasks/main.yml new file mode 100644 index 0000000..058ef2f --- /dev/null +++ b/ansible/roles/noble_kube_vip/tasks/main.yml @@ -0,0 +1,11 @@ +--- +- name: Apply kube-vip (Kubernetes API VIP) + ansible.builtin.command: + argv: + - kubectl + - apply + - -k + - "{{ noble_repo_root }}/clusters/noble/apps/kube-vip" + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true diff --git a/ansible/roles/noble_kyverno/tasks/main.yml b/ansible/roles/noble_kyverno/tasks/main.yml new file mode 100644 index 0000000..62cc4b9 --- /dev/null +++ b/ansible/roles/noble_kyverno/tasks/main.yml @@ -0,0 +1,32 @@ +--- +- name: Create Kyverno namespace + ansible.builtin.command: + argv: + - kubectl + - apply + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/kyverno/namespace.yaml" + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Install Kyverno operator + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - kyverno + - kyverno/kyverno + - -n + - kyverno + - --version + - "3.7.1" + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/kyverno/values.yaml" + - --wait + - --timeout + - 15m + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true diff --git a/ansible/roles/noble_kyverno_policies/tasks/main.yml b/ansible/roles/noble_kyverno_policies/tasks/main.yml new file mode 100644 index 0000000..831fabe --- /dev/null +++ b/ansible/roles/noble_kyverno_policies/tasks/main.yml @@ -0,0 +1,21 @@ +--- +- name: Install Kyverno policy chart (PSS baseline, Audit) + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - kyverno-policies + - kyverno/kyverno-policies + - -n + - kyverno + - --version + - "3.7.1" + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/kyverno/policies-values.yaml" + - --wait + - --timeout + - 10m + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true diff --git a/ansible/roles/noble_longhorn/tasks/main.yml b/ansible/roles/noble_longhorn/tasks/main.yml new file mode 100644 index 0000000..3c84148 --- /dev/null +++ b/ansible/roles/noble_longhorn/tasks/main.yml @@ -0,0 +1,29 @@ +--- +- name: Apply Longhorn namespace (PSA) from kustomization + ansible.builtin.command: + argv: + - kubectl + - apply + - -k + - "{{ noble_repo_root }}/clusters/noble/apps/longhorn" + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Install Longhorn chart + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - longhorn + - longhorn/longhorn + - -n + - longhorn-system + - --create-namespace + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/longhorn/values.yaml" + - --wait + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true diff --git a/ansible/roles/noble_metallb/tasks/main.yml b/ansible/roles/noble_metallb/tasks/main.yml new file mode 100644 index 0000000..5ef2a29 --- /dev/null +++ b/ansible/roles/noble_metallb/tasks/main.yml @@ -0,0 +1,37 @@ +--- +- name: Apply MetalLB namespace (Pod Security labels) + ansible.builtin.command: + argv: + - kubectl + - apply + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/metallb/namespace.yaml" + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Install MetalLB chart + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - metallb + - metallb/metallb + - --namespace + - metallb-system + - --wait + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Apply IPAddressPool and L2Advertisement + ansible.builtin.command: + argv: + - kubectl + - apply + - -k + - "{{ noble_repo_root }}/clusters/noble/apps/metallb" + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true diff --git a/ansible/roles/noble_metrics_server/tasks/main.yml b/ansible/roles/noble_metrics_server/tasks/main.yml new file mode 100644 index 0000000..b226a41 --- /dev/null +++ b/ansible/roles/noble_metrics_server/tasks/main.yml @@ -0,0 +1,19 @@ +--- +- name: Install metrics-server + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - metrics-server + - metrics-server/metrics-server + - -n + - kube-system + - --version + - "3.13.0" + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/metrics-server/values.yaml" + - --wait + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true diff --git a/ansible/roles/noble_newt/tasks/main.yml b/ansible/roles/noble_newt/tasks/main.yml new file mode 100644 index 0000000..4393504 --- /dev/null +++ b/ansible/roles/noble_newt/tasks/main.yml @@ -0,0 +1,37 @@ +--- +- name: Skip Newt when not enabled + ansible.builtin.debug: + msg: "noble_newt_install is false — create newt-pangolin-auth Secret and set noble_newt_install=true to deploy Newt." + when: not (noble_newt_install | bool) + +- name: Create Newt namespace + ansible.builtin.command: + argv: + - kubectl + - apply + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/newt/namespace.yaml" + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + when: noble_newt_install | bool + changed_when: true + +- name: Install Newt chart + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - newt + - fossorial/newt + - --namespace + - newt + - --version + - "1.2.0" + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/newt/values.yaml" + - --wait + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + when: noble_newt_install | bool + changed_when: true diff --git a/ansible/roles/noble_platform/tasks/main.yml b/ansible/roles/noble_platform/tasks/main.yml new file mode 100644 index 0000000..5bc6f08 --- /dev/null +++ b/ansible/roles/noble_platform/tasks/main.yml @@ -0,0 +1,147 @@ +--- +# Mirrors former **noble-platform** Argo Application: Helm releases + plain manifests under clusters/noble/apps. +- name: Apply clusters/noble/apps kustomize (namespaces, Grafana Loki datasource, Vault extras) + ansible.builtin.command: + argv: + - kubectl + - apply + - -k + - "{{ noble_repo_root }}/clusters/noble/apps" + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Install Sealed Secrets + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - sealed-secrets + - sealed-secrets/sealed-secrets + - --namespace + - sealed-secrets + - --version + - "2.18.4" + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/sealed-secrets/values.yaml" + - --wait + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Install External Secrets Operator + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - external-secrets + - external-secrets/external-secrets + - --namespace + - external-secrets + - --version + - "2.2.0" + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/external-secrets/values.yaml" + - --wait + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Install Vault + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - vault + - hashicorp/vault + - --namespace + - vault + - --version + - "0.32.0" + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/vault/values.yaml" + - --wait + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Install kube-prometheus-stack + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - kube-prometheus + - prometheus-community/kube-prometheus-stack + - -n + - monitoring + - --version + - "82.15.1" + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/kube-prometheus-stack/values.yaml" + - --wait + - --timeout + - 30m + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Install Loki + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - loki + - grafana/loki + - -n + - loki + - --version + - "6.55.0" + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/loki/values.yaml" + - --wait + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Install Fluent Bit + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - fluent-bit + - fluent/fluent-bit + - -n + - logging + - --version + - "0.56.0" + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/fluent-bit/values.yaml" + - --wait + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Install Headlamp + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - headlamp + - headlamp/headlamp + - --version + - "0.40.1" + - -n + - headlamp + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/headlamp/values.yaml" + - --wait + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true diff --git a/ansible/roles/noble_post_deploy/tasks/main.yml b/ansible/roles/noble_post_deploy/tasks/main.yml new file mode 100644 index 0000000..0aebc40 --- /dev/null +++ b/ansible/roles/noble_post_deploy/tasks/main.yml @@ -0,0 +1,27 @@ +--- +- name: Vault — manual steps (not automated) + ansible.builtin.debug: + msg: | + 1. kubectl -n vault get pods (wait for Running) + 2. kubectl -n vault exec -it vault-0 -- vault operator init (once; save keys) + 3. Unseal per clusters/noble/apps/vault/README.md + 4. ./clusters/noble/apps/vault/configure-kubernetes-auth.sh + 5. kubectl apply -f clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml + +- name: Optional — apply Vault ClusterSecretStore for External Secrets + ansible.builtin.command: + argv: + - kubectl + - apply + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml" + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + when: noble_apply_vault_cluster_secret_store | default(false) | bool + changed_when: true + +- name: Argo CD optional root Application (empty app-of-apps) + ansible.builtin.debug: + msg: >- + Optional: kubectl apply -f clusters/noble/bootstrap/argocd/root-application.yaml + after editing repoURL. Core workloads are not synced by Argo — see bootstrap/argocd/apps/README.md diff --git a/ansible/roles/noble_traefik/tasks/main.yml b/ansible/roles/noble_traefik/tasks/main.yml new file mode 100644 index 0000000..915e892 --- /dev/null +++ b/ansible/roles/noble_traefik/tasks/main.yml @@ -0,0 +1,30 @@ +--- +- name: Create Traefik namespace + ansible.builtin.command: + argv: + - kubectl + - apply + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/traefik/namespace.yaml" + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true + +- name: Install Traefik + ansible.builtin.command: + argv: + - helm + - upgrade + - --install + - traefik + - traefik/traefik + - --namespace + - traefik + - --version + - "39.0.6" + - -f + - "{{ noble_repo_root }}/clusters/noble/apps/traefik/values.yaml" + - --wait + environment: + KUBECONFIG: "{{ noble_kubeconfig }}" + changed_when: true diff --git a/ansible/roles/talos_bootstrap/defaults/main.yml b/ansible/roles/talos_bootstrap/defaults/main.yml new file mode 100644 index 0000000..86f4405 --- /dev/null +++ b/ansible/roles/talos_bootstrap/defaults/main.yml @@ -0,0 +1,3 @@ +--- +# Set **true** to run `talhelper genconfig -o out` under **talos/** (requires talhelper + talconfig). +noble_talos_genconfig: false diff --git a/ansible/roles/talos_bootstrap/tasks/main.yml b/ansible/roles/talos_bootstrap/tasks/main.yml new file mode 100644 index 0000000..5a4eb0f --- /dev/null +++ b/ansible/roles/talos_bootstrap/tasks/main.yml @@ -0,0 +1,36 @@ +--- +- name: Generate Talos machine configs (talhelper genconfig) + when: noble_talos_genconfig | bool + block: + - name: Validate talconfig + ansible.builtin.command: + argv: + - talhelper + - validate + - talconfig + - talconfig.yaml + args: + chdir: "{{ noble_repo_root }}/talos" + changed_when: false + + - name: Generate Talos configs (out/) + ansible.builtin.command: + argv: + - talhelper + - genconfig + - -o + - out + args: + chdir: "{{ noble_repo_root }}/talos" + changed_when: true + + - name: Post genconfig — next steps + ansible.builtin.debug: + msg: >- + Configs are in talos/out/. Apply to nodes, bootstrap, and kubeconfig per talos/README.md + before running playbooks/noble.yml. + +- name: Skip when noble_talos_genconfig is false + ansible.builtin.debug: + msg: "No-op: pass -e noble_talos_genconfig=true to run talhelper genconfig." + when: not (noble_talos_genconfig | bool) diff --git a/ansible/roles/talos_phase_a/defaults/main.yml b/ansible/roles/talos_phase_a/defaults/main.yml new file mode 100644 index 0000000..fdb547f --- /dev/null +++ b/ansible/roles/talos_phase_a/defaults/main.yml @@ -0,0 +1,38 @@ +--- +# **noble_repo_root** and **noble_talos_dir** are set by **playbooks/talos_phase_a.yml** (repo root and **talos/**). + +# Run **talhelper genconfig -o out** before apply (needs talhelper + talsecret per talos/README.md §1). +noble_talos_genconfig: true + +# **auto** — probe nodes (maintenance vs joined TLS); **insecure** — always **--insecure**; **secure** — always **TALOSCONFIG** (Phase A already done / talos/README §2 B). +noble_talos_apply_mode: auto + +# Skip if cluster is already bootstrapped (re-run playbook safely). +noble_talos_skip_bootstrap: false + +# After **apply-config**, nodes often reboot — wait for Talos **apid** (:50000) before **bootstrap** / **kubeconfig**. +noble_talos_wait_for_apid: true +noble_talos_apid_wait_delay: 20 +noble_talos_apid_wait_timeout: 900 + +# **talosctl bootstrap -n** — first control plane (neon). +noble_talos_bootstrap_node_ip: "192.168.50.20" + +# **talosctl kubeconfig -n** (node that answers Talos/K8s for cert fetch). +noble_talos_kubeconfig_node: "192.168.50.20" + +# **talosctl kubeconfig -e** — Talos endpoint (node IP before VIP is reachable; VIP when LAN works). +noble_talos_kubeconfig_endpoint: "192.168.50.20" + +# After kubeconfig, patch **kubectl** server if VIP in file is unreachable (**group_vars** / same as noble.yml). +# noble_k8s_api_server_override: "" + +# Must match **cluster.name** / kubeconfig cluster entry (often **noble**). +noble_talos_kubectl_cluster_name: noble + +# Inventory: IP + filename under **talos/out/** — align with **talos/talconfig.yaml**. +noble_talos_nodes: + - { ip: "192.168.50.20", machine: "noble-neon.yaml" } + - { ip: "192.168.50.30", machine: "noble-argon.yaml" } + - { ip: "192.168.50.40", machine: "noble-krypton.yaml" } + - { ip: "192.168.50.10", machine: "noble-helium.yaml" } diff --git a/ansible/roles/talos_phase_a/tasks/main.yml b/ansible/roles/talos_phase_a/tasks/main.yml new file mode 100644 index 0000000..d292ab6 --- /dev/null +++ b/ansible/roles/talos_phase_a/tasks/main.yml @@ -0,0 +1,209 @@ +--- +# Order matches talos/README.md: genconfig → apply all nodes → bootstrap → kubeconfig. + +- name: Validate talconfig and generate **out/** (talhelper genconfig) + when: noble_talos_genconfig | bool + block: + - name: talhelper validate + ansible.builtin.command: + argv: + - talhelper + - validate + - talconfig + - talconfig.yaml + args: + chdir: "{{ noble_talos_dir }}" + changed_when: false + + - name: talhelper genconfig -o out + ansible.builtin.command: + argv: + - talhelper + - genconfig + - -o + - out + args: + chdir: "{{ noble_talos_dir }}" + changed_when: true + +- name: Stat talos/out/talosconfig + ansible.builtin.stat: + path: "{{ noble_talos_dir }}/out/talosconfig" + register: noble_talos_talosconfig + +- name: Require talos/out/talosconfig + ansible.builtin.assert: + that: + - noble_talos_talosconfig.stat.exists | default(false) + fail_msg: >- + Missing {{ noble_talos_dir }}/out/talosconfig. Run **talhelper genconfig -o out** in **talos/** (talsecret per talos/README.md §1), + or set **noble_talos_genconfig=true** on this playbook. + +# Maintenance API (**--insecure**) vs joined cluster (**tls: certificate required**) — talos/README §2 A vs B. +- name: Set apply path from noble_talos_apply_mode (manual) + ansible.builtin.set_fact: + noble_talos_apply_insecure: "{{ noble_talos_apply_mode == 'insecure' }}" + when: noble_talos_apply_mode | default('auto') in ['insecure', 'secure'] + +- name: Probe Talos API — apply-config dry-run (insecure / maintenance) + ansible.builtin.command: + argv: + - talosctl + - apply-config + - --insecure + - -n + - "{{ noble_talos_nodes[0].ip }}" + - -f + - "{{ noble_talos_dir }}/out/{{ noble_talos_nodes[0].machine }}" + - --dry-run + register: noble_talos_probe_insecure + failed_when: false + changed_when: false + when: noble_talos_apply_mode | default('auto') == 'auto' + +- name: Probe Talos API — apply-config dry-run (TLS / joined) + ansible.builtin.command: + argv: + - talosctl + - apply-config + - -n + - "{{ noble_talos_nodes[0].ip }}" + - -f + - "{{ noble_talos_dir }}/out/{{ noble_talos_nodes[0].machine }}" + - --dry-run + environment: + TALOSCONFIG: "{{ noble_talos_dir }}/out/talosconfig" + register: noble_talos_probe_secure + failed_when: false + changed_when: false + when: + - noble_talos_apply_mode | default('auto') == 'auto' + - noble_talos_probe_insecure.rc != 0 + +- name: Resolve apply mode — maintenance (insecure) + ansible.builtin.set_fact: + noble_talos_apply_insecure: true + when: + - noble_talos_apply_mode | default('auto') == 'auto' + - noble_talos_probe_insecure.rc == 0 + +- name: Resolve apply mode — joined (TALOSCONFIG, no insecure) + ansible.builtin.set_fact: + noble_talos_apply_insecure: false + when: + - noble_talos_apply_mode | default('auto') == 'auto' + - noble_talos_probe_insecure.rc != 0 + - noble_talos_probe_secure.rc == 0 + +- name: Fail when Talos API mode cannot be determined + ansible.builtin.fail: + msg: >- + Cannot run **talosctl apply-config --dry-run** on {{ noble_talos_nodes[0].ip }}. + Insecure: rc={{ noble_talos_probe_insecure.rc }} {{ noble_talos_probe_insecure.stderr | default('') }}. + TLS: rc={{ noble_talos_probe_secure.rc | default('n/a') }} {{ noble_talos_probe_secure.stderr | default('') }}. + Check LAN to :50000, node power, and that **out/talosconfig** matches these nodes. + Override: **-e noble_talos_apply_mode=secure** (joined) or **insecure** (maintenance ISO). + when: + - noble_talos_apply_mode | default('auto') == 'auto' + - noble_talos_probe_insecure.rc != 0 + - noble_talos_probe_secure is not defined or noble_talos_probe_secure.rc != 0 + +- name: Show resolved Talos apply-config mode + ansible.builtin.debug: + msg: >- + apply-config: {{ 'maintenance (--insecure)' if noble_talos_apply_insecure | bool else 'joined (TALOSCONFIG)' }} + (noble_talos_apply_mode={{ noble_talos_apply_mode | default('auto') }}) + +- name: Apply machine config to each node (first install — insecure) + ansible.builtin.command: + argv: + - talosctl + - apply-config + - --insecure + - -n + - "{{ item.ip }}" + - --file + - "{{ noble_talos_dir }}/out/{{ item.machine }}" + loop: "{{ noble_talos_nodes }}" + loop_control: + label: "{{ item.ip }}" + when: noble_talos_apply_insecure | bool + changed_when: true + +- name: Apply machine config to each node (cluster already has TLS — no insecure) + ansible.builtin.command: + argv: + - talosctl + - apply-config + - -n + - "{{ item.ip }}" + - --file + - "{{ noble_talos_dir }}/out/{{ item.machine }}" + environment: + TALOSCONFIG: "{{ noble_talos_dir }}/out/talosconfig" + loop: "{{ noble_talos_nodes }}" + loop_control: + label: "{{ item.ip }}" + when: not (noble_talos_apply_insecure | bool) + changed_when: true + +# apply-config triggers reboots; apid on :50000 must accept connections before talosctl bootstrap / kubeconfig. +- name: Wait for Talos machine API (apid) on bootstrap node + ansible.builtin.wait_for: + host: "{{ noble_talos_bootstrap_node_ip }}" + port: 50000 + delay: "{{ noble_talos_apid_wait_delay | int }}" + timeout: "{{ noble_talos_apid_wait_timeout | int }}" + state: started + when: noble_talos_wait_for_apid | default(true) | bool + +- name: Bootstrap cluster (once per cluster) + ansible.builtin.command: + argv: + - talosctl + - bootstrap + - -n + - "{{ noble_talos_bootstrap_node_ip }}" + environment: + TALOSCONFIG: "{{ noble_talos_dir }}/out/talosconfig" + register: noble_talos_bootstrap_cmd + when: not (noble_talos_skip_bootstrap | bool) + changed_when: noble_talos_bootstrap_cmd.rc == 0 + failed_when: >- + noble_talos_bootstrap_cmd.rc != 0 and + ('etcd data directory is not empty' not in (noble_talos_bootstrap_cmd.stderr | default(''))) + +- name: Write Kubernetes admin kubeconfig + ansible.builtin.command: + argv: + - talosctl + - kubeconfig + - "{{ noble_talos_kubeconfig_out }}" + - --force + - -n + - "{{ noble_talos_kubeconfig_node }}" + - -e + - "{{ noble_talos_kubeconfig_endpoint }}" + - --merge=false + environment: + TALOSCONFIG: "{{ noble_talos_dir }}/out/talosconfig" + changed_when: true + +- name: Optional — set kubectl cluster server to reachable API (VIP unreachable from this host) + ansible.builtin.command: + argv: + - kubectl + - config + - set-cluster + - "{{ noble_talos_kubectl_cluster_name }}" + - --server={{ noble_k8s_api_server_override }} + - --kubeconfig={{ noble_talos_kubeconfig_out }} + when: noble_k8s_api_server_override | default('') | length > 0 + changed_when: true + +- name: Next — platform stack + ansible.builtin.debug: + msg: >- + Kubeconfig written to {{ noble_talos_kubeconfig_out }}. + Export KUBECONFIG={{ noble_talos_kubeconfig_out }} and run: ansible-playbook playbooks/noble.yml + (or: ansible-playbook playbooks/deploy.yml for the full pipeline). diff --git a/clusters/noble/apps/kustomization.yaml b/clusters/noble/apps/kustomization.yaml index bacd2dd..7ed8a4f 100644 --- a/clusters/noble/apps/kustomization.yaml +++ b/clusters/noble/apps/kustomization.yaml @@ -1,5 +1,5 @@ -# Plain Kustomize only (namespaces + extra YAML). Helm charts are **Application** sources in -# **bootstrap/argocd/apps/noble-platform.yaml** — avoids **kustomize --enable-helm** on repo-server. +# Plain Kustomize only (namespaces + extra YAML). Helm installs are driven by **ansible/playbooks/noble.yml** +# (role **noble_platform**) — avoids **kustomize --enable-helm** in-repo. apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization diff --git a/clusters/noble/bootstrap/argocd/README.md b/clusters/noble/bootstrap/argocd/README.md index 413298f..edae846 100644 --- a/clusters/noble/bootstrap/argocd/README.md +++ b/clusters/noble/bootstrap/argocd/README.md @@ -39,17 +39,21 @@ Change the password in the UI or via `argocd account update-password`. Use **Settings → Repositories** in the UI, or `argocd repo add` / a `Secret` of type `repository`. -## 4. App-of-apps (optional) +## 4. App-of-apps (optional GitOps only) + +Bootstrap **platform** workloads (CNI, ingress, cert-manager, Kyverno, observability, Vault, etc.) are installed by +**`ansible/playbooks/noble.yml`** — not by Argo. **`apps/kustomization.yaml`** is empty by default. 1. Edit **`root-application.yaml`**: set **`repoURL`** and **`targetRevision`** to this repository. The **`resources-finalizer.argocd.argoproj.io/background`** finalizer uses Argo’s path-qualified form so **`kubectl apply`** does not warn about finalizer names. -2. Commit **`Application`** manifests under **`apps/`** (see **`apps/README.md`**). +2. When you want Argo to manage specific apps, add **`Application`** manifests under **`apps/`** (see **`apps/README.md`**). 3. Apply the root: ```bash kubectl apply -f clusters/noble/bootstrap/argocd/root-application.yaml ``` -**`apps/noble-platform.yaml`** uses **multiple sources** (Helm repos + Git **`ref`/`path`**); **`clusters/noble/apps/kustomization.yaml`** is plain resources only — chart installs are **Helm** sources, not Kustomize **helmCharts**. +If you migrated from GitOps-managed **`noble-platform`** / **`noble-kyverno`**, delete stale **`Application`** objects on +the cluster (see **`apps/README.md`**) then re-apply the root. ## Versions diff --git a/clusters/noble/bootstrap/argocd/apps/README.md b/clusters/noble/bootstrap/argocd/apps/README.md index 8c83d56..706feeb 100644 --- a/clusters/noble/bootstrap/argocd/apps/README.md +++ b/clusters/noble/bootstrap/argocd/apps/README.md @@ -1,11 +1,17 @@ -# Argo CD — app-of-apps children +# Argo CD — app-of-apps children (optional GitOps only) -**`noble-root`** syncs this directory. Order matters for **Kyverno** webhooks: +**Core platform is Ansible-managed** — see repository **`ansible/README.md`** and **`ansible/playbooks/noble.yml`**. -| Application | Sync wave | Role | -|-------------|-----------|------| -| **`noble-kyverno`** | `0` | Kyverno operator only | -| **`noble-kyverno-policies`** | `1` | `kyverno-policies` chart (after operator) | -| **`noble-platform`** | `2` | Sealed Secrets, ESO, Vault, observability, Headlamp + Git/Kustomize | +This directory’s **`kustomization.yaml`** has **`resources: []`** so **`noble-root`** (if applied) does not reconcile Helm charts or cluster add-ons. **Add `Application` manifests here only** for apps you want Argo to manage (for example, sample workloads or third-party charts not covered by the bootstrap playbook). -**`noble-platform`** uses **Helm** `sources` + Git **`ref`/`path`**; **`clusters/noble/apps/kustomization.yaml`** is plain resources (no **`helmCharts`**). +| Previous (removed) | Now | +|--------------------|-----| +| **`noble-kyverno`**, **`noble-kyverno-policies`**, **`noble-platform`** | Installed by Ansible roles **`noble_kyverno`**, **`noble_kyverno_policies`**, **`noble_platform`** | + +If you previously synced **`noble-root`** with the old child manifests, delete stale Applications on the cluster: + +```bash +kubectl delete application -n argocd noble-platform noble-kyverno noble-kyverno-policies --ignore-not-found +``` + +Then re-apply **`root-application.yaml`** so Argo matches this repo. diff --git a/clusters/noble/bootstrap/argocd/apps/kustomization.yaml b/clusters/noble/bootstrap/argocd/apps/kustomization.yaml new file mode 100644 index 0000000..dc245a5 --- /dev/null +++ b/clusters/noble/bootstrap/argocd/apps/kustomization.yaml @@ -0,0 +1,6 @@ +# Intentionally empty: core platform (CNI, ingress, storage, observability, policy, etc.) is +# installed by **ansible/playbooks/noble.yml** — not by Argo CD. Add optional Application +# manifests here only for workloads you want GitOps-managed. +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: [] diff --git a/clusters/noble/bootstrap/argocd/apps/noble-kyverno-policies.yaml b/clusters/noble/bootstrap/argocd/apps/noble-kyverno-policies.yaml deleted file mode 100644 index d69b41d..0000000 --- a/clusters/noble/bootstrap/argocd/apps/noble-kyverno-policies.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Kyverno policy chart — after operator is up (sync wave 1). Retries help if webhooks were still starting. -apiVersion: argoproj.io/v1alpha1 -kind: Application -metadata: - name: noble-kyverno-policies - namespace: argocd - annotations: - argocd.argoproj.io/sync-wave: "1" - finalizers: - - resources-finalizer.argocd.argoproj.io/background -spec: - project: default - sources: - - repoURL: https://kyverno.github.io/kyverno/ - chart: kyverno-policies - targetRevision: "3.7.1" - helm: - releaseName: kyverno-policies - namespace: kyverno - valueFiles: - - $values/clusters/noble/apps/kyverno/policies-values.yaml - - repoURL: https://gitea.pcenicni.ca/gsdavidp/home-server.git - targetRevision: HEAD - ref: values - destination: - server: https://kubernetes.default.svc - namespace: default - syncPolicy: - automated: - prune: true - selfHeal: true - syncOptions: - - CreateNamespace=true - - ServerSideApply=true - retry: - limit: 10 - backoff: - duration: 15s - factor: 2 - maxDuration: 5m diff --git a/clusters/noble/bootstrap/argocd/apps/noble-kyverno.yaml b/clusters/noble/bootstrap/argocd/apps/noble-kyverno.yaml deleted file mode 100644 index 13473a9..0000000 --- a/clusters/noble/bootstrap/argocd/apps/noble-kyverno.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Kyverno operator only — must apply before policies and before workloads that hit webhooks. -# Sync wave 0 under **noble-root** (lower number runs first). -apiVersion: argoproj.io/v1alpha1 -kind: Application -metadata: - name: noble-kyverno - namespace: argocd - annotations: - argocd.argoproj.io/sync-wave: "0" - finalizers: - - resources-finalizer.argocd.argoproj.io/background -spec: - project: default - sources: - - repoURL: https://kyverno.github.io/kyverno/ - chart: kyverno - targetRevision: "3.7.1" - helm: - releaseName: kyverno - namespace: kyverno - valueFiles: - - $values/clusters/noble/apps/kyverno/values.yaml - - repoURL: https://gitea.pcenicni.ca/gsdavidp/home-server.git - targetRevision: HEAD - ref: values - destination: - server: https://kubernetes.default.svc - namespace: default - syncPolicy: - automated: - prune: true - selfHeal: true - syncOptions: - - CreateNamespace=true - - ServerSideApply=true diff --git a/clusters/noble/bootstrap/argocd/apps/noble-platform.yaml b/clusters/noble/bootstrap/argocd/apps/noble-platform.yaml deleted file mode 100644 index 472994a..0000000 --- a/clusters/noble/bootstrap/argocd/apps/noble-platform.yaml +++ /dev/null @@ -1,91 +0,0 @@ -# Platform workloads (no Kyverno — those are **noble-kyverno** + **noble-kyverno-policies**). Sync wave 2. -# -# https://argo-cd.readthedocs.io/en/stable/user-guide/multiple_sources/ -apiVersion: argoproj.io/v1alpha1 -kind: Application -metadata: - name: noble-platform - namespace: argocd - annotations: - argocd.argoproj.io/sync-wave: "2" - finalizers: - - resources-finalizer.argocd.argoproj.io/background -spec: - project: default - sources: - - repoURL: https://bitnami-labs.github.io/sealed-secrets - chart: sealed-secrets - targetRevision: "2.18.4" - helm: - releaseName: sealed-secrets - namespace: sealed-secrets - valueFiles: - - $values/clusters/noble/apps/sealed-secrets/values.yaml - - repoURL: https://charts.external-secrets.io - chart: external-secrets - targetRevision: "2.2.0" - helm: - releaseName: external-secrets - namespace: external-secrets - valueFiles: - - $values/clusters/noble/apps/external-secrets/values.yaml - - repoURL: https://helm.releases.hashicorp.com - chart: vault - targetRevision: "0.32.0" - helm: - releaseName: vault - namespace: vault - valueFiles: - - $values/clusters/noble/apps/vault/values.yaml - - repoURL: https://prometheus-community.github.io/helm-charts - chart: kube-prometheus-stack - targetRevision: "82.15.1" - helm: - releaseName: kube-prometheus - namespace: monitoring - valueFiles: - - $values/clusters/noble/apps/kube-prometheus-stack/values.yaml - - repoURL: https://grafana.github.io/helm-charts - chart: loki - targetRevision: "6.55.0" - helm: - releaseName: loki - namespace: loki - valueFiles: - - $values/clusters/noble/apps/loki/values.yaml - - repoURL: https://fluent.github.io/helm-charts - chart: fluent-bit - targetRevision: "0.56.0" - helm: - releaseName: fluent-bit - namespace: logging - valueFiles: - - $values/clusters/noble/apps/fluent-bit/values.yaml - - repoURL: https://kubernetes-sigs.github.io/headlamp/ - chart: headlamp - targetRevision: "0.40.1" - helm: - releaseName: headlamp - namespace: headlamp - valueFiles: - - $values/clusters/noble/apps/headlamp/values.yaml - - repoURL: https://gitea.pcenicni.ca/gsdavidp/home-server.git - targetRevision: HEAD - ref: values - path: clusters/noble/apps - destination: - server: https://kubernetes.default.svc - namespace: default - syncPolicy: - automated: - prune: true - selfHeal: true - syncOptions: - - CreateNamespace=true - - ServerSideApply=true - retry: - limit: 10 - backoff: - duration: 15s - factor: 2 - maxDuration: 5m diff --git a/clusters/noble/bootstrap/argocd/root-application.yaml b/clusters/noble/bootstrap/argocd/root-application.yaml index d5a8c25..1df2225 100644 --- a/clusters/noble/bootstrap/argocd/root-application.yaml +++ b/clusters/noble/bootstrap/argocd/root-application.yaml @@ -1,11 +1,10 @@ -# App-of-apps root — apply after Argo CD is running. +# App-of-apps root — apply after Argo CD is running (optional). # # 1. Set spec.source.repoURL (and targetRevision — **HEAD** tracks the remote default branch) to this repo. # 2. kubectl apply -f clusters/noble/bootstrap/argocd/root-application.yaml # -# Syncs **Application** YAMLs under **apps/** (**noble-kyverno**, **noble-kyverno-policies**, **noble-platform**). Cluster -# workloads: **apps/noble-platform.yaml** (Helm chart sources + Git/Kustomize); **clusters/noble/apps/** -# holds **values.yaml** and plain **kustomization.yaml** (resources only). Per-app READMEs stay the source of truth for versions. +# **apps/kustomization.yaml** is intentionally empty: core platform is installed by **ansible/playbooks/noble.yml**, +# not Argo. Add **Application** manifests under **apps/** only for optional GitOps-managed workloads. # apiVersion: argoproj.io/v1alpha1 kind: Application diff --git a/docs/architecture.md b/docs/architecture.md index 280b3af..3871677 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -74,7 +74,7 @@ flowchart TB ## Platform stack (bootstrap → workloads) -Order: **Talos** → **Cilium** (cluster uses `cni: none` until CNI is installed) → **metrics-server**, **Longhorn**, **MetalLB** + pool manifests, **kube-vip** → **Traefik**, **cert-manager** → **Argo CD** (Helm + app-of-apps under `clusters/noble/bootstrap/argocd/`). Platform namespaces include `cert-manager`, `traefik`, `metallb-system`, `longhorn-system`, `monitoring`, `loki`, `logging`, `argocd`, `vault`, `external-secrets`, `sealed-secrets`, `kyverno`, `newt`, and others as deployed. +Order: **Talos** → **Cilium** (cluster uses `cni: none` until CNI is installed) → **metrics-server**, **Longhorn**, **MetalLB** + pool manifests, **kube-vip** → **Traefik**, **cert-manager** → **Argo CD** (Helm only; optional empty app-of-apps). **Automated install:** `ansible/playbooks/noble.yml` (see `ansible/README.md`). Platform namespaces include `cert-manager`, `traefik`, `metallb-system`, `longhorn-system`, `monitoring`, `loki`, `logging`, `argocd`, `vault`, `external-secrets`, `sealed-secrets`, `kyverno`, `newt`, and others as deployed. ```mermaid flowchart TB @@ -95,7 +95,7 @@ flowchart TB CM["cert-manager + ClusterIssuers"] end subgraph L4["GitOps"] - Argo["Argo CD
app-of-apps under bootstrap/argocd/"] + Argo["Argo CD
(optional app-of-apps; platform via Ansible)"] end subgraph L5["Platform namespaces (examples)"] NS["cert-manager, traefik, metallb-system,
longhorn-system, monitoring, loki, logging,
argocd, vault, external-secrets, sealed-secrets,
kyverno, newt, …"] diff --git a/talos/CLUSTER-BUILD.md b/talos/CLUSTER-BUILD.md index fb00e94..7ea2920 100644 --- a/talos/CLUSTER-BUILD.md +++ b/talos/CLUSTER-BUILD.md @@ -125,6 +125,7 @@ Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (Vaul ## Phase A — Talos bootstrap + API VIP +- Optional: **Ansible** runs the same steps — [`ansible/playbooks/talos_phase_a.yml`](../ansible/playbooks/talos_phase_a.yml) (genconfig → apply → bootstrap → kubeconfig) or [`ansible/playbooks/deploy.yml`](../ansible/playbooks/deploy.yml) (Phase A + **`noble.yml`**); see [`ansible/README.md`](../ansible/README.md). - [x] `talhelper gensecret` → `talhelper genconfig -o out` (re-run `genconfig` after every `talconfig` edit) - [x] `apply-config` all nodes (`talos/README.md` §2 — **no** `--insecure` after nodes join; use `TALOSCONFIG`) - [x] `talosctl bootstrap` once; other control planes and worker join @@ -148,9 +149,9 @@ Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (Vaul ## Phase C — GitOps -- [x] **Argo CD** bootstrap — `clusters/noble/bootstrap/argocd/` (`helm upgrade --install argocd …`) +- [x] **Argo CD** bootstrap — `clusters/noble/bootstrap/argocd/` (`helm upgrade --install argocd …`) — also covered by **`ansible/playbooks/noble.yml`** (role **`noble_argocd`**) - [x] Argo CD server **LoadBalancer** — **`192.168.50.210`** (see `values.yaml`) -- [X] **App-of-apps** — set **`repoURL`** in **`root-application.yaml`**, add **`Application`** manifests under **`bootstrap/argocd/apps/`**, apply **`root-application.yaml`** +- [x] **App-of-apps** — optional; **`apps/kustomization.yaml`** is **empty** (core stack is **Ansible**-managed, not Argo). Set **`repoURL`** in **`root-application.yaml`** and add **`Application`** manifests only for optional GitOps workloads — see **`bootstrap/argocd/apps/README.md`** - [x] **Renovate** — **`renovate.json`** at repo root ([Renovate](https://docs.renovatebot.com/) — **Kubernetes** manager for **`clusters/noble/**/*.yaml`** image pins; grouped minor/patch PRs). **Activate PRs:** install **[Mend Renovate](https://github.com/apps/renovate)** on the Git repo (**Option A**), or **Option B:** self-hosted chart per [Helm charts](https://docs.renovatebot.com/helm-charts/) + token from **Sealed Secrets** / **ESO**. Helm **chart** versions pinned only in comments still need manual bumps or extra **regex** `customManagers` — extend **`renovate.json`** as needed. - [ ] SSO — later