Refactor Argo CD application management by removing noble-kyverno and noble-platform configurations, transitioning to Ansible-driven installations. Update documentation to clarify the optional nature of app-of-apps and the role of kustomization.yaml as an empty resource holder. Ensure users are informed about the need to delete stale Applications when migrating from previous configurations.

This commit is contained in:
Nikholas Pcenicni
2026-03-28 15:17:54 -04:00
parent 207cdca0cf
commit 46cedc965f
40 changed files with 1264 additions and 187 deletions

View File

@@ -0,0 +1,5 @@
---
# Full bring-up: Talos Phase A then platform stack.
# Run from **ansible/**: ansible-playbook playbooks/deploy.yml
- import_playbook: talos_phase_a.yml
- import_playbook: noble.yml

215
ansible/playbooks/noble.yml Normal file
View File

@@ -0,0 +1,215 @@
---
# Full platform install — **after** Talos bootstrap (`talosctl bootstrap` + working kubeconfig).
# Do not run until `kubectl get --raw /healthz` returns ok (see talos/README.md §3, CLUSTER-BUILD Phase A).
# Run from repo **ansible/** directory: ansible-playbook playbooks/noble.yml
#
# Tags: repos, cilium, metrics, longhorn, metallb, kube_vip, traefik, cert_manager, newt,
# argocd, kyverno, kyverno_policies, platform, all (default)
- name: Noble cluster — platform stack (Ansible-managed)
hosts: localhost
connection: local
gather_facts: false
vars:
noble_repo_root: "{{ playbook_dir | dirname | dirname }}"
noble_kubeconfig: "{{ lookup('env', 'KUBECONFIG') | default(noble_repo_root + '/talos/kubeconfig', true) }}"
environment:
KUBECONFIG: "{{ noble_kubeconfig }}"
pre_tasks:
# Helm/kubectl use $KUBECONFIG; a missing file yields "connection refused" to localhost:8080.
- name: Stat kubeconfig path from KUBECONFIG or default
ansible.builtin.stat:
path: "{{ noble_kubeconfig }}"
register: noble_kubeconfig_stat
tags: [always]
- name: Fall back to repo talos/kubeconfig when $KUBECONFIG is unset or not a file
ansible.builtin.set_fact:
noble_kubeconfig: "{{ noble_repo_root }}/talos/kubeconfig"
when: not noble_kubeconfig_stat.stat.exists | default(false)
tags: [always]
- name: Stat kubeconfig after fallback
ansible.builtin.stat:
path: "{{ noble_kubeconfig }}"
register: noble_kubeconfig_stat2
tags: [always]
- name: Require a real kubeconfig file
ansible.builtin.assert:
that:
- noble_kubeconfig_stat2.stat.exists | default(false)
- noble_kubeconfig_stat2.stat.isreg | default(false)
fail_msg: >-
No kubeconfig file at {{ noble_kubeconfig }}.
Fix: export KUBECONFIG=/actual/path/from/talosctl-kubeconfig (see talos/README.md),
or copy the admin kubeconfig to {{ noble_repo_root }}/talos/kubeconfig.
Do not use documentation placeholders as the path.
tags: [always]
- name: Ensure temp dir for kubeconfig API override
ansible.builtin.file:
path: "{{ noble_repo_root }}/ansible/.ansible-tmp"
state: directory
mode: "0700"
when: noble_k8s_api_server_override | default('') | length > 0
tags: [always]
- name: Copy kubeconfig for API server override (original file unchanged)
ansible.builtin.copy:
src: "{{ noble_kubeconfig }}"
dest: "{{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.patched"
mode: "0600"
when: noble_k8s_api_server_override | default('') | length > 0
tags: [always]
- name: Resolve current cluster name (for set-cluster)
ansible.builtin.command:
argv:
- kubectl
- config
- view
- --minify
- -o
- jsonpath={.clusters[0].name}
environment:
KUBECONFIG: "{{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.patched"
register: noble_k8s_cluster_name
changed_when: false
when: noble_k8s_api_server_override | default('') | length > 0
tags: [always]
- name: Point patched kubeconfig at reachable apiserver
ansible.builtin.command:
argv:
- kubectl
- config
- set-cluster
- "{{ noble_k8s_cluster_name.stdout }}"
- --server={{ noble_k8s_api_server_override }}
- --kubeconfig={{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.patched
when: noble_k8s_api_server_override | default('') | length > 0
changed_when: true
tags: [always]
- name: Use patched kubeconfig for this play
ansible.builtin.set_fact:
noble_kubeconfig: "{{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.patched"
when: noble_k8s_api_server_override | default('') | length > 0
tags: [always]
- name: Verify Kubernetes API is reachable from this host
ansible.builtin.command:
argv:
- kubectl
- get
- --raw
- /healthz
- --request-timeout=15s
environment:
KUBECONFIG: "{{ noble_kubeconfig }}"
register: noble_k8s_health
failed_when: false
changed_when: false
tags: [always]
# talosctl kubeconfig often sets server to the VIP; off-LAN you can reach a control-plane IP but not 192.168.50.230.
- name: Auto-fallback API server when VIP is unreachable (temp kubeconfig)
tags: [always]
when:
- noble_k8s_api_server_auto_fallback | default(true) | bool
- noble_k8s_api_server_override | default('') | length == 0
- not (noble_skip_k8s_health_check | default(false) | bool)
- noble_k8s_health.rc != 0 or (noble_k8s_health.stdout | default('') | trim) != 'ok'
- ('network is unreachable' in (noble_k8s_health.stderr | default('') | lower)) or
('no route to host' in (noble_k8s_health.stderr | default('') | lower))
block:
- name: Ensure temp dir for kubeconfig auto-fallback
ansible.builtin.file:
path: "{{ noble_repo_root }}/ansible/.ansible-tmp"
state: directory
mode: "0700"
- name: Copy kubeconfig for API auto-fallback
ansible.builtin.copy:
src: "{{ noble_kubeconfig }}"
dest: "{{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.auto-fallback"
mode: "0600"
- name: Resolve cluster name for kubectl set-cluster
ansible.builtin.command:
argv:
- kubectl
- config
- view
- --minify
- -o
- jsonpath={.clusters[0].name}
environment:
KUBECONFIG: "{{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.auto-fallback"
register: noble_k8s_cluster_fb
changed_when: false
- name: Point temp kubeconfig at fallback apiserver
ansible.builtin.command:
argv:
- kubectl
- config
- set-cluster
- "{{ noble_k8s_cluster_fb.stdout }}"
- --server={{ noble_k8s_api_server_fallback | default('https://192.168.50.20:6443', true) }}
- --kubeconfig={{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.auto-fallback
changed_when: true
- name: Use kubeconfig with fallback API server for this play
ansible.builtin.set_fact:
noble_kubeconfig: "{{ noble_repo_root }}/ansible/.ansible-tmp/kubeconfig.auto-fallback"
- name: Re-verify Kubernetes API after auto-fallback
ansible.builtin.command:
argv:
- kubectl
- get
- --raw
- /healthz
- --request-timeout=15s
environment:
KUBECONFIG: "{{ noble_kubeconfig }}"
register: noble_k8s_health
failed_when: false
changed_when: false
- name: Fail when API check did not return ok
ansible.builtin.fail:
msg: "{{ lookup('template', 'templates/api_health_hint.j2') }}"
when:
- not (noble_skip_k8s_health_check | default(false) | bool)
- noble_k8s_health.rc != 0 or (noble_k8s_health.stdout | default('') | trim) != 'ok'
tags: [always]
roles:
- role: helm_repos
tags: [repos, helm]
- role: noble_cilium
tags: [cilium, cni]
- role: noble_metrics_server
tags: [metrics, metrics_server]
- role: noble_longhorn
tags: [longhorn, storage]
- role: noble_metallb
tags: [metallb, lb]
- role: noble_kube_vip
tags: [kube_vip, vip]
- role: noble_traefik
tags: [traefik, ingress]
- role: noble_cert_manager
tags: [cert_manager, certs]
- role: noble_newt
tags: [newt]
- role: noble_argocd
tags: [argocd, gitops]
- role: noble_kyverno
tags: [kyverno, policy]
- role: noble_kyverno_policies
tags: [kyverno_policies, policy]
- role: noble_platform
tags: [platform, observability, apps]

View File

@@ -0,0 +1,12 @@
---
# Manual follow-ups after **noble.yml**: Vault init/unseal, Kubernetes auth for Vault, ESO ClusterSecretStore.
# Run: ansible-playbook playbooks/post_deploy.yml
- name: Noble cluster — post-install reminders
hosts: localhost
connection: local
gather_facts: false
vars:
noble_repo_root: "{{ playbook_dir | dirname | dirname }}"
noble_kubeconfig: "{{ lookup('env', 'KUBECONFIG') | default(noble_repo_root + '/talos/kubeconfig', true) }}"
roles:
- role: noble_post_deploy

View File

@@ -0,0 +1,11 @@
---
# Genconfig only — for full Talos Phase A (apply, bootstrap, kubeconfig) use **playbooks/talos_phase_a.yml**
# or **playbooks/deploy.yml**. Run: ansible-playbook playbooks/talos_bootstrap.yml -e noble_talos_genconfig=true
- name: Talos — optional genconfig helper
hosts: localhost
connection: local
gather_facts: false
vars:
noble_repo_root: "{{ playbook_dir | dirname | dirname }}"
roles:
- role: talos_bootstrap

View File

@@ -0,0 +1,15 @@
---
# Talos Phase A — **talhelper genconfig** → **apply-config** (all nodes) → **bootstrap** → **kubeconfig**.
# Requires: **talosctl**, **talhelper**, reachable node IPs (same LAN as nodes for Talos API :50000).
# See **talos/README.md** §1§3. Then run **playbooks/noble.yml** or **deploy.yml**.
- name: Talos — genconfig, apply, bootstrap, kubeconfig
hosts: localhost
connection: local
gather_facts: false
vars:
noble_repo_root: "{{ playbook_dir | dirname | dirname }}"
noble_talos_dir: "{{ noble_repo_root }}/talos"
noble_talos_kubeconfig_out: "{{ noble_repo_root }}/talos/kubeconfig"
roles:
- role: talos_phase_a
tags: [talos, phase_a]

View File

@@ -0,0 +1,22 @@
{# Error output for noble.yml API preflight when kubectl /healthz fails #}
Cannot use the Kubernetes API from this host (kubectl get --raw /healthz).
rc={{ noble_k8s_health.rc }}
stderr: {{ noble_k8s_health.stderr | default('') | trim }}
{% set err = (noble_k8s_health.stderr | default('')) | lower %}
{% if 'connection refused' in err %}
Connection refused: the TCP path to that host works, but nothing is accepting HTTPS on port 6443 there.
• **Not bootstrapped yet?** Finish Talos first: `talosctl bootstrap` (once on a control plane), then `talosctl kubeconfig`, then confirm `kubectl get nodes`. See talos/README.md §2§3 and CLUSTER-BUILD.md Phase A. **Do not run this playbook before the Kubernetes API exists.**
• If bootstrap is done: try another control-plane IP (CLUSTER-BUILD inventory: neon 192.168.50.20, argon .30, krypton .40), or the VIP if kube-vip is up and you are on the LAN:
-e 'noble_k8s_api_server_override=https://192.168.50.230:6443'
• Do not point the API URL at a worker-only node.
• `talosctl health` / `kubectl get nodes` from a working client.
{% elif 'network is unreachable' in err or 'no route to host' in err %}
Network unreachable / no route: this machine cannot route to the API IP. Join the lab LAN or VPN, or set a reachable API server URL (talos/README.md §3).
{% else %}
If kubeconfig used the VIP from off-LAN, try a reachable control-plane IP, e.g.:
-e 'noble_k8s_api_server_override=https://192.168.50.20:6443'
See talos/README.md §3.
{% endif %}
To skip this check (not recommended): -e noble_skip_k8s_health_check=true