Update .gitignore to include .env file and enhance README.md with instructions for deploying secrets. Refactor noble.yml to improve Kubernetes health check handling and update templates for error reporting. Modify cert-manager and metallb tasks to apply secrets from .env and adjust timeout settings. Clarify Newt installation requirements in tasks. These changes aim to streamline deployment processes and improve documentation clarity.
This commit is contained in:
@@ -17,6 +17,10 @@ cd ansible
|
||||
ansible-playbook playbooks/deploy.yml
|
||||
```
|
||||
|
||||
## Deploy secrets (`.env`)
|
||||
|
||||
Copy **`.env.sample`** to **`.env`** at the repository root (`.env` is gitignored). At minimum set **`CLOUDFLARE_DNS_API_TOKEN`** for cert-manager DNS-01. The **cert-manager** role applies it automatically during **`noble.yml`**. See **`.env.sample`** for optional placeholders (e.g. Newt/Pangolin).
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- `talosctl` (matches node Talos version), `talhelper`, `helm`, `kubectl`.
|
||||
@@ -73,7 +77,7 @@ ansible-playbook playbooks/noble.yml --skip-tags newt
|
||||
|------|----------|
|
||||
| `talos_phase_a` | Talos genconfig, apply-config, bootstrap, kubeconfig |
|
||||
| `helm_repos` | `helm repo add` / `update` |
|
||||
| `noble_*` | Cilium, metrics-server, Longhorn, MetalLB, kube-vip, Traefik, cert-manager, Newt, Argo CD, Kyverno, platform stack |
|
||||
| `noble_*` | Cilium, metrics-server, Longhorn, MetalLB (20m Helm wait), kube-vip, Traefik, cert-manager, Newt, Argo CD, Kyverno, platform stack |
|
||||
| `noble_post_deploy` | Post-install reminders |
|
||||
| `talos_bootstrap` | Genconfig-only (used by older playbook) |
|
||||
|
||||
|
||||
@@ -107,7 +107,7 @@
|
||||
- --request-timeout=15s
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
register: noble_k8s_health
|
||||
register: noble_k8s_health_first
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
tags: [always]
|
||||
@@ -119,9 +119,9 @@
|
||||
- noble_k8s_api_server_auto_fallback | default(true) | bool
|
||||
- noble_k8s_api_server_override | default('') | length == 0
|
||||
- not (noble_skip_k8s_health_check | default(false) | bool)
|
||||
- noble_k8s_health.rc != 0 or (noble_k8s_health.stdout | default('') | trim) != 'ok'
|
||||
- ('network is unreachable' in (noble_k8s_health.stderr | default('') | lower)) or
|
||||
('no route to host' in (noble_k8s_health.stderr | default('') | lower))
|
||||
- (noble_k8s_health_first.rc | default(1)) != 0 or (noble_k8s_health_first.stdout | default('') | trim) != 'ok'
|
||||
- ('network is unreachable' in (noble_k8s_health_first.stderr | default('') | lower)) or
|
||||
('no route to host' in (noble_k8s_health_first.stderr | default('') | lower))
|
||||
block:
|
||||
- name: Ensure temp dir for kubeconfig auto-fallback
|
||||
ansible.builtin.file:
|
||||
@@ -174,16 +174,27 @@
|
||||
- --request-timeout=15s
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
register: noble_k8s_health
|
||||
register: noble_k8s_health_after_fallback
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
- name: Mark that API was re-checked after kubeconfig fallback
|
||||
ansible.builtin.set_fact:
|
||||
noble_k8s_api_fallback_used: true
|
||||
|
||||
- name: Normalize API health result for preflight (scalars; avoids dict merge / set_fact stringification)
|
||||
ansible.builtin.set_fact:
|
||||
noble_k8s_health_rc: "{{ noble_k8s_health_after_fallback.rc | default(1) if (noble_k8s_api_fallback_used | default(false) | bool) else (noble_k8s_health_first.rc | default(1)) }}"
|
||||
noble_k8s_health_stdout: "{{ noble_k8s_health_after_fallback.stdout | default('') if (noble_k8s_api_fallback_used | default(false) | bool) else (noble_k8s_health_first.stdout | default('')) }}"
|
||||
noble_k8s_health_stderr: "{{ noble_k8s_health_after_fallback.stderr | default('') if (noble_k8s_api_fallback_used | default(false) | bool) else (noble_k8s_health_first.stderr | default('')) }}"
|
||||
tags: [always]
|
||||
|
||||
- name: Fail when API check did not return ok
|
||||
ansible.builtin.fail:
|
||||
msg: "{{ lookup('template', 'templates/api_health_hint.j2') }}"
|
||||
when:
|
||||
- not (noble_skip_k8s_health_check | default(false) | bool)
|
||||
- noble_k8s_health.rc != 0 or (noble_k8s_health.stdout | default('') | trim) != 'ok'
|
||||
- (noble_k8s_health_rc | int) != 0 or (noble_k8s_health_stdout | default('') | trim) != 'ok'
|
||||
tags: [always]
|
||||
|
||||
roles:
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
{# Error output for noble.yml API preflight when kubectl /healthz fails #}
|
||||
Cannot use the Kubernetes API from this host (kubectl get --raw /healthz).
|
||||
rc={{ noble_k8s_health.rc }}
|
||||
stderr: {{ noble_k8s_health.stderr | default('') | trim }}
|
||||
rc={{ noble_k8s_health_rc | default('n/a') }}
|
||||
stderr: {{ noble_k8s_health_stderr | default('') | trim }}
|
||||
|
||||
{% set err = (noble_k8s_health.stderr | default('')) | lower %}
|
||||
{% set err = (noble_k8s_health_stderr | default('')) | lower %}
|
||||
{% if 'connection refused' in err %}
|
||||
Connection refused: the TCP path to that host works, but nothing is accepting HTTPS on port 6443 there.
|
||||
• **Not bootstrapped yet?** Finish Talos first: `talosctl bootstrap` (once on a control plane), then `talosctl kubeconfig`, then confirm `kubectl get nodes`. See talos/README.md §2–§3 and CLUSTER-BUILD.md Phase A. **Do not run this playbook before the Kubernetes API exists.**
|
||||
|
||||
3
ansible/roles/noble_cert_manager/defaults/main.yml
Normal file
3
ansible/roles/noble_cert_manager/defaults/main.yml
Normal file
@@ -0,0 +1,3 @@
|
||||
---
|
||||
# Warn when **cloudflare-dns-api-token** is missing after apply (also set in **group_vars/all.yml** when loaded).
|
||||
noble_cert_manager_require_cloudflare_secret: true
|
||||
28
ansible/roles/noble_cert_manager/tasks/from_env.yml
Normal file
28
ansible/roles/noble_cert_manager/tasks/from_env.yml
Normal file
@@ -0,0 +1,28 @@
|
||||
---
|
||||
# See repository **.env.sample** — copy to **.env** (gitignored).
|
||||
- name: Stat repository .env for deploy secrets
|
||||
ansible.builtin.stat:
|
||||
path: "{{ noble_repo_root }}/.env"
|
||||
register: noble_deploy_env_file
|
||||
changed_when: false
|
||||
|
||||
- name: Create cert-manager Cloudflare DNS secret from .env
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
set -a
|
||||
. "{{ noble_repo_root }}/.env"
|
||||
set +a
|
||||
if [ -z "${CLOUDFLARE_DNS_API_TOKEN:-}" ]; then
|
||||
echo NO_TOKEN
|
||||
exit 0
|
||||
fi
|
||||
kubectl -n cert-manager create secret generic cloudflare-dns-api-token \
|
||||
--from-literal=api-token="${CLOUDFLARE_DNS_API_TOKEN}" \
|
||||
--dry-run=client -o yaml | kubectl apply -f -
|
||||
echo APPLIED
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
when: noble_deploy_env_file.stat.exists | default(false)
|
||||
no_log: true
|
||||
register: noble_cf_secret_from_env
|
||||
changed_when: "'APPLIED' in (noble_cf_secret_from_env.stdout | default(''))"
|
||||
@@ -29,6 +29,9 @@
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
changed_when: true
|
||||
|
||||
- name: Apply secrets from repository .env (optional)
|
||||
ansible.builtin.include_tasks: from_env.yml
|
||||
|
||||
- name: Check Cloudflare DNS API token Secret (required for ClusterIssuers)
|
||||
ansible.builtin.command:
|
||||
argv:
|
||||
@@ -50,7 +53,7 @@
|
||||
Secret cert-manager/cloudflare-dns-api-token not found.
|
||||
Create it per clusters/noble/apps/cert-manager/README.md before ClusterIssuers can succeed.
|
||||
when:
|
||||
- noble_cert_manager_require_cloudflare_secret | bool
|
||||
- noble_cert_manager_require_cloudflare_secret | default(true) | bool
|
||||
- noble_cf_secret.rc != 0
|
||||
|
||||
- name: Apply ClusterIssuers (staging + prod)
|
||||
|
||||
3
ansible/roles/noble_metallb/defaults/main.yml
Normal file
3
ansible/roles/noble_metallb/defaults/main.yml
Normal file
@@ -0,0 +1,3 @@
|
||||
---
|
||||
# Helm **--wait** default is often too short when images pull slowly or nodes are busy.
|
||||
noble_helm_metallb_wait_timeout: 20m
|
||||
@@ -21,6 +21,8 @@
|
||||
- --namespace
|
||||
- metallb-system
|
||||
- --wait
|
||||
- --timeout
|
||||
- "{{ noble_helm_metallb_wait_timeout }}"
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
changed_when: true
|
||||
|
||||
3
ansible/roles/noble_newt/defaults/main.yml
Normal file
3
ansible/roles/noble_newt/defaults/main.yml
Normal file
@@ -0,0 +1,3 @@
|
||||
---
|
||||
# Set true after creating the newt-pangolin-auth Secret (see role / cluster docs).
|
||||
noble_newt_install: true
|
||||
30
ansible/roles/noble_newt/tasks/from_env.yml
Normal file
30
ansible/roles/noble_newt/tasks/from_env.yml
Normal file
@@ -0,0 +1,30 @@
|
||||
---
|
||||
# See repository **.env.sample** — copy to **.env** (gitignored).
|
||||
- name: Stat repository .env for deploy secrets
|
||||
ansible.builtin.stat:
|
||||
path: "{{ noble_repo_root }}/.env"
|
||||
register: noble_deploy_env_file
|
||||
changed_when: false
|
||||
|
||||
- name: Create newt-pangolin-auth Secret from .env
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
set -a
|
||||
. "{{ noble_repo_root }}/.env"
|
||||
set +a
|
||||
if [ -z "${PANGOLIN_ENDPOINT:-}" ] || [ -z "${NEWT_ID:-}" ] || [ -z "${NEWT_SECRET:-}" ]; then
|
||||
echo NO_VARS
|
||||
exit 0
|
||||
fi
|
||||
kubectl -n newt create secret generic newt-pangolin-auth \
|
||||
--from-literal=PANGOLIN_ENDPOINT="${PANGOLIN_ENDPOINT}" \
|
||||
--from-literal=NEWT_ID="${NEWT_ID}" \
|
||||
--from-literal=NEWT_SECRET="${NEWT_SECRET}" \
|
||||
--dry-run=client -o yaml | kubectl apply -f -
|
||||
echo APPLIED
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
when: noble_deploy_env_file.stat.exists | default(false)
|
||||
no_log: true
|
||||
register: noble_newt_secret_from_env
|
||||
changed_when: "'APPLIED' in (noble_newt_secret_from_env.stdout | default(''))"
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
- name: Skip Newt when not enabled
|
||||
ansible.builtin.debug:
|
||||
msg: "noble_newt_install is false — create newt-pangolin-auth Secret and set noble_newt_install=true to deploy Newt."
|
||||
msg: "noble_newt_install is false — set PANGOLIN_ENDPOINT, NEWT_ID, NEWT_SECRET in repo .env (or create the Secret manually) and set noble_newt_install=true to deploy Newt."
|
||||
when: not (noble_newt_install | bool)
|
||||
|
||||
- name: Create Newt namespace
|
||||
@@ -16,6 +16,10 @@
|
||||
when: noble_newt_install | bool
|
||||
changed_when: true
|
||||
|
||||
- name: Apply Newt Pangolin auth Secret from repository .env (optional)
|
||||
ansible.builtin.include_tasks: from_env.yml
|
||||
when: noble_newt_install | bool
|
||||
|
||||
- name: Install Newt chart
|
||||
ansible.builtin.command:
|
||||
argv:
|
||||
|
||||
Reference in New Issue
Block a user