diff --git a/.env.sample b/.env.sample index 3f77ae8..ae677ea 100644 --- a/.env.sample +++ b/.env.sample @@ -2,12 +2,12 @@ # Ansible **noble_cert_manager** role sources `.env` after cert-manager Helm install and creates # **cert-manager/cloudflare-dns-api-token** when **CLOUDFLARE_DNS_API_TOKEN** is set. # -# Cloudflare: Zone → DNS → Edit + Zone → Read for **pcenicni.dev** (see clusters/noble/apps/cert-manager/README.md). +# Cloudflare: Zone → DNS → Edit + Zone → Read for **pcenicni.dev** (see clusters/noble/bootstrap/cert-manager/README.md). CLOUDFLARE_DNS_API_TOKEN= # --- Optional: other deploy-time values (documented for manual use or future automation) --- -# Pangolin / Newt — with **noble_newt_install=true**, Ansible creates **newt/newt-pangolin-auth** when all are set (see clusters/noble/apps/newt/README.md). +# Pangolin / Newt — with **noble_newt_install=true**, Ansible creates **newt/newt-pangolin-auth** when all are set (see clusters/noble/bootstrap/newt/README.md). PANGOLIN_ENDPOINT= NEWT_ID= NEWT_SECRET= diff --git a/ansible/README.md b/ansible/README.md index 4378260..6118762 100644 --- a/ansible/README.md +++ b/ansible/README.md @@ -1,6 +1,6 @@ # Ansible — noble cluster -Automates [`talos/CLUSTER-BUILD.md`](../talos/CLUSTER-BUILD.md): optional **Talos Phase A** (genconfig → apply → bootstrap → kubeconfig), then **Phase B+** (CNI → add-ons → ingress → Argo CD → Kyverno → observability, etc.). **Argo CD** does not reconcile core charts — optional GitOps starts from an empty [`clusters/noble/bootstrap/argocd/apps/kustomization.yaml`](../clusters/noble/bootstrap/argocd/apps/kustomization.yaml). +Automates [`talos/CLUSTER-BUILD.md`](../talos/CLUSTER-BUILD.md): optional **Talos Phase A** (genconfig → apply → bootstrap → kubeconfig), then **Phase B+** (CNI → add-ons → ingress → Argo CD → Kyverno → observability, etc.). **Argo CD** does not reconcile core charts — optional GitOps starts from an empty [`clusters/noble/apps/kustomization.yaml`](../clusters/noble/apps/kustomization.yaml). ## Order of operations diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml index 5388b40..87271b3 100644 --- a/ansible/group_vars/all.yml +++ b/ansible/group_vars/all.yml @@ -13,7 +13,7 @@ noble_k8s_api_server_fallback: "https://192.168.50.20:6443" # Only if you must skip the kubectl /healthz preflight (not recommended). noble_skip_k8s_health_check: false -# Pangolin / Newt — set true only after creating newt-pangolin-auth Secret (see clusters/noble/apps/newt/README.md) +# Pangolin / Newt — set true only after creating newt-pangolin-auth Secret (see clusters/noble/bootstrap/newt/README.md) noble_newt_install: false # cert-manager needs Secret cloudflare-dns-api-token in cert-manager namespace before ClusterIssuers work diff --git a/ansible/roles/noble_cert_manager/tasks/main.yml b/ansible/roles/noble_cert_manager/tasks/main.yml index be5bd02..810142f 100644 --- a/ansible/roles/noble_cert_manager/tasks/main.yml +++ b/ansible/roles/noble_cert_manager/tasks/main.yml @@ -5,7 +5,7 @@ - kubectl - apply - -f - - "{{ noble_repo_root }}/clusters/noble/apps/cert-manager/namespace.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/cert-manager/namespace.yaml" environment: KUBECONFIG: "{{ noble_kubeconfig }}" changed_when: true @@ -23,7 +23,7 @@ - --version - v1.20.0 - -f - - "{{ noble_repo_root }}/clusters/noble/apps/cert-manager/values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/cert-manager/values.yaml" - --wait environment: KUBECONFIG: "{{ noble_kubeconfig }}" @@ -51,7 +51,7 @@ ansible.builtin.debug: msg: >- Secret cert-manager/cloudflare-dns-api-token not found. - Create it per clusters/noble/apps/cert-manager/README.md before ClusterIssuers can succeed. + Create it per clusters/noble/bootstrap/cert-manager/README.md before ClusterIssuers can succeed. when: - noble_cert_manager_require_cloudflare_secret | default(true) | bool - noble_cf_secret.rc != 0 @@ -62,7 +62,7 @@ - kubectl - apply - -k - - "{{ noble_repo_root }}/clusters/noble/apps/cert-manager" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/cert-manager" environment: KUBECONFIG: "{{ noble_kubeconfig }}" changed_when: true diff --git a/ansible/roles/noble_cilium/tasks/main.yml b/ansible/roles/noble_cilium/tasks/main.yml index 25fb9e0..fec9bcb 100644 --- a/ansible/roles/noble_cilium/tasks/main.yml +++ b/ansible/roles/noble_cilium/tasks/main.yml @@ -12,7 +12,7 @@ - --version - "1.16.6" - -f - - "{{ noble_repo_root }}/clusters/noble/apps/cilium/values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/cilium/values.yaml" - --wait environment: KUBECONFIG: "{{ noble_kubeconfig }}" diff --git a/ansible/roles/noble_kube_vip/tasks/main.yml b/ansible/roles/noble_kube_vip/tasks/main.yml index 058ef2f..f64e654 100644 --- a/ansible/roles/noble_kube_vip/tasks/main.yml +++ b/ansible/roles/noble_kube_vip/tasks/main.yml @@ -5,7 +5,7 @@ - kubectl - apply - -k - - "{{ noble_repo_root }}/clusters/noble/apps/kube-vip" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/kube-vip" environment: KUBECONFIG: "{{ noble_kubeconfig }}" changed_when: true diff --git a/ansible/roles/noble_kyverno/tasks/main.yml b/ansible/roles/noble_kyverno/tasks/main.yml index 62cc4b9..53799dc 100644 --- a/ansible/roles/noble_kyverno/tasks/main.yml +++ b/ansible/roles/noble_kyverno/tasks/main.yml @@ -5,7 +5,7 @@ - kubectl - apply - -f - - "{{ noble_repo_root }}/clusters/noble/apps/kyverno/namespace.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/kyverno/namespace.yaml" environment: KUBECONFIG: "{{ noble_kubeconfig }}" changed_when: true @@ -23,7 +23,7 @@ - --version - "3.7.1" - -f - - "{{ noble_repo_root }}/clusters/noble/apps/kyverno/values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/kyverno/values.yaml" - --wait - --timeout - 15m diff --git a/ansible/roles/noble_kyverno_policies/tasks/main.yml b/ansible/roles/noble_kyverno_policies/tasks/main.yml index 831fabe..5acdc7f 100644 --- a/ansible/roles/noble_kyverno_policies/tasks/main.yml +++ b/ansible/roles/noble_kyverno_policies/tasks/main.yml @@ -12,7 +12,7 @@ - --version - "3.7.1" - -f - - "{{ noble_repo_root }}/clusters/noble/apps/kyverno/policies-values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/kyverno/policies-values.yaml" - --wait - --timeout - 10m diff --git a/ansible/roles/noble_landing_urls/templates/noble-lab-ui-urls.md.j2 b/ansible/roles/noble_landing_urls/templates/noble-lab-ui-urls.md.j2 index ca22969..35ebdb4 100644 --- a/ansible/roles/noble_landing_urls/templates/noble-lab-ui-urls.md.j2 +++ b/ansible/roles/noble_landing_urls/templates/noble-lab-ui-urls.md.j2 @@ -2,7 +2,7 @@ > **Sensitive:** This file may include **passwords read from Kubernetes Secrets** when credential fetch ran. It is **gitignored** — do not commit or share. -**DNS:** point **`*.apps.noble.lab.pcenicni.dev`** at the Traefik **LoadBalancer** (MetalLB **`192.168.50.211`** by default — see `clusters/noble/apps/traefik/values.yaml`). +**DNS:** point **`*.apps.noble.lab.pcenicni.dev`** at the Traefik **LoadBalancer** (MetalLB **`192.168.50.211`** by default — see `clusters/noble/bootstrap/traefik/values.yaml`). **TLS:** **cert-manager** + **`letsencrypt-prod`** on each Ingress (public **DNS-01** for **`pcenicni.dev`**). @@ -24,7 +24,7 @@ This file is **generated** by Ansible (`noble_landing_urls` role). Use it as a t | **Prometheus** | — | No auth in default install (lab). | | **Alertmanager** | — | No auth in default install (lab). | | **Longhorn** | — | No default login unless you enable access control in the UI settings. | -| **Vault** | Token | Root token is only from **`vault operator init`** (not stored in git). See `clusters/noble/apps/vault/README.md`. | +| **Vault** | Token | Root token is only from **`vault operator init`** (not stored in git). See `clusters/noble/bootstrap/vault/README.md`. | ### Commands to retrieve passwords (if not filled above) diff --git a/ansible/roles/noble_longhorn/tasks/main.yml b/ansible/roles/noble_longhorn/tasks/main.yml index 3c84148..c3d47a3 100644 --- a/ansible/roles/noble_longhorn/tasks/main.yml +++ b/ansible/roles/noble_longhorn/tasks/main.yml @@ -5,7 +5,7 @@ - kubectl - apply - -k - - "{{ noble_repo_root }}/clusters/noble/apps/longhorn" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/longhorn" environment: KUBECONFIG: "{{ noble_kubeconfig }}" changed_when: true @@ -22,7 +22,7 @@ - longhorn-system - --create-namespace - -f - - "{{ noble_repo_root }}/clusters/noble/apps/longhorn/values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/longhorn/values.yaml" - --wait environment: KUBECONFIG: "{{ noble_kubeconfig }}" diff --git a/ansible/roles/noble_metallb/tasks/main.yml b/ansible/roles/noble_metallb/tasks/main.yml index 24fad06..4cd3e76 100644 --- a/ansible/roles/noble_metallb/tasks/main.yml +++ b/ansible/roles/noble_metallb/tasks/main.yml @@ -5,7 +5,7 @@ - kubectl - apply - -f - - "{{ noble_repo_root }}/clusters/noble/apps/metallb/namespace.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/metallb/namespace.yaml" environment: KUBECONFIG: "{{ noble_kubeconfig }}" changed_when: true @@ -33,7 +33,7 @@ - kubectl - apply - -k - - "{{ noble_repo_root }}/clusters/noble/apps/metallb" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/metallb" environment: KUBECONFIG: "{{ noble_kubeconfig }}" changed_when: true diff --git a/ansible/roles/noble_metrics_server/tasks/main.yml b/ansible/roles/noble_metrics_server/tasks/main.yml index b226a41..6ed761f 100644 --- a/ansible/roles/noble_metrics_server/tasks/main.yml +++ b/ansible/roles/noble_metrics_server/tasks/main.yml @@ -12,7 +12,7 @@ - --version - "3.13.0" - -f - - "{{ noble_repo_root }}/clusters/noble/apps/metrics-server/values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/metrics-server/values.yaml" - --wait environment: KUBECONFIG: "{{ noble_kubeconfig }}" diff --git a/ansible/roles/noble_newt/tasks/main.yml b/ansible/roles/noble_newt/tasks/main.yml index 3bde6a3..f8bc7c6 100644 --- a/ansible/roles/noble_newt/tasks/main.yml +++ b/ansible/roles/noble_newt/tasks/main.yml @@ -10,7 +10,7 @@ - kubectl - apply - -f - - "{{ noble_repo_root }}/clusters/noble/apps/newt/namespace.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/newt/namespace.yaml" environment: KUBECONFIG: "{{ noble_kubeconfig }}" when: noble_newt_install | bool @@ -33,7 +33,7 @@ - --version - "1.2.0" - -f - - "{{ noble_repo_root }}/clusters/noble/apps/newt/values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/newt/values.yaml" - --wait environment: KUBECONFIG: "{{ noble_kubeconfig }}" diff --git a/ansible/roles/noble_platform/tasks/main.yml b/ansible/roles/noble_platform/tasks/main.yml index 802344e..fb856cb 100644 --- a/ansible/roles/noble_platform/tasks/main.yml +++ b/ansible/roles/noble_platform/tasks/main.yml @@ -1,13 +1,13 @@ --- -# Mirrors former **noble-platform** Argo Application: Helm releases + plain manifests under clusters/noble/apps. -- name: Apply clusters/noble/apps kustomize (namespaces, Grafana Loki datasource, Vault extras) +# Mirrors former **noble-platform** Argo Application: Helm releases + plain manifests under clusters/noble/bootstrap. +- name: Apply clusters/noble/bootstrap kustomize (namespaces, Grafana Loki datasource, Vault extras) ansible.builtin.command: argv: - kubectl - apply - "--request-timeout={{ noble_platform_kubectl_request_timeout }}" - -k - - "{{ noble_repo_root }}/clusters/noble/apps" + - "{{ noble_repo_root }}/clusters/noble/bootstrap" environment: KUBECONFIG: "{{ noble_kubeconfig }}" register: noble_platform_kustomize @@ -29,7 +29,7 @@ - --version - "2.18.4" - -f - - "{{ noble_repo_root }}/clusters/noble/apps/sealed-secrets/values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/sealed-secrets/values.yaml" - --wait environment: KUBECONFIG: "{{ noble_kubeconfig }}" @@ -48,7 +48,7 @@ - --version - "2.2.0" - -f - - "{{ noble_repo_root }}/clusters/noble/apps/external-secrets/values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/external-secrets/values.yaml" - --wait environment: KUBECONFIG: "{{ noble_kubeconfig }}" @@ -82,7 +82,7 @@ - --version - "0.32.0" - -f - - "{{ noble_repo_root }}/clusters/noble/apps/vault/values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/vault/values.yaml" - --wait environment: KUBECONFIG: "{{ noble_kubeconfig }}" @@ -102,7 +102,7 @@ - --version - "82.15.1" - -f - - "{{ noble_repo_root }}/clusters/noble/apps/kube-prometheus-stack/values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml" - --wait - --timeout - 30m @@ -123,7 +123,7 @@ - --version - "6.55.0" - -f - - "{{ noble_repo_root }}/clusters/noble/apps/loki/values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/loki/values.yaml" - --wait environment: KUBECONFIG: "{{ noble_kubeconfig }}" @@ -142,7 +142,7 @@ - --version - "0.56.0" - -f - - "{{ noble_repo_root }}/clusters/noble/apps/fluent-bit/values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/fluent-bit/values.yaml" - --wait environment: KUBECONFIG: "{{ noble_kubeconfig }}" @@ -161,7 +161,7 @@ - -n - headlamp - -f - - "{{ noble_repo_root }}/clusters/noble/apps/headlamp/values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/headlamp/values.yaml" - --wait environment: KUBECONFIG: "{{ noble_kubeconfig }}" diff --git a/ansible/roles/noble_post_deploy/tasks/main.yml b/ansible/roles/noble_post_deploy/tasks/main.yml index 0aebc40..ff08dba 100644 --- a/ansible/roles/noble_post_deploy/tasks/main.yml +++ b/ansible/roles/noble_post_deploy/tasks/main.yml @@ -4,9 +4,9 @@ msg: | 1. kubectl -n vault get pods (wait for Running) 2. kubectl -n vault exec -it vault-0 -- vault operator init (once; save keys) - 3. Unseal per clusters/noble/apps/vault/README.md - 4. ./clusters/noble/apps/vault/configure-kubernetes-auth.sh - 5. kubectl apply -f clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml + 3. Unseal per clusters/noble/bootstrap/vault/README.md + 4. ./clusters/noble/bootstrap/vault/configure-kubernetes-auth.sh + 5. kubectl apply -f clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml - name: Optional — apply Vault ClusterSecretStore for External Secrets ansible.builtin.command: @@ -14,7 +14,7 @@ - kubectl - apply - -f - - "{{ noble_repo_root }}/clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml" environment: KUBECONFIG: "{{ noble_kubeconfig }}" when: noble_apply_vault_cluster_secret_store | default(false) | bool @@ -24,4 +24,4 @@ ansible.builtin.debug: msg: >- Optional: kubectl apply -f clusters/noble/bootstrap/argocd/root-application.yaml - after editing repoURL. Core workloads are not synced by Argo — see bootstrap/argocd/apps/README.md + after editing repoURL. Core workloads are not synced by Argo — see clusters/noble/apps/README.md diff --git a/ansible/roles/noble_traefik/tasks/main.yml b/ansible/roles/noble_traefik/tasks/main.yml index 915e892..8cec5fa 100644 --- a/ansible/roles/noble_traefik/tasks/main.yml +++ b/ansible/roles/noble_traefik/tasks/main.yml @@ -5,7 +5,7 @@ - kubectl - apply - -f - - "{{ noble_repo_root }}/clusters/noble/apps/traefik/namespace.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/traefik/namespace.yaml" environment: KUBECONFIG: "{{ noble_kubeconfig }}" changed_when: true @@ -23,7 +23,7 @@ - --version - "39.0.6" - -f - - "{{ noble_repo_root }}/clusters/noble/apps/traefik/values.yaml" + - "{{ noble_repo_root }}/clusters/noble/bootstrap/traefik/values.yaml" - --wait environment: KUBECONFIG: "{{ noble_kubeconfig }}" diff --git a/clusters/noble/bootstrap/argocd/README.md b/clusters/noble/bootstrap/argocd/README.md index 8b7dec4..f8c9759 100644 --- a/clusters/noble/bootstrap/argocd/README.md +++ b/clusters/noble/bootstrap/argocd/README.md @@ -53,10 +53,10 @@ Use **Settings → Repositories** in the UI, or `argocd repo add` / a `Secret` o ## 4. App-of-apps (optional GitOps only) Bootstrap **platform** workloads (CNI, ingress, cert-manager, Kyverno, observability, Vault, etc.) are installed by -**`ansible/playbooks/noble.yml`** — not by Argo. **`apps/kustomization.yaml`** is empty by default. +**`ansible/playbooks/noble.yml`** from **`clusters/noble/bootstrap/`** — not by Argo. **`clusters/noble/apps/kustomization.yaml`** is empty by default. 1. Edit **`root-application.yaml`**: set **`repoURL`** and **`targetRevision`** to this repository. The **`resources-finalizer.argocd.argoproj.io/background`** finalizer uses Argo’s path-qualified form so **`kubectl apply`** does not warn about finalizer names. -2. When you want Argo to manage specific apps, add **`Application`** manifests under **`apps/`** (see **`apps/README.md`**). +2. When you want Argo to manage specific apps, add **`Application`** manifests under **`clusters/noble/apps/`** (see **`clusters/noble/apps/README.md`**). 3. Apply the root: ```bash @@ -64,7 +64,7 @@ Bootstrap **platform** workloads (CNI, ingress, cert-manager, Kyverno, observabi ``` If you migrated from GitOps-managed **`noble-platform`** / **`noble-kyverno`**, delete stale **`Application`** objects on -the cluster (see **`apps/README.md`**) then re-apply the root. +the cluster (see **`clusters/noble/apps/README.md`**) then re-apply the root. ## Versions diff --git a/clusters/noble/bootstrap/argocd/root-application.yaml b/clusters/noble/bootstrap/argocd/root-application.yaml index 1df2225..7fd72e4 100644 --- a/clusters/noble/bootstrap/argocd/root-application.yaml +++ b/clusters/noble/bootstrap/argocd/root-application.yaml @@ -3,8 +3,8 @@ # 1. Set spec.source.repoURL (and targetRevision — **HEAD** tracks the remote default branch) to this repo. # 2. kubectl apply -f clusters/noble/bootstrap/argocd/root-application.yaml # -# **apps/kustomization.yaml** is intentionally empty: core platform is installed by **ansible/playbooks/noble.yml**, -# not Argo. Add **Application** manifests under **apps/** only for optional GitOps-managed workloads. +# **clusters/noble/apps** holds optional **Application** manifests. Core platform is installed by +# **ansible/playbooks/noble.yml** from **clusters/noble/bootstrap/**. # apiVersion: argoproj.io/v1alpha1 kind: Application @@ -21,7 +21,7 @@ spec: source: repoURL: https://gitea.pcenicni.ca/gsdavidp/home-server.git targetRevision: HEAD - path: clusters/noble/bootstrap/argocd/apps + path: clusters/noble/apps destination: server: https://kubernetes.default.svc namespace: argocd diff --git a/clusters/noble/bootstrap/cert-manager/README.md b/clusters/noble/bootstrap/cert-manager/README.md index 1085df7..d88112f 100644 --- a/clusters/noble/bootstrap/cert-manager/README.md +++ b/clusters/noble/bootstrap/cert-manager/README.md @@ -19,7 +19,7 @@ Without this Secret, **`ClusterIssuer`** will not complete certificate orders. 1. Create the namespace: ```bash - kubectl apply -f clusters/noble/apps/cert-manager/namespace.yaml + kubectl apply -f clusters/noble/bootstrap/cert-manager/namespace.yaml ``` 2. Install the chart (CRDs included via `values.yaml`): @@ -30,7 +30,7 @@ Without this Secret, **`ClusterIssuer`** will not complete certificate orders. helm upgrade --install cert-manager jetstack/cert-manager \ --namespace cert-manager \ --version v1.20.0 \ - -f clusters/noble/apps/cert-manager/values.yaml \ + -f clusters/noble/bootstrap/cert-manager/values.yaml \ --wait ``` @@ -39,7 +39,7 @@ Without this Secret, **`ClusterIssuer`** will not complete certificate orders. 4. Apply ClusterIssuers (staging then prod, or both): ```bash - kubectl apply -k clusters/noble/apps/cert-manager + kubectl apply -k clusters/noble/bootstrap/cert-manager ``` 5. Confirm: diff --git a/clusters/noble/bootstrap/cert-manager/values.yaml b/clusters/noble/bootstrap/cert-manager/values.yaml index ea2a2a7..bf53025 100644 --- a/clusters/noble/bootstrap/cert-manager/values.yaml +++ b/clusters/noble/bootstrap/cert-manager/values.yaml @@ -2,13 +2,13 @@ # # Chart: jetstack/cert-manager — pin version on the helm command (e.g. v1.20.0). # -# kubectl apply -f clusters/noble/apps/cert-manager/namespace.yaml +# kubectl apply -f clusters/noble/bootstrap/cert-manager/namespace.yaml # helm repo add jetstack https://charts.jetstack.io # helm repo update # helm upgrade --install cert-manager jetstack/cert-manager -n cert-manager \ -# --version v1.20.0 -f clusters/noble/apps/cert-manager/values.yaml --wait +# --version v1.20.0 -f clusters/noble/bootstrap/cert-manager/values.yaml --wait # -# kubectl apply -k clusters/noble/apps/cert-manager +# kubectl apply -k clusters/noble/bootstrap/cert-manager crds: enabled: true diff --git a/clusters/noble/bootstrap/cilium/README.md b/clusters/noble/bootstrap/cilium/README.md index 0a02b92..7d6157b 100644 --- a/clusters/noble/bootstrap/cilium/README.md +++ b/clusters/noble/bootstrap/cilium/README.md @@ -14,7 +14,7 @@ helm repo update helm upgrade --install cilium cilium/cilium \ --namespace kube-system \ --version 1.16.6 \ - -f clusters/noble/apps/cilium/values.yaml \ + -f clusters/noble/bootstrap/cilium/values.yaml \ --wait ``` @@ -25,7 +25,7 @@ kubectl -n kube-system rollout status ds/cilium kubectl get nodes ``` -When nodes are **Ready**, continue with **MetalLB** (`clusters/noble/apps/metallb/README.md`) and other Phase B items. **kube-vip** for the Kubernetes API VIP is separate (L2 ARP); it can run after the API is reachable. +When nodes are **Ready**, continue with **MetalLB** (`clusters/noble/bootstrap/metallb/README.md`) and other Phase B items. **kube-vip** for the Kubernetes API VIP is separate (L2 ARP); it can run after the API is reachable. ## 2. Optional: kube-proxy replacement (phase 2) diff --git a/clusters/noble/bootstrap/external-secrets/README.md b/clusters/noble/bootstrap/external-secrets/README.md index 04374c4..8a4848b 100644 --- a/clusters/noble/bootstrap/external-secrets/README.md +++ b/clusters/noble/bootstrap/external-secrets/README.md @@ -11,9 +11,9 @@ Syncs secrets from external systems into Kubernetes **Secret** objects via **Ext ```bash helm repo add external-secrets https://charts.external-secrets.io helm repo update -kubectl apply -f clusters/noble/apps/external-secrets/namespace.yaml +kubectl apply -f clusters/noble/bootstrap/external-secrets/namespace.yaml helm upgrade --install external-secrets external-secrets/external-secrets -n external-secrets \ - --version 2.2.0 -f clusters/noble/apps/external-secrets/values.yaml --wait + --version 2.2.0 -f clusters/noble/bootstrap/external-secrets/values.yaml --wait ``` Verify: diff --git a/clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml b/clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml index bdbb1fc..159bea0 100644 --- a/clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml +++ b/clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml @@ -10,7 +10,7 @@ # Adjust server, mountPath, role, and path to match your Vault deployment. If Vault uses TLS # with a private CA, set provider.vault.caProvider or caBundle (see README). # -# kubectl apply -f clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml +# kubectl apply -f clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml --- apiVersion: external-secrets.io/v1 kind: ClusterSecretStore diff --git a/clusters/noble/bootstrap/external-secrets/values.yaml b/clusters/noble/bootstrap/external-secrets/values.yaml index 871f674..a630c8b 100644 --- a/clusters/noble/bootstrap/external-secrets/values.yaml +++ b/clusters/noble/bootstrap/external-secrets/values.yaml @@ -2,9 +2,9 @@ # # helm repo add external-secrets https://charts.external-secrets.io # helm repo update -# kubectl apply -f clusters/noble/apps/external-secrets/namespace.yaml +# kubectl apply -f clusters/noble/bootstrap/external-secrets/namespace.yaml # helm upgrade --install external-secrets external-secrets/external-secrets -n external-secrets \ -# --version 2.2.0 -f clusters/noble/apps/external-secrets/values.yaml --wait +# --version 2.2.0 -f clusters/noble/bootstrap/external-secrets/values.yaml --wait # # CRDs are installed by the chart (installCRDs: true). Vault ClusterSecretStore: see README + examples/. commonLabels: {} diff --git a/clusters/noble/bootstrap/fluent-bit/values.yaml b/clusters/noble/bootstrap/fluent-bit/values.yaml index f87ae33..0e2c2f5 100644 --- a/clusters/noble/bootstrap/fluent-bit/values.yaml +++ b/clusters/noble/bootstrap/fluent-bit/values.yaml @@ -5,11 +5,11 @@ # # Talos: only **tail** `/var/log/containers` (no host **systemd** input — journal layout differs from typical Linux). # -# kubectl apply -f clusters/noble/apps/fluent-bit/namespace.yaml +# kubectl apply -f clusters/noble/bootstrap/fluent-bit/namespace.yaml # helm repo add fluent https://fluent.github.io/helm-charts # helm repo update # helm upgrade --install fluent-bit fluent/fluent-bit -n logging \ -# --version 0.56.0 -f clusters/noble/apps/fluent-bit/values.yaml --wait --timeout 15m +# --version 0.56.0 -f clusters/noble/bootstrap/fluent-bit/values.yaml --wait --timeout 15m config: inputs: | diff --git a/clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml b/clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml index 738dc76..5b53145 100644 --- a/clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml +++ b/clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml @@ -2,9 +2,9 @@ # The Grafana sidecar watches ConfigMaps labeled **grafana_datasource: "1"** and loads YAML keys as files. # Does not require editing the kube-prometheus-stack Helm release. # -# kubectl apply -f clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml +# kubectl apply -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml # -# Remove with: kubectl delete -f clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml +# Remove with: kubectl delete -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml apiVersion: v1 kind: ConfigMap metadata: diff --git a/clusters/noble/bootstrap/headlamp/README.md b/clusters/noble/bootstrap/headlamp/README.md index 14d42dc..ada6228 100644 --- a/clusters/noble/bootstrap/headlamp/README.md +++ b/clusters/noble/bootstrap/headlamp/README.md @@ -10,9 +10,9 @@ ```bash helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/ helm repo update -kubectl apply -f clusters/noble/apps/headlamp/namespace.yaml +kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml helm upgrade --install headlamp headlamp/headlamp -n headlamp \ - --version 0.40.1 -f clusters/noble/apps/headlamp/values.yaml --wait --timeout 10m + --version 0.40.1 -f clusters/noble/bootstrap/headlamp/values.yaml --wait --timeout 10m ``` Sign-in uses a **ServiceAccount token** (Headlamp docs: create a limited SA for day-to-day use). This repo binds the Headlamp workload SA to the built-in **`edit`** ClusterRole (**`clusterRoleBinding.clusterRoleName: edit`** in **`values.yaml`**) — not **`cluster-admin`**. For cluster-scoped admin work, use **`kubectl`** with your admin kubeconfig. Optional **OIDC** in **`config.oidc`** replaces token login for SSO. diff --git a/clusters/noble/bootstrap/headlamp/values.yaml b/clusters/noble/bootstrap/headlamp/values.yaml index 916b58a..b9cc69f 100644 --- a/clusters/noble/bootstrap/headlamp/values.yaml +++ b/clusters/noble/bootstrap/headlamp/values.yaml @@ -2,9 +2,9 @@ # # helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/ # helm repo update -# kubectl apply -f clusters/noble/apps/headlamp/namespace.yaml +# kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml # helm upgrade --install headlamp headlamp/headlamp -n headlamp \ -# --version 0.40.1 -f clusters/noble/apps/headlamp/values.yaml --wait --timeout 10m +# --version 0.40.1 -f clusters/noble/bootstrap/headlamp/values.yaml --wait --timeout 10m # # DNS: headlamp.apps.noble.lab.pcenicni.dev → Traefik LB (see talos/CLUSTER-BUILD.md). # Default chart RBAC is broad — restrict for production (Phase G). diff --git a/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml b/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml index 9dc9077..d8e7df3 100644 --- a/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml +++ b/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml @@ -4,10 +4,10 @@ # # Install (use one terminal; chain with && so `helm upgrade` always runs after `helm repo update`): # -# kubectl apply -f clusters/noble/apps/kube-prometheus-stack/namespace.yaml +# kubectl apply -f clusters/noble/bootstrap/kube-prometheus-stack/namespace.yaml # helm repo add prometheus-community https://prometheus-community.github.io/helm-charts # helm repo update && helm upgrade --install kube-prometheus prometheus-community/kube-prometheus-stack -n monitoring \ -# --version 82.15.1 -f clusters/noble/apps/kube-prometheus-stack/values.yaml --wait --timeout 30m +# --version 82.15.1 -f clusters/noble/bootstrap/kube-prometheus-stack/values.yaml --wait --timeout 30m # # Why it looks "stalled": with --wait, Helm prints almost nothing until the release finishes (can be many minutes). # Do not use --timeout 5m for first install — Longhorn PVCs + StatefulSets often need 15–30m. To watch progress, @@ -87,7 +87,7 @@ grafana: size: 10Gi # HTTPS via Traefik + cert-manager (ClusterIssuer letsencrypt-prod; same pattern as other *.apps.noble.lab.pcenicni.dev hosts). - # DNS: grafana.apps.noble.lab.pcenicni.dev → Traefik LoadBalancer (192.168.50.211) — see clusters/noble/apps/traefik/values.yaml + # DNS: grafana.apps.noble.lab.pcenicni.dev → Traefik LoadBalancer (192.168.50.211) — see clusters/noble/bootstrap/traefik/values.yaml ingress: enabled: true ingressClassName: traefik @@ -109,4 +109,4 @@ grafana: # Traefik sets X-Forwarded-*; required for correct redirects and cookies behind the ingress. use_proxy_headers: true - # Loki datasource: apply `clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml` (sidecar ConfigMap) instead of additionalDataSources here. + # Loki datasource: apply `clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml` (sidecar ConfigMap) instead of additionalDataSources here. diff --git a/clusters/noble/bootstrap/kustomization.yaml b/clusters/noble/bootstrap/kustomization.yaml index 7ed8a4f..c18be26 100644 --- a/clusters/noble/bootstrap/kustomization.yaml +++ b/clusters/noble/bootstrap/kustomization.yaml @@ -1,5 +1,6 @@ -# Plain Kustomize only (namespaces + extra YAML). Helm installs are driven by **ansible/playbooks/noble.yml** -# (role **noble_platform**) — avoids **kustomize --enable-helm** in-repo. +# Ansible bootstrap: plain Kustomize (namespaces + extra YAML). Helm installs are driven by +# **ansible/playbooks/noble.yml** (role **noble_platform**) — avoids **kustomize --enable-helm** in-repo. +# Optional GitOps workloads live under **../apps/** (Argo **noble-root**). apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization diff --git a/clusters/noble/bootstrap/kyverno/README.md b/clusters/noble/bootstrap/kyverno/README.md index b615ead..81ea6d5 100644 --- a/clusters/noble/bootstrap/kyverno/README.md +++ b/clusters/noble/bootstrap/kyverno/README.md @@ -10,11 +10,11 @@ Admission policies using [Kyverno](https://kyverno.io/). The main chart installs ```bash helm repo add kyverno https://kyverno.github.io/kyverno/ helm repo update -kubectl apply -f clusters/noble/apps/kyverno/namespace.yaml +kubectl apply -f clusters/noble/bootstrap/kyverno/namespace.yaml helm upgrade --install kyverno kyverno/kyverno -n kyverno \ - --version 3.7.1 -f clusters/noble/apps/kyverno/values.yaml --wait --timeout 15m + --version 3.7.1 -f clusters/noble/bootstrap/kyverno/values.yaml --wait --timeout 15m helm upgrade --install kyverno-policies kyverno/kyverno-policies -n kyverno \ - --version 3.7.1 -f clusters/noble/apps/kyverno/policies-values.yaml --wait --timeout 10m + --version 3.7.1 -f clusters/noble/bootstrap/kyverno/policies-values.yaml --wait --timeout 10m ``` Verify: diff --git a/clusters/noble/bootstrap/kyverno/policies-values.yaml b/clusters/noble/bootstrap/kyverno/policies-values.yaml index e41bbf5..e148211 100644 --- a/clusters/noble/bootstrap/kyverno/policies-values.yaml +++ b/clusters/noble/bootstrap/kyverno/policies-values.yaml @@ -1,12 +1,12 @@ # kyverno/kyverno-policies — Pod Security Standards as Kyverno ClusterPolicies # # helm upgrade --install kyverno-policies kyverno/kyverno-policies -n kyverno \ -# --version 3.7.1 -f clusters/noble/apps/kyverno/policies-values.yaml --wait --timeout 10m +# --version 3.7.1 -f clusters/noble/bootstrap/kyverno/policies-values.yaml --wait --timeout 10m # # Default profile is baseline; validationFailureAction is Audit so existing privileged # workloads are not blocked. Kyverno still emits PolicyReports for matches — Headlamp # surfaces those as “policy violations”. Exclude namespaces that intentionally run -# outside baseline (see namespace PSA labels under clusters/noble/apps/*/namespace.yaml) +# outside baseline (see namespace PSA labels under clusters/noble/bootstrap/*/namespace.yaml) # plus core Kubernetes namespaces and every Ansible-managed app namespace on noble. # # After widening excludes, Kyverno does not always prune old PolicyReport rows; refresh: @@ -25,7 +25,7 @@ validationFailureAction: Audit failurePolicy: Fail validationAllowExistingViolations: true -# All platform namespaces on noble (ansible/playbooks/noble.yml + clusters/noble/apps). +# All platform namespaces on noble (ansible/playbooks/noble.yml + clusters/noble/bootstrap). x-kyverno-exclude-infra: &kyverno_exclude_infra any: - resources: diff --git a/clusters/noble/bootstrap/kyverno/values.yaml b/clusters/noble/bootstrap/kyverno/values.yaml index 8020f5d..acc8787 100644 --- a/clusters/noble/bootstrap/kyverno/values.yaml +++ b/clusters/noble/bootstrap/kyverno/values.yaml @@ -2,9 +2,9 @@ # # helm repo add kyverno https://kyverno.github.io/kyverno/ # helm repo update -# kubectl apply -f clusters/noble/apps/kyverno/namespace.yaml +# kubectl apply -f clusters/noble/bootstrap/kyverno/namespace.yaml # helm upgrade --install kyverno kyverno/kyverno -n kyverno \ -# --version 3.7.1 -f clusters/noble/apps/kyverno/values.yaml --wait --timeout 15m +# --version 3.7.1 -f clusters/noble/bootstrap/kyverno/values.yaml --wait --timeout 15m # # Baseline Pod Security policies (separate chart): see policies-values.yaml + README.md # diff --git a/clusters/noble/bootstrap/loki/values.yaml b/clusters/noble/bootstrap/loki/values.yaml index 1533467..c1fe007 100644 --- a/clusters/noble/bootstrap/loki/values.yaml +++ b/clusters/noble/bootstrap/loki/values.yaml @@ -2,11 +2,11 @@ # # Chart: grafana/loki — pin version on install (e.g. 6.55.0). # -# kubectl apply -f clusters/noble/apps/loki/namespace.yaml +# kubectl apply -f clusters/noble/bootstrap/loki/namespace.yaml # helm repo add grafana https://grafana.github.io/helm-charts # helm repo update # helm upgrade --install loki grafana/loki -n loki \ -# --version 6.55.0 -f clusters/noble/apps/loki/values.yaml --wait --timeout 30m +# --version 6.55.0 -f clusters/noble/bootstrap/loki/values.yaml --wait --timeout 30m # # Query/push URL for Grafana + Fluent Bit: http://loki-gateway.loki.svc.cluster.local:80 diff --git a/clusters/noble/bootstrap/longhorn/values.yaml b/clusters/noble/bootstrap/longhorn/values.yaml index 2af82f8..7501ad4 100644 --- a/clusters/noble/bootstrap/longhorn/values.yaml +++ b/clusters/noble/bootstrap/longhorn/values.yaml @@ -1,11 +1,11 @@ # Longhorn Helm values — use with Talos user volume + kubelet mounts (see talos/talconfig.yaml). -# 1) PSA: `kubectl apply -k clusters/noble/apps/longhorn` (privileged namespace) before or after Helm. +# 1) PSA: `kubectl apply -k clusters/noble/bootstrap/longhorn` (privileged namespace) before or after Helm. # 2) Talos: bind `/var/lib/longhorn` → `/var/mnt/longhorn` in kubelet extraMounts — chart hostPath is fixed to /var/lib/longhorn. # Example (run from home-server repo root so -f path resolves): -# kubectl apply -k clusters/noble/apps/longhorn +# kubectl apply -k clusters/noble/bootstrap/longhorn # helm repo add longhorn https://charts.longhorn.io && helm repo update # helm upgrade --install longhorn longhorn/longhorn -n longhorn-system --create-namespace \ -# -f clusters/noble/apps/longhorn/values.yaml +# -f clusters/noble/bootstrap/longhorn/values.yaml # "helm upgrade --install" needs two arguments: RELEASE_NAME and CHART (e.g. longhorn longhorn/longhorn). # # If you already installed Longhorn without this file: fix Default Settings in the UI or edit each diff --git a/clusters/noble/bootstrap/metallb/README.md b/clusters/noble/bootstrap/metallb/README.md index 3af5a74..0ef60b5 100644 --- a/clusters/noble/bootstrap/metallb/README.md +++ b/clusters/noble/bootstrap/metallb/README.md @@ -11,7 +11,7 @@ If `kubectl apply -k` fails with **`no matches for kind "IPAddressPool"`** / **` **Pod Security warnings** (`would violate PodSecurity "restricted"`): MetalLB’s speaker/FRR use `hostNetwork`, `NET_ADMIN`, etc. That is expected unless `metallb-system` is labeled **privileged**. Apply `namespace.yaml` **before** Helm so the namespace is created with the right labels (omit `--create-namespace` on Helm), or patch an existing namespace: ```bash -kubectl apply -f clusters/noble/apps/metallb/namespace.yaml +kubectl apply -f clusters/noble/bootstrap/metallb/namespace.yaml ``` If you already ran Helm with `--create-namespace`, either `kubectl apply -f namespace.yaml` (merges labels) or: @@ -38,15 +38,15 @@ Then restart MetalLB pods if they were failing (`kubectl get pods -n metallb-sys 2. Apply this folder’s pool and L2 advertisement: ```bash - kubectl apply -k clusters/noble/apps/metallb + kubectl apply -k clusters/noble/bootstrap/metallb ``` -3. Confirm a `Service` `type: LoadBalancer` receives an address in `192.168.50.210`–`192.168.50.229` (e.g. **`kubectl get svc -n traefik traefik`** after installing **Traefik** in `clusters/noble/apps/traefik/`). +3. Confirm a `Service` `type: LoadBalancer` receives an address in `192.168.50.210`–`192.168.50.229` (e.g. **`kubectl get svc -n traefik traefik`** after installing **Traefik** in `clusters/noble/bootstrap/traefik/`). -Reserve **one** IP in that range for Argo CD (e.g. `192.168.50.210`) via `spec.loadBalancerIP` or chart values when you expose the server. Traefik pins **`192.168.50.211`** in **`clusters/noble/apps/traefik/values.yaml`**. +Reserve **one** IP in that range for Argo CD (e.g. `192.168.50.210`) via `spec.loadBalancerIP` or chart values when you expose the server. Traefik pins **`192.168.50.211`** in **`clusters/noble/bootstrap/traefik/values.yaml`**. ## `Pending` MetalLB pods 1. `kubectl get nodes` — every node **`Ready`**? If **`NotReady`** or **`NetworkUnavailable`**, finish **CNI** install first. 2. `kubectl describe pod -n metallb-system ` — read **Events** at the bottom (`0/N nodes are available: …`). -3. L2 speaker uses the node’s uplink; kube-vip in this repo expects **`ens18`** on control planes (`clusters/noble/apps/kube-vip/vip-daemonset.yaml`). If your NIC name differs, change `vip_interface` there. +3. L2 speaker uses the node’s uplink; kube-vip in this repo expects **`ens18`** on control planes (`clusters/noble/bootstrap/kube-vip/vip-daemonset.yaml`). If your NIC name differs, change `vip_interface` there. diff --git a/clusters/noble/bootstrap/metallb/namespace.yaml b/clusters/noble/bootstrap/metallb/namespace.yaml index 5ef548b..ac545db 100644 --- a/clusters/noble/bootstrap/metallb/namespace.yaml +++ b/clusters/noble/bootstrap/metallb/namespace.yaml @@ -1,5 +1,5 @@ # Apply before Helm if you do not use --create-namespace, or use this to fix PSA after the fact: -# kubectl apply -f clusters/noble/apps/metallb/namespace.yaml +# kubectl apply -f clusters/noble/bootstrap/metallb/namespace.yaml # MetalLB speaker needs hostNetwork + NET_ADMIN; incompatible with Pod Security "restricted". apiVersion: v1 kind: Namespace diff --git a/clusters/noble/bootstrap/metrics-server/values.yaml b/clusters/noble/bootstrap/metrics-server/values.yaml index 1756bdb..06641e8 100644 --- a/clusters/noble/bootstrap/metrics-server/values.yaml +++ b/clusters/noble/bootstrap/metrics-server/values.yaml @@ -4,7 +4,7 @@ # # helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/ # helm upgrade --install metrics-server metrics-server/metrics-server -n kube-system \ -# --version 3.13.0 -f clusters/noble/apps/metrics-server/values.yaml --wait +# --version 3.13.0 -f clusters/noble/bootstrap/metrics-server/values.yaml --wait args: - --kubelet-insecure-tls diff --git a/clusters/noble/bootstrap/newt/README.md b/clusters/noble/bootstrap/newt/README.md index 314cba5..0fce92d 100644 --- a/clusters/noble/bootstrap/newt/README.md +++ b/clusters/noble/bootstrap/newt/README.md @@ -10,14 +10,14 @@ Keys must match `values.yaml` (`PANGOLIN_ENDPOINT`, `NEWT_ID`, `NEWT_SECRET`). ### Option A — Sealed Secret (safe for GitOps) -With the [Sealed Secrets](https://github.com/bitnami-labs/sealed-secrets) controller installed (`clusters/noble/apps/sealed-secrets/`), generate a `SealedSecret` from your workstation (rotate credentials in Pangolin first if they were exposed): +With the [Sealed Secrets](https://github.com/bitnami-labs/sealed-secrets) controller installed (`clusters/noble/bootstrap/sealed-secrets/`), generate a `SealedSecret` from your workstation (rotate credentials in Pangolin first if they were exposed): ```bash -chmod +x clusters/noble/apps/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh +chmod +x clusters/noble/bootstrap/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh export PANGOLIN_ENDPOINT='https://pangolin.pcenicni.dev' export NEWT_ID='YOUR_NEWT_ID' export NEWT_SECRET='YOUR_NEWT_SECRET' -./clusters/noble/apps/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh > newt-pangolin-auth.sealedsecret.yaml +./clusters/noble/bootstrap/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh > newt-pangolin-auth.sealedsecret.yaml kubectl apply -f newt-pangolin-auth.sealedsecret.yaml ``` @@ -26,7 +26,7 @@ Commit only the `.sealedsecret.yaml` file, not plain `Secret` YAML. ### Option B — Imperative Secret (not in git) ```bash -kubectl apply -f clusters/noble/apps/newt/namespace.yaml +kubectl apply -f clusters/noble/bootstrap/newt/namespace.yaml kubectl -n newt create secret generic newt-pangolin-auth \ --from-literal=PANGOLIN_ENDPOINT='https://pangolin.pcenicni.dev' \ @@ -44,7 +44,7 @@ helm repo update helm upgrade --install newt fossorial/newt \ --namespace newt \ --version 1.2.0 \ - -f clusters/noble/apps/newt/values.yaml \ + -f clusters/noble/bootstrap/newt/values.yaml \ --wait ``` diff --git a/clusters/noble/bootstrap/newt/values.yaml b/clusters/noble/bootstrap/newt/values.yaml index e238912..4570608 100644 --- a/clusters/noble/bootstrap/newt/values.yaml +++ b/clusters/noble/bootstrap/newt/values.yaml @@ -2,7 +2,7 @@ # # Credentials MUST come from a Secret — do not put endpoint/id/secret in git. # -# kubectl apply -f clusters/noble/apps/newt/namespace.yaml +# kubectl apply -f clusters/noble/bootstrap/newt/namespace.yaml # kubectl -n newt create secret generic newt-pangolin-auth \ # --from-literal=PANGOLIN_ENDPOINT='https://pangolin.example.com' \ # --from-literal=NEWT_ID='...' \ @@ -10,7 +10,7 @@ # # helm repo add fossorial https://charts.fossorial.io # helm upgrade --install newt fossorial/newt -n newt \ -# --version 1.2.0 -f clusters/noble/apps/newt/values.yaml --wait +# --version 1.2.0 -f clusters/noble/bootstrap/newt/values.yaml --wait # # See README.md for Pangolin Integration API (domains + HTTP resources + CNAME). diff --git a/clusters/noble/bootstrap/sealed-secrets/README.md b/clusters/noble/bootstrap/sealed-secrets/README.md index a927423..9e7cbdb 100644 --- a/clusters/noble/bootstrap/sealed-secrets/README.md +++ b/clusters/noble/bootstrap/sealed-secrets/README.md @@ -10,9 +10,9 @@ Encrypts `Secret` manifests so they can live in git; the controller decrypts **S ```bash helm repo add sealed-secrets https://bitnami-labs.github.io/sealed-secrets helm repo update -kubectl apply -f clusters/noble/apps/sealed-secrets/namespace.yaml +kubectl apply -f clusters/noble/bootstrap/sealed-secrets/namespace.yaml helm upgrade --install sealed-secrets sealed-secrets/sealed-secrets -n sealed-secrets \ - --version 2.18.4 -f clusters/noble/apps/sealed-secrets/values.yaml --wait + --version 2.18.4 -f clusters/noble/bootstrap/sealed-secrets/values.yaml --wait ``` ## Workstation: `kubeseal` diff --git a/clusters/noble/bootstrap/sealed-secrets/values.yaml b/clusters/noble/bootstrap/sealed-secrets/values.yaml index 497d925..0f84be9 100644 --- a/clusters/noble/bootstrap/sealed-secrets/values.yaml +++ b/clusters/noble/bootstrap/sealed-secrets/values.yaml @@ -2,15 +2,15 @@ # # helm repo add sealed-secrets https://bitnami-labs.github.io/sealed-secrets # helm repo update -# kubectl apply -f clusters/noble/apps/sealed-secrets/namespace.yaml +# kubectl apply -f clusters/noble/bootstrap/sealed-secrets/namespace.yaml # helm upgrade --install sealed-secrets sealed-secrets/sealed-secrets -n sealed-secrets \ -# --version 2.18.4 -f clusters/noble/apps/sealed-secrets/values.yaml --wait +# --version 2.18.4 -f clusters/noble/bootstrap/sealed-secrets/values.yaml --wait # # Client: install kubeseal (same minor as controller — see README). # Defaults are sufficient for the lab; override here if you need key renewal, resources, etc. # # GitOps pattern: create Secrets only via SealedSecret (or External Secrets + Vault). -# Example (Newt): clusters/noble/apps/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh +# Example (Newt): clusters/noble/bootstrap/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh # Backup the controller's sealing key: kubectl -n sealed-secrets get secret sealed-secrets-key -o yaml # # Talos cluster secrets (bootstrap token, cluster secret, certs) belong in talhelper talsecret / diff --git a/clusters/noble/bootstrap/traefik/README.md b/clusters/noble/bootstrap/traefik/README.md index 51598b0..64905cc 100644 --- a/clusters/noble/bootstrap/traefik/README.md +++ b/clusters/noble/bootstrap/traefik/README.md @@ -5,7 +5,7 @@ 1. Create the namespace (Pod Security **baseline** — Traefik needs more than **restricted**): ```bash - kubectl apply -f clusters/noble/apps/traefik/namespace.yaml + kubectl apply -f clusters/noble/bootstrap/traefik/namespace.yaml ``` 2. Install the chart (**do not** use `--create-namespace` if the namespace already exists): @@ -16,11 +16,11 @@ helm upgrade --install traefik traefik/traefik \ --namespace traefik \ --version 39.0.6 \ - -f clusters/noble/apps/traefik/values.yaml \ + -f clusters/noble/bootstrap/traefik/values.yaml \ --wait ``` -3. Confirm the Service has a pool address. On the **LAN**, **`*.apps.noble.lab.pcenicni.dev`** can resolve to this IP (split horizon / local DNS). **Public** names go through **Pangolin + Newt** (CNAME + API), not ExternalDNS — see **`clusters/noble/apps/newt/README.md`**. +3. Confirm the Service has a pool address. On the **LAN**, **`*.apps.noble.lab.pcenicni.dev`** can resolve to this IP (split horizon / local DNS). **Public** names go through **Pangolin + Newt** (CNAME + API), not ExternalDNS — see **`clusters/noble/bootstrap/newt/README.md`**. ```bash kubectl get svc -n traefik traefik @@ -28,6 +28,6 @@ Values pin **`192.168.50.211`** via **`metallb.io/loadBalancerIPs`**. **`192.168.50.210`** stays free for Argo CD. -4. Create **Ingress** resources with **`ingressClassName: traefik`** (or rely on the default class). **TLS:** add **`cert-manager.io/cluster-issuer: letsencrypt-staging`** (or **`letsencrypt-prod`**) and **`tls`** hosts — see **`clusters/noble/apps/cert-manager/README.md`**. +4. Create **Ingress** resources with **`ingressClassName: traefik`** (or rely on the default class). **TLS:** add **`cert-manager.io/cluster-issuer: letsencrypt-staging`** (or **`letsencrypt-prod`**) and **`tls`** hosts — see **`clusters/noble/bootstrap/cert-manager/README.md`**. -5. **Public DNS:** use **Newt** + Pangolin (**CNAME** at your DNS host + **Integration API** for resources/targets) — **`clusters/noble/apps/newt/README.md`**. +5. **Public DNS:** use **Newt** + Pangolin (**CNAME** at your DNS host + **Integration API** for resources/targets) — **`clusters/noble/bootstrap/newt/README.md`**. diff --git a/clusters/noble/bootstrap/traefik/values.yaml b/clusters/noble/bootstrap/traefik/values.yaml index e74b28c..05f55fd 100644 --- a/clusters/noble/bootstrap/traefik/values.yaml +++ b/clusters/noble/bootstrap/traefik/values.yaml @@ -3,10 +3,10 @@ # Chart: traefik/traefik — pin version on the helm command (e.g. 39.0.6). # DNS: point *.apps.noble.lab.pcenicni.dev to the LoadBalancer IP below. # -# kubectl apply -f clusters/noble/apps/traefik/namespace.yaml +# kubectl apply -f clusters/noble/bootstrap/traefik/namespace.yaml # helm repo add traefik https://traefik.github.io/charts # helm upgrade --install traefik traefik/traefik -n traefik \ -# --version 39.0.6 -f clusters/noble/apps/traefik/values.yaml --wait +# --version 39.0.6 -f clusters/noble/bootstrap/traefik/values.yaml --wait service: type: LoadBalancer diff --git a/clusters/noble/bootstrap/vault/README.md b/clusters/noble/bootstrap/vault/README.md index 04d90e5..c05250a 100644 --- a/clusters/noble/bootstrap/vault/README.md +++ b/clusters/noble/bootstrap/vault/README.md @@ -10,9 +10,9 @@ Standalone Vault with **file** storage on a **Longhorn** PVC (`server.dataStorag ```bash helm repo add hashicorp https://helm.releases.hashicorp.com helm repo update -kubectl apply -f clusters/noble/apps/vault/namespace.yaml +kubectl apply -f clusters/noble/bootstrap/vault/namespace.yaml helm upgrade --install vault hashicorp/vault -n vault \ - --version 0.32.0 -f clusters/noble/apps/vault/values.yaml --wait --timeout 15m + --version 0.32.0 -f clusters/noble/bootstrap/vault/values.yaml --wait --timeout 15m ``` Verify: @@ -27,7 +27,7 @@ kubectl -n vault exec -i sts/vault -- vault status After **Cilium** is up, optionally restrict HTTP access to the Vault server pods (**TCP 8200**) to **`external-secrets`** and same-namespace clients: ```bash -kubectl apply -f clusters/noble/apps/vault/cilium-network-policy.yaml +kubectl apply -f clusters/noble/bootstrap/vault/cilium-network-policy.yaml ``` If you add workloads in other namespaces that call Vault, extend **`ingress`** in that manifest. @@ -53,7 +53,7 @@ Or create the Secret used by the optional CronJob and apply it: ```bash kubectl -n vault create secret generic vault-unseal-key --from-literal=key='YOUR_UNSEAL_KEY' -kubectl apply -f clusters/noble/apps/vault/unseal-cronjob.yaml +kubectl apply -f clusters/noble/bootstrap/vault/unseal-cronjob.yaml ``` The CronJob runs every minute and unseals if Vault is sealed and the Secret is present. @@ -64,7 +64,7 @@ Vault **OSS** auto-unseal uses cloud KMS (AWS, GCP, Azure, OCI), **Transit** (an ## Kubernetes auth (External Secrets / ClusterSecretStore) -**One-shot:** from the repo root, `export KUBECONFIG=talos/kubeconfig` and `export VAULT_TOKEN=…`, then run **`./clusters/noble/apps/vault/configure-kubernetes-auth.sh`** (idempotent). Then **`kubectl apply -f clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml`** on its own line (shell comments **`# …`** on the same line are parsed as extra `kubectl` args and break `apply`). **`kubectl get clustersecretstore vault`** should show **READY=True** after a few seconds. +**One-shot:** from the repo root, `export KUBECONFIG=talos/kubeconfig` and `export VAULT_TOKEN=…`, then run **`./clusters/noble/bootstrap/vault/configure-kubernetes-auth.sh`** (idempotent). Then **`kubectl apply -f clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml`** on its own line (shell comments **`# …`** on the same line are parsed as extra `kubectl` args and break `apply`). **`kubectl get clustersecretstore vault`** should show **READY=True** after a few seconds. Run these **from your workstation** (needs `kubectl`; no local `vault` binary required). Use a **short-lived admin token** or the root token **only in your shell** — do not paste tokens into logs or chat. @@ -139,7 +139,7 @@ EOF ' ``` -**5. Apply** **`clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml`** if you have not already, then verify: +**5. Apply** **`clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml`** if you have not already, then verify: ```bash kubectl describe clustersecretstore vault diff --git a/clusters/noble/bootstrap/vault/cilium-network-policy.yaml b/clusters/noble/bootstrap/vault/cilium-network-policy.yaml index c381899..4086934 100644 --- a/clusters/noble/bootstrap/vault/cilium-network-policy.yaml +++ b/clusters/noble/bootstrap/vault/cilium-network-policy.yaml @@ -1,5 +1,5 @@ # CiliumNetworkPolicy — restrict who may reach Vault HTTP listener (8200). -# Apply after Cilium is healthy: kubectl apply -f clusters/noble/apps/vault/cilium-network-policy.yaml +# Apply after Cilium is healthy: kubectl apply -f clusters/noble/bootstrap/vault/cilium-network-policy.yaml # # Ingress-only policy: egress from Vault is unchanged (Kubernetes auth needs API + DNS). # Extend ingress rules if other namespaces must call Vault (e.g. app workloads). diff --git a/clusters/noble/bootstrap/vault/configure-kubernetes-auth.sh b/clusters/noble/bootstrap/vault/configure-kubernetes-auth.sh index 08708c3..6c013a1 100755 --- a/clusters/noble/bootstrap/vault/configure-kubernetes-auth.sh +++ b/clusters/noble/bootstrap/vault/configure-kubernetes-auth.sh @@ -5,9 +5,9 @@ # Usage (from repo root): # export KUBECONFIG=talos/kubeconfig # or your path # export VAULT_TOKEN='…' # root or admin token — never commit -# ./clusters/noble/apps/vault/configure-kubernetes-auth.sh +# ./clusters/noble/bootstrap/vault/configure-kubernetes-auth.sh # -# Then: kubectl apply -f clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml +# Then: kubectl apply -f clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml # Verify: kubectl describe clustersecretstore vault set -euo pipefail @@ -73,5 +73,5 @@ EOF echo "Done. Issuer used: $ISSUER" echo "" echo "Next (each command on its own line — do not paste # comments after kubectl):" -echo " kubectl apply -f clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml" +echo " kubectl apply -f clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml" echo " kubectl get clustersecretstore vault" diff --git a/clusters/noble/bootstrap/vault/unseal-cronjob.yaml b/clusters/noble/bootstrap/vault/unseal-cronjob.yaml index 1a36bf2..c529aa0 100644 --- a/clusters/noble/bootstrap/vault/unseal-cronjob.yaml +++ b/clusters/noble/bootstrap/vault/unseal-cronjob.yaml @@ -2,7 +2,7 @@ # # 1) vault operator init -key-shares=1 -key-threshold=1 (lab only — single key) # 2) kubectl -n vault create secret generic vault-unseal-key --from-literal=key='YOUR_UNSEAL_KEY' -# 3) kubectl apply -f clusters/noble/apps/vault/unseal-cronjob.yaml +# 3) kubectl apply -f clusters/noble/bootstrap/vault/unseal-cronjob.yaml # # OSS Vault has no Kubernetes/KMS seal; this CronJob runs vault operator unseal when the server is sealed. # Protect the Secret with RBAC; prefer cloud KMS auto-unseal for real environments. diff --git a/clusters/noble/bootstrap/vault/values.yaml b/clusters/noble/bootstrap/vault/values.yaml index 9bf9945..e20b9c4 100644 --- a/clusters/noble/bootstrap/vault/values.yaml +++ b/clusters/noble/bootstrap/vault/values.yaml @@ -2,9 +2,9 @@ # # helm repo add hashicorp https://helm.releases.hashicorp.com # helm repo update -# kubectl apply -f clusters/noble/apps/vault/namespace.yaml +# kubectl apply -f clusters/noble/bootstrap/vault/namespace.yaml # helm upgrade --install vault hashicorp/vault -n vault \ -# --version 0.32.0 -f clusters/noble/apps/vault/values.yaml --wait --timeout 15m +# --version 0.32.0 -f clusters/noble/bootstrap/vault/values.yaml --wait --timeout 15m # # Post-install: initialize, store unseal key in Secret, apply optional unseal CronJob — see README.md # diff --git a/docs/architecture.md b/docs/architecture.md index 3871677..fcac12c 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -41,7 +41,7 @@ flowchart TB ## Network and ingress -**North–south (apps on LAN):** DNS for **`*.apps.noble.lab.pcenicni.dev`** → **Traefik** **`LoadBalancer` `192.168.50.211`**. **MetalLB** L2 pool **`192.168.50.210`–`192.168.50.229`**; **Argo CD** uses **`192.168.50.210`**. **Public** access is not in-cluster ExternalDNS: **Newt** (Pangolin tunnel) plus **CNAME** and **Integration API** per [`clusters/noble/apps/newt/README.md`](../clusters/noble/apps/newt/README.md). +**North–south (apps on LAN):** DNS for **`*.apps.noble.lab.pcenicni.dev`** → **Traefik** **`LoadBalancer` `192.168.50.211`**. **MetalLB** L2 pool **`192.168.50.210`–`192.168.50.229`**; **Argo CD** uses **`192.168.50.210`**. **Public** access is not in-cluster ExternalDNS: **Newt** (Pangolin tunnel) plus **CNAME** and **Integration API** per [`clusters/noble/bootstrap/newt/README.md`](../clusters/noble/bootstrap/newt/README.md). ```mermaid flowchart TB @@ -114,7 +114,7 @@ flowchart TB ## Observability path -**kube-prometheus-stack** in **`monitoring`**: Prometheus, Grafana, Alertmanager, node-exporter, etc. **Loki** (SingleBinary) in **`loki`** with **Fluent Bit** in **`logging`** shipping to **`loki-gateway`**. Grafana Loki datasource is applied via **ConfigMap** [`clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`](../clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml). Prometheus, Grafana, Alertmanager, and Loki use **Longhorn** PVCs where configured. +**kube-prometheus-stack** in **`monitoring`**: Prometheus, Grafana, Alertmanager, node-exporter, etc. **Loki** (SingleBinary) in **`loki`** with **Fluent Bit** in **`logging`** shipping to **`loki-gateway`**. Grafana Loki datasource is applied via **ConfigMap** [`clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`](../clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml). Prometheus, Grafana, Alertmanager, and Loki use **Longhorn** PVCs where configured. ```mermaid flowchart LR @@ -149,7 +149,7 @@ flowchart LR ## Secrets and policy -**Sealed Secrets** decrypts `SealedSecret` objects in-cluster. **External Secrets Operator** syncs from **Vault** using **`ClusterSecretStore`** (see [`examples/vault-cluster-secret-store.yaml`](../clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml)). Trust is **cluster → Vault** (ESO calls Vault; Vault does not initiate cluster trust). **Kyverno** with **kyverno-policies** enforces **PSS baseline** in **Audit**. +**Sealed Secrets** decrypts `SealedSecret` objects in-cluster. **External Secrets Operator** syncs from **Vault** using **`ClusterSecretStore`** (see [`examples/vault-cluster-secret-store.yaml`](../clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml)). Trust is **cluster → Vault** (ESO calls Vault; Vault does not initiate cluster trust). **Kyverno** with **kyverno-policies** enforces **PSS baseline** in **Audit**. ```mermaid flowchart LR @@ -218,7 +218,7 @@ See [`talos/CLUSTER-BUILD.md`](../talos/CLUSTER-BUILD.md) for the authoritative ## Narrative -The **noble** environment is a **Talos** lab cluster on **`192.168.50.0/24`** with **three control plane nodes and one worker**, schedulable workloads on control planes enabled, and the Kubernetes API exposed through **kube-vip** at **`192.168.50.230`**. **Cilium** provides the CNI after Talos bootstrap with **`cni: none`**; **MetalLB** advertises **`192.168.50.210`–`192.168.50.229`**, pinning **Argo CD** to **`192.168.50.210`** and **Traefik** to **`192.168.50.211`** for **`*.apps.noble.lab.pcenicni.dev`**. **cert-manager** issues certificates for Traefik Ingresses; **GitOps** is **Helm plus Argo CD** with manifests under **`clusters/noble/`** and bootstrap under **`clusters/noble/bootstrap/argocd/`**. **Observability** uses **kube-prometheus-stack** in **`monitoring`**, **Loki** and **Fluent Bit** with Grafana wired via a **ConfigMap** datasource, with **Longhorn** PVCs for Prometheus, Grafana, Alertmanager, Loki, and **Vault**. **Secrets** combine **Sealed Secrets** for git-encrypted material, **Vault** with **External Secrets** for dynamic sync, and **Kyverno** enforces **Pod Security Standards baseline** in **Audit**. **Public** access uses **Newt** to **Pangolin** with **CNAME** and Integration API steps as documented—not generic in-cluster public DNS. +The **noble** environment is a **Talos** lab cluster on **`192.168.50.0/24`** with **three control plane nodes and one worker**, schedulable workloads on control planes enabled, and the Kubernetes API exposed through **kube-vip** at **`192.168.50.230`**. **Cilium** provides the CNI after Talos bootstrap with **`cni: none`**; **MetalLB** advertises **`192.168.50.210`–`192.168.50.229`**, pinning **Argo CD** to **`192.168.50.210`** and **Traefik** to **`192.168.50.211`** for **`*.apps.noble.lab.pcenicni.dev`**. **cert-manager** issues certificates for Traefik Ingresses; **GitOps** is **Ansible-driven Helm** for the platform (**`clusters/noble/bootstrap/`**) plus optional **Argo CD** app-of-apps (**`clusters/noble/apps/`**, **`clusters/noble/bootstrap/argocd/`**). **Observability** uses **kube-prometheus-stack** in **`monitoring`**, **Loki** and **Fluent Bit** with Grafana wired via a **ConfigMap** datasource, with **Longhorn** PVCs for Prometheus, Grafana, Alertmanager, Loki, and **Vault**. **Secrets** combine **Sealed Secrets** for git-encrypted material, **Vault** with **External Secrets** for dynamic sync, and **Kyverno** enforces **Pod Security Standards baseline** in **Audit**. **Public** access uses **Newt** to **Pangolin** with **CNAME** and Integration API steps as documented—not generic in-cluster public DNS. --- diff --git a/talos/CLUSTER-BUILD.md b/talos/CLUSTER-BUILD.md index 7ea2920..35bd584 100644 --- a/talos/CLUSTER-BUILD.md +++ b/talos/CLUSTER-BUILD.md @@ -7,20 +7,20 @@ This document is the **exported TODO** for the **noble** Talos cluster (4 nodes) Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (Vault **CiliumNetworkPolicy**, **`talos/runbooks/`**). **Next focus:** optional **Alertmanager** receivers (Slack/PagerDuty); tighten **RBAC** (Headlamp / cluster-admin); **Cilium** policies for other namespaces as needed; enable **Mend Renovate** for PRs; Pangolin/sample Ingress; **Velero** when S3 exists. - **Talos** v1.12.6 (target) / **Kubernetes** as bundled — four nodes **Ready** unless upgrading; **`talosctl health`**; **`talos/kubeconfig`** is **local only** (gitignored — never commit; regenerate with `talosctl kubeconfig` per `talos/README.md`). **Image Factory (nocloud installer):** `factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6` -- **Cilium** Helm **1.16.6** / app **1.16.6** (`clusters/noble/apps/cilium/`, phase 1 values). +- **Cilium** Helm **1.16.6** / app **1.16.6** (`clusters/noble/bootstrap/cilium/`, phase 1 values). - **MetalLB** Helm **0.15.3** / app **v0.15.3**; **IPAddressPool** `noble-l2` + **L2Advertisement** — pool **`192.168.50.210`–`192.168.50.229`**. - **kube-vip** DaemonSet **3/3** on control planes; VIP **`192.168.50.230`** on **`ens18`** (`vip_subnet` **`/32`** required — bare **`32`** breaks parsing). **Verified from workstation:** `kubectl config set-cluster noble --server=https://192.168.50.230:6443` then **`kubectl get --raw /healthz`** → **`ok`** (`talos/kubeconfig`; see `talos/README.md`). -- **metrics-server** Helm **3.13.0** / app **v0.8.0** — `clusters/noble/apps/metrics-server/values.yaml` (`--kubelet-insecure-tls` for Talos); **`kubectl top nodes`** works. -- **Longhorn** Helm **1.11.1** / app **v1.11.1** — `clusters/noble/apps/longhorn/` (PSA **privileged** namespace, `defaultDataPath` `/var/mnt/longhorn`, `preUpgradeChecker` enabled); **StorageClass** `longhorn` (default); **`nodes.longhorn.io`** all **Ready**; test **PVC** `Bound` on `longhorn`. -- **Traefik** Helm **39.0.6** / app **v3.6.11** — `clusters/noble/apps/traefik/`; **`Service`** **`LoadBalancer`** **`EXTERNAL-IP` `192.168.50.211`**; **`IngressClass`** **`traefik`** (default). Point **`*.apps.noble.lab.pcenicni.dev`** at **`192.168.50.211`**. MetalLB pool verification was done before replacing the temporary nginx test with Traefik. -- **cert-manager** Helm **v1.20.0** / app **v1.20.0** — `clusters/noble/apps/cert-manager/`; **`ClusterIssuer`** **`letsencrypt-staging`** and **`letsencrypt-prod`** (**DNS-01** via **Cloudflare** for **`pcenicni.dev`**, Secret **`cloudflare-dns-api-token`** in **`cert-manager`**); ACME email **`certificates@noble.lab.pcenicni.dev`** (edit in manifests if you want a different mailbox). -- **Newt** Helm **1.2.0** / app **1.10.1** — `clusters/noble/apps/newt/` (**fossorial/newt**); Pangolin site tunnel — **`newt-pangolin-auth`** Secret (**`PANGOLIN_ENDPOINT`**, **`NEWT_ID`**, **`NEWT_SECRET`**). Prefer a **SealedSecret** in git (`kubeseal` — see `clusters/noble/apps/sealed-secrets/examples/`) after rotating credentials if they were exposed. **Public DNS** is **not** automated with ExternalDNS: **CNAME** records at your DNS host per Pangolin’s domain instructions, plus **Integration API** for HTTP resources/targets — see **`clusters/noble/apps/newt/README.md`**. LAN access to Traefik can still use **`*.apps.noble.lab.pcenicni.dev`** → **`192.168.50.211`** (split horizon / local resolver). -- **Argo CD** Helm **9.4.17** / app **v3.3.6** — `clusters/noble/bootstrap/argocd/`; **`argocd-server`** **`LoadBalancer`** **`192.168.50.210`**; app-of-apps scaffold under **`bootstrap/argocd/apps/`** (edit **`root-application.yaml`** `repoURL` before applying). -- **kube-prometheus-stack** — Helm chart **82.15.1** — `clusters/noble/apps/kube-prometheus-stack/` (**namespace** `monitoring`, PSA **privileged** — **node-exporter** needs host mounts); **Longhorn** PVCs for Prometheus, Grafana, Alertmanager; **node-exporter** DaemonSet **4/4**. **Grafana Ingress:** **`https://grafana.apps.noble.lab.pcenicni.dev`** (Traefik **`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **Loki** datasource in Grafana: ConfigMap **`clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`** (sidecar label **`grafana_datasource: "1"`**) — not via **`grafana.additionalDataSources`** in the chart. **`helm upgrade --install` with `--wait` is silent until done** — use **`--timeout 30m`**; Grafana admin: Secret **`kube-prometheus-grafana`**, keys **`admin-user`** / **`admin-password`**. -- **Loki** + **Fluent Bit** — **`grafana/loki` 6.55.0** SingleBinary + **filesystem** on **Longhorn** (`clusters/noble/apps/loki/`); **`loki.auth_enabled: false`**; **`chunksCache.enabled: false`** (no memcached chunk cache). **`fluent/fluent-bit` 0.56.0** → **`loki-gateway.loki.svc:80`** (`clusters/noble/apps/fluent-bit/`); **`logging`** PSA **privileged**. **Grafana Explore:** **`kubectl apply -f clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`** then **Explore → Loki** (e.g. `{job="fluent-bit"}`). -- **Sealed Secrets** Helm **2.18.4** / app **0.36.1** — `clusters/noble/apps/sealed-secrets/` (namespace **`sealed-secrets`**); **`kubeseal`** on client should match controller minor (**README**); back up **`sealed-secrets-key`** (see README). -- **External Secrets Operator** Helm **2.2.0** / app **v2.2.0** — `clusters/noble/apps/external-secrets/`; Vault **`ClusterSecretStore`** in **`examples/vault-cluster-secret-store.yaml`** (**`http://`** to match Vault listener — apply after Vault **Kubernetes auth**). -- **Vault** Helm **0.32.0** / app **1.21.2** — `clusters/noble/apps/vault/` — standalone **file** storage, **Longhorn** PVC; **HTTP** listener (`global.tlsDisable`); optional **CronJob** lab unseal **`unseal-cronjob.yaml`**; **not** initialized in git — run **`vault operator init`** per **`README.md`**. +- **metrics-server** Helm **3.13.0** / app **v0.8.0** — `clusters/noble/bootstrap/metrics-server/values.yaml` (`--kubelet-insecure-tls` for Talos); **`kubectl top nodes`** works. +- **Longhorn** Helm **1.11.1** / app **v1.11.1** — `clusters/noble/bootstrap/longhorn/` (PSA **privileged** namespace, `defaultDataPath` `/var/mnt/longhorn`, `preUpgradeChecker` enabled); **StorageClass** `longhorn` (default); **`nodes.longhorn.io`** all **Ready**; test **PVC** `Bound` on `longhorn`. +- **Traefik** Helm **39.0.6** / app **v3.6.11** — `clusters/noble/bootstrap/traefik/`; **`Service`** **`LoadBalancer`** **`EXTERNAL-IP` `192.168.50.211`**; **`IngressClass`** **`traefik`** (default). Point **`*.apps.noble.lab.pcenicni.dev`** at **`192.168.50.211`**. MetalLB pool verification was done before replacing the temporary nginx test with Traefik. +- **cert-manager** Helm **v1.20.0** / app **v1.20.0** — `clusters/noble/bootstrap/cert-manager/`; **`ClusterIssuer`** **`letsencrypt-staging`** and **`letsencrypt-prod`** (**DNS-01** via **Cloudflare** for **`pcenicni.dev`**, Secret **`cloudflare-dns-api-token`** in **`cert-manager`**); ACME email **`certificates@noble.lab.pcenicni.dev`** (edit in manifests if you want a different mailbox). +- **Newt** Helm **1.2.0** / app **1.10.1** — `clusters/noble/bootstrap/newt/` (**fossorial/newt**); Pangolin site tunnel — **`newt-pangolin-auth`** Secret (**`PANGOLIN_ENDPOINT`**, **`NEWT_ID`**, **`NEWT_SECRET`**). Prefer a **SealedSecret** in git (`kubeseal` — see `clusters/noble/bootstrap/sealed-secrets/examples/`) after rotating credentials if they were exposed. **Public DNS** is **not** automated with ExternalDNS: **CNAME** records at your DNS host per Pangolin’s domain instructions, plus **Integration API** for HTTP resources/targets — see **`clusters/noble/bootstrap/newt/README.md`**. LAN access to Traefik can still use **`*.apps.noble.lab.pcenicni.dev`** → **`192.168.50.211`** (split horizon / local resolver). +- **Argo CD** Helm **9.4.17** / app **v3.3.6** — `clusters/noble/bootstrap/argocd/`; **`argocd-server`** **`LoadBalancer`** **`192.168.50.210`**; app-of-apps root syncs **`clusters/noble/apps/`** (edit **`root-application.yaml`** `repoURL` before applying). +- **kube-prometheus-stack** — Helm chart **82.15.1** — `clusters/noble/bootstrap/kube-prometheus-stack/` (**namespace** `monitoring`, PSA **privileged** — **node-exporter** needs host mounts); **Longhorn** PVCs for Prometheus, Grafana, Alertmanager; **node-exporter** DaemonSet **4/4**. **Grafana Ingress:** **`https://grafana.apps.noble.lab.pcenicni.dev`** (Traefik **`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **Loki** datasource in Grafana: ConfigMap **`clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`** (sidecar label **`grafana_datasource: "1"`**) — not via **`grafana.additionalDataSources`** in the chart. **`helm upgrade --install` with `--wait` is silent until done** — use **`--timeout 30m`**; Grafana admin: Secret **`kube-prometheus-grafana`**, keys **`admin-user`** / **`admin-password`**. +- **Loki** + **Fluent Bit** — **`grafana/loki` 6.55.0** SingleBinary + **filesystem** on **Longhorn** (`clusters/noble/bootstrap/loki/`); **`loki.auth_enabled: false`**; **`chunksCache.enabled: false`** (no memcached chunk cache). **`fluent/fluent-bit` 0.56.0** → **`loki-gateway.loki.svc:80`** (`clusters/noble/bootstrap/fluent-bit/`); **`logging`** PSA **privileged**. **Grafana Explore:** **`kubectl apply -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`** then **Explore → Loki** (e.g. `{job="fluent-bit"}`). +- **Sealed Secrets** Helm **2.18.4** / app **0.36.1** — `clusters/noble/bootstrap/sealed-secrets/` (namespace **`sealed-secrets`**); **`kubeseal`** on client should match controller minor (**README**); back up **`sealed-secrets-key`** (see README). +- **External Secrets Operator** Helm **2.2.0** / app **v2.2.0** — `clusters/noble/bootstrap/external-secrets/`; Vault **`ClusterSecretStore`** in **`examples/vault-cluster-secret-store.yaml`** (**`http://`** to match Vault listener — apply after Vault **Kubernetes auth**). +- **Vault** Helm **0.32.0** / app **1.21.2** — `clusters/noble/bootstrap/vault/` — standalone **file** storage, **Longhorn** PVC; **HTTP** listener (`global.tlsDisable`); optional **CronJob** lab unseal **`unseal-cronjob.yaml`**; **not** initialized in git — run **`vault operator init`** per **`README.md`**. - **Still open:** **Renovate** — install **[Mend Renovate](https://github.com/apps/renovate)** (or self-host) so PRs run; optional **Alertmanager** notification channels; optional **sample Ingress + cert + Pangolin** end-to-end; **Velero** when S3 is ready; **Argo CD SSO**. ## Inventory @@ -39,11 +39,11 @@ Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (Vaul | Kubernetes API VIP (kube-vip) | `192.168.50.230` (see `talos/README.md`; align with `talos/talconfig.yaml` `additionalApiServerCertSans`) | | MetalLB L2 pool | `192.168.50.210`–`192.168.50.229` | | Argo CD `LoadBalancer` | **Pick one IP** in the MetalLB pool (e.g. `192.168.50.210`) | -| Traefik (apps ingress) | `192.168.50.211` — **`metallb.io/loadBalancerIPs`** in `clusters/noble/apps/traefik/values.yaml` | +| Traefik (apps ingress) | `192.168.50.211` — **`metallb.io/loadBalancerIPs`** in `clusters/noble/bootstrap/traefik/values.yaml` | | Apps ingress (LAN / split horizon) | `*.apps.noble.lab.pcenicni.dev` → Traefik LB | -| Grafana (Ingress + TLS) | **`grafana.apps.noble.lab.pcenicni.dev`** — `grafana.ingress` in `clusters/noble/apps/kube-prometheus-stack/values.yaml` (**`letsencrypt-prod`**) | -| Headlamp (Ingress + TLS) | **`headlamp.apps.noble.lab.pcenicni.dev`** — chart `ingress` in `clusters/noble/apps/headlamp/` (**`letsencrypt-prod`**, **`ingressClassName: traefik`**) | -| Public DNS (Pangolin) | **Newt** tunnel + **CNAME** at registrar + **Integration API** — `clusters/noble/apps/newt/` | +| Grafana (Ingress + TLS) | **`grafana.apps.noble.lab.pcenicni.dev`** — `grafana.ingress` in `clusters/noble/bootstrap/kube-prometheus-stack/values.yaml` (**`letsencrypt-prod`**) | +| Headlamp (Ingress + TLS) | **`headlamp.apps.noble.lab.pcenicni.dev`** — chart `ingress` in `clusters/noble/bootstrap/headlamp/` (**`letsencrypt-prod`**, **`ingressClassName: traefik`**) | +| Public DNS (Pangolin) | **Newt** tunnel + **CNAME** at registrar + **Integration API** — `clusters/noble/bootstrap/newt/` | | Velero | S3-compatible URL — configure later | ## Versions @@ -51,7 +51,7 @@ Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (Vaul - Talos: **v1.12.6** — align `talosctl` client with node image - Talos **Image Factory** (iscsi-tools + util-linux-tools): **`factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6`** — same schematic must appear in **`machine.install.image`** after `talhelper genconfig` (bare metal may use `metal-installer/` instead of `nocloud-installer/`) - Kubernetes: **1.35.2** on current nodes (bundled with Talos; not pinned in repo) -- Cilium: **1.16.6** (Helm chart; see `clusters/noble/apps/cilium/README.md`) +- Cilium: **1.16.6** (Helm chart; see `clusters/noble/bootstrap/cilium/README.md`) - MetalLB: **0.15.3** (Helm chart; app **v0.15.3**) - metrics-server: **3.13.0** (Helm chart; app **v0.8.0**) - Longhorn: **1.11.1** (Helm chart; app **v1.11.1**) @@ -65,7 +65,7 @@ Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (Vaul - Sealed Secrets: **2.18.4** (Helm chart `sealed-secrets/sealed-secrets`; app **0.36.1**) - External Secrets Operator: **2.2.0** (Helm chart `external-secrets/external-secrets`; app **v2.2.0**) - Vault: **0.32.0** (Helm chart `hashicorp/vault`; app **1.21.2**) -- Kyverno: **3.7.1** (Helm chart `kyverno/kyverno`; app **v1.17.1**); **kyverno-policies** **3.7.1** — **baseline** PSS, **Audit** (`clusters/noble/apps/kyverno/`) +- Kyverno: **3.7.1** (Helm chart `kyverno/kyverno`; app **v1.17.1**); **kyverno-policies** **3.7.1** — **baseline** PSS, **Audit** (`clusters/noble/bootstrap/kyverno/`) - Headlamp: **0.40.1** (Helm chart `headlamp/headlamp`; app matches chart — see [Artifact Hub](https://artifacthub.io/packages/helm/headlamp/headlamp)) - Renovate: **hosted** (Mend **Renovate** GitHub/GitLab app — no cluster chart) **or** **self-hosted** — pin chart when added ([Helm charts](https://docs.renovatebot.com/helm-charts/), OCI `ghcr.io/renovatebot/charts/renovate`); pair **`renovate.json`** with this repo’s Helm paths under **`clusters/noble/`** @@ -79,25 +79,25 @@ Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (Vaul | talhelper source (active) | `talos/talconfig.yaml` — may be **wipe-phase** (no Longhorn volume) during disk recovery | | Longhorn volume restore | `talos/talconfig.with-longhorn.yaml` — copy to `talconfig.yaml` after GPT wipe (see `talos/README.md` §5) | | Longhorn GPT wipe automation | `talos/scripts/longhorn-gpt-recovery.sh` | -| kube-vip (kustomize) | `clusters/noble/apps/kube-vip/` (`vip_interface` e.g. `ens18`) | -| Cilium (Helm values) | `clusters/noble/apps/cilium/` — `values.yaml` (phase 1), optional `values-kpr.yaml`, `README.md` | -| MetalLB | `clusters/noble/apps/metallb/` — `namespace.yaml` (PSA **privileged**), `ip-address-pool.yaml`, `kustomization.yaml`, `README.md` | -| Longhorn | `clusters/noble/apps/longhorn/` — `values.yaml`, `namespace.yaml` (PSA **privileged**), `kustomization.yaml` | -| metrics-server (Helm values) | `clusters/noble/apps/metrics-server/values.yaml` | -| Traefik (Helm values) | `clusters/noble/apps/traefik/` — `values.yaml`, `namespace.yaml`, `README.md` | -| cert-manager (Helm + ClusterIssuers) | `clusters/noble/apps/cert-manager/` — `values.yaml`, `namespace.yaml`, `kustomization.yaml`, `README.md` | -| Newt / Pangolin tunnel (Helm) | `clusters/noble/apps/newt/` — `values.yaml`, `namespace.yaml`, `README.md` | -| Argo CD (bootstrap + app-of-apps) | `clusters/noble/bootstrap/argocd/` — `values.yaml`, `root-application.yaml`, `apps/`, `README.md` | -| kube-prometheus-stack (Helm values) | `clusters/noble/apps/kube-prometheus-stack/` — `values.yaml`, `namespace.yaml` | -| Grafana Loki datasource (ConfigMap; no chart change) | `clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml` | -| Loki (Helm values) | `clusters/noble/apps/loki/` — `values.yaml`, `namespace.yaml` | -| Fluent Bit → Loki (Helm values) | `clusters/noble/apps/fluent-bit/` — `values.yaml`, `namespace.yaml` | -| Sealed Secrets (Helm) | `clusters/noble/apps/sealed-secrets/` — `values.yaml`, `namespace.yaml`, `README.md` | -| External Secrets Operator (Helm + Vault store example) | `clusters/noble/apps/external-secrets/` — `values.yaml`, `namespace.yaml`, `README.md`, `examples/vault-cluster-secret-store.yaml` | -| Vault (Helm + optional unseal CronJob) | `clusters/noble/apps/vault/` — `values.yaml`, `namespace.yaml`, `unseal-cronjob.yaml`, `cilium-network-policy.yaml`, `configure-kubernetes-auth.sh`, `README.md` | -| Kyverno + PSS baseline policies | `clusters/noble/apps/kyverno/` — `values.yaml`, `policies-values.yaml`, `namespace.yaml`, `README.md` | -| Headlamp (Helm + Ingress) | `clusters/noble/apps/headlamp/` — `values.yaml`, `namespace.yaml`, `README.md` | -| Renovate (repo config + optional self-hosted Helm) | **`renovate.json`** at repo root; optional `clusters/noble/apps/renovate/` for self-hosted chart + token Secret (**Sealed Secrets** / **ESO** after **Phase E**) | +| kube-vip (kustomize) | `clusters/noble/bootstrap/kube-vip/` (`vip_interface` e.g. `ens18`) | +| Cilium (Helm values) | `clusters/noble/bootstrap/cilium/` — `values.yaml` (phase 1), optional `values-kpr.yaml`, `README.md` | +| MetalLB | `clusters/noble/bootstrap/metallb/` — `namespace.yaml` (PSA **privileged**), `ip-address-pool.yaml`, `kustomization.yaml`, `README.md` | +| Longhorn | `clusters/noble/bootstrap/longhorn/` — `values.yaml`, `namespace.yaml` (PSA **privileged**), `kustomization.yaml` | +| metrics-server (Helm values) | `clusters/noble/bootstrap/metrics-server/values.yaml` | +| Traefik (Helm values) | `clusters/noble/bootstrap/traefik/` — `values.yaml`, `namespace.yaml`, `README.md` | +| cert-manager (Helm + ClusterIssuers) | `clusters/noble/bootstrap/cert-manager/` — `values.yaml`, `namespace.yaml`, `kustomization.yaml`, `README.md` | +| Newt / Pangolin tunnel (Helm) | `clusters/noble/bootstrap/newt/` — `values.yaml`, `namespace.yaml`, `README.md` | +| Argo CD (Helm) + optional app-of-apps | `clusters/noble/bootstrap/argocd/` — `values.yaml`, `root-application.yaml`, `README.md`; optional **`Application`** tree in **`clusters/noble/apps/`** | +| kube-prometheus-stack (Helm values) | `clusters/noble/bootstrap/kube-prometheus-stack/` — `values.yaml`, `namespace.yaml` | +| Grafana Loki datasource (ConfigMap; no chart change) | `clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml` | +| Loki (Helm values) | `clusters/noble/bootstrap/loki/` — `values.yaml`, `namespace.yaml` | +| Fluent Bit → Loki (Helm values) | `clusters/noble/bootstrap/fluent-bit/` — `values.yaml`, `namespace.yaml` | +| Sealed Secrets (Helm) | `clusters/noble/bootstrap/sealed-secrets/` — `values.yaml`, `namespace.yaml`, `README.md` | +| External Secrets Operator (Helm + Vault store example) | `clusters/noble/bootstrap/external-secrets/` — `values.yaml`, `namespace.yaml`, `README.md`, `examples/vault-cluster-secret-store.yaml` | +| Vault (Helm + optional unseal CronJob) | `clusters/noble/bootstrap/vault/` — `values.yaml`, `namespace.yaml`, `unseal-cronjob.yaml`, `cilium-network-policy.yaml`, `configure-kubernetes-auth.sh`, `README.md` | +| Kyverno + PSS baseline policies | `clusters/noble/bootstrap/kyverno/` — `values.yaml`, `policies-values.yaml`, `namespace.yaml`, `README.md` | +| Headlamp (Helm + Ingress) | `clusters/noble/bootstrap/headlamp/` — `values.yaml`, `namespace.yaml`, `README.md` | +| Renovate (repo config + optional self-hosted Helm) | **`renovate.json`** at repo root; optional self-hosted chart under **`clusters/noble/apps/`** (Argo) + token Secret (**Sealed Secrets** / **ESO** after **Phase E**) | **Git vs cluster:** manifests and `talconfig` live in git; **`talhelper genconfig -o out`**, bootstrap, Helm, and `kubectl` run on your LAN. See **`talos/README.md`** for workstation reachability (lab LAN/VPN), **`talosctl kubeconfig`** vs Kubernetes `server:` (VIP vs node IP), and **`--insecure`** only in maintenance. @@ -105,10 +105,10 @@ Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (Vaul 1. **Talos** installed; **Cilium** (or chosen CNI) **before** most workloads — with `cni: none`, nodes stay **NotReady** / **network-unavailable** taint until CNI is up. 2. **MetalLB Helm chart** (CRDs + controller) **before** `kubectl apply -k` on the pool manifests. -3. **`clusters/noble/apps/metallb/namespace.yaml`** before or merged onto `metallb-system` so Pod Security does not block speaker (see `apps/metallb/README.md`). -4. **Longhorn:** Talos user volume + extensions in `talconfig.with-longhorn.yaml` (when restored); Helm **`defaultDataPath`** in `clusters/noble/apps/longhorn/values.yaml`. -5. **Loki → Fluent Bit → Grafana datasource:** deploy **Loki** (`loki-gateway` Service) before **Fluent Bit**; apply **`clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`** after **Loki** (sidecar picks up the ConfigMap — no kube-prometheus values change for Loki). -6. **Vault:** **Longhorn** default **StorageClass** before **`clusters/noble/apps/vault/`** Helm (PVC **`data-vault-0`**); **External Secrets** **`ClusterSecretStore`** after Vault is initialized, unsealed, and **Kubernetes auth** is configured. +3. **`clusters/noble/bootstrap/metallb/namespace.yaml`** before or merged onto `metallb-system` so Pod Security does not block speaker (see `bootstrap/metallb/README.md`). +4. **Longhorn:** Talos user volume + extensions in `talconfig.with-longhorn.yaml` (when restored); Helm **`defaultDataPath`** in `clusters/noble/bootstrap/longhorn/values.yaml`. +5. **Loki → Fluent Bit → Grafana datasource:** deploy **Loki** (`loki-gateway` Service) before **Fluent Bit**; apply **`clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`** after **Loki** (sidecar picks up the ConfigMap — no kube-prometheus values change for Loki). +6. **Vault:** **Longhorn** default **StorageClass** before **`clusters/noble/bootstrap/vault/`** Helm (PVC **`data-vault-0`**); **External Secrets** **`ClusterSecretStore`** after Vault is initialized, unsealed, and **Kubernetes auth** is configured. 7. **Headlamp:** **Traefik** + **cert-manager** (**`letsencrypt-prod`**) before exposing **`headlamp.apps.noble.lab.pcenicni.dev`**; treat as **cluster-admin** UI — protect with network policy / SSO when hardening (**Phase G**). 8. **Renovate:** **Git remote** + platform access (**hosted app** needs org/repo install; **self-hosted** needs **`RENOVATE_TOKEN`** and chart **`renovate.config`**). If the bot runs **in-cluster**, add the token **after** **Sealed Secrets** / **Vault** (**Phase E**) — no ingress required for the bot itself. @@ -130,7 +130,7 @@ Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (Vaul - [x] `apply-config` all nodes (`talos/README.md` §2 — **no** `--insecure` after nodes join; use `TALOSCONFIG`) - [x] `talosctl bootstrap` once; other control planes and worker join - [x] `talosctl kubeconfig` → working `kubectl` (`talos/README.md` §3 — override `server:` if VIP not reachable from workstation) -- [x] **kube-vip manifests** in `clusters/noble/apps/kube-vip` +- [x] **kube-vip manifests** in `clusters/noble/bootstrap/kube-vip` - [x] kube-vip healthy; `vip_interface` matches uplink (`talosctl get links`); VIP reachable where needed - [x] `talosctl health` (e.g. `talosctl health -n 192.168.50.20` with `TALOSCONFIG` set) @@ -138,45 +138,45 @@ Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (Vaul **Install order:** **Cilium** → **metrics-server** → **Longhorn** (Talos disk + Helm) → **MetalLB** (Helm → pool manifests) → ingress / certs / DNS as planned. -- [x] **Cilium** (Helm **1.16.6**) — **required** before MetalLB if `cni: none` (`clusters/noble/apps/cilium/`) -- [x] **metrics-server** — Helm **3.13.0**; values in `clusters/noble/apps/metrics-server/values.yaml`; verify `kubectl top nodes` -- [x] **Longhorn** — Talos: user volume + kubelet mounts + extensions (`talos/README.md` §5); Helm **1.11.1**; `kubectl apply -k clusters/noble/apps/longhorn`; verify **`nodes.longhorn.io`** and test PVC **`Bound`** -- [x] **MetalLB** — chart installed; **pool + L2** from `clusters/noble/apps/metallb/` applied (`192.168.50.210`–`229`) +- [x] **Cilium** (Helm **1.16.6**) — **required** before MetalLB if `cni: none` (`clusters/noble/bootstrap/cilium/`) +- [x] **metrics-server** — Helm **3.13.0**; values in `clusters/noble/bootstrap/metrics-server/values.yaml`; verify `kubectl top nodes` +- [x] **Longhorn** — Talos: user volume + kubelet mounts + extensions (`talos/README.md` §5); Helm **1.11.1**; `kubectl apply -k clusters/noble/bootstrap/longhorn`; verify **`nodes.longhorn.io`** and test PVC **`Bound`** +- [x] **MetalLB** — chart installed; **pool + L2** from `clusters/noble/bootstrap/metallb/` applied (`192.168.50.210`–`229`) - [x] **`Service` `LoadBalancer`** / pool check — MetalLB assigns from `210`–`229` (validated before Traefik; temporary nginx test removed in favor of Traefik) -- [x] **Traefik** `LoadBalancer` for `*.apps.noble.lab.pcenicni.dev` — `clusters/noble/apps/traefik/`; **`192.168.50.211`** -- [x] **cert-manager** + ClusterIssuer (**`letsencrypt-staging`** / **`letsencrypt-prod`**) — `clusters/noble/apps/cert-manager/` -- [x] **Newt** (Pangolin tunnel; replaces ExternalDNS for public DNS) — `clusters/noble/apps/newt/` — **`newt-pangolin-auth`**; CNAME + **Integration API** per **`newt/README.md`** +- [x] **Traefik** `LoadBalancer` for `*.apps.noble.lab.pcenicni.dev` — `clusters/noble/bootstrap/traefik/`; **`192.168.50.211`** +- [x] **cert-manager** + ClusterIssuer (**`letsencrypt-staging`** / **`letsencrypt-prod`**) — `clusters/noble/bootstrap/cert-manager/` +- [x] **Newt** (Pangolin tunnel; replaces ExternalDNS for public DNS) — `clusters/noble/bootstrap/newt/` — **`newt-pangolin-auth`**; CNAME + **Integration API** per **`newt/README.md`** ## Phase C — GitOps - [x] **Argo CD** bootstrap — `clusters/noble/bootstrap/argocd/` (`helm upgrade --install argocd …`) — also covered by **`ansible/playbooks/noble.yml`** (role **`noble_argocd`**) - [x] Argo CD server **LoadBalancer** — **`192.168.50.210`** (see `values.yaml`) -- [x] **App-of-apps** — optional; **`apps/kustomization.yaml`** is **empty** (core stack is **Ansible**-managed, not Argo). Set **`repoURL`** in **`root-application.yaml`** and add **`Application`** manifests only for optional GitOps workloads — see **`bootstrap/argocd/apps/README.md`** +- [x] **App-of-apps** — optional; **`clusters/noble/apps/kustomization.yaml`** is **empty** (core stack is **Ansible**-managed from **`clusters/noble/bootstrap/`**, not Argo). Set **`repoURL`** in **`root-application.yaml`** and add **`Application`** manifests only for optional GitOps workloads — see **`clusters/noble/apps/README.md`** - [x] **Renovate** — **`renovate.json`** at repo root ([Renovate](https://docs.renovatebot.com/) — **Kubernetes** manager for **`clusters/noble/**/*.yaml`** image pins; grouped minor/patch PRs). **Activate PRs:** install **[Mend Renovate](https://github.com/apps/renovate)** on the Git repo (**Option A**), or **Option B:** self-hosted chart per [Helm charts](https://docs.renovatebot.com/helm-charts/) + token from **Sealed Secrets** / **ESO**. Helm **chart** versions pinned only in comments still need manual bumps or extra **regex** `customManagers` — extend **`renovate.json`** as needed. - [ ] SSO — later ## Phase D — Observability -- [x] **kube-prometheus-stack** — `kubectl apply -f clusters/noble/apps/kube-prometheus-stack/namespace.yaml` then **`helm upgrade --install`** as in `clusters/noble/apps/kube-prometheus-stack/values.yaml` (chart **82.15.1**); PVCs **`longhorn`**; **`--wait --timeout 30m`** recommended; verify **`kubectl -n monitoring get pods,pvc`** -- [x] **Loki** + **Fluent Bit** + **Grafana Loki datasource** — **order:** **`kubectl apply -f clusters/noble/apps/loki/namespace.yaml`** → **`helm upgrade --install loki`** `grafana/loki` **6.55.0** `-f clusters/noble/apps/loki/values.yaml` → **`kubectl apply -f clusters/noble/apps/fluent-bit/namespace.yaml`** → **`helm upgrade --install fluent-bit`** `fluent/fluent-bit` **0.56.0** `-f clusters/noble/apps/fluent-bit/values.yaml` → **`kubectl apply -f clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`**. Verify **Explore → Loki** in Grafana; **`kubectl -n loki get pods,pvc`**, **`kubectl -n logging get pods`** -- [x] **Headlamp** — Kubernetes web UI ([Headlamp](https://headlamp.dev/)); **`helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/`**; **`kubectl apply -f clusters/noble/apps/headlamp/namespace.yaml`** → **`helm upgrade --install headlamp headlamp/headlamp --version 0.40.1 -n headlamp -f clusters/noble/apps/headlamp/values.yaml`**; **Ingress** **`https://headlamp.apps.noble.lab.pcenicni.dev`** (**`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **`values.yaml`:** **`config.sessionTTL: null`** works around chart **0.40.1** / binary mismatch ([headlamp#4883](https://github.com/kubernetes-sigs/headlamp/issues/4883)). **RBAC:** chart defaults are permissive — tighten before LAN-wide exposure; align with **Phase G** hardening. +- [x] **kube-prometheus-stack** — `kubectl apply -f clusters/noble/bootstrap/kube-prometheus-stack/namespace.yaml` then **`helm upgrade --install`** as in `clusters/noble/bootstrap/kube-prometheus-stack/values.yaml` (chart **82.15.1**); PVCs **`longhorn`**; **`--wait --timeout 30m`** recommended; verify **`kubectl -n monitoring get pods,pvc`** +- [x] **Loki** + **Fluent Bit** + **Grafana Loki datasource** — **order:** **`kubectl apply -f clusters/noble/bootstrap/loki/namespace.yaml`** → **`helm upgrade --install loki`** `grafana/loki` **6.55.0** `-f clusters/noble/bootstrap/loki/values.yaml` → **`kubectl apply -f clusters/noble/bootstrap/fluent-bit/namespace.yaml`** → **`helm upgrade --install fluent-bit`** `fluent/fluent-bit` **0.56.0** `-f clusters/noble/bootstrap/fluent-bit/values.yaml` → **`kubectl apply -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`**. Verify **Explore → Loki** in Grafana; **`kubectl -n loki get pods,pvc`**, **`kubectl -n logging get pods`** +- [x] **Headlamp** — Kubernetes web UI ([Headlamp](https://headlamp.dev/)); **`helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/`**; **`kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml`** → **`helm upgrade --install headlamp headlamp/headlamp --version 0.40.1 -n headlamp -f clusters/noble/bootstrap/headlamp/values.yaml`**; **Ingress** **`https://headlamp.apps.noble.lab.pcenicni.dev`** (**`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **`values.yaml`:** **`config.sessionTTL: null`** works around chart **0.40.1** / binary mismatch ([headlamp#4883](https://github.com/kubernetes-sigs/headlamp/issues/4883)). **RBAC:** chart defaults are permissive — tighten before LAN-wide exposure; align with **Phase G** hardening. ## Phase E — Secrets -- [x] **Sealed Secrets** (optional Git workflow) — `clusters/noble/apps/sealed-secrets/` (Helm **2.18.4**); **`kubeseal`** + key backup per **`README.md`** -- [x] **Vault** in-cluster on Longhorn + **auto-unseal** — `clusters/noble/apps/vault/` (Helm **0.32.0**); **Longhorn** PVC; **OSS** “auto-unseal” = optional **`unseal-cronjob.yaml`** + Secret (**README**); **`configure-kubernetes-auth.sh`** for ESO (**Kubernetes auth** + KV + role) -- [x] **External Secrets Operator** + Vault `ClusterSecretStore` — operator **`clusters/noble/apps/external-secrets/`** (Helm **2.2.0**); apply **`examples/vault-cluster-secret-store.yaml`** after Vault (**`README.md`**) +- [x] **Sealed Secrets** (optional Git workflow) — `clusters/noble/bootstrap/sealed-secrets/` (Helm **2.18.4**); **`kubeseal`** + key backup per **`README.md`** +- [x] **Vault** in-cluster on Longhorn + **auto-unseal** — `clusters/noble/bootstrap/vault/` (Helm **0.32.0**); **Longhorn** PVC; **OSS** “auto-unseal” = optional **`unseal-cronjob.yaml`** + Secret (**README**); **`configure-kubernetes-auth.sh`** for ESO (**Kubernetes auth** + KV + role) +- [x] **External Secrets Operator** + Vault `ClusterSecretStore` — operator **`clusters/noble/bootstrap/external-secrets/`** (Helm **2.2.0**); apply **`examples/vault-cluster-secret-store.yaml`** after Vault (**`README.md`**) ## Phase F — Policy + backups -- [x] **Kyverno** baseline policies — `clusters/noble/apps/kyverno/` (Helm **kyverno** **3.7.1** + **kyverno-policies** **3.7.1**, **baseline** / **Audit** — see **`README.md`**) +- [x] **Kyverno** baseline policies — `clusters/noble/bootstrap/kyverno/` (Helm **kyverno** **3.7.1** + **kyverno-policies** **3.7.1**, **baseline** / **Audit** — see **`README.md`**) - [ ] **Velero** when S3 is ready; backup/restore drill ## Phase G — Hardening -- [x] **Cilium** — Vault **`CiliumNetworkPolicy`** (`clusters/noble/apps/vault/cilium-network-policy.yaml`) — HTTP **8200** from **`external-secrets`** + **`vault`**; extend for other clients as needed +- [x] **Cilium** — Vault **`CiliumNetworkPolicy`** (`clusters/noble/bootstrap/vault/cilium-network-policy.yaml`) — HTTP **8200** from **`external-secrets`** + **`vault`**; extend for other clients as needed - [x] **Runbooks** — **`talos/runbooks/`** (API VIP / kube-vip, etcd–Talos, Longhorn, Vault) -- [x] **RBAC** — **Headlamp** **`ClusterRoleBinding`** uses built-in **`edit`** (not **`cluster-admin`**); **Argo CD** **`policy.default: role:readonly`** with **`g, admin, role:admin`** — see **`clusters/noble/apps/headlamp/values.yaml`**, **`clusters/noble/bootstrap/argocd/values.yaml`**, **`talos/runbooks/rbac.md`** +- [x] **RBAC** — **Headlamp** **`ClusterRoleBinding`** uses built-in **`edit`** (not **`cluster-admin`**); **Argo CD** **`policy.default: role:readonly`** with **`g, admin, role:admin`** — see **`clusters/noble/bootstrap/headlamp/values.yaml`**, **`clusters/noble/bootstrap/argocd/values.yaml`**, **`talos/runbooks/rbac.md`** - [ ] **Alertmanager** — add **`slack_configs`**, **`pagerduty_configs`**, or other receivers under **`kube-prometheus-stack`** `alertmanager.config` (chart defaults use **`null`** receiver) ## Quick validation diff --git a/talos/README.md b/talos/README.md index e46c723..efc33e4 100644 --- a/talos/README.md +++ b/talos/README.md @@ -106,7 +106,7 @@ sed -i '' 's|https://192.168.50.230:6443|https://192.168.50.20:6443|g' kubeconfi Quick check from your Mac: `nc -vz 192.168.50.20 50000` (Talos) and `nc -vz 192.168.50.20 6443` (Kubernetes). -**`dial tcp 192.168.50.230:6443` on nodes:** Host-network components (including **Cilium**) cannot use the in-cluster `kubernetes` Service; they otherwise follow **`cluster.controlPlane.endpoint`** (the VIP). Talos **KubePrism** on **`127.0.0.1:7445`** (default) load-balances to healthy apiservers. Ensure the CNI Helm values set **`k8sServiceHost: "127.0.0.1"`** and **`k8sServicePort: "7445"`** — see [`clusters/noble/apps/cilium/values.yaml`](../clusters/noble/apps/cilium/values.yaml). Also confirm **kube-vip**’s **`vip_interface`** matches the uplink (`talosctl -n get links` — e.g. **`ens18`** on these nodes). A bare **`curl -k https://192.168.50.230:6443/healthz`** often returns **`401 Unauthorized`** because no client cert was sent — that still means TLS to the VIP worked. +**`dial tcp 192.168.50.230:6443` on nodes:** Host-network components (including **Cilium**) cannot use the in-cluster `kubernetes` Service; they otherwise follow **`cluster.controlPlane.endpoint`** (the VIP). Talos **KubePrism** on **`127.0.0.1:7445`** (default) load-balances to healthy apiservers. Ensure the CNI Helm values set **`k8sServiceHost: "127.0.0.1"`** and **`k8sServicePort: "7445"`** — see [`clusters/noble/bootstrap/cilium/values.yaml`](../clusters/noble/bootstrap/cilium/values.yaml). Also confirm **kube-vip**’s **`vip_interface`** matches the uplink (`talosctl -n get links` — e.g. **`ens18`** on these nodes). A bare **`curl -k https://192.168.50.230:6443/healthz`** often returns **`401 Unauthorized`** because no client cert was sent — that still means TLS to the VIP worked. **Verify the VIP with `kubectl` (copy as-is):** use a real kubeconfig path (not ` /path/to/…`). From the **repository root**: @@ -124,23 +124,23 @@ Expect a single line: **`ok`**. If you see **`The connection to the server local | Component | Apply | |-----------|--------| -| Cilium | **Before** kube-vip/MetalLB scheduling: Helm from [`clusters/noble/apps/cilium/README.md`](../clusters/noble/apps/cilium/README.md) (`values.yaml`) | -| kube-vip | `kubectl apply -k ../clusters/noble/apps/kube-vip` | -| MetalLB pool | After MetalLB controller install: `kubectl apply -k ../clusters/noble/apps/metallb` | -| Longhorn PSA + Helm | `kubectl apply -k ../clusters/noble/apps/longhorn` then Helm from §5 below | +| Cilium | **Before** kube-vip/MetalLB scheduling: Helm from [`clusters/noble/bootstrap/cilium/README.md`](../clusters/noble/bootstrap/cilium/README.md) (`values.yaml`) | +| kube-vip | `kubectl apply -k ../clusters/noble/bootstrap/kube-vip` | +| MetalLB pool | After MetalLB controller install: `kubectl apply -k ../clusters/noble/bootstrap/metallb` | +| Longhorn PSA + Helm | `kubectl apply -k ../clusters/noble/bootstrap/longhorn` then Helm from §5 below | -Set `vip_interface` in `clusters/noble/apps/kube-vip/vip-daemonset.yaml` if it does not match the control-plane uplink (`talosctl -n get links`). +Set `vip_interface` in `clusters/noble/bootstrap/kube-vip/vip-daemonset.yaml` if it does not match the control-plane uplink (`talosctl -n get links`). ## 5. Longhorn (Talos) 1. **Machine image:** `talconfig.yaml` includes `iscsi-tools` and `util-linux-tools` extensions. After `talhelper genconfig`, **upgrade each node** so the running installer image matches (extensions are in the image, not applied live by config alone). If `longhorn-manager` logs **`iscsiadm` / `open-iscsi`**, the node image does not include the extension yet. -2. **Pod Security + path:** Apply `kubectl apply -k ../clusters/noble/apps/longhorn` (privileged `longhorn-system`). The Helm chart host-mounts **`/var/lib/longhorn`**; `talconfig` adds a kubelet **bind** from `/var/mnt/longhorn` → `/var/lib/longhorn` so that path matches the dedicated XFS volume. +2. **Pod Security + path:** Apply `kubectl apply -k ../clusters/noble/bootstrap/longhorn` (privileged `longhorn-system`). The Helm chart host-mounts **`/var/lib/longhorn`**; `talconfig` adds a kubelet **bind** from `/var/mnt/longhorn` → `/var/lib/longhorn` so that path matches the dedicated XFS volume. 3. **Data path:** From the **repository root** (not `talos/`), run Helm with a real release and chart name — not literal `...`: ```bash helm repo add longhorn https://charts.longhorn.io && helm repo update helm upgrade --install longhorn longhorn/longhorn -n longhorn-system --create-namespace \ - -f clusters/noble/apps/longhorn/values.yaml + -f clusters/noble/bootstrap/longhorn/values.yaml ``` If Longhorn defaults to `/var/lib/longhorn`, you get **wrong format** / **no space** on the Talos root filesystem. diff --git a/talos/runbooks/api-vip-kube-vip.md b/talos/runbooks/api-vip-kube-vip.md index 57b72c4..b68e4a8 100644 --- a/talos/runbooks/api-vip-kube-vip.md +++ b/talos/runbooks/api-vip-kube-vip.md @@ -4,7 +4,7 @@ **Checks** -1. VIP and interface align with [`talos/talconfig.yaml`](../talconfig.yaml) (`cluster.network`, `additionalApiServerCertSans`) and [`clusters/noble/apps/kube-vip/`](../../clusters/noble/apps/kube-vip/). +1. VIP and interface align with [`talos/talconfig.yaml`](../talconfig.yaml) (`cluster.network`, `additionalApiServerCertSans`) and [`clusters/noble/bootstrap/kube-vip/`](../../clusters/noble/bootstrap/kube-vip/). 2. `kubectl -n kube-system get pods -l app.kubernetes.io/name=kube-vip -o wide` — DaemonSet should be **Running** on control-plane nodes. 3. From a workstation: `ping 192.168.50.230` (if ICMP allowed) and `curl -k https://192.168.50.230:6443/healthz` or `kubectl get --raw /healthz` with kubeconfig `server:` set to the VIP. 4. `talosctl health` with `TALOSCONFIG` (see [`talos/README.md`](../README.md) §3). diff --git a/talos/runbooks/longhorn.md b/talos/runbooks/longhorn.md index 3f16feb..9c7793e 100644 --- a/talos/runbooks/longhorn.md +++ b/talos/runbooks/longhorn.md @@ -13,4 +13,4 @@ - Node disk pressure / mount missing: fix Talos machine config, reboot node per Talos docs. - Recovery / GPT wipe scripts: [`talos/scripts/longhorn-gpt-recovery.sh`](../scripts/longhorn-gpt-recovery.sh) and CLUSTER-BUILD notes. -**References:** [`clusters/noble/apps/longhorn/`](../../clusters/noble/apps/longhorn/), [Longhorn docs](https://longhorn.io/docs/). +**References:** [`clusters/noble/bootstrap/longhorn/`](../../clusters/noble/bootstrap/longhorn/), [Longhorn docs](https://longhorn.io/docs/). diff --git a/talos/runbooks/rbac.md b/talos/runbooks/rbac.md index 7115961..9f7bd18 100644 --- a/talos/runbooks/rbac.md +++ b/talos/runbooks/rbac.md @@ -1,6 +1,6 @@ # Runbook: Kubernetes RBAC (noble) -**Headlamp** (`clusters/noble/apps/headlamp/values.yaml`): the chart’s **ClusterRoleBinding** uses the built-in **`edit`** ClusterRole — not **`cluster-admin`**. Break-glass changes use **`kubectl`** with an admin kubeconfig. +**Headlamp** (`clusters/noble/bootstrap/headlamp/values.yaml`): the chart’s **ClusterRoleBinding** uses the built-in **`edit`** ClusterRole — not **`cluster-admin`**. Break-glass changes use **`kubectl`** with an admin kubeconfig. **Argo CD** (`clusters/noble/bootstrap/argocd/values.yaml`): **`policy.default: role:readonly`** — new OIDC/Git users get read-only unless you add **`g, , role:admin`** (or another role) in **`configs.rbac.policy.csv`**. Local user **`admin`** stays **`role:admin`** via **`g, admin, role:admin`**. diff --git a/talos/runbooks/vault.md b/talos/runbooks/vault.md index 983b734..4786df9 100644 --- a/talos/runbooks/vault.md +++ b/talos/runbooks/vault.md @@ -5,9 +5,9 @@ **Checks** 1. `kubectl -n vault exec -i sts/vault -- vault status` — **Sealed** / **Initialized**. -2. Unseal key Secret + optional CronJob: [`clusters/noble/apps/vault/README.md`](../../clusters/noble/apps/vault/README.md), `unseal-cronjob.yaml`. -3. Kubernetes auth for ESO: [`clusters/noble/apps/vault/configure-kubernetes-auth.sh`](../../clusters/noble/apps/vault/configure-kubernetes-auth.sh) and `kubectl describe clustersecretstore vault`. -4. **Cilium** policy: if Vault is unreachable from `external-secrets`, check [`clusters/noble/apps/vault/cilium-network-policy.yaml`](../../clusters/noble/apps/vault/cilium-network-policy.yaml) and extend `ingress` for new client namespaces. +2. Unseal key Secret + optional CronJob: [`clusters/noble/bootstrap/vault/README.md`](../../clusters/noble/bootstrap/vault/README.md), `unseal-cronjob.yaml`. +3. Kubernetes auth for ESO: [`clusters/noble/bootstrap/vault/configure-kubernetes-auth.sh`](../../clusters/noble/bootstrap/vault/configure-kubernetes-auth.sh) and `kubectl describe clustersecretstore vault`. +4. **Cilium** policy: if Vault is unreachable from `external-secrets`, check [`clusters/noble/bootstrap/vault/cilium-network-policy.yaml`](../../clusters/noble/bootstrap/vault/cilium-network-policy.yaml) and extend `ingress` for new client namespaces. **Common fixes** diff --git a/talos/talconfig.with-longhorn.yaml b/talos/talconfig.with-longhorn.yaml index 2069860..e14d616 100644 --- a/talos/talconfig.with-longhorn.yaml +++ b/talos/talconfig.with-longhorn.yaml @@ -8,7 +8,7 @@ # installDisk: confirm with `talosctl disks -n --insecure` (Proxmox virtio is often /dev/sda). # Longhorn data disk: second disk (often /dev/sdb SCSI or /dev/vdb virtio) → XFS at /var/mnt/longhorn. # After changing schematic/extensions: regenerate configs, upgrade nodes with new installer image, then reboot if needed. -# Helm must set defaultDataPath to /var/mnt/longhorn (see clusters/noble/apps/longhorn/values.yaml). +# Helm must set defaultDataPath to /var/mnt/longhorn (see clusters/noble/bootstrap/longhorn/values.yaml). # # Image Factory schematic (iscsi-tools + util-linux-tools), nocloud installer — pinned per-node via `talosImageURL` # (base URL only, no `:tag` — talhelper validates and appends `talosVersion`). diff --git a/talos/talconfig.yaml b/talos/talconfig.yaml index 2069860..e14d616 100644 --- a/talos/talconfig.yaml +++ b/talos/talconfig.yaml @@ -8,7 +8,7 @@ # installDisk: confirm with `talosctl disks -n --insecure` (Proxmox virtio is often /dev/sda). # Longhorn data disk: second disk (often /dev/sdb SCSI or /dev/vdb virtio) → XFS at /var/mnt/longhorn. # After changing schematic/extensions: regenerate configs, upgrade nodes with new installer image, then reboot if needed. -# Helm must set defaultDataPath to /var/mnt/longhorn (see clusters/noble/apps/longhorn/values.yaml). +# Helm must set defaultDataPath to /var/mnt/longhorn (see clusters/noble/bootstrap/longhorn/values.yaml). # # Image Factory schematic (iscsi-tools + util-linux-tools), nocloud installer — pinned per-node via `talosImageURL` # (base URL only, no `:tag` — talhelper validates and appends `talosVersion`).