From d5f38bd766b42e1003db58aa50b82d934e9d6ca2 Mon Sep 17 00:00:00 2001
From: Nikholas Pcenicni <82239765+nikpcenicni@users.noreply.github.com>
Date: Sat, 28 Mar 2026 01:41:52 -0400
Subject: [PATCH] Update README.md and CLUSTER-BUILD.md to enhance
documentation for Vault Kubernetes auth and ClusterSecretStore integration.
Add one-shot configuration instructions for Kubernetes auth in README.md, and
update CLUSTER-BUILD.md to reflect the current state of the Talos cluster,
including new components like Headlamp and Renovate, along with their
deployment details and next steps.
---
clusters/noble/apps/headlamp/README.md | 18 ++
clusters/noble/apps/headlamp/namespace.yaml | 10 +
clusters/noble/apps/headlamp/values.yaml | 25 ++
clusters/noble/apps/kyverno/README.md | 31 +++
clusters/noble/apps/kyverno/namespace.yaml | 5 +
.../noble/apps/kyverno/policies-values.yaml | 16 ++
clusters/noble/apps/kyverno/values.yaml | 10 +
clusters/noble/apps/vault/README.md | 2 +
.../apps/vault/configure-kubernetes-auth.sh | 77 ++++++
docs/architecture.md | 241 ++++++++++++++++++
talos/CLUSTER-BUILD.md | 24 +-
11 files changed, 454 insertions(+), 5 deletions(-)
create mode 100644 clusters/noble/apps/headlamp/README.md
create mode 100644 clusters/noble/apps/headlamp/namespace.yaml
create mode 100644 clusters/noble/apps/headlamp/values.yaml
create mode 100644 clusters/noble/apps/kyverno/README.md
create mode 100644 clusters/noble/apps/kyverno/namespace.yaml
create mode 100644 clusters/noble/apps/kyverno/policies-values.yaml
create mode 100644 clusters/noble/apps/kyverno/values.yaml
create mode 100755 clusters/noble/apps/vault/configure-kubernetes-auth.sh
create mode 100644 docs/architecture.md
diff --git a/clusters/noble/apps/headlamp/README.md b/clusters/noble/apps/headlamp/README.md
new file mode 100644
index 0000000..76840ef
--- /dev/null
+++ b/clusters/noble/apps/headlamp/README.md
@@ -0,0 +1,18 @@
+# Headlamp (noble)
+
+[Headlamp](https://headlamp.dev/) web UI for the cluster. Exposed on **`https://headlamp.apps.noble.lab.pcenicni.dev`** via **Traefik** + **cert-manager** (`letsencrypt-prod`), same pattern as Grafana.
+
+- **Chart:** `headlamp/headlamp` **0.40.1**
+- **Namespace:** `headlamp`
+
+## Install
+
+```bash
+helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/
+helm repo update
+kubectl apply -f clusters/noble/apps/headlamp/namespace.yaml
+helm upgrade --install headlamp headlamp/headlamp -n headlamp \
+ --version 0.40.1 -f clusters/noble/apps/headlamp/values.yaml --wait --timeout 10m
+```
+
+Sign-in uses a **ServiceAccount token** (Headlamp docs: create a limited SA for day-to-day use). The chart’s default **ClusterRole** is powerful — tighten RBAC and/or add **OIDC** in **`values.yaml`** under **`config.oidc`** when hardening (**Phase G**).
diff --git a/clusters/noble/apps/headlamp/namespace.yaml b/clusters/noble/apps/headlamp/namespace.yaml
new file mode 100644
index 0000000..131711a
--- /dev/null
+++ b/clusters/noble/apps/headlamp/namespace.yaml
@@ -0,0 +1,10 @@
+# Headlamp — apply before Helm.
+# Chart pods do not satisfy PSA "restricted" (see install warnings); align with other UIs.
+apiVersion: v1
+kind: Namespace
+metadata:
+ name: headlamp
+ labels:
+ pod-security.kubernetes.io/enforce: privileged
+ pod-security.kubernetes.io/audit: privileged
+ pod-security.kubernetes.io/warn: privileged
diff --git a/clusters/noble/apps/headlamp/values.yaml b/clusters/noble/apps/headlamp/values.yaml
new file mode 100644
index 0000000..695dcf3
--- /dev/null
+++ b/clusters/noble/apps/headlamp/values.yaml
@@ -0,0 +1,25 @@
+# Headlamp — noble (Kubernetes web UI)
+#
+# helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/
+# helm repo update
+# kubectl apply -f clusters/noble/apps/headlamp/namespace.yaml
+# helm upgrade --install headlamp headlamp/headlamp -n headlamp \
+# --version 0.40.1 -f clusters/noble/apps/headlamp/values.yaml --wait --timeout 10m
+#
+# DNS: headlamp.apps.noble.lab.pcenicni.dev → Traefik LB (see talos/CLUSTER-BUILD.md).
+# Default chart RBAC is broad — restrict for production (Phase G).
+
+ingress:
+ enabled: true
+ ingressClassName: traefik
+ annotations:
+ cert-manager.io/cluster-issuer: letsencrypt-prod
+ hosts:
+ - host: headlamp.apps.noble.lab.pcenicni.dev
+ paths:
+ - path: /
+ type: Prefix
+ tls:
+ - secretName: headlamp-apps-noble-tls
+ hosts:
+ - headlamp.apps.noble.lab.pcenicni.dev
diff --git a/clusters/noble/apps/kyverno/README.md b/clusters/noble/apps/kyverno/README.md
new file mode 100644
index 0000000..b615ead
--- /dev/null
+++ b/clusters/noble/apps/kyverno/README.md
@@ -0,0 +1,31 @@
+# Kyverno (noble)
+
+Admission policies using [Kyverno](https://kyverno.io/). The main chart installs controllers and CRDs; **`kyverno-policies`** installs **Pod Security Standard** rules matching the **`baseline`** profile in **`Audit`** mode (violations are visible in policy reports; workloads are not denied).
+
+- **Charts:** `kyverno/kyverno` **3.7.1** (app **v1.17.1**), `kyverno/kyverno-policies` **3.7.1**
+- **Namespace:** `kyverno`
+
+## Install
+
+```bash
+helm repo add kyverno https://kyverno.github.io/kyverno/
+helm repo update
+kubectl apply -f clusters/noble/apps/kyverno/namespace.yaml
+helm upgrade --install kyverno kyverno/kyverno -n kyverno \
+ --version 3.7.1 -f clusters/noble/apps/kyverno/values.yaml --wait --timeout 15m
+helm upgrade --install kyverno-policies kyverno/kyverno-policies -n kyverno \
+ --version 3.7.1 -f clusters/noble/apps/kyverno/policies-values.yaml --wait --timeout 10m
+```
+
+Verify:
+
+```bash
+kubectl -n kyverno get pods
+kubectl get clusterpolicy | head
+```
+
+## Notes
+
+- **`validationFailureAction: Audit`** in `policies-values.yaml` avoids breaking namespaces that need **privileged** behavior (Longhorn, monitoring node-exporter, etc.). Switch specific policies or namespaces to **`Enforce`** when you are ready.
+- To use **`restricted`** instead of **`baseline`**, change **`podSecurityStandard`** in `policies-values.yaml` and reconcile expectations for host mounts and capabilities.
+- Upgrade: bump **`--version`** on both charts together; read [Kyverno release notes](https://github.com/kyverno/kyverno/releases) for breaking changes.
diff --git a/clusters/noble/apps/kyverno/namespace.yaml b/clusters/noble/apps/kyverno/namespace.yaml
new file mode 100644
index 0000000..13e8f48
--- /dev/null
+++ b/clusters/noble/apps/kyverno/namespace.yaml
@@ -0,0 +1,5 @@
+# Kyverno — apply before Helm.
+apiVersion: v1
+kind: Namespace
+metadata:
+ name: kyverno
diff --git a/clusters/noble/apps/kyverno/policies-values.yaml b/clusters/noble/apps/kyverno/policies-values.yaml
new file mode 100644
index 0000000..c2ed8a4
--- /dev/null
+++ b/clusters/noble/apps/kyverno/policies-values.yaml
@@ -0,0 +1,16 @@
+# kyverno/kyverno-policies — Pod Security Standards as Kyverno ClusterPolicies
+#
+# helm upgrade --install kyverno-policies kyverno/kyverno-policies -n kyverno \
+# --version 3.7.1 -f clusters/noble/apps/kyverno/policies-values.yaml --wait --timeout 10m
+#
+# Default profile is baseline; validationFailureAction is Audit so existing privileged
+# workloads (monitoring, longhorn, etc.) are reported, not blocked. Tighten per policy or
+# namespace when ready (see README).
+#
+policyKind: ClusterPolicy
+policyType: ClusterPolicy
+podSecurityStandard: baseline
+podSecuritySeverity: medium
+validationFailureAction: Audit
+failurePolicy: Fail
+validationAllowExistingViolations: true
diff --git a/clusters/noble/apps/kyverno/values.yaml b/clusters/noble/apps/kyverno/values.yaml
new file mode 100644
index 0000000..819f2cb
--- /dev/null
+++ b/clusters/noble/apps/kyverno/values.yaml
@@ -0,0 +1,10 @@
+# Kyverno — noble (policy engine)
+#
+# helm repo add kyverno https://kyverno.github.io/kyverno/
+# helm repo update
+# kubectl apply -f clusters/noble/apps/kyverno/namespace.yaml
+# helm upgrade --install kyverno kyverno/kyverno -n kyverno \
+# --version 3.7.1 -f clusters/noble/apps/kyverno/values.yaml --wait --timeout 15m
+#
+# Baseline Pod Security policies (separate chart): see policies-values.yaml + README.md
+#
diff --git a/clusters/noble/apps/vault/README.md b/clusters/noble/apps/vault/README.md
index 13048c3..2f94e6c 100644
--- a/clusters/noble/apps/vault/README.md
+++ b/clusters/noble/apps/vault/README.md
@@ -54,6 +54,8 @@ Vault **OSS** auto-unseal uses cloud KMS (AWS, GCP, Azure, OCI), **Transit** (an
## Kubernetes auth (External Secrets / ClusterSecretStore)
+**One-shot:** from the repo root, `export KUBECONFIG=talos/kubeconfig` and `export VAULT_TOKEN=…`, then run **`./clusters/noble/apps/vault/configure-kubernetes-auth.sh`** (idempotent). Then **`kubectl apply -f clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml`** on its own line (shell comments **`# …`** on the same line are parsed as extra `kubectl` args and break `apply`). **`kubectl get clustersecretstore vault`** should show **READY=True** after a few seconds.
+
Run these **from your workstation** (needs `kubectl`; no local `vault` binary required). Use a **short-lived admin token** or the root token **only in your shell** — do not paste tokens into logs or chat.
**1. Enable the auth method** (skip if already done):
diff --git a/clusters/noble/apps/vault/configure-kubernetes-auth.sh b/clusters/noble/apps/vault/configure-kubernetes-auth.sh
new file mode 100755
index 0000000..08708c3
--- /dev/null
+++ b/clusters/noble/apps/vault/configure-kubernetes-auth.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+# Configure Vault Kubernetes auth + KV v2 + policy/role for External Secrets Operator.
+# Requires: kubectl (cluster access), jq optional (openid issuer); Vault reachable via sts/vault.
+#
+# Usage (from repo root):
+# export KUBECONFIG=talos/kubeconfig # or your path
+# export VAULT_TOKEN='…' # root or admin token — never commit
+# ./clusters/noble/apps/vault/configure-kubernetes-auth.sh
+#
+# Then: kubectl apply -f clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml
+# Verify: kubectl describe clustersecretstore vault
+
+set -euo pipefail
+
+: "${VAULT_TOKEN:?Set VAULT_TOKEN to your Vault root or admin token}"
+
+ISSUER=$(kubectl get --raw /.well-known/openid-configuration | jq -r .issuer)
+REVIEWER=$(kubectl -n vault create token vault --duration=8760h)
+CA_B64=$(kubectl config view --raw --minify -o jsonpath='{.clusters[0].cluster.certificate-authority-data}')
+
+kubectl -n vault exec -i sts/vault -- env \
+ VAULT_ADDR=http://127.0.0.1:8200 \
+ VAULT_TOKEN="$VAULT_TOKEN" \
+ sh -ec '
+ set -e
+ vault auth list >/tmp/vauth.txt
+ grep -q "^kubernetes/" /tmp/vauth.txt || vault auth enable kubernetes
+ '
+
+kubectl -n vault exec -i sts/vault -- env \
+ VAULT_ADDR=http://127.0.0.1:8200 \
+ VAULT_TOKEN="$VAULT_TOKEN" \
+ CA_B64="$CA_B64" \
+ REVIEWER="$REVIEWER" \
+ ISSUER="$ISSUER" \
+ sh -ec '
+ echo "$CA_B64" | base64 -d > /tmp/k8s-ca.crt
+ vault write auth/kubernetes/config \
+ kubernetes_host="https://kubernetes.default.svc:443" \
+ kubernetes_ca_cert=@/tmp/k8s-ca.crt \
+ token_reviewer_jwt="$REVIEWER" \
+ issuer="$ISSUER"
+ '
+
+kubectl -n vault exec -i sts/vault -- env \
+ VAULT_ADDR=http://127.0.0.1:8200 \
+ VAULT_TOKEN="$VAULT_TOKEN" \
+ sh -ec '
+ set -e
+ vault secrets list >/tmp/vsec.txt
+ grep -q "^secret/" /tmp/vsec.txt || vault secrets enable -path=secret kv-v2
+ '
+
+kubectl -n vault exec -i sts/vault -- env \
+ VAULT_ADDR=http://127.0.0.1:8200 \
+ VAULT_TOKEN="$VAULT_TOKEN" \
+ sh -ec '
+ vault policy write external-secrets - <192.168.50.20
control-plane + schedulable"]
+ argon["argon
192.168.50.30
control-plane + schedulable"]
+ krypton["krypton
192.168.50.40
control-plane + schedulable"]
+ end
+ subgraph W["Worker"]
+ helium["helium
192.168.50.10
worker only"]
+ end
+ VIP["API VIP 192.168.50.230
kube-vip on ens18
→ apiserver :6443"]
+ end
+ neon --- VIP
+ argon --- VIP
+ krypton --- VIP
+ kubectl["kubectl / talosctl clients
(workstation on LAN/VPN)"] -->|"HTTPS :6443"| VIP
+```
+
+---
+
+## Network and ingress
+
+**North–south (apps on LAN):** DNS for **`*.apps.noble.lab.pcenicni.dev`** → **Traefik** **`LoadBalancer` `192.168.50.211`**. **MetalLB** L2 pool **`192.168.50.210`–`192.168.50.229`**; **Argo CD** uses **`192.168.50.210`**. **Public** access is not in-cluster ExternalDNS: **Newt** (Pangolin tunnel) plus **CNAME** and **Integration API** per [`clusters/noble/apps/newt/README.md`](../clusters/noble/apps/newt/README.md).
+
+```mermaid
+flowchart TB
+ user["User"]
+ subgraph DNS["DNS"]
+ pub["Public: CNAME → Pangolin
(per Newt README; not ExternalDNS)"]
+ split["LAN / split horizon:
*.apps.noble.lab.pcenicni.dev
→ 192.168.50.211"]
+ end
+ subgraph LAN["LAN"]
+ ML["MetalLB L2
pool 192.168.50.210–229
IPAddressPool noble-l2"]
+ T["Traefik Service LoadBalancer
192.168.50.211
IngressClass: traefik"]
+ Argo["Argo CD server LoadBalancer
192.168.50.210"]
+ Newt["Newt (Pangolin tunnel)
outbound to Pangolin"]
+ end
+ subgraph Cluster["Cluster workloads"]
+ Ing["Ingress resources
cert-manager HTTP-01"]
+ App["Apps / Grafana Ingress
e.g. grafana.apps.noble.lab.pcenicni.dev"]
+ end
+ user --> pub
+ user --> split
+ split --> T
+ pub -.->|"tunnel path"| Newt
+ T --> Ing --> App
+ ML --- T
+ ML --- Argo
+ user -->|"optional direct to LB IP"| Argo
+```
+
+---
+
+## Platform stack (bootstrap → workloads)
+
+Order: **Talos** → **Cilium** (cluster uses `cni: none` until CNI is installed) → **metrics-server**, **Longhorn**, **MetalLB** + pool manifests, **kube-vip** → **Traefik**, **cert-manager** → **Argo CD** (Helm + app-of-apps under `clusters/noble/bootstrap/argocd/`). Platform namespaces include `cert-manager`, `traefik`, `metallb-system`, `longhorn-system`, `monitoring`, `loki`, `logging`, `argocd`, `vault`, `external-secrets`, `sealed-secrets`, `kyverno`, `newt`, and others as deployed.
+
+```mermaid
+flowchart TB
+ subgraph L0["OS / bootstrap"]
+ Talos["Talos v1.12.6
Image Factory schematic"]
+ end
+ subgraph L1["CNI"]
+ Cilium["Cilium
(cni: none until installed)"]
+ end
+ subgraph L2["Core add-ons"]
+ MS["metrics-server"]
+ LH["Longhorn + default StorageClass"]
+ MB["MetalLB + pool manifests"]
+ KV["kube-vip (API VIP)"]
+ end
+ subgraph L3["Ingress and TLS"]
+ Traefik["Traefik"]
+ CM["cert-manager + ClusterIssuers"]
+ end
+ subgraph L4["GitOps"]
+ Argo["Argo CD
app-of-apps under bootstrap/argocd/"]
+ end
+ subgraph L5["Platform namespaces (examples)"]
+ NS["cert-manager, traefik, metallb-system,
longhorn-system, monitoring, loki, logging,
argocd, vault, external-secrets, sealed-secrets,
kyverno, newt, …"]
+ end
+ Talos --> Cilium --> MS
+ Cilium --> LH
+ Cilium --> MB
+ Cilium --> KV
+ MB --> Traefik
+ Traefik --> CM
+ CM --> Argo
+ Argo --> NS
+```
+
+---
+
+## Observability path
+
+**kube-prometheus-stack** in **`monitoring`**: Prometheus, Grafana, Alertmanager, node-exporter, etc. **Loki** (SingleBinary) in **`loki`** with **Fluent Bit** in **`logging`** shipping to **`loki-gateway`**. Grafana Loki datasource is applied via **ConfigMap** [`clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`](../clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml). Prometheus, Grafana, Alertmanager, and Loki use **Longhorn** PVCs where configured.
+
+```mermaid
+flowchart LR
+ subgraph Nodes["All nodes"]
+ NE["node-exporter DaemonSet"]
+ FB["Fluent Bit DaemonSet
namespace: logging"]
+ end
+ subgraph mon["monitoring"]
+ PROM["Prometheus"]
+ AM["Alertmanager"]
+ GF["Grafana"]
+ SC["ServiceMonitors / kube-state-metrics / operator"]
+ end
+ subgraph lok["loki"]
+ LG["loki-gateway Service"]
+ LO["Loki SingleBinary"]
+ end
+ NE --> PROM
+ PROM --> GF
+ AM --> GF
+ FB -->|"to loki-gateway:80"| LG --> LO
+ GF -->|"Explore / datasource ConfigMap
grafana-loki-datasource"| LO
+ subgraph PVC["Longhorn PVCs"]
+ P1["Prometheus / Grafana /
Alertmanager PVCs"]
+ P2["Loki PVC"]
+ end
+ PROM --- P1
+ LO --- P2
+```
+
+---
+
+## Secrets and policy
+
+**Sealed Secrets** decrypts `SealedSecret` objects in-cluster. **External Secrets Operator** syncs from **Vault** using **`ClusterSecretStore`** (see [`examples/vault-cluster-secret-store.yaml`](../clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml)). Trust is **cluster → Vault** (ESO calls Vault; Vault does not initiate cluster trust). **Kyverno** with **kyverno-policies** enforces **PSS baseline** in **Audit**.
+
+```mermaid
+flowchart LR
+ subgraph Git["Git repo"]
+ SSman["SealedSecret manifests
(optional)"]
+ end
+ subgraph cluster["Cluster"]
+ SSC["Sealed Secrets controller
sealed-secrets"]
+ ESO["External Secrets Operator
external-secrets"]
+ V["Vault
vault namespace
HTTP listener"]
+ K["Kyverno + kyverno-policies
PSS baseline Audit"]
+ end
+ SSman -->|"encrypted"| SSC -->|"decrypt to Secret"| workloads["Workload Secrets"]
+ ESO -->|"ClusterSecretStore →"| V
+ ESO -->|"sync ExternalSecret"| workloads
+ K -.->|"admission / audit
(PSS baseline)"| workloads
+```
+
+---
+
+## Data and storage
+
+**StorageClass:** **`longhorn`** (default). Talos mounts **user volume** data at **`/var/mnt/longhorn`** (bind paths for Longhorn). Stateful consumers include **Vault**, **kube-prometheus-stack** PVCs, and **Loki**.
+
+```mermaid
+flowchart TB
+ subgraph disks["Per-node Longhorn data path"]
+ UD["Talos user volume →
/var/mnt/longhorn (bind to Longhorn paths)"]
+ end
+ subgraph LH["Longhorn"]
+ SC["StorageClass: longhorn (default)"]
+ end
+ subgraph consumers["Stateful / durable consumers"]
+ V["Vault PVC data-vault-0"]
+ PGL["kube-prometheus-stack PVCs"]
+ L["Loki PVC"]
+ end
+ UD --> SC
+ SC --> V
+ SC --> PGL
+ SC --> L
+```
+
+---
+
+## Component versions
+
+See [`talos/CLUSTER-BUILD.md`](../talos/CLUSTER-BUILD.md) for the authoritative checklist. Summary:
+
+| Component | Chart / app (from CLUSTER-BUILD.md) |
+|-----------|-------------------------------------|
+| Talos / Kubernetes | v1.12.6 / 1.35.2 bundled |
+| Cilium | Helm 1.16.6 |
+| MetalLB | 0.15.3 |
+| Longhorn | 1.11.1 |
+| Traefik | 39.0.6 / app v3.6.11 |
+| cert-manager | v1.20.0 |
+| Argo CD | 9.4.17 / app v3.3.6 |
+| kube-prometheus-stack | 82.15.1 |
+| Loki / Fluent Bit | 6.55.0 / 0.56.0 |
+| Sealed Secrets / ESO / Vault | 2.18.4 / 2.2.0 / 0.32.0 |
+| Kyverno | 3.7.1 / policies 3.7.1 |
+| Newt | 1.2.0 / app 1.10.1 |
+
+---
+
+## Narrative
+
+The **noble** environment is a **Talos** lab cluster on **`192.168.50.0/24`** with **three control plane nodes and one worker**, schedulable workloads on control planes enabled, and the Kubernetes API exposed through **kube-vip** at **`192.168.50.230`**. **Cilium** provides the CNI after Talos bootstrap with **`cni: none`**; **MetalLB** advertises **`192.168.50.210`–`192.168.50.229`**, pinning **Argo CD** to **`192.168.50.210`** and **Traefik** to **`192.168.50.211`** for **`*.apps.noble.lab.pcenicni.dev`**. **cert-manager** issues certificates for Traefik Ingresses; **GitOps** is **Helm plus Argo CD** with manifests under **`clusters/noble/`** and bootstrap under **`clusters/noble/bootstrap/argocd/`**. **Observability** uses **kube-prometheus-stack** in **`monitoring`**, **Loki** and **Fluent Bit** with Grafana wired via a **ConfigMap** datasource, with **Longhorn** PVCs for Prometheus, Grafana, Alertmanager, Loki, and **Vault**. **Secrets** combine **Sealed Secrets** for git-encrypted material, **Vault** with **External Secrets** for dynamic sync, and **Kyverno** enforces **Pod Security Standards baseline** in **Audit**. **Public** access uses **Newt** to **Pangolin** with **CNAME** and Integration API steps as documented—not generic in-cluster public DNS.
+
+---
+
+## Assumptions and open questions
+
+**Assumptions**
+
+- **Hypervisor vs bare metal:** Not fixed in inventory tables; `talconfig.yaml` comments mention Proxmox virtio disk paths as examples—treat actual host platform as **TBD** unless confirmed.
+- **Workstation path:** Operators reach the VIP and node IPs from the **LAN or VPN** per [`talos/README.md`](../talos/README.md).
+- **Optional components** (Headlamp, Renovate, Velero, Phase G hardening) are described in CLUSTER-BUILD.md; they are not required for the diagrams above until deployed.
+
+**Open questions**
+
+- **Split horizon:** Confirm whether only LAN DNS resolves `*.apps.noble.lab.pcenicni.dev` to **`192.168.50.211`** or whether public resolvers also point at that address.
+- **Velero / S3:** **TBD** until an S3-compatible backend is configured.
+- **Argo CD:** Confirm **`repoURL`** in `root-application.yaml` and what is actually applied on-cluster.
+
+---
+
+*Keep in sync with [`talos/CLUSTER-BUILD.md`](../talos/CLUSTER-BUILD.md) and manifests under [`clusters/noble/`](../clusters/noble/).*
diff --git a/talos/CLUSTER-BUILD.md b/talos/CLUSTER-BUILD.md
index 49250ea..a2282e6 100644
--- a/talos/CLUSTER-BUILD.md
+++ b/talos/CLUSTER-BUILD.md
@@ -4,7 +4,7 @@ This document is the **exported TODO** for the **noble** Talos cluster (4 nodes)
## Current state (2026-03-28)
-Lab stack is **up** on-cluster for bootstrap through **Phase D** (observability) and **Phase E** (Sealed Secrets, External Secrets, **Vault** Helm install), with manifests matching this repo. **Next focus:** **Vault** `operator init` / unseal, optional **`unseal-cronjob.yaml`**, Kubernetes auth + **`ClusterSecretStore`**, optional Pangolin/sample Ingress validation, Velero when S3 exists.
+Lab stack is **up** on-cluster through **Phase D** (observability), **Phase E** (Sealed Secrets, External Secrets, **Vault** + **`ClusterSecretStore`**), and **Phase F** (**Kyverno** **baseline** PSS **Audit**), with manifests matching this repo. **Next focus:** optional **Headlamp** (Ingress + TLS), **Renovate** (dependency PRs for Helm/manifests), Pangolin/sample Ingress validation, **Phase G**, **Velero** when S3 exists.
- **Talos** v1.12.6 (target) / **Kubernetes** as bundled — four nodes **Ready** unless upgrading; **`talosctl health`**; **`talos/kubeconfig`** is **local only** (gitignored — never commit; regenerate with `talosctl kubeconfig` per `talos/README.md`). **Image Factory (nocloud installer):** `factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6`
- **Cilium** Helm **1.16.6** / app **1.16.6** (`clusters/noble/apps/cilium/`, phase 1 values).
@@ -21,7 +21,7 @@ Lab stack is **up** on-cluster for bootstrap through **Phase D** (observability)
- **Sealed Secrets** Helm **2.18.4** / app **0.36.1** — `clusters/noble/apps/sealed-secrets/` (namespace **`sealed-secrets`**); **`kubeseal`** on client should match controller minor (**README**); back up **`sealed-secrets-key`** (see README).
- **External Secrets Operator** Helm **2.2.0** / app **v2.2.0** — `clusters/noble/apps/external-secrets/`; Vault **`ClusterSecretStore`** in **`examples/vault-cluster-secret-store.yaml`** (**`http://`** to match Vault listener — apply after Vault **Kubernetes auth**).
- **Vault** Helm **0.32.0** / app **1.21.2** — `clusters/noble/apps/vault/` — standalone **file** storage, **Longhorn** PVC; **HTTP** listener (`global.tlsDisable`); optional **CronJob** lab unseal **`unseal-cronjob.yaml`**; **not** initialized in git — run **`vault operator init`** per **`README.md`**.
-- **Still open:** Vault **Kubernetes auth** + **`ClusterSecretStore`** apply + KV for ESO; **Phase F–G**; optional **sample Ingress + cert + Pangolin** end-to-end; **Velero** when S3 is ready; **Argo CD SSO**.
+- **Still open:** **Headlamp** (Helm + Traefik Ingress + **`letsencrypt-prod`**); **Renovate** ([Renovate](https://docs.renovatebot.com/) — dependency bot; hosted app **or** self-hosted on-cluster); **Phase G**; optional **sample Ingress + cert + Pangolin** end-to-end; **Velero** when S3 is ready; **Argo CD SSO**.
## Inventory
@@ -42,6 +42,7 @@ Lab stack is **up** on-cluster for bootstrap through **Phase D** (observability)
| Traefik (apps ingress) | `192.168.50.211` — **`metallb.io/loadBalancerIPs`** in `clusters/noble/apps/traefik/values.yaml` |
| Apps ingress (LAN / split horizon) | `*.apps.noble.lab.pcenicni.dev` → Traefik LB |
| Grafana (Ingress + TLS) | **`grafana.apps.noble.lab.pcenicni.dev`** — `grafana.ingress` in `clusters/noble/apps/kube-prometheus-stack/values.yaml` (**`letsencrypt-prod`**) |
+| Headlamp (Ingress + TLS) | **`headlamp.apps.noble.lab.pcenicni.dev`** — chart `ingress` in `clusters/noble/apps/headlamp/` (**`letsencrypt-prod`**, **`ingressClassName: traefik`**) |
| Public DNS (Pangolin) | **Newt** tunnel + **CNAME** at registrar + **Integration API** — `clusters/noble/apps/newt/` |
| Velero | S3-compatible URL — configure later |
@@ -64,6 +65,9 @@ Lab stack is **up** on-cluster for bootstrap through **Phase D** (observability)
- Sealed Secrets: **2.18.4** (Helm chart `sealed-secrets/sealed-secrets`; app **0.36.1**)
- External Secrets Operator: **2.2.0** (Helm chart `external-secrets/external-secrets`; app **v2.2.0**)
- Vault: **0.32.0** (Helm chart `hashicorp/vault`; app **1.21.2**)
+- Kyverno: **3.7.1** (Helm chart `kyverno/kyverno`; app **v1.17.1**); **kyverno-policies** **3.7.1** — **baseline** PSS, **Audit** (`clusters/noble/apps/kyverno/`)
+- Headlamp: **0.40.1** (Helm chart `headlamp/headlamp`; app matches chart — see [Artifact Hub](https://artifacthub.io/packages/helm/headlamp/headlamp))
+- Renovate: **hosted** (Mend **Renovate** GitHub/GitLab app — no cluster chart) **or** **self-hosted** — pin chart when added ([Helm charts](https://docs.renovatebot.com/helm-charts/), OCI `ghcr.io/renovatebot/charts/renovate`); pair **`renovate.json`** with this repo’s Helm paths under **`clusters/noble/`**
## Repo paths (this workspace)
@@ -89,7 +93,10 @@ Lab stack is **up** on-cluster for bootstrap through **Phase D** (observability)
| Fluent Bit → Loki (Helm values) | `clusters/noble/apps/fluent-bit/` — `values.yaml`, `namespace.yaml` |
| Sealed Secrets (Helm) | `clusters/noble/apps/sealed-secrets/` — `values.yaml`, `namespace.yaml`, `README.md` |
| External Secrets Operator (Helm + Vault store example) | `clusters/noble/apps/external-secrets/` — `values.yaml`, `namespace.yaml`, `README.md`, `examples/vault-cluster-secret-store.yaml` |
-| Vault (Helm + optional unseal CronJob) | `clusters/noble/apps/vault/` — `values.yaml`, `namespace.yaml`, `unseal-cronjob.yaml`, `README.md` |
+| Vault (Helm + optional unseal CronJob) | `clusters/noble/apps/vault/` — `values.yaml`, `namespace.yaml`, `unseal-cronjob.yaml`, `configure-kubernetes-auth.sh`, `README.md` |
+| Kyverno + PSS baseline policies | `clusters/noble/apps/kyverno/` — `values.yaml`, `policies-values.yaml`, `namespace.yaml`, `README.md` |
+| Headlamp (Helm + Ingress) | `clusters/noble/apps/headlamp/` — `values.yaml`, `namespace.yaml` (planned — `helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/`) |
+| Renovate (repo config + optional self-hosted Helm) | `renovate.json` or `renovate.json5` at repo root (see [Renovate docs](https://docs.renovatebot.com/)); optional `clusters/noble/apps/renovate/` for self-hosted chart + token Secret (**Sealed Secrets** / **ESO** after **Phase E**) |
**Git vs cluster:** manifests and `talconfig` live in git; **`talhelper genconfig -o out`**, bootstrap, Helm, and `kubectl` run on your LAN. See **`talos/README.md`** for workstation reachability (lab LAN/VPN), **`talosctl kubeconfig`** vs Kubernetes `server:` (VIP vs node IP), and **`--insecure`** only in maintenance.
@@ -101,6 +108,8 @@ Lab stack is **up** on-cluster for bootstrap through **Phase D** (observability)
4. **Longhorn:** Talos user volume + extensions in `talconfig.with-longhorn.yaml` (when restored); Helm **`defaultDataPath`** in `clusters/noble/apps/longhorn/values.yaml`.
5. **Loki → Fluent Bit → Grafana datasource:** deploy **Loki** (`loki-gateway` Service) before **Fluent Bit**; apply **`clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`** after **Loki** (sidecar picks up the ConfigMap — no kube-prometheus values change for Loki).
6. **Vault:** **Longhorn** default **StorageClass** before **`clusters/noble/apps/vault/`** Helm (PVC **`data-vault-0`**); **External Secrets** **`ClusterSecretStore`** after Vault is initialized, unsealed, and **Kubernetes auth** is configured.
+7. **Headlamp:** **Traefik** + **cert-manager** (**`letsencrypt-prod`**) before exposing **`headlamp.apps.noble.lab.pcenicni.dev`**; treat as **cluster-admin** UI — protect with network policy / SSO when hardening (**Phase G**).
+8. **Renovate:** **Git remote** + platform access (**hosted app** needs org/repo install; **self-hosted** needs **`RENOVATE_TOKEN`** and chart **`renovate.config`**). If the bot runs **in-cluster**, add the token **after** **Sealed Secrets** / **Vault** (**Phase E**) — no ingress required for the bot itself.
## Prerequisites (before phases)
@@ -141,22 +150,24 @@ Lab stack is **up** on-cluster for bootstrap through **Phase D** (observability)
- [x] **Argo CD** bootstrap — `clusters/noble/bootstrap/argocd/` (`helm upgrade --install argocd …`)
- [x] Argo CD server **LoadBalancer** — **`192.168.50.210`** (see `values.yaml`)
- [X] **App-of-apps** — set **`repoURL`** in **`root-application.yaml`**, add **`Application`** manifests under **`bootstrap/argocd/apps/`**, apply **`root-application.yaml`**
+- [ ] **Renovate** — [Renovate](https://docs.renovatebot.com/) opens PRs for Helm charts, Docker tags, and related bumps. **Option A:** install the **Mend Renovate** app on **GitHub** / **GitLab** for this repo (no cluster). **Option B:** self-hosted — **`helm repo add renovate https://docs.renovatebot.com/helm-charts`** or OCI per [Helm charts](https://docs.renovatebot.com/helm-charts/); **`renovate.config`** with token from **Sealed Secrets** / **ESO** (**`clusters/noble/apps/renovate/`** when added). Add **`renovate.json`** (or **`renovate.json5`**) at repo root with **`packageRules`**, **`kubernetes`** / **`helm-values`** file patterns covering **`clusters/noble/`** (Helm **`values.yaml`**, manifests). Verify a dry run or first dependency PR.
- [ ] SSO — later
## Phase D — Observability
- [x] **kube-prometheus-stack** — `kubectl apply -f clusters/noble/apps/kube-prometheus-stack/namespace.yaml` then **`helm upgrade --install`** as in `clusters/noble/apps/kube-prometheus-stack/values.yaml` (chart **82.15.1**); PVCs **`longhorn`**; **`--wait --timeout 30m`** recommended; verify **`kubectl -n monitoring get pods,pvc`**
- [x] **Loki** + **Fluent Bit** + **Grafana Loki datasource** — **order:** **`kubectl apply -f clusters/noble/apps/loki/namespace.yaml`** → **`helm upgrade --install loki`** `grafana/loki` **6.55.0** `-f clusters/noble/apps/loki/values.yaml` → **`kubectl apply -f clusters/noble/apps/fluent-bit/namespace.yaml`** → **`helm upgrade --install fluent-bit`** `fluent/fluent-bit` **0.56.0** `-f clusters/noble/apps/fluent-bit/values.yaml` → **`kubectl apply -f clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`**. Verify **Explore → Loki** in Grafana; **`kubectl -n loki get pods,pvc`**, **`kubectl -n logging get pods`**
+- [ ] **Headlamp** — Kubernetes web UI ([Headlamp](https://headlamp.dev/)); **`helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/`**; **`kubectl apply -f clusters/noble/apps/headlamp/namespace.yaml`** → **`helm upgrade --install headlamp headlamp/headlamp --version 0.40.1 -n headlamp -f clusters/noble/apps/headlamp/values.yaml`**; **Ingress** **`https://headlamp.apps.noble.lab.pcenicni.dev`** (**`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **RBAC:** chart defaults are permissive — tighten before LAN-wide exposure; align with **Phase G** hardening.
## Phase E — Secrets
- [x] **Sealed Secrets** (optional Git workflow) — `clusters/noble/apps/sealed-secrets/` (Helm **2.18.4**); **`kubeseal`** + key backup per **`README.md`**
-- [x] **Vault** in-cluster on Longhorn + **auto-unseal** — `clusters/noble/apps/vault/` (Helm **0.32.0**); **Longhorn** PVC; **OSS** “auto-unseal” = optional **`unseal-cronjob.yaml`** + Secret (**README**); init/unseal/Kubernetes auth for ESO still **to do** on cluster
+- [x] **Vault** in-cluster on Longhorn + **auto-unseal** — `clusters/noble/apps/vault/` (Helm **0.32.0**); **Longhorn** PVC; **OSS** “auto-unseal” = optional **`unseal-cronjob.yaml`** + Secret (**README**); **`configure-kubernetes-auth.sh`** for ESO (**Kubernetes auth** + KV + role)
- [x] **External Secrets Operator** + Vault `ClusterSecretStore` — operator **`clusters/noble/apps/external-secrets/`** (Helm **2.2.0**); apply **`examples/vault-cluster-secret-store.yaml`** after Vault (**`README.md`**)
## Phase F — Policy + backups
-- [ ] **Kyverno** baseline policies
+- [x] **Kyverno** baseline policies — `clusters/noble/apps/kyverno/` (Helm **kyverno** **3.7.1** + **kyverno-policies** **3.7.1**, **baseline** / **Audit** — see **`README.md`**)
- [ ] **Velero** when S3 is ready; backup/restore drill
## Phase G — Hardening
@@ -170,15 +181,18 @@ Lab stack is **up** on-cluster for bootstrap through **Phase D** (observability)
- [x] API via VIP `:6443` — **`kubectl get --raw /healthz`** → **`ok`** with kubeconfig **`server:`** `https://192.168.50.230:6443`
- [x] Ingress **`LoadBalancer`** in pool `210`–`229` (**Traefik** → **`192.168.50.211`**)
- [x] **Argo CD** UI — **`argocd-server`** **`LoadBalancer`** **`192.168.50.210`** (initial **`admin`** password from **`argocd-initial-admin-secret`**)
+- [ ] **Renovate** — hosted app enabled for this repo **or** self-hosted workload **Running** + PRs updating **`clusters/noble/`** manifests as configured
- [ ] Sample Ingress + cert (cert-manager ready) + Pangolin resource + CNAME
- [x] PVC **`Bound`** on **Longhorn** (`storageClassName: longhorn`); Prometheus/Loki durable when configured
- [x] **`monitoring`** — **kube-prometheus-stack** core workloads **Running** (Prometheus, Grafana, Alertmanager, operator, kube-state-metrics, node-exporter); PVCs **Bound** on **longhorn**
- [x] **`loki`** — **Loki** SingleBinary + **gateway** **Running**; **`loki`** PVC **Bound** on **longhorn** (no chunks-cache by design)
- [x] **`logging`** — **Fluent Bit** DaemonSet **Running** on all nodes (logs → **Loki**)
- [x] **Grafana** — **Loki** datasource from **`grafana-loki-datasource`** ConfigMap (**Explore** works after apply + sidecar sync)
+- [ ] **Headlamp** — Deployment **Running** in **`headlamp`**; UI at **`https://headlamp.apps.noble.lab.pcenicni.dev`** (TLS via **`letsencrypt-prod`**)
- [x] **`sealed-secrets`** — controller **Deployment** **Running** in **`sealed-secrets`** (install + **`kubeseal`** per **`apps/sealed-secrets/README.md`**)
- [x] **`external-secrets`** — controller + webhook + cert-controller **Running** in **`external-secrets`**; apply **`ClusterSecretStore`** after Vault **Kubernetes auth**
- [x] **`vault`** — **StatefulSet** **Running**, **`data-vault-0`** PVC **Bound** on **longhorn**; **`vault operator init`** + unseal per **`apps/vault/README.md`**
+- [x] **`kyverno`** — admission / background / cleanup / reports controllers **Running** in **`kyverno`**; **ClusterPolicies** for **PSS baseline** **Ready** (**Audit**)
---