diff --git a/.gitignore b/.gitignore index aeb687a..eda42ff 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ ansible/inventory/hosts.ini # Talos generated talos/out/ +talos/kubeconfig # Local secrets age-key.txt \ No newline at end of file diff --git a/clusters/noble/apps/newt/README.md b/clusters/noble/apps/newt/README.md index 1bb62d8..314cba5 100644 --- a/clusters/noble/apps/newt/README.md +++ b/clusters/noble/apps/newt/README.md @@ -6,7 +6,24 @@ This is the **primary** automation path for **public** hostnames to workloads in ## 1. Create the Secret -Keys must match `values.yaml` (`PANGOLIN_ENDPOINT`, `NEWT_ID`, `NEWT_SECRET`): +Keys must match `values.yaml` (`PANGOLIN_ENDPOINT`, `NEWT_ID`, `NEWT_SECRET`). + +### Option A — Sealed Secret (safe for GitOps) + +With the [Sealed Secrets](https://github.com/bitnami-labs/sealed-secrets) controller installed (`clusters/noble/apps/sealed-secrets/`), generate a `SealedSecret` from your workstation (rotate credentials in Pangolin first if they were exposed): + +```bash +chmod +x clusters/noble/apps/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh +export PANGOLIN_ENDPOINT='https://pangolin.pcenicni.dev' +export NEWT_ID='YOUR_NEWT_ID' +export NEWT_SECRET='YOUR_NEWT_SECRET' +./clusters/noble/apps/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh > newt-pangolin-auth.sealedsecret.yaml +kubectl apply -f newt-pangolin-auth.sealedsecret.yaml +``` + +Commit only the `.sealedsecret.yaml` file, not plain `Secret` YAML. + +### Option B — Imperative Secret (not in git) ```bash kubectl apply -f clusters/noble/apps/newt/namespace.yaml diff --git a/clusters/noble/apps/sealed-secrets/README.md b/clusters/noble/apps/sealed-secrets/README.md index 2cd1aa1..a927423 100644 --- a/clusters/noble/apps/sealed-secrets/README.md +++ b/clusters/noble/apps/sealed-secrets/README.md @@ -37,6 +37,8 @@ kubectl create secret generic example --from-literal=foo=bar --dry-run=client -o Commit `example-sealedsecret.yaml`; apply it with `kubectl apply -f`. The controller creates the **Secret** in the same namespace as the **SealedSecret**. +**Noble example:** `examples/kubeseal-newt-pangolin-auth.sh` (Newt / Pangolin tunnel credentials). + ## Backup the sealing key If the controller’s private key is lost, existing sealed files cannot be decrypted on a new cluster. Back up the key secret after install: diff --git a/clusters/noble/apps/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh b/clusters/noble/apps/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh new file mode 100755 index 0000000..c647ac8 --- /dev/null +++ b/clusters/noble/apps/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# Emit a SealedSecret for newt-pangolin-auth (namespace newt). +# Prerequisites: sealed-secrets controller running; kubeseal client (same minor as controller). +# Rotate Pangolin/Newt credentials in the UI first if they were exposed, then set env vars and run: +# +# export PANGOLIN_ENDPOINT='https://pangolin.example.com' +# export NEWT_ID='...' +# export NEWT_SECRET='...' +# ./kubeseal-newt-pangolin-auth.sh > newt-pangolin-auth.sealedsecret.yaml +# kubectl apply -f newt-pangolin-auth.sealedsecret.yaml +# +set -euo pipefail +kubectl apply -f "$(dirname "$0")/../../newt/namespace.yaml" >/dev/null 2>&1 || true +kubectl -n newt create secret generic newt-pangolin-auth \ + --dry-run=client \ + --from-literal=PANGOLIN_ENDPOINT="${PANGOLIN_ENDPOINT:?}" \ + --from-literal=NEWT_ID="${NEWT_ID:?}" \ + --from-literal=NEWT_SECRET="${NEWT_SECRET:?}" \ + -o yaml | kubeseal -o yaml diff --git a/clusters/noble/apps/sealed-secrets/values.yaml b/clusters/noble/apps/sealed-secrets/values.yaml index cf9abfe..497d925 100644 --- a/clusters/noble/apps/sealed-secrets/values.yaml +++ b/clusters/noble/apps/sealed-secrets/values.yaml @@ -8,4 +8,11 @@ # # Client: install kubeseal (same minor as controller — see README). # Defaults are sufficient for the lab; override here if you need key renewal, resources, etc. +# +# GitOps pattern: create Secrets only via SealedSecret (or External Secrets + Vault). +# Example (Newt): clusters/noble/apps/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh +# Backup the controller's sealing key: kubectl -n sealed-secrets get secret sealed-secrets-key -o yaml +# +# Talos cluster secrets (bootstrap token, cluster secret, certs) belong in talhelper talsecret / +# SOPS — not Sealed Secrets. See talos/README.md. commonLabels: {} diff --git a/komodo/auth/Authentik/.env.sample b/komodo/auth/Authentik/.env.sample index afc6a40..a81d91b 100644 --- a/komodo/auth/Authentik/.env.sample +++ b/komodo/auth/Authentik/.env.sample @@ -14,4 +14,6 @@ COMPOSE_PORT_HTTP=10000 COMPOSE_PORT_HTTPS=10443 AUTHENTIK_ERROR_REPORTING__ENABLED=true AUTHENTIK_TAG=2025.10 +# LDAP outpost (komodo/auth/Authentik/compose.yaml authentik_ldap) — create token in Authentik UI +AUTHENTIK_LDAP_OUTPOST_TOKEN= CONFIG_PATH=/srv/dev-disk-by-uuid-7acaa21a-aa26-4605-bb36-8f4c9c1a7695/configs/authentik \ No newline at end of file diff --git a/komodo/auth/Authentik/compose.yaml b/komodo/auth/Authentik/compose.yaml index 5a8219c..0be4402 100644 --- a/komodo/auth/Authentik/compose.yaml +++ b/komodo/auth/Authentik/compose.yaml @@ -103,7 +103,7 @@ services: environment: AUTHENTIK_HOST: https://auth.pcenicni.ca AUTHENTIK_INSECURE: "false" - AUTHENTIK_TOKEN: 2OutrpIACRD41JdhjiZE6zSL8I48RpwkvnDRVbEPnllDnzdcxO9UJ26iS08Q + AUTHENTIK_TOKEN: ${AUTHENTIK_LDAP_OUTPOST_TOKEN:?set AUTHENTIK_LDAP_OUTPOST_TOKEN in .env} depends_on: postgresql: condition: service_healthy diff --git a/talos/CLUSTER-BUILD.md b/talos/CLUSTER-BUILD.md index b3eaf30..49250ea 100644 --- a/talos/CLUSTER-BUILD.md +++ b/talos/CLUSTER-BUILD.md @@ -6,7 +6,7 @@ This document is the **exported TODO** for the **noble** Talos cluster (4 nodes) Lab stack is **up** on-cluster for bootstrap through **Phase D** (observability) and **Phase E** (Sealed Secrets, External Secrets, **Vault** Helm install), with manifests matching this repo. **Next focus:** **Vault** `operator init` / unseal, optional **`unseal-cronjob.yaml`**, Kubernetes auth + **`ClusterSecretStore`**, optional Pangolin/sample Ingress validation, Velero when S3 exists. -- **Talos** v1.12.6 (target) / **Kubernetes** as bundled — four nodes **Ready** unless upgrading; **`talosctl health`**; **`talos/kubeconfig`** for `kubectl` (root `kubeconfig` may still be a stub). **Image Factory (nocloud installer):** `factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6` +- **Talos** v1.12.6 (target) / **Kubernetes** as bundled — four nodes **Ready** unless upgrading; **`talosctl health`**; **`talos/kubeconfig`** is **local only** (gitignored — never commit; regenerate with `talosctl kubeconfig` per `talos/README.md`). **Image Factory (nocloud installer):** `factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6` - **Cilium** Helm **1.16.6** / app **1.16.6** (`clusters/noble/apps/cilium/`, phase 1 values). - **MetalLB** Helm **0.15.3** / app **v0.15.3**; **IPAddressPool** `noble-l2` + **L2Advertisement** — pool **`192.168.50.210`–`192.168.50.229`**. - **kube-vip** DaemonSet **3/3** on control planes; VIP **`192.168.50.230`** on **`ens18`** (`vip_subnet` **`/32`** required — bare **`32`** breaks parsing). **Verified from workstation:** `kubectl config set-cluster noble --server=https://192.168.50.230:6443` then **`kubectl get --raw /healthz`** → **`ok`** (`talos/kubeconfig`; see `talos/README.md`). @@ -14,7 +14,7 @@ Lab stack is **up** on-cluster for bootstrap through **Phase D** (observability) - **Longhorn** Helm **1.11.1** / app **v1.11.1** — `clusters/noble/apps/longhorn/` (PSA **privileged** namespace, `defaultDataPath` `/var/mnt/longhorn`, `preUpgradeChecker` enabled); **StorageClass** `longhorn` (default); **`nodes.longhorn.io`** all **Ready**; test **PVC** `Bound` on `longhorn`. - **Traefik** Helm **39.0.6** / app **v3.6.11** — `clusters/noble/apps/traefik/`; **`Service`** **`LoadBalancer`** **`EXTERNAL-IP` `192.168.50.211`**; **`IngressClass`** **`traefik`** (default). Point **`*.apps.noble.lab.pcenicni.dev`** at **`192.168.50.211`**. MetalLB pool verification was done before replacing the temporary nginx test with Traefik. - **cert-manager** Helm **v1.20.0** / app **v1.20.0** — `clusters/noble/apps/cert-manager/`; **`ClusterIssuer`** **`letsencrypt-staging`** and **`letsencrypt-prod`** (HTTP-01, ingress class **`traefik`**); ACME email **`certificates@noble.lab.pcenicni.dev`** (edit in manifests if you want a different mailbox). -- **Newt** Helm **1.2.0** / app **1.10.1** — `clusters/noble/apps/newt/` (**fossorial/newt**); Pangolin site tunnel — **`newt-pangolin-auth`** Secret (**`PANGOLIN_ENDPOINT`**, **`NEWT_ID`**, **`NEWT_SECRET`**). **Public DNS** is **not** automated with ExternalDNS: **CNAME** records at your DNS host per Pangolin’s domain instructions, plus **Integration API** for HTTP resources/targets — see **`clusters/noble/apps/newt/README.md`**. LAN access to Traefik can still use **`*.apps.noble.lab.pcenicni.dev`** → **`192.168.50.211`** (split horizon / local resolver). +- **Newt** Helm **1.2.0** / app **1.10.1** — `clusters/noble/apps/newt/` (**fossorial/newt**); Pangolin site tunnel — **`newt-pangolin-auth`** Secret (**`PANGOLIN_ENDPOINT`**, **`NEWT_ID`**, **`NEWT_SECRET`**). Prefer a **SealedSecret** in git (`kubeseal` — see `clusters/noble/apps/sealed-secrets/examples/`) after rotating credentials if they were exposed. **Public DNS** is **not** automated with ExternalDNS: **CNAME** records at your DNS host per Pangolin’s domain instructions, plus **Integration API** for HTTP resources/targets — see **`clusters/noble/apps/newt/README.md`**. LAN access to Traefik can still use **`*.apps.noble.lab.pcenicni.dev`** → **`192.168.50.211`** (split horizon / local resolver). - **Argo CD** Helm **9.4.17** / app **v3.3.6** — `clusters/noble/bootstrap/argocd/`; **`argocd-server`** **`LoadBalancer`** **`192.168.50.210`**; app-of-apps scaffold under **`bootstrap/argocd/apps/`** (edit **`root-application.yaml`** `repoURL` before applying). - **kube-prometheus-stack** — Helm chart **82.15.1** — `clusters/noble/apps/kube-prometheus-stack/` (**namespace** `monitoring`, PSA **privileged** — **node-exporter** needs host mounts); **Longhorn** PVCs for Prometheus, Grafana, Alertmanager; **node-exporter** DaemonSet **4/4**. **Grafana Ingress:** **`https://grafana.apps.noble.lab.pcenicni.dev`** (Traefik **`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **Loki** datasource in Grafana: ConfigMap **`clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`** (sidecar label **`grafana_datasource: "1"`**) — not via **`grafana.additionalDataSources`** in the chart. **`helm upgrade --install` with `--wait` is silent until done** — use **`--timeout 30m`**; Grafana admin: Secret **`kube-prometheus-grafana`**, keys **`admin-user`** / **`admin-password`**. - **Loki** + **Fluent Bit** — **`grafana/loki` 6.55.0** SingleBinary + **filesystem** on **Longhorn** (`clusters/noble/apps/loki/`); **`loki.auth_enabled: false`**; **`chunksCache.enabled: false`** (no memcached chunk cache). **`fluent/fluent-bit` 0.56.0** → **`loki-gateway.loki.svc:80`** (`clusters/noble/apps/fluent-bit/`); **`logging`** PSA **privileged**. **Grafana Explore:** **`kubectl apply -f clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`** then **Explore → Loki** (e.g. `{job="fluent-bit"}`). diff --git a/talos/README.md b/talos/README.md index d7c9dcb..2f62d14 100644 --- a/talos/README.md +++ b/talos/README.md @@ -26,6 +26,8 @@ talhelper genconfig -o out `out/` is ignored via repo root `.gitignore` (`talos/out/`). Do not commit `talsecret.yaml` or generated machine configs. +**Never commit `talos/kubeconfig`** (also gitignored). It contains cluster admin credentials; generate locally with `talosctl kubeconfig` (§3). If it was ever pushed, remove it from git tracking, regenerate kubeconfig, and treat the old credentials as compromised (purge from history with `git filter-repo` or BFG if needed). + **After any `talconfig.yaml` edit, run `genconfig` again** before `apply-config`. Stale `out/*.yaml` is easy to apply by mistake. Quick check: `grep -A8 kind: UserVolumeConfig out/noble-neon.yaml` should match what you expect (e.g. Longhorn `volumeType: disk`, not `grow`/`maxSize` on a partition). ## 2. Apply machine config diff --git a/talos/kubeconfig b/talos/kubeconfig deleted file mode 100644 index 56b8c96..0000000 --- a/talos/kubeconfig +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: v1 -clusters: -- cluster: - certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJpakNDQVRDZ0F3SUJBZ0lSQUthRzU4bCtjeURSQlIrMFlXSWltajR3Q2dZSUtvWkl6ajBFQXdJd0ZURVQKTUJFR0ExVUVDaE1LYTNWaVpYSnVaWFJsY3pBZUZ3MHlOakF6TWpnd01UTTVNelJhRncwek5qQXpNalV3TVRNNQpNelJhTUJVeEV6QVJCZ05WQkFvVENtdDFZbVZ5Ym1WMFpYTXdXVEFUQmdjcWhrak9QUUlCQmdncWhrak9QUU1CCkJ3TkNBQVNQeUpCMExLVFV2Tm0wRzB4ZHNnQ2FoRDN6Ung2UFR0Vkdxdmd4MmphZ3pLcmU1N2NRajNBRzdsRmoKeTdkMGZNSDBiK3Fwd281aG1VbWtpWmVVcHRscm8yRXdYekFPQmdOVkhROEJBZjhFQkFNQ0FvUXdIUVlEVlIwbApCQll3RkFZSUt3WUJCUVVIQXdFR0NDc0dBUVVGQndNQ01BOEdBMVVkRXdFQi93UUZNQU1CQWY4d0hRWURWUjBPCkJCWUVGSWdITVgwTTZDN1ZzSEVUVjVndjYwdWJMQ0h0TUFvR0NDcUdTTTQ5QkFNQ0EwZ0FNRVVDSUc2ZmNUT1cKL2FkTmVoTTdISVFBZGsxcGVLTU5RMFFWRjJGMVBRUzluMGZZQWlFQTNTbWRFUWNVS0p2VGZPQUUzQkJobHBIZwpNNFFTVU1rQWFaQmt4c3BTNy9BPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== - server: https://192.168.50.230:6443 - name: noble -contexts: -- context: - cluster: noble - namespace: default - user: admin@noble - name: admin@noble -current-context: admin@noble -kind: Config -preferences: {} -users: -- name: admin@noble - user: - client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJoRENDQVN1Z0F3SUJBZ0lSQUw0alhIdStZSUxUQnFiU3ExbFpiT3N3Q2dZSUtvWkl6ajBFQXdJd0ZURVQKTUJFR0ExVUVDaE1LYTNWaVpYSnVaWFJsY3pBZUZ3MHlOakF6TWpnd01qQXpNVEZhRncweU56QXpNamd3TWpBegpNakZhTUNreEZ6QVZCZ05WQkFvVERuTjVjM1JsYlRwdFlYTjBaWEp6TVE0d0RBWURWUVFERXdWaFpHMXBiakJaCk1CTUdCeXFHU000OUFnRUdDQ3FHU000OUF3RUhBMElBQkJ0Y0dib3c4UFk4UnlFdFNUdEFVRkZPVjRXbndidnMKVGdaZFoyQ3NPVjB6dFZnWmxMZENlaHI3YTRxUHFFMTJPa09ObXYxTnI1eXVHN281cEdiZjc5T2pTREJHTUE0RwpBMVVkRHdFQi93UUVBd0lGb0RBVEJnTlZIU1VFRERBS0JnZ3JCZ0VGQlFjREFqQWZCZ05WSFNNRUdEQVdnQlNJCkJ6RjlET2d1MWJCeEUxZVlMK3RMbXl3aDdUQUtCZ2dxaGtqT1BRUURBZ05IQURCRUFpQkVTbE5aQktkc05OQ2sKYnVhejB2TFZrYmNXK1Q0UnYxNGNFS1huYWV5UXNBSWdWRk9qaXBSNjQzc3ZEN1NaSXRMU1FKcEQxcWhCdmd1MApxZXkxSUhKMTdGRT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= - client-key-data: LS0tLS1CRUdJTiBFQyBQUklWQVRFIEtFWS0tLS0tCk1IY0NBUUVFSUh2b0lwTW5ubW5aalgreXRQejM3Y3RKdGFVRzNvamtlRENGamUwaWZkcW9vQW9HQ0NxR1NNNDkKQXdFSG9VUURRZ0FFRzF3WnVqRHc5anhISVMxSk8wQlFVVTVYaGFmQnUreE9CbDFuWUt3NVhUTzFXQm1VdDBKNgpHdnRyaW8rb1RYWTZRNDJhL1Uydm5LNGJ1am1rWnQvdjB3PT0KLS0tLS1FTkQgRUMgUFJJVkFURSBLRVktLS0tLQo=