Compare commits

...

2 Commits

86 changed files with 230 additions and 239 deletions

View File

@@ -2,12 +2,12 @@
# Ansible **noble_cert_manager** role sources `.env` after cert-manager Helm install and creates # Ansible **noble_cert_manager** role sources `.env` after cert-manager Helm install and creates
# **cert-manager/cloudflare-dns-api-token** when **CLOUDFLARE_DNS_API_TOKEN** is set. # **cert-manager/cloudflare-dns-api-token** when **CLOUDFLARE_DNS_API_TOKEN** is set.
# #
# Cloudflare: Zone → DNS → Edit + Zone → Read for **pcenicni.dev** (see clusters/noble/apps/cert-manager/README.md). # Cloudflare: Zone → DNS → Edit + Zone → Read for **pcenicni.dev** (see clusters/noble/bootstrap/cert-manager/README.md).
CLOUDFLARE_DNS_API_TOKEN= CLOUDFLARE_DNS_API_TOKEN=
# --- Optional: other deploy-time values (documented for manual use or future automation) --- # --- Optional: other deploy-time values (documented for manual use or future automation) ---
# Pangolin / Newt — with **noble_newt_install=true**, Ansible creates **newt/newt-pangolin-auth** when all are set (see clusters/noble/apps/newt/README.md). # Pangolin / Newt — with **noble_newt_install=true**, Ansible creates **newt/newt-pangolin-auth** when all are set (see clusters/noble/bootstrap/newt/README.md).
PANGOLIN_ENDPOINT= PANGOLIN_ENDPOINT=
NEWT_ID= NEWT_ID=
NEWT_SECRET= NEWT_SECRET=

View File

@@ -1,6 +1,6 @@
# Ansible — noble cluster # Ansible — noble cluster
Automates [`talos/CLUSTER-BUILD.md`](../talos/CLUSTER-BUILD.md): optional **Talos Phase A** (genconfig → apply → bootstrap → kubeconfig), then **Phase B+** (CNI → add-ons → ingress → Argo CD → Kyverno → observability, etc.). **Argo CD** does not reconcile core charts — optional GitOps starts from an empty [`clusters/noble/bootstrap/argocd/apps/kustomization.yaml`](../clusters/noble/bootstrap/argocd/apps/kustomization.yaml). Automates [`talos/CLUSTER-BUILD.md`](../talos/CLUSTER-BUILD.md): optional **Talos Phase A** (genconfig → apply → bootstrap → kubeconfig), then **Phase B+** (CNI → add-ons → ingress → Argo CD → Kyverno → observability, etc.). **Argo CD** does not reconcile core charts — optional GitOps starts from an empty [`clusters/noble/apps/kustomization.yaml`](../clusters/noble/apps/kustomization.yaml).
## Order of operations ## Order of operations

View File

@@ -13,7 +13,7 @@ noble_k8s_api_server_fallback: "https://192.168.50.20:6443"
# Only if you must skip the kubectl /healthz preflight (not recommended). # Only if you must skip the kubectl /healthz preflight (not recommended).
noble_skip_k8s_health_check: false noble_skip_k8s_health_check: false
# Pangolin / Newt — set true only after creating newt-pangolin-auth Secret (see clusters/noble/apps/newt/README.md) # Pangolin / Newt — set true only after creating newt-pangolin-auth Secret (see clusters/noble/bootstrap/newt/README.md)
noble_newt_install: false noble_newt_install: false
# cert-manager needs Secret cloudflare-dns-api-token in cert-manager namespace before ClusterIssuers work # cert-manager needs Secret cloudflare-dns-api-token in cert-manager namespace before ClusterIssuers work

View File

@@ -5,7 +5,7 @@
- kubectl - kubectl
- apply - apply
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/cert-manager/namespace.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/cert-manager/namespace.yaml"
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
changed_when: true changed_when: true
@@ -23,7 +23,7 @@
- --version - --version
- v1.20.0 - v1.20.0
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/cert-manager/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/cert-manager/values.yaml"
- --wait - --wait
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
@@ -51,7 +51,7 @@
ansible.builtin.debug: ansible.builtin.debug:
msg: >- msg: >-
Secret cert-manager/cloudflare-dns-api-token not found. Secret cert-manager/cloudflare-dns-api-token not found.
Create it per clusters/noble/apps/cert-manager/README.md before ClusterIssuers can succeed. Create it per clusters/noble/bootstrap/cert-manager/README.md before ClusterIssuers can succeed.
when: when:
- noble_cert_manager_require_cloudflare_secret | default(true) | bool - noble_cert_manager_require_cloudflare_secret | default(true) | bool
- noble_cf_secret.rc != 0 - noble_cf_secret.rc != 0
@@ -62,7 +62,7 @@
- kubectl - kubectl
- apply - apply
- -k - -k
- "{{ noble_repo_root }}/clusters/noble/apps/cert-manager" - "{{ noble_repo_root }}/clusters/noble/bootstrap/cert-manager"
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
changed_when: true changed_when: true

View File

@@ -12,7 +12,7 @@
- --version - --version
- "1.16.6" - "1.16.6"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/cilium/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/cilium/values.yaml"
- --wait - --wait
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"

View File

@@ -5,7 +5,7 @@
- kubectl - kubectl
- apply - apply
- -k - -k
- "{{ noble_repo_root }}/clusters/noble/apps/kube-vip" - "{{ noble_repo_root }}/clusters/noble/bootstrap/kube-vip"
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
changed_when: true changed_when: true

View File

@@ -5,7 +5,7 @@
- kubectl - kubectl
- apply - apply
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/kyverno/namespace.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/kyverno/namespace.yaml"
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
changed_when: true changed_when: true
@@ -23,7 +23,7 @@
- --version - --version
- "3.7.1" - "3.7.1"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/kyverno/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/kyverno/values.yaml"
- --wait - --wait
- --timeout - --timeout
- 15m - 15m

View File

@@ -12,7 +12,7 @@
- --version - --version
- "3.7.1" - "3.7.1"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/kyverno/policies-values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/kyverno/policies-values.yaml"
- --wait - --wait
- --timeout - --timeout
- 10m - 10m

View File

@@ -2,7 +2,7 @@
> **Sensitive:** This file may include **passwords read from Kubernetes Secrets** when credential fetch ran. It is **gitignored** — do not commit or share. > **Sensitive:** This file may include **passwords read from Kubernetes Secrets** when credential fetch ran. It is **gitignored** — do not commit or share.
**DNS:** point **`*.apps.noble.lab.pcenicni.dev`** at the Traefik **LoadBalancer** (MetalLB **`192.168.50.211`** by default — see `clusters/noble/apps/traefik/values.yaml`). **DNS:** point **`*.apps.noble.lab.pcenicni.dev`** at the Traefik **LoadBalancer** (MetalLB **`192.168.50.211`** by default — see `clusters/noble/bootstrap/traefik/values.yaml`).
**TLS:** **cert-manager** + **`letsencrypt-prod`** on each Ingress (public **DNS-01** for **`pcenicni.dev`**). **TLS:** **cert-manager** + **`letsencrypt-prod`** on each Ingress (public **DNS-01** for **`pcenicni.dev`**).
@@ -24,7 +24,7 @@ This file is **generated** by Ansible (`noble_landing_urls` role). Use it as a t
| **Prometheus** | — | No auth in default install (lab). | | **Prometheus** | — | No auth in default install (lab). |
| **Alertmanager** | — | No auth in default install (lab). | | **Alertmanager** | — | No auth in default install (lab). |
| **Longhorn** | — | No default login unless you enable access control in the UI settings. | | **Longhorn** | — | No default login unless you enable access control in the UI settings. |
| **Vault** | Token | Root token is only from **`vault operator init`** (not stored in git). See `clusters/noble/apps/vault/README.md`. | | **Vault** | Token | Root token is only from **`vault operator init`** (not stored in git). See `clusters/noble/bootstrap/vault/README.md`. |
### Commands to retrieve passwords (if not filled above) ### Commands to retrieve passwords (if not filled above)

View File

@@ -5,7 +5,7 @@
- kubectl - kubectl
- apply - apply
- -k - -k
- "{{ noble_repo_root }}/clusters/noble/apps/longhorn" - "{{ noble_repo_root }}/clusters/noble/bootstrap/longhorn"
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
changed_when: true changed_when: true
@@ -22,7 +22,7 @@
- longhorn-system - longhorn-system
- --create-namespace - --create-namespace
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/longhorn/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/longhorn/values.yaml"
- --wait - --wait
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"

View File

@@ -5,7 +5,7 @@
- kubectl - kubectl
- apply - apply
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/metallb/namespace.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/metallb/namespace.yaml"
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
changed_when: true changed_when: true
@@ -33,7 +33,7 @@
- kubectl - kubectl
- apply - apply
- -k - -k
- "{{ noble_repo_root }}/clusters/noble/apps/metallb" - "{{ noble_repo_root }}/clusters/noble/bootstrap/metallb"
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
changed_when: true changed_when: true

View File

@@ -12,7 +12,7 @@
- --version - --version
- "3.13.0" - "3.13.0"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/metrics-server/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/metrics-server/values.yaml"
- --wait - --wait
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"

View File

@@ -10,7 +10,7 @@
- kubectl - kubectl
- apply - apply
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/newt/namespace.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/newt/namespace.yaml"
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
when: noble_newt_install | bool when: noble_newt_install | bool
@@ -33,7 +33,7 @@
- --version - --version
- "1.2.0" - "1.2.0"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/newt/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/newt/values.yaml"
- --wait - --wait
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"

View File

@@ -1,13 +1,13 @@
--- ---
# Mirrors former **noble-platform** Argo Application: Helm releases + plain manifests under clusters/noble/apps. # Mirrors former **noble-platform** Argo Application: Helm releases + plain manifests under clusters/noble/bootstrap.
- name: Apply clusters/noble/apps kustomize (namespaces, Grafana Loki datasource, Vault extras) - name: Apply clusters/noble/bootstrap kustomize (namespaces, Grafana Loki datasource, Vault extras)
ansible.builtin.command: ansible.builtin.command:
argv: argv:
- kubectl - kubectl
- apply - apply
- "--request-timeout={{ noble_platform_kubectl_request_timeout }}" - "--request-timeout={{ noble_platform_kubectl_request_timeout }}"
- -k - -k
- "{{ noble_repo_root }}/clusters/noble/apps" - "{{ noble_repo_root }}/clusters/noble/bootstrap"
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
register: noble_platform_kustomize register: noble_platform_kustomize
@@ -29,7 +29,7 @@
- --version - --version
- "2.18.4" - "2.18.4"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/sealed-secrets/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/sealed-secrets/values.yaml"
- --wait - --wait
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
@@ -48,7 +48,7 @@
- --version - --version
- "2.2.0" - "2.2.0"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/external-secrets/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/external-secrets/values.yaml"
- --wait - --wait
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
@@ -82,7 +82,7 @@
- --version - --version
- "0.32.0" - "0.32.0"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/vault/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/vault/values.yaml"
- --wait - --wait
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
@@ -102,7 +102,7 @@
- --version - --version
- "82.15.1" - "82.15.1"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/kube-prometheus-stack/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml"
- --wait - --wait
- --timeout - --timeout
- 30m - 30m
@@ -123,7 +123,7 @@
- --version - --version
- "6.55.0" - "6.55.0"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/loki/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/loki/values.yaml"
- --wait - --wait
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
@@ -142,7 +142,7 @@
- --version - --version
- "0.56.0" - "0.56.0"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/fluent-bit/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/fluent-bit/values.yaml"
- --wait - --wait
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
@@ -161,7 +161,7 @@
- -n - -n
- headlamp - headlamp
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/headlamp/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/headlamp/values.yaml"
- --wait - --wait
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"

View File

@@ -4,9 +4,9 @@
msg: | msg: |
1. kubectl -n vault get pods (wait for Running) 1. kubectl -n vault get pods (wait for Running)
2. kubectl -n vault exec -it vault-0 -- vault operator init (once; save keys) 2. kubectl -n vault exec -it vault-0 -- vault operator init (once; save keys)
3. Unseal per clusters/noble/apps/vault/README.md 3. Unseal per clusters/noble/bootstrap/vault/README.md
4. ./clusters/noble/apps/vault/configure-kubernetes-auth.sh 4. ./clusters/noble/bootstrap/vault/configure-kubernetes-auth.sh
5. kubectl apply -f clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml 5. kubectl apply -f clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml
- name: Optional — apply Vault ClusterSecretStore for External Secrets - name: Optional — apply Vault ClusterSecretStore for External Secrets
ansible.builtin.command: ansible.builtin.command:
@@ -14,7 +14,7 @@
- kubectl - kubectl
- apply - apply
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml"
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
when: noble_apply_vault_cluster_secret_store | default(false) | bool when: noble_apply_vault_cluster_secret_store | default(false) | bool
@@ -24,4 +24,4 @@
ansible.builtin.debug: ansible.builtin.debug:
msg: >- msg: >-
Optional: kubectl apply -f clusters/noble/bootstrap/argocd/root-application.yaml Optional: kubectl apply -f clusters/noble/bootstrap/argocd/root-application.yaml
after editing repoURL. Core workloads are not synced by Argo — see bootstrap/argocd/apps/README.md after editing repoURL. Core workloads are not synced by Argo — see clusters/noble/apps/README.md

View File

@@ -5,7 +5,7 @@
- kubectl - kubectl
- apply - apply
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/traefik/namespace.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/traefik/namespace.yaml"
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
changed_when: true changed_when: true
@@ -23,7 +23,7 @@
- --version - --version
- "39.0.6" - "39.0.6"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/apps/traefik/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/traefik/values.yaml"
- --wait - --wait
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"

View File

@@ -0,0 +1,7 @@
# Argo CD — optional applications (non-bootstrap)
**Base cluster configuration** (CNI, MetalLB, ingress, cert-manager, storage, observability stack, policy, Vault, etc.) is installed by **`ansible/playbooks/noble.yml`** from **`clusters/noble/bootstrap/`** — not from here.
**`noble-root`** (`clusters/noble/bootstrap/argocd/root-application.yaml`) points at **`clusters/noble/apps`**. Add **`Application`** manifests (and optional **`AppProject`** definitions) under this directory only for workloads that are additive and do not subsume the Ansible-managed platform.
For an app-of-apps pattern, use a second-level **`Application`** that syncs a subdirectory (for example **`optional/`**) containing leaf **`Application`** resources.

View File

@@ -1,17 +1,6 @@
# Plain Kustomize only (namespaces + extra YAML). Helm installs are driven by **ansible/playbooks/noble.yml** # Argo CD **noble-root** syncs this directory. Add **Application** / **AppProject** manifests only for
# (role **noble_platform**) — avoids **kustomize --enable-helm** in-repo. # optional workloads that do not replace Ansible bootstrap (CNI, ingress, storage, core observability, etc.).
# Helm value files for those apps can live in subdirectories here (for example **./homepage/values.yaml**).
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: []
resources:
- kube-prometheus-stack/namespace.yaml
- loki/namespace.yaml
- fluent-bit/namespace.yaml
- sealed-secrets/namespace.yaml
- external-secrets/namespace.yaml
- vault/namespace.yaml
- kyverno/namespace.yaml
- headlamp/namespace.yaml
- grafana-loki-datasource/loki-datasource.yaml
- vault/unseal-cronjob.yaml
- vault/cilium-network-policy.yaml

View File

@@ -53,10 +53,10 @@ Use **Settings → Repositories** in the UI, or `argocd repo add` / a `Secret` o
## 4. App-of-apps (optional GitOps only) ## 4. App-of-apps (optional GitOps only)
Bootstrap **platform** workloads (CNI, ingress, cert-manager, Kyverno, observability, Vault, etc.) are installed by Bootstrap **platform** workloads (CNI, ingress, cert-manager, Kyverno, observability, Vault, etc.) are installed by
**`ansible/playbooks/noble.yml`** — not by Argo. **`apps/kustomization.yaml`** is empty by default. **`ansible/playbooks/noble.yml`** from **`clusters/noble/bootstrap/`** — not by Argo. **`clusters/noble/apps/kustomization.yaml`** is empty by default.
1. Edit **`root-application.yaml`**: set **`repoURL`** and **`targetRevision`** to this repository. The **`resources-finalizer.argocd.argoproj.io/background`** finalizer uses Argos path-qualified form so **`kubectl apply`** does not warn about finalizer names. 1. Edit **`root-application.yaml`**: set **`repoURL`** and **`targetRevision`** to this repository. The **`resources-finalizer.argocd.argoproj.io/background`** finalizer uses Argos path-qualified form so **`kubectl apply`** does not warn about finalizer names.
2. When you want Argo to manage specific apps, add **`Application`** manifests under **`apps/`** (see **`apps/README.md`**). 2. When you want Argo to manage specific apps, add **`Application`** manifests under **`clusters/noble/apps/`** (see **`clusters/noble/apps/README.md`**).
3. Apply the root: 3. Apply the root:
```bash ```bash
@@ -64,7 +64,7 @@ Bootstrap **platform** workloads (CNI, ingress, cert-manager, Kyverno, observabi
``` ```
If you migrated from GitOps-managed **`noble-platform`** / **`noble-kyverno`**, delete stale **`Application`** objects on If you migrated from GitOps-managed **`noble-platform`** / **`noble-kyverno`**, delete stale **`Application`** objects on
the cluster (see **`apps/README.md`**) then re-apply the root. the cluster (see **`clusters/noble/apps/README.md`**) then re-apply the root.
## Versions ## Versions

View File

@@ -1,17 +0,0 @@
# Argo CD — app-of-apps children (optional GitOps only)
**Core platform is Ansible-managed** — see repository **`ansible/README.md`** and **`ansible/playbooks/noble.yml`**.
This directorys **`kustomization.yaml`** has **`resources: []`** so **`noble-root`** (if applied) does not reconcile Helm charts or cluster add-ons. **Add `Application` manifests here only** for apps you want Argo to manage (for example, sample workloads or third-party charts not covered by the bootstrap playbook).
| Previous (removed) | Now |
|--------------------|-----|
| **`noble-kyverno`**, **`noble-kyverno-policies`**, **`noble-platform`** | Installed by Ansible roles **`noble_kyverno`**, **`noble_kyverno_policies`**, **`noble_platform`** |
If you previously synced **`noble-root`** with the old child manifests, delete stale Applications on the cluster:
```bash
kubectl delete application -n argocd noble-platform noble-kyverno noble-kyverno-policies --ignore-not-found
```
Then re-apply **`root-application.yaml`** so Argo matches this repo.

View File

@@ -1,6 +0,0 @@
# Intentionally empty: core platform (CNI, ingress, storage, observability, policy, etc.) is
# installed by **ansible/playbooks/noble.yml** — not by Argo CD. Add optional Application
# manifests here only for workloads you want GitOps-managed.
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources: []

View File

@@ -3,8 +3,8 @@
# 1. Set spec.source.repoURL (and targetRevision — **HEAD** tracks the remote default branch) to this repo. # 1. Set spec.source.repoURL (and targetRevision — **HEAD** tracks the remote default branch) to this repo.
# 2. kubectl apply -f clusters/noble/bootstrap/argocd/root-application.yaml # 2. kubectl apply -f clusters/noble/bootstrap/argocd/root-application.yaml
# #
# **apps/kustomization.yaml** is intentionally empty: core platform is installed by **ansible/playbooks/noble.yml**, # **clusters/noble/apps** holds optional **Application** manifests. Core platform is installed by
# not Argo. Add **Application** manifests under **apps/** only for optional GitOps-managed workloads. # **ansible/playbooks/noble.yml** from **clusters/noble/bootstrap/**.
# #
apiVersion: argoproj.io/v1alpha1 apiVersion: argoproj.io/v1alpha1
kind: Application kind: Application
@@ -21,7 +21,7 @@ spec:
source: source:
repoURL: https://gitea.pcenicni.ca/gsdavidp/home-server.git repoURL: https://gitea.pcenicni.ca/gsdavidp/home-server.git
targetRevision: HEAD targetRevision: HEAD
path: clusters/noble/bootstrap/argocd/apps path: clusters/noble/apps
destination: destination:
server: https://kubernetes.default.svc server: https://kubernetes.default.svc
namespace: argocd namespace: argocd

View File

@@ -19,7 +19,7 @@ Without this Secret, **`ClusterIssuer`** will not complete certificate orders.
1. Create the namespace: 1. Create the namespace:
```bash ```bash
kubectl apply -f clusters/noble/apps/cert-manager/namespace.yaml kubectl apply -f clusters/noble/bootstrap/cert-manager/namespace.yaml
``` ```
2. Install the chart (CRDs included via `values.yaml`): 2. Install the chart (CRDs included via `values.yaml`):
@@ -30,7 +30,7 @@ Without this Secret, **`ClusterIssuer`** will not complete certificate orders.
helm upgrade --install cert-manager jetstack/cert-manager \ helm upgrade --install cert-manager jetstack/cert-manager \
--namespace cert-manager \ --namespace cert-manager \
--version v1.20.0 \ --version v1.20.0 \
-f clusters/noble/apps/cert-manager/values.yaml \ -f clusters/noble/bootstrap/cert-manager/values.yaml \
--wait --wait
``` ```
@@ -39,7 +39,7 @@ Without this Secret, **`ClusterIssuer`** will not complete certificate orders.
4. Apply ClusterIssuers (staging then prod, or both): 4. Apply ClusterIssuers (staging then prod, or both):
```bash ```bash
kubectl apply -k clusters/noble/apps/cert-manager kubectl apply -k clusters/noble/bootstrap/cert-manager
``` ```
5. Confirm: 5. Confirm:

View File

@@ -2,13 +2,13 @@
# #
# Chart: jetstack/cert-manager — pin version on the helm command (e.g. v1.20.0). # Chart: jetstack/cert-manager — pin version on the helm command (e.g. v1.20.0).
# #
# kubectl apply -f clusters/noble/apps/cert-manager/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/cert-manager/namespace.yaml
# helm repo add jetstack https://charts.jetstack.io # helm repo add jetstack https://charts.jetstack.io
# helm repo update # helm repo update
# helm upgrade --install cert-manager jetstack/cert-manager -n cert-manager \ # helm upgrade --install cert-manager jetstack/cert-manager -n cert-manager \
# --version v1.20.0 -f clusters/noble/apps/cert-manager/values.yaml --wait # --version v1.20.0 -f clusters/noble/bootstrap/cert-manager/values.yaml --wait
# #
# kubectl apply -k clusters/noble/apps/cert-manager # kubectl apply -k clusters/noble/bootstrap/cert-manager
crds: crds:
enabled: true enabled: true

View File

@@ -14,7 +14,7 @@ helm repo update
helm upgrade --install cilium cilium/cilium \ helm upgrade --install cilium cilium/cilium \
--namespace kube-system \ --namespace kube-system \
--version 1.16.6 \ --version 1.16.6 \
-f clusters/noble/apps/cilium/values.yaml \ -f clusters/noble/bootstrap/cilium/values.yaml \
--wait --wait
``` ```
@@ -25,7 +25,7 @@ kubectl -n kube-system rollout status ds/cilium
kubectl get nodes kubectl get nodes
``` ```
When nodes are **Ready**, continue with **MetalLB** (`clusters/noble/apps/metallb/README.md`) and other Phase B items. **kube-vip** for the Kubernetes API VIP is separate (L2 ARP); it can run after the API is reachable. When nodes are **Ready**, continue with **MetalLB** (`clusters/noble/bootstrap/metallb/README.md`) and other Phase B items. **kube-vip** for the Kubernetes API VIP is separate (L2 ARP); it can run after the API is reachable.
## 2. Optional: kube-proxy replacement (phase 2) ## 2. Optional: kube-proxy replacement (phase 2)

View File

@@ -11,9 +11,9 @@ Syncs secrets from external systems into Kubernetes **Secret** objects via **Ext
```bash ```bash
helm repo add external-secrets https://charts.external-secrets.io helm repo add external-secrets https://charts.external-secrets.io
helm repo update helm repo update
kubectl apply -f clusters/noble/apps/external-secrets/namespace.yaml kubectl apply -f clusters/noble/bootstrap/external-secrets/namespace.yaml
helm upgrade --install external-secrets external-secrets/external-secrets -n external-secrets \ helm upgrade --install external-secrets external-secrets/external-secrets -n external-secrets \
--version 2.2.0 -f clusters/noble/apps/external-secrets/values.yaml --wait --version 2.2.0 -f clusters/noble/bootstrap/external-secrets/values.yaml --wait
``` ```
Verify: Verify:

View File

@@ -10,7 +10,7 @@
# Adjust server, mountPath, role, and path to match your Vault deployment. If Vault uses TLS # Adjust server, mountPath, role, and path to match your Vault deployment. If Vault uses TLS
# with a private CA, set provider.vault.caProvider or caBundle (see README). # with a private CA, set provider.vault.caProvider or caBundle (see README).
# #
# kubectl apply -f clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml # kubectl apply -f clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml
--- ---
apiVersion: external-secrets.io/v1 apiVersion: external-secrets.io/v1
kind: ClusterSecretStore kind: ClusterSecretStore

View File

@@ -2,9 +2,9 @@
# #
# helm repo add external-secrets https://charts.external-secrets.io # helm repo add external-secrets https://charts.external-secrets.io
# helm repo update # helm repo update
# kubectl apply -f clusters/noble/apps/external-secrets/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/external-secrets/namespace.yaml
# helm upgrade --install external-secrets external-secrets/external-secrets -n external-secrets \ # helm upgrade --install external-secrets external-secrets/external-secrets -n external-secrets \
# --version 2.2.0 -f clusters/noble/apps/external-secrets/values.yaml --wait # --version 2.2.0 -f clusters/noble/bootstrap/external-secrets/values.yaml --wait
# #
# CRDs are installed by the chart (installCRDs: true). Vault ClusterSecretStore: see README + examples/. # CRDs are installed by the chart (installCRDs: true). Vault ClusterSecretStore: see README + examples/.
commonLabels: {} commonLabels: {}

View File

@@ -5,11 +5,11 @@
# #
# Talos: only **tail** `/var/log/containers` (no host **systemd** input — journal layout differs from typical Linux). # Talos: only **tail** `/var/log/containers` (no host **systemd** input — journal layout differs from typical Linux).
# #
# kubectl apply -f clusters/noble/apps/fluent-bit/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/fluent-bit/namespace.yaml
# helm repo add fluent https://fluent.github.io/helm-charts # helm repo add fluent https://fluent.github.io/helm-charts
# helm repo update # helm repo update
# helm upgrade --install fluent-bit fluent/fluent-bit -n logging \ # helm upgrade --install fluent-bit fluent/fluent-bit -n logging \
# --version 0.56.0 -f clusters/noble/apps/fluent-bit/values.yaml --wait --timeout 15m # --version 0.56.0 -f clusters/noble/bootstrap/fluent-bit/values.yaml --wait --timeout 15m
config: config:
inputs: | inputs: |

View File

@@ -2,9 +2,9 @@
# The Grafana sidecar watches ConfigMaps labeled **grafana_datasource: "1"** and loads YAML keys as files. # The Grafana sidecar watches ConfigMaps labeled **grafana_datasource: "1"** and loads YAML keys as files.
# Does not require editing the kube-prometheus-stack Helm release. # Does not require editing the kube-prometheus-stack Helm release.
# #
# kubectl apply -f clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml # kubectl apply -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml
# #
# Remove with: kubectl delete -f clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml # Remove with: kubectl delete -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:

View File

@@ -10,9 +10,9 @@
```bash ```bash
helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/ helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/
helm repo update helm repo update
kubectl apply -f clusters/noble/apps/headlamp/namespace.yaml kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml
helm upgrade --install headlamp headlamp/headlamp -n headlamp \ helm upgrade --install headlamp headlamp/headlamp -n headlamp \
--version 0.40.1 -f clusters/noble/apps/headlamp/values.yaml --wait --timeout 10m --version 0.40.1 -f clusters/noble/bootstrap/headlamp/values.yaml --wait --timeout 10m
``` ```
Sign-in uses a **ServiceAccount token** (Headlamp docs: create a limited SA for day-to-day use). This repo binds the Headlamp workload SA to the built-in **`edit`** ClusterRole (**`clusterRoleBinding.clusterRoleName: edit`** in **`values.yaml`**) — not **`cluster-admin`**. For cluster-scoped admin work, use **`kubectl`** with your admin kubeconfig. Optional **OIDC** in **`config.oidc`** replaces token login for SSO. Sign-in uses a **ServiceAccount token** (Headlamp docs: create a limited SA for day-to-day use). This repo binds the Headlamp workload SA to the built-in **`edit`** ClusterRole (**`clusterRoleBinding.clusterRoleName: edit`** in **`values.yaml`**) — not **`cluster-admin`**. For cluster-scoped admin work, use **`kubectl`** with your admin kubeconfig. Optional **OIDC** in **`config.oidc`** replaces token login for SSO.

View File

@@ -2,9 +2,9 @@
# #
# helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/ # helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/
# helm repo update # helm repo update
# kubectl apply -f clusters/noble/apps/headlamp/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml
# helm upgrade --install headlamp headlamp/headlamp -n headlamp \ # helm upgrade --install headlamp headlamp/headlamp -n headlamp \
# --version 0.40.1 -f clusters/noble/apps/headlamp/values.yaml --wait --timeout 10m # --version 0.40.1 -f clusters/noble/bootstrap/headlamp/values.yaml --wait --timeout 10m
# #
# DNS: headlamp.apps.noble.lab.pcenicni.dev → Traefik LB (see talos/CLUSTER-BUILD.md). # DNS: headlamp.apps.noble.lab.pcenicni.dev → Traefik LB (see talos/CLUSTER-BUILD.md).
# Default chart RBAC is broad — restrict for production (Phase G). # Default chart RBAC is broad — restrict for production (Phase G).

View File

@@ -4,10 +4,10 @@
# #
# Install (use one terminal; chain with && so `helm upgrade` always runs after `helm repo update`): # Install (use one terminal; chain with && so `helm upgrade` always runs after `helm repo update`):
# #
# kubectl apply -f clusters/noble/apps/kube-prometheus-stack/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/kube-prometheus-stack/namespace.yaml
# helm repo add prometheus-community https://prometheus-community.github.io/helm-charts # helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
# helm repo update && helm upgrade --install kube-prometheus prometheus-community/kube-prometheus-stack -n monitoring \ # helm repo update && helm upgrade --install kube-prometheus prometheus-community/kube-prometheus-stack -n monitoring \
# --version 82.15.1 -f clusters/noble/apps/kube-prometheus-stack/values.yaml --wait --timeout 30m # --version 82.15.1 -f clusters/noble/bootstrap/kube-prometheus-stack/values.yaml --wait --timeout 30m
# #
# Why it looks "stalled": with --wait, Helm prints almost nothing until the release finishes (can be many minutes). # Why it looks "stalled": with --wait, Helm prints almost nothing until the release finishes (can be many minutes).
# Do not use --timeout 5m for first install — Longhorn PVCs + StatefulSets often need 1530m. To watch progress, # Do not use --timeout 5m for first install — Longhorn PVCs + StatefulSets often need 1530m. To watch progress,
@@ -87,7 +87,7 @@ grafana:
size: 10Gi size: 10Gi
# HTTPS via Traefik + cert-manager (ClusterIssuer letsencrypt-prod; same pattern as other *.apps.noble.lab.pcenicni.dev hosts). # HTTPS via Traefik + cert-manager (ClusterIssuer letsencrypt-prod; same pattern as other *.apps.noble.lab.pcenicni.dev hosts).
# DNS: grafana.apps.noble.lab.pcenicni.dev → Traefik LoadBalancer (192.168.50.211) — see clusters/noble/apps/traefik/values.yaml # DNS: grafana.apps.noble.lab.pcenicni.dev → Traefik LoadBalancer (192.168.50.211) — see clusters/noble/bootstrap/traefik/values.yaml
ingress: ingress:
enabled: true enabled: true
ingressClassName: traefik ingressClassName: traefik
@@ -109,4 +109,4 @@ grafana:
# Traefik sets X-Forwarded-*; required for correct redirects and cookies behind the ingress. # Traefik sets X-Forwarded-*; required for correct redirects and cookies behind the ingress.
use_proxy_headers: true use_proxy_headers: true
# Loki datasource: apply `clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml` (sidecar ConfigMap) instead of additionalDataSources here. # Loki datasource: apply `clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml` (sidecar ConfigMap) instead of additionalDataSources here.

View File

@@ -0,0 +1,18 @@
# Ansible bootstrap: plain Kustomize (namespaces + extra YAML). Helm installs are driven by
# **ansible/playbooks/noble.yml** (role **noble_platform**) — avoids **kustomize --enable-helm** in-repo.
# Optional GitOps workloads live under **../apps/** (Argo **noble-root**).
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- kube-prometheus-stack/namespace.yaml
- loki/namespace.yaml
- fluent-bit/namespace.yaml
- sealed-secrets/namespace.yaml
- external-secrets/namespace.yaml
- vault/namespace.yaml
- kyverno/namespace.yaml
- headlamp/namespace.yaml
- grafana-loki-datasource/loki-datasource.yaml
- vault/unseal-cronjob.yaml
- vault/cilium-network-policy.yaml

View File

@@ -10,11 +10,11 @@ Admission policies using [Kyverno](https://kyverno.io/). The main chart installs
```bash ```bash
helm repo add kyverno https://kyverno.github.io/kyverno/ helm repo add kyverno https://kyverno.github.io/kyverno/
helm repo update helm repo update
kubectl apply -f clusters/noble/apps/kyverno/namespace.yaml kubectl apply -f clusters/noble/bootstrap/kyverno/namespace.yaml
helm upgrade --install kyverno kyverno/kyverno -n kyverno \ helm upgrade --install kyverno kyverno/kyverno -n kyverno \
--version 3.7.1 -f clusters/noble/apps/kyverno/values.yaml --wait --timeout 15m --version 3.7.1 -f clusters/noble/bootstrap/kyverno/values.yaml --wait --timeout 15m
helm upgrade --install kyverno-policies kyverno/kyverno-policies -n kyverno \ helm upgrade --install kyverno-policies kyverno/kyverno-policies -n kyverno \
--version 3.7.1 -f clusters/noble/apps/kyverno/policies-values.yaml --wait --timeout 10m --version 3.7.1 -f clusters/noble/bootstrap/kyverno/policies-values.yaml --wait --timeout 10m
``` ```
Verify: Verify:

View File

@@ -1,12 +1,12 @@
# kyverno/kyverno-policies — Pod Security Standards as Kyverno ClusterPolicies # kyverno/kyverno-policies — Pod Security Standards as Kyverno ClusterPolicies
# #
# helm upgrade --install kyverno-policies kyverno/kyverno-policies -n kyverno \ # helm upgrade --install kyverno-policies kyverno/kyverno-policies -n kyverno \
# --version 3.7.1 -f clusters/noble/apps/kyverno/policies-values.yaml --wait --timeout 10m # --version 3.7.1 -f clusters/noble/bootstrap/kyverno/policies-values.yaml --wait --timeout 10m
# #
# Default profile is baseline; validationFailureAction is Audit so existing privileged # Default profile is baseline; validationFailureAction is Audit so existing privileged
# workloads are not blocked. Kyverno still emits PolicyReports for matches — Headlamp # workloads are not blocked. Kyverno still emits PolicyReports for matches — Headlamp
# surfaces those as “policy violations”. Exclude namespaces that intentionally run # surfaces those as “policy violations”. Exclude namespaces that intentionally run
# outside baseline (see namespace PSA labels under clusters/noble/apps/*/namespace.yaml) # outside baseline (see namespace PSA labels under clusters/noble/bootstrap/*/namespace.yaml)
# plus core Kubernetes namespaces and every Ansible-managed app namespace on noble. # plus core Kubernetes namespaces and every Ansible-managed app namespace on noble.
# #
# After widening excludes, Kyverno does not always prune old PolicyReport rows; refresh: # After widening excludes, Kyverno does not always prune old PolicyReport rows; refresh:
@@ -25,7 +25,7 @@ validationFailureAction: Audit
failurePolicy: Fail failurePolicy: Fail
validationAllowExistingViolations: true validationAllowExistingViolations: true
# All platform namespaces on noble (ansible/playbooks/noble.yml + clusters/noble/apps). # All platform namespaces on noble (ansible/playbooks/noble.yml + clusters/noble/bootstrap).
x-kyverno-exclude-infra: &kyverno_exclude_infra x-kyverno-exclude-infra: &kyverno_exclude_infra
any: any:
- resources: - resources:

View File

@@ -2,9 +2,9 @@
# #
# helm repo add kyverno https://kyverno.github.io/kyverno/ # helm repo add kyverno https://kyverno.github.io/kyverno/
# helm repo update # helm repo update
# kubectl apply -f clusters/noble/apps/kyverno/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/kyverno/namespace.yaml
# helm upgrade --install kyverno kyverno/kyverno -n kyverno \ # helm upgrade --install kyverno kyverno/kyverno -n kyverno \
# --version 3.7.1 -f clusters/noble/apps/kyverno/values.yaml --wait --timeout 15m # --version 3.7.1 -f clusters/noble/bootstrap/kyverno/values.yaml --wait --timeout 15m
# #
# Baseline Pod Security policies (separate chart): see policies-values.yaml + README.md # Baseline Pod Security policies (separate chart): see policies-values.yaml + README.md
# #

View File

@@ -2,11 +2,11 @@
# #
# Chart: grafana/loki — pin version on install (e.g. 6.55.0). # Chart: grafana/loki — pin version on install (e.g. 6.55.0).
# #
# kubectl apply -f clusters/noble/apps/loki/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/loki/namespace.yaml
# helm repo add grafana https://grafana.github.io/helm-charts # helm repo add grafana https://grafana.github.io/helm-charts
# helm repo update # helm repo update
# helm upgrade --install loki grafana/loki -n loki \ # helm upgrade --install loki grafana/loki -n loki \
# --version 6.55.0 -f clusters/noble/apps/loki/values.yaml --wait --timeout 30m # --version 6.55.0 -f clusters/noble/bootstrap/loki/values.yaml --wait --timeout 30m
# #
# Query/push URL for Grafana + Fluent Bit: http://loki-gateway.loki.svc.cluster.local:80 # Query/push URL for Grafana + Fluent Bit: http://loki-gateway.loki.svc.cluster.local:80

View File

@@ -1,11 +1,11 @@
# Longhorn Helm values — use with Talos user volume + kubelet mounts (see talos/talconfig.yaml). # Longhorn Helm values — use with Talos user volume + kubelet mounts (see talos/talconfig.yaml).
# 1) PSA: `kubectl apply -k clusters/noble/apps/longhorn` (privileged namespace) before or after Helm. # 1) PSA: `kubectl apply -k clusters/noble/bootstrap/longhorn` (privileged namespace) before or after Helm.
# 2) Talos: bind `/var/lib/longhorn` → `/var/mnt/longhorn` in kubelet extraMounts — chart hostPath is fixed to /var/lib/longhorn. # 2) Talos: bind `/var/lib/longhorn` → `/var/mnt/longhorn` in kubelet extraMounts — chart hostPath is fixed to /var/lib/longhorn.
# Example (run from home-server repo root so -f path resolves): # Example (run from home-server repo root so -f path resolves):
# kubectl apply -k clusters/noble/apps/longhorn # kubectl apply -k clusters/noble/bootstrap/longhorn
# helm repo add longhorn https://charts.longhorn.io && helm repo update # helm repo add longhorn https://charts.longhorn.io && helm repo update
# helm upgrade --install longhorn longhorn/longhorn -n longhorn-system --create-namespace \ # helm upgrade --install longhorn longhorn/longhorn -n longhorn-system --create-namespace \
# -f clusters/noble/apps/longhorn/values.yaml # -f clusters/noble/bootstrap/longhorn/values.yaml
# "helm upgrade --install" needs two arguments: RELEASE_NAME and CHART (e.g. longhorn longhorn/longhorn). # "helm upgrade --install" needs two arguments: RELEASE_NAME and CHART (e.g. longhorn longhorn/longhorn).
# #
# If you already installed Longhorn without this file: fix Default Settings in the UI or edit each # If you already installed Longhorn without this file: fix Default Settings in the UI or edit each

View File

@@ -11,7 +11,7 @@ If `kubectl apply -k` fails with **`no matches for kind "IPAddressPool"`** / **`
**Pod Security warnings** (`would violate PodSecurity "restricted"`): MetalLBs speaker/FRR use `hostNetwork`, `NET_ADMIN`, etc. That is expected unless `metallb-system` is labeled **privileged**. Apply `namespace.yaml` **before** Helm so the namespace is created with the right labels (omit `--create-namespace` on Helm), or patch an existing namespace: **Pod Security warnings** (`would violate PodSecurity "restricted"`): MetalLBs speaker/FRR use `hostNetwork`, `NET_ADMIN`, etc. That is expected unless `metallb-system` is labeled **privileged**. Apply `namespace.yaml` **before** Helm so the namespace is created with the right labels (omit `--create-namespace` on Helm), or patch an existing namespace:
```bash ```bash
kubectl apply -f clusters/noble/apps/metallb/namespace.yaml kubectl apply -f clusters/noble/bootstrap/metallb/namespace.yaml
``` ```
If you already ran Helm with `--create-namespace`, either `kubectl apply -f namespace.yaml` (merges labels) or: If you already ran Helm with `--create-namespace`, either `kubectl apply -f namespace.yaml` (merges labels) or:
@@ -38,15 +38,15 @@ Then restart MetalLB pods if they were failing (`kubectl get pods -n metallb-sys
2. Apply this folders pool and L2 advertisement: 2. Apply this folders pool and L2 advertisement:
```bash ```bash
kubectl apply -k clusters/noble/apps/metallb kubectl apply -k clusters/noble/bootstrap/metallb
``` ```
3. Confirm a `Service` `type: LoadBalancer` receives an address in `192.168.50.210``192.168.50.229` (e.g. **`kubectl get svc -n traefik traefik`** after installing **Traefik** in `clusters/noble/apps/traefik/`). 3. Confirm a `Service` `type: LoadBalancer` receives an address in `192.168.50.210``192.168.50.229` (e.g. **`kubectl get svc -n traefik traefik`** after installing **Traefik** in `clusters/noble/bootstrap/traefik/`).
Reserve **one** IP in that range for Argo CD (e.g. `192.168.50.210`) via `spec.loadBalancerIP` or chart values when you expose the server. Traefik pins **`192.168.50.211`** in **`clusters/noble/apps/traefik/values.yaml`**. Reserve **one** IP in that range for Argo CD (e.g. `192.168.50.210`) via `spec.loadBalancerIP` or chart values when you expose the server. Traefik pins **`192.168.50.211`** in **`clusters/noble/bootstrap/traefik/values.yaml`**.
## `Pending` MetalLB pods ## `Pending` MetalLB pods
1. `kubectl get nodes` — every node **`Ready`**? If **`NotReady`** or **`NetworkUnavailable`**, finish **CNI** install first. 1. `kubectl get nodes` — every node **`Ready`**? If **`NotReady`** or **`NetworkUnavailable`**, finish **CNI** install first.
2. `kubectl describe pod -n metallb-system <pod-name>` — read **Events** at the bottom (`0/N nodes are available: …`). 2. `kubectl describe pod -n metallb-system <pod-name>` — read **Events** at the bottom (`0/N nodes are available: …`).
3. L2 speaker uses the nodes uplink; kube-vip in this repo expects **`ens18`** on control planes (`clusters/noble/apps/kube-vip/vip-daemonset.yaml`). If your NIC name differs, change `vip_interface` there. 3. L2 speaker uses the nodes uplink; kube-vip in this repo expects **`ens18`** on control planes (`clusters/noble/bootstrap/kube-vip/vip-daemonset.yaml`). If your NIC name differs, change `vip_interface` there.

View File

@@ -1,5 +1,5 @@
# Apply before Helm if you do not use --create-namespace, or use this to fix PSA after the fact: # Apply before Helm if you do not use --create-namespace, or use this to fix PSA after the fact:
# kubectl apply -f clusters/noble/apps/metallb/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/metallb/namespace.yaml
# MetalLB speaker needs hostNetwork + NET_ADMIN; incompatible with Pod Security "restricted". # MetalLB speaker needs hostNetwork + NET_ADMIN; incompatible with Pod Security "restricted".
apiVersion: v1 apiVersion: v1
kind: Namespace kind: Namespace

View File

@@ -4,7 +4,7 @@
# #
# helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/ # helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/
# helm upgrade --install metrics-server metrics-server/metrics-server -n kube-system \ # helm upgrade --install metrics-server metrics-server/metrics-server -n kube-system \
# --version 3.13.0 -f clusters/noble/apps/metrics-server/values.yaml --wait # --version 3.13.0 -f clusters/noble/bootstrap/metrics-server/values.yaml --wait
args: args:
- --kubelet-insecure-tls - --kubelet-insecure-tls

View File

@@ -10,14 +10,14 @@ Keys must match `values.yaml` (`PANGOLIN_ENDPOINT`, `NEWT_ID`, `NEWT_SECRET`).
### Option A — Sealed Secret (safe for GitOps) ### Option A — Sealed Secret (safe for GitOps)
With the [Sealed Secrets](https://github.com/bitnami-labs/sealed-secrets) controller installed (`clusters/noble/apps/sealed-secrets/`), generate a `SealedSecret` from your workstation (rotate credentials in Pangolin first if they were exposed): With the [Sealed Secrets](https://github.com/bitnami-labs/sealed-secrets) controller installed (`clusters/noble/bootstrap/sealed-secrets/`), generate a `SealedSecret` from your workstation (rotate credentials in Pangolin first if they were exposed):
```bash ```bash
chmod +x clusters/noble/apps/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh chmod +x clusters/noble/bootstrap/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh
export PANGOLIN_ENDPOINT='https://pangolin.pcenicni.dev' export PANGOLIN_ENDPOINT='https://pangolin.pcenicni.dev'
export NEWT_ID='YOUR_NEWT_ID' export NEWT_ID='YOUR_NEWT_ID'
export NEWT_SECRET='YOUR_NEWT_SECRET' export NEWT_SECRET='YOUR_NEWT_SECRET'
./clusters/noble/apps/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh > newt-pangolin-auth.sealedsecret.yaml ./clusters/noble/bootstrap/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh > newt-pangolin-auth.sealedsecret.yaml
kubectl apply -f newt-pangolin-auth.sealedsecret.yaml kubectl apply -f newt-pangolin-auth.sealedsecret.yaml
``` ```
@@ -26,7 +26,7 @@ Commit only the `.sealedsecret.yaml` file, not plain `Secret` YAML.
### Option B — Imperative Secret (not in git) ### Option B — Imperative Secret (not in git)
```bash ```bash
kubectl apply -f clusters/noble/apps/newt/namespace.yaml kubectl apply -f clusters/noble/bootstrap/newt/namespace.yaml
kubectl -n newt create secret generic newt-pangolin-auth \ kubectl -n newt create secret generic newt-pangolin-auth \
--from-literal=PANGOLIN_ENDPOINT='https://pangolin.pcenicni.dev' \ --from-literal=PANGOLIN_ENDPOINT='https://pangolin.pcenicni.dev' \
@@ -44,7 +44,7 @@ helm repo update
helm upgrade --install newt fossorial/newt \ helm upgrade --install newt fossorial/newt \
--namespace newt \ --namespace newt \
--version 1.2.0 \ --version 1.2.0 \
-f clusters/noble/apps/newt/values.yaml \ -f clusters/noble/bootstrap/newt/values.yaml \
--wait --wait
``` ```

View File

@@ -2,7 +2,7 @@
# #
# Credentials MUST come from a Secret — do not put endpoint/id/secret in git. # Credentials MUST come from a Secret — do not put endpoint/id/secret in git.
# #
# kubectl apply -f clusters/noble/apps/newt/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/newt/namespace.yaml
# kubectl -n newt create secret generic newt-pangolin-auth \ # kubectl -n newt create secret generic newt-pangolin-auth \
# --from-literal=PANGOLIN_ENDPOINT='https://pangolin.example.com' \ # --from-literal=PANGOLIN_ENDPOINT='https://pangolin.example.com' \
# --from-literal=NEWT_ID='...' \ # --from-literal=NEWT_ID='...' \
@@ -10,7 +10,7 @@
# #
# helm repo add fossorial https://charts.fossorial.io # helm repo add fossorial https://charts.fossorial.io
# helm upgrade --install newt fossorial/newt -n newt \ # helm upgrade --install newt fossorial/newt -n newt \
# --version 1.2.0 -f clusters/noble/apps/newt/values.yaml --wait # --version 1.2.0 -f clusters/noble/bootstrap/newt/values.yaml --wait
# #
# See README.md for Pangolin Integration API (domains + HTTP resources + CNAME). # See README.md for Pangolin Integration API (domains + HTTP resources + CNAME).

View File

@@ -10,9 +10,9 @@ Encrypts `Secret` manifests so they can live in git; the controller decrypts **S
```bash ```bash
helm repo add sealed-secrets https://bitnami-labs.github.io/sealed-secrets helm repo add sealed-secrets https://bitnami-labs.github.io/sealed-secrets
helm repo update helm repo update
kubectl apply -f clusters/noble/apps/sealed-secrets/namespace.yaml kubectl apply -f clusters/noble/bootstrap/sealed-secrets/namespace.yaml
helm upgrade --install sealed-secrets sealed-secrets/sealed-secrets -n sealed-secrets \ helm upgrade --install sealed-secrets sealed-secrets/sealed-secrets -n sealed-secrets \
--version 2.18.4 -f clusters/noble/apps/sealed-secrets/values.yaml --wait --version 2.18.4 -f clusters/noble/bootstrap/sealed-secrets/values.yaml --wait
``` ```
## Workstation: `kubeseal` ## Workstation: `kubeseal`

View File

@@ -2,15 +2,15 @@
# #
# helm repo add sealed-secrets https://bitnami-labs.github.io/sealed-secrets # helm repo add sealed-secrets https://bitnami-labs.github.io/sealed-secrets
# helm repo update # helm repo update
# kubectl apply -f clusters/noble/apps/sealed-secrets/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/sealed-secrets/namespace.yaml
# helm upgrade --install sealed-secrets sealed-secrets/sealed-secrets -n sealed-secrets \ # helm upgrade --install sealed-secrets sealed-secrets/sealed-secrets -n sealed-secrets \
# --version 2.18.4 -f clusters/noble/apps/sealed-secrets/values.yaml --wait # --version 2.18.4 -f clusters/noble/bootstrap/sealed-secrets/values.yaml --wait
# #
# Client: install kubeseal (same minor as controller — see README). # Client: install kubeseal (same minor as controller — see README).
# Defaults are sufficient for the lab; override here if you need key renewal, resources, etc. # Defaults are sufficient for the lab; override here if you need key renewal, resources, etc.
# #
# GitOps pattern: create Secrets only via SealedSecret (or External Secrets + Vault). # GitOps pattern: create Secrets only via SealedSecret (or External Secrets + Vault).
# Example (Newt): clusters/noble/apps/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh # Example (Newt): clusters/noble/bootstrap/sealed-secrets/examples/kubeseal-newt-pangolin-auth.sh
# Backup the controller's sealing key: kubectl -n sealed-secrets get secret sealed-secrets-key -o yaml # Backup the controller's sealing key: kubectl -n sealed-secrets get secret sealed-secrets-key -o yaml
# #
# Talos cluster secrets (bootstrap token, cluster secret, certs) belong in talhelper talsecret / # Talos cluster secrets (bootstrap token, cluster secret, certs) belong in talhelper talsecret /

View File

@@ -5,7 +5,7 @@
1. Create the namespace (Pod Security **baseline** — Traefik needs more than **restricted**): 1. Create the namespace (Pod Security **baseline** — Traefik needs more than **restricted**):
```bash ```bash
kubectl apply -f clusters/noble/apps/traefik/namespace.yaml kubectl apply -f clusters/noble/bootstrap/traefik/namespace.yaml
``` ```
2. Install the chart (**do not** use `--create-namespace` if the namespace already exists): 2. Install the chart (**do not** use `--create-namespace` if the namespace already exists):
@@ -16,11 +16,11 @@
helm upgrade --install traefik traefik/traefik \ helm upgrade --install traefik traefik/traefik \
--namespace traefik \ --namespace traefik \
--version 39.0.6 \ --version 39.0.6 \
-f clusters/noble/apps/traefik/values.yaml \ -f clusters/noble/bootstrap/traefik/values.yaml \
--wait --wait
``` ```
3. Confirm the Service has a pool address. On the **LAN**, **`*.apps.noble.lab.pcenicni.dev`** can resolve to this IP (split horizon / local DNS). **Public** names go through **Pangolin + Newt** (CNAME + API), not ExternalDNS — see **`clusters/noble/apps/newt/README.md`**. 3. Confirm the Service has a pool address. On the **LAN**, **`*.apps.noble.lab.pcenicni.dev`** can resolve to this IP (split horizon / local DNS). **Public** names go through **Pangolin + Newt** (CNAME + API), not ExternalDNS — see **`clusters/noble/bootstrap/newt/README.md`**.
```bash ```bash
kubectl get svc -n traefik traefik kubectl get svc -n traefik traefik
@@ -28,6 +28,6 @@
Values pin **`192.168.50.211`** via **`metallb.io/loadBalancerIPs`**. **`192.168.50.210`** stays free for Argo CD. Values pin **`192.168.50.211`** via **`metallb.io/loadBalancerIPs`**. **`192.168.50.210`** stays free for Argo CD.
4. Create **Ingress** resources with **`ingressClassName: traefik`** (or rely on the default class). **TLS:** add **`cert-manager.io/cluster-issuer: letsencrypt-staging`** (or **`letsencrypt-prod`**) and **`tls`** hosts — see **`clusters/noble/apps/cert-manager/README.md`**. 4. Create **Ingress** resources with **`ingressClassName: traefik`** (or rely on the default class). **TLS:** add **`cert-manager.io/cluster-issuer: letsencrypt-staging`** (or **`letsencrypt-prod`**) and **`tls`** hosts — see **`clusters/noble/bootstrap/cert-manager/README.md`**.
5. **Public DNS:** use **Newt** + Pangolin (**CNAME** at your DNS host + **Integration API** for resources/targets) — **`clusters/noble/apps/newt/README.md`**. 5. **Public DNS:** use **Newt** + Pangolin (**CNAME** at your DNS host + **Integration API** for resources/targets) — **`clusters/noble/bootstrap/newt/README.md`**.

View File

@@ -3,10 +3,10 @@
# Chart: traefik/traefik — pin version on the helm command (e.g. 39.0.6). # Chart: traefik/traefik — pin version on the helm command (e.g. 39.0.6).
# DNS: point *.apps.noble.lab.pcenicni.dev to the LoadBalancer IP below. # DNS: point *.apps.noble.lab.pcenicni.dev to the LoadBalancer IP below.
# #
# kubectl apply -f clusters/noble/apps/traefik/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/traefik/namespace.yaml
# helm repo add traefik https://traefik.github.io/charts # helm repo add traefik https://traefik.github.io/charts
# helm upgrade --install traefik traefik/traefik -n traefik \ # helm upgrade --install traefik traefik/traefik -n traefik \
# --version 39.0.6 -f clusters/noble/apps/traefik/values.yaml --wait # --version 39.0.6 -f clusters/noble/bootstrap/traefik/values.yaml --wait
service: service:
type: LoadBalancer type: LoadBalancer

View File

@@ -10,9 +10,9 @@ Standalone Vault with **file** storage on a **Longhorn** PVC (`server.dataStorag
```bash ```bash
helm repo add hashicorp https://helm.releases.hashicorp.com helm repo add hashicorp https://helm.releases.hashicorp.com
helm repo update helm repo update
kubectl apply -f clusters/noble/apps/vault/namespace.yaml kubectl apply -f clusters/noble/bootstrap/vault/namespace.yaml
helm upgrade --install vault hashicorp/vault -n vault \ helm upgrade --install vault hashicorp/vault -n vault \
--version 0.32.0 -f clusters/noble/apps/vault/values.yaml --wait --timeout 15m --version 0.32.0 -f clusters/noble/bootstrap/vault/values.yaml --wait --timeout 15m
``` ```
Verify: Verify:
@@ -27,7 +27,7 @@ kubectl -n vault exec -i sts/vault -- vault status
After **Cilium** is up, optionally restrict HTTP access to the Vault server pods (**TCP 8200**) to **`external-secrets`** and same-namespace clients: After **Cilium** is up, optionally restrict HTTP access to the Vault server pods (**TCP 8200**) to **`external-secrets`** and same-namespace clients:
```bash ```bash
kubectl apply -f clusters/noble/apps/vault/cilium-network-policy.yaml kubectl apply -f clusters/noble/bootstrap/vault/cilium-network-policy.yaml
``` ```
If you add workloads in other namespaces that call Vault, extend **`ingress`** in that manifest. If you add workloads in other namespaces that call Vault, extend **`ingress`** in that manifest.
@@ -53,7 +53,7 @@ Or create the Secret used by the optional CronJob and apply it:
```bash ```bash
kubectl -n vault create secret generic vault-unseal-key --from-literal=key='YOUR_UNSEAL_KEY' kubectl -n vault create secret generic vault-unseal-key --from-literal=key='YOUR_UNSEAL_KEY'
kubectl apply -f clusters/noble/apps/vault/unseal-cronjob.yaml kubectl apply -f clusters/noble/bootstrap/vault/unseal-cronjob.yaml
``` ```
The CronJob runs every minute and unseals if Vault is sealed and the Secret is present. The CronJob runs every minute and unseals if Vault is sealed and the Secret is present.
@@ -64,7 +64,7 @@ Vault **OSS** auto-unseal uses cloud KMS (AWS, GCP, Azure, OCI), **Transit** (an
## Kubernetes auth (External Secrets / ClusterSecretStore) ## Kubernetes auth (External Secrets / ClusterSecretStore)
**One-shot:** from the repo root, `export KUBECONFIG=talos/kubeconfig` and `export VAULT_TOKEN=…`, then run **`./clusters/noble/apps/vault/configure-kubernetes-auth.sh`** (idempotent). Then **`kubectl apply -f clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml`** on its own line (shell comments **`# …`** on the same line are parsed as extra `kubectl` args and break `apply`). **`kubectl get clustersecretstore vault`** should show **READY=True** after a few seconds. **One-shot:** from the repo root, `export KUBECONFIG=talos/kubeconfig` and `export VAULT_TOKEN=…`, then run **`./clusters/noble/bootstrap/vault/configure-kubernetes-auth.sh`** (idempotent). Then **`kubectl apply -f clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml`** on its own line (shell comments **`# …`** on the same line are parsed as extra `kubectl` args and break `apply`). **`kubectl get clustersecretstore vault`** should show **READY=True** after a few seconds.
Run these **from your workstation** (needs `kubectl`; no local `vault` binary required). Use a **short-lived admin token** or the root token **only in your shell** — do not paste tokens into logs or chat. Run these **from your workstation** (needs `kubectl`; no local `vault` binary required). Use a **short-lived admin token** or the root token **only in your shell** — do not paste tokens into logs or chat.
@@ -139,7 +139,7 @@ EOF
' '
``` ```
**5. Apply** **`clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml`** if you have not already, then verify: **5. Apply** **`clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml`** if you have not already, then verify:
```bash ```bash
kubectl describe clustersecretstore vault kubectl describe clustersecretstore vault

View File

@@ -1,5 +1,5 @@
# CiliumNetworkPolicy — restrict who may reach Vault HTTP listener (8200). # CiliumNetworkPolicy — restrict who may reach Vault HTTP listener (8200).
# Apply after Cilium is healthy: kubectl apply -f clusters/noble/apps/vault/cilium-network-policy.yaml # Apply after Cilium is healthy: kubectl apply -f clusters/noble/bootstrap/vault/cilium-network-policy.yaml
# #
# Ingress-only policy: egress from Vault is unchanged (Kubernetes auth needs API + DNS). # Ingress-only policy: egress from Vault is unchanged (Kubernetes auth needs API + DNS).
# Extend ingress rules if other namespaces must call Vault (e.g. app workloads). # Extend ingress rules if other namespaces must call Vault (e.g. app workloads).

View File

@@ -5,9 +5,9 @@
# Usage (from repo root): # Usage (from repo root):
# export KUBECONFIG=talos/kubeconfig # or your path # export KUBECONFIG=talos/kubeconfig # or your path
# export VAULT_TOKEN='…' # root or admin token — never commit # export VAULT_TOKEN='…' # root or admin token — never commit
# ./clusters/noble/apps/vault/configure-kubernetes-auth.sh # ./clusters/noble/bootstrap/vault/configure-kubernetes-auth.sh
# #
# Then: kubectl apply -f clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml # Then: kubectl apply -f clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml
# Verify: kubectl describe clustersecretstore vault # Verify: kubectl describe clustersecretstore vault
set -euo pipefail set -euo pipefail
@@ -73,5 +73,5 @@ EOF
echo "Done. Issuer used: $ISSUER" echo "Done. Issuer used: $ISSUER"
echo "" echo ""
echo "Next (each command on its own line — do not paste # comments after kubectl):" echo "Next (each command on its own line — do not paste # comments after kubectl):"
echo " kubectl apply -f clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml" echo " kubectl apply -f clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml"
echo " kubectl get clustersecretstore vault" echo " kubectl get clustersecretstore vault"

View File

@@ -2,7 +2,7 @@
# #
# 1) vault operator init -key-shares=1 -key-threshold=1 (lab only — single key) # 1) vault operator init -key-shares=1 -key-threshold=1 (lab only — single key)
# 2) kubectl -n vault create secret generic vault-unseal-key --from-literal=key='YOUR_UNSEAL_KEY' # 2) kubectl -n vault create secret generic vault-unseal-key --from-literal=key='YOUR_UNSEAL_KEY'
# 3) kubectl apply -f clusters/noble/apps/vault/unseal-cronjob.yaml # 3) kubectl apply -f clusters/noble/bootstrap/vault/unseal-cronjob.yaml
# #
# OSS Vault has no Kubernetes/KMS seal; this CronJob runs vault operator unseal when the server is sealed. # OSS Vault has no Kubernetes/KMS seal; this CronJob runs vault operator unseal when the server is sealed.
# Protect the Secret with RBAC; prefer cloud KMS auto-unseal for real environments. # Protect the Secret with RBAC; prefer cloud KMS auto-unseal for real environments.

View File

@@ -2,9 +2,9 @@
# #
# helm repo add hashicorp https://helm.releases.hashicorp.com # helm repo add hashicorp https://helm.releases.hashicorp.com
# helm repo update # helm repo update
# kubectl apply -f clusters/noble/apps/vault/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/vault/namespace.yaml
# helm upgrade --install vault hashicorp/vault -n vault \ # helm upgrade --install vault hashicorp/vault -n vault \
# --version 0.32.0 -f clusters/noble/apps/vault/values.yaml --wait --timeout 15m # --version 0.32.0 -f clusters/noble/bootstrap/vault/values.yaml --wait --timeout 15m
# #
# Post-install: initialize, store unseal key in Secret, apply optional unseal CronJob — see README.md # Post-install: initialize, store unseal key in Secret, apply optional unseal CronJob — see README.md
# #

View File

@@ -41,7 +41,7 @@ flowchart TB
## Network and ingress ## Network and ingress
**Northsouth (apps on LAN):** DNS for **`*.apps.noble.lab.pcenicni.dev`** → **Traefik** **`LoadBalancer` `192.168.50.211`**. **MetalLB** L2 pool **`192.168.50.210``192.168.50.229`**; **Argo CD** uses **`192.168.50.210`**. **Public** access is not in-cluster ExternalDNS: **Newt** (Pangolin tunnel) plus **CNAME** and **Integration API** per [`clusters/noble/apps/newt/README.md`](../clusters/noble/apps/newt/README.md). **Northsouth (apps on LAN):** DNS for **`*.apps.noble.lab.pcenicni.dev`** → **Traefik** **`LoadBalancer` `192.168.50.211`**. **MetalLB** L2 pool **`192.168.50.210``192.168.50.229`**; **Argo CD** uses **`192.168.50.210`**. **Public** access is not in-cluster ExternalDNS: **Newt** (Pangolin tunnel) plus **CNAME** and **Integration API** per [`clusters/noble/bootstrap/newt/README.md`](../clusters/noble/bootstrap/newt/README.md).
```mermaid ```mermaid
flowchart TB flowchart TB
@@ -114,7 +114,7 @@ flowchart TB
## Observability path ## Observability path
**kube-prometheus-stack** in **`monitoring`**: Prometheus, Grafana, Alertmanager, node-exporter, etc. **Loki** (SingleBinary) in **`loki`** with **Fluent Bit** in **`logging`** shipping to **`loki-gateway`**. Grafana Loki datasource is applied via **ConfigMap** [`clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`](../clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml). Prometheus, Grafana, Alertmanager, and Loki use **Longhorn** PVCs where configured. **kube-prometheus-stack** in **`monitoring`**: Prometheus, Grafana, Alertmanager, node-exporter, etc. **Loki** (SingleBinary) in **`loki`** with **Fluent Bit** in **`logging`** shipping to **`loki-gateway`**. Grafana Loki datasource is applied via **ConfigMap** [`clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`](../clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml). Prometheus, Grafana, Alertmanager, and Loki use **Longhorn** PVCs where configured.
```mermaid ```mermaid
flowchart LR flowchart LR
@@ -149,7 +149,7 @@ flowchart LR
## Secrets and policy ## Secrets and policy
**Sealed Secrets** decrypts `SealedSecret` objects in-cluster. **External Secrets Operator** syncs from **Vault** using **`ClusterSecretStore`** (see [`examples/vault-cluster-secret-store.yaml`](../clusters/noble/apps/external-secrets/examples/vault-cluster-secret-store.yaml)). Trust is **cluster → Vault** (ESO calls Vault; Vault does not initiate cluster trust). **Kyverno** with **kyverno-policies** enforces **PSS baseline** in **Audit**. **Sealed Secrets** decrypts `SealedSecret` objects in-cluster. **External Secrets Operator** syncs from **Vault** using **`ClusterSecretStore`** (see [`examples/vault-cluster-secret-store.yaml`](../clusters/noble/bootstrap/external-secrets/examples/vault-cluster-secret-store.yaml)). Trust is **cluster → Vault** (ESO calls Vault; Vault does not initiate cluster trust). **Kyverno** with **kyverno-policies** enforces **PSS baseline** in **Audit**.
```mermaid ```mermaid
flowchart LR flowchart LR
@@ -218,7 +218,7 @@ See [`talos/CLUSTER-BUILD.md`](../talos/CLUSTER-BUILD.md) for the authoritative
## Narrative ## Narrative
The **noble** environment is a **Talos** lab cluster on **`192.168.50.0/24`** with **three control plane nodes and one worker**, schedulable workloads on control planes enabled, and the Kubernetes API exposed through **kube-vip** at **`192.168.50.230`**. **Cilium** provides the CNI after Talos bootstrap with **`cni: none`**; **MetalLB** advertises **`192.168.50.210``192.168.50.229`**, pinning **Argo CD** to **`192.168.50.210`** and **Traefik** to **`192.168.50.211`** for **`*.apps.noble.lab.pcenicni.dev`**. **cert-manager** issues certificates for Traefik Ingresses; **GitOps** is **Helm plus Argo CD** with manifests under **`clusters/noble/`** and bootstrap under **`clusters/noble/bootstrap/argocd/`**. **Observability** uses **kube-prometheus-stack** in **`monitoring`**, **Loki** and **Fluent Bit** with Grafana wired via a **ConfigMap** datasource, with **Longhorn** PVCs for Prometheus, Grafana, Alertmanager, Loki, and **Vault**. **Secrets** combine **Sealed Secrets** for git-encrypted material, **Vault** with **External Secrets** for dynamic sync, and **Kyverno** enforces **Pod Security Standards baseline** in **Audit**. **Public** access uses **Newt** to **Pangolin** with **CNAME** and Integration API steps as documented—not generic in-cluster public DNS. The **noble** environment is a **Talos** lab cluster on **`192.168.50.0/24`** with **three control plane nodes and one worker**, schedulable workloads on control planes enabled, and the Kubernetes API exposed through **kube-vip** at **`192.168.50.230`**. **Cilium** provides the CNI after Talos bootstrap with **`cni: none`**; **MetalLB** advertises **`192.168.50.210``192.168.50.229`**, pinning **Argo CD** to **`192.168.50.210`** and **Traefik** to **`192.168.50.211`** for **`*.apps.noble.lab.pcenicni.dev`**. **cert-manager** issues certificates for Traefik Ingresses; **GitOps** is **Ansible-driven Helm** for the platform (**`clusters/noble/bootstrap/`**) plus optional **Argo CD** app-of-apps (**`clusters/noble/apps/`**, **`clusters/noble/bootstrap/argocd/`**). **Observability** uses **kube-prometheus-stack** in **`monitoring`**, **Loki** and **Fluent Bit** with Grafana wired via a **ConfigMap** datasource, with **Longhorn** PVCs for Prometheus, Grafana, Alertmanager, Loki, and **Vault**. **Secrets** combine **Sealed Secrets** for git-encrypted material, **Vault** with **External Secrets** for dynamic sync, and **Kyverno** enforces **Pod Security Standards baseline** in **Audit**. **Public** access uses **Newt** to **Pangolin** with **CNAME** and Integration API steps as documented—not generic in-cluster public DNS.
--- ---

View File

@@ -7,20 +7,20 @@ This document is the **exported TODO** for the **noble** Talos cluster (4 nodes)
Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (Vault **CiliumNetworkPolicy**, **`talos/runbooks/`**). **Next focus:** optional **Alertmanager** receivers (Slack/PagerDuty); tighten **RBAC** (Headlamp / cluster-admin); **Cilium** policies for other namespaces as needed; enable **Mend Renovate** for PRs; Pangolin/sample Ingress; **Velero** when S3 exists. Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (Vault **CiliumNetworkPolicy**, **`talos/runbooks/`**). **Next focus:** optional **Alertmanager** receivers (Slack/PagerDuty); tighten **RBAC** (Headlamp / cluster-admin); **Cilium** policies for other namespaces as needed; enable **Mend Renovate** for PRs; Pangolin/sample Ingress; **Velero** when S3 exists.
- **Talos** v1.12.6 (target) / **Kubernetes** as bundled — four nodes **Ready** unless upgrading; **`talosctl health`**; **`talos/kubeconfig`** is **local only** (gitignored — never commit; regenerate with `talosctl kubeconfig` per `talos/README.md`). **Image Factory (nocloud installer):** `factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6` - **Talos** v1.12.6 (target) / **Kubernetes** as bundled — four nodes **Ready** unless upgrading; **`talosctl health`**; **`talos/kubeconfig`** is **local only** (gitignored — never commit; regenerate with `talosctl kubeconfig` per `talos/README.md`). **Image Factory (nocloud installer):** `factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6`
- **Cilium** Helm **1.16.6** / app **1.16.6** (`clusters/noble/apps/cilium/`, phase 1 values). - **Cilium** Helm **1.16.6** / app **1.16.6** (`clusters/noble/bootstrap/cilium/`, phase 1 values).
- **MetalLB** Helm **0.15.3** / app **v0.15.3**; **IPAddressPool** `noble-l2` + **L2Advertisement** — pool **`192.168.50.210``192.168.50.229`**. - **MetalLB** Helm **0.15.3** / app **v0.15.3**; **IPAddressPool** `noble-l2` + **L2Advertisement** — pool **`192.168.50.210``192.168.50.229`**.
- **kube-vip** DaemonSet **3/3** on control planes; VIP **`192.168.50.230`** on **`ens18`** (`vip_subnet` **`/32`** required — bare **`32`** breaks parsing). **Verified from workstation:** `kubectl config set-cluster noble --server=https://192.168.50.230:6443` then **`kubectl get --raw /healthz`** → **`ok`** (`talos/kubeconfig`; see `talos/README.md`). - **kube-vip** DaemonSet **3/3** on control planes; VIP **`192.168.50.230`** on **`ens18`** (`vip_subnet` **`/32`** required — bare **`32`** breaks parsing). **Verified from workstation:** `kubectl config set-cluster noble --server=https://192.168.50.230:6443` then **`kubectl get --raw /healthz`** → **`ok`** (`talos/kubeconfig`; see `talos/README.md`).
- **metrics-server** Helm **3.13.0** / app **v0.8.0**`clusters/noble/apps/metrics-server/values.yaml` (`--kubelet-insecure-tls` for Talos); **`kubectl top nodes`** works. - **metrics-server** Helm **3.13.0** / app **v0.8.0**`clusters/noble/bootstrap/metrics-server/values.yaml` (`--kubelet-insecure-tls` for Talos); **`kubectl top nodes`** works.
- **Longhorn** Helm **1.11.1** / app **v1.11.1**`clusters/noble/apps/longhorn/` (PSA **privileged** namespace, `defaultDataPath` `/var/mnt/longhorn`, `preUpgradeChecker` enabled); **StorageClass** `longhorn` (default); **`nodes.longhorn.io`** all **Ready**; test **PVC** `Bound` on `longhorn`. - **Longhorn** Helm **1.11.1** / app **v1.11.1**`clusters/noble/bootstrap/longhorn/` (PSA **privileged** namespace, `defaultDataPath` `/var/mnt/longhorn`, `preUpgradeChecker` enabled); **StorageClass** `longhorn` (default); **`nodes.longhorn.io`** all **Ready**; test **PVC** `Bound` on `longhorn`.
- **Traefik** Helm **39.0.6** / app **v3.6.11**`clusters/noble/apps/traefik/`; **`Service`** **`LoadBalancer`** **`EXTERNAL-IP` `192.168.50.211`**; **`IngressClass`** **`traefik`** (default). Point **`*.apps.noble.lab.pcenicni.dev`** at **`192.168.50.211`**. MetalLB pool verification was done before replacing the temporary nginx test with Traefik. - **Traefik** Helm **39.0.6** / app **v3.6.11**`clusters/noble/bootstrap/traefik/`; **`Service`** **`LoadBalancer`** **`EXTERNAL-IP` `192.168.50.211`**; **`IngressClass`** **`traefik`** (default). Point **`*.apps.noble.lab.pcenicni.dev`** at **`192.168.50.211`**. MetalLB pool verification was done before replacing the temporary nginx test with Traefik.
- **cert-manager** Helm **v1.20.0** / app **v1.20.0**`clusters/noble/apps/cert-manager/`; **`ClusterIssuer`** **`letsencrypt-staging`** and **`letsencrypt-prod`** (**DNS-01** via **Cloudflare** for **`pcenicni.dev`**, Secret **`cloudflare-dns-api-token`** in **`cert-manager`**); ACME email **`certificates@noble.lab.pcenicni.dev`** (edit in manifests if you want a different mailbox). - **cert-manager** Helm **v1.20.0** / app **v1.20.0**`clusters/noble/bootstrap/cert-manager/`; **`ClusterIssuer`** **`letsencrypt-staging`** and **`letsencrypt-prod`** (**DNS-01** via **Cloudflare** for **`pcenicni.dev`**, Secret **`cloudflare-dns-api-token`** in **`cert-manager`**); ACME email **`certificates@noble.lab.pcenicni.dev`** (edit in manifests if you want a different mailbox).
- **Newt** Helm **1.2.0** / app **1.10.1**`clusters/noble/apps/newt/` (**fossorial/newt**); Pangolin site tunnel — **`newt-pangolin-auth`** Secret (**`PANGOLIN_ENDPOINT`**, **`NEWT_ID`**, **`NEWT_SECRET`**). Prefer a **SealedSecret** in git (`kubeseal` — see `clusters/noble/apps/sealed-secrets/examples/`) after rotating credentials if they were exposed. **Public DNS** is **not** automated with ExternalDNS: **CNAME** records at your DNS host per Pangolins domain instructions, plus **Integration API** for HTTP resources/targets — see **`clusters/noble/apps/newt/README.md`**. LAN access to Traefik can still use **`*.apps.noble.lab.pcenicni.dev`** → **`192.168.50.211`** (split horizon / local resolver). - **Newt** Helm **1.2.0** / app **1.10.1**`clusters/noble/bootstrap/newt/` (**fossorial/newt**); Pangolin site tunnel — **`newt-pangolin-auth`** Secret (**`PANGOLIN_ENDPOINT`**, **`NEWT_ID`**, **`NEWT_SECRET`**). Prefer a **SealedSecret** in git (`kubeseal` — see `clusters/noble/bootstrap/sealed-secrets/examples/`) after rotating credentials if they were exposed. **Public DNS** is **not** automated with ExternalDNS: **CNAME** records at your DNS host per Pangolins domain instructions, plus **Integration API** for HTTP resources/targets — see **`clusters/noble/bootstrap/newt/README.md`**. LAN access to Traefik can still use **`*.apps.noble.lab.pcenicni.dev`** → **`192.168.50.211`** (split horizon / local resolver).
- **Argo CD** Helm **9.4.17** / app **v3.3.6**`clusters/noble/bootstrap/argocd/`; **`argocd-server`** **`LoadBalancer`** **`192.168.50.210`**; app-of-apps scaffold under **`bootstrap/argocd/apps/`** (edit **`root-application.yaml`** `repoURL` before applying). - **Argo CD** Helm **9.4.17** / app **v3.3.6**`clusters/noble/bootstrap/argocd/`; **`argocd-server`** **`LoadBalancer`** **`192.168.50.210`**; app-of-apps root syncs **`clusters/noble/apps/`** (edit **`root-application.yaml`** `repoURL` before applying).
- **kube-prometheus-stack** — Helm chart **82.15.1**`clusters/noble/apps/kube-prometheus-stack/` (**namespace** `monitoring`, PSA **privileged****node-exporter** needs host mounts); **Longhorn** PVCs for Prometheus, Grafana, Alertmanager; **node-exporter** DaemonSet **4/4**. **Grafana Ingress:** **`https://grafana.apps.noble.lab.pcenicni.dev`** (Traefik **`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **Loki** datasource in Grafana: ConfigMap **`clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`** (sidecar label **`grafana_datasource: "1"`**) — not via **`grafana.additionalDataSources`** in the chart. **`helm upgrade --install` with `--wait` is silent until done** — use **`--timeout 30m`**; Grafana admin: Secret **`kube-prometheus-grafana`**, keys **`admin-user`** / **`admin-password`**. - **kube-prometheus-stack** — Helm chart **82.15.1**`clusters/noble/bootstrap/kube-prometheus-stack/` (**namespace** `monitoring`, PSA **privileged****node-exporter** needs host mounts); **Longhorn** PVCs for Prometheus, Grafana, Alertmanager; **node-exporter** DaemonSet **4/4**. **Grafana Ingress:** **`https://grafana.apps.noble.lab.pcenicni.dev`** (Traefik **`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **Loki** datasource in Grafana: ConfigMap **`clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`** (sidecar label **`grafana_datasource: "1"`**) — not via **`grafana.additionalDataSources`** in the chart. **`helm upgrade --install` with `--wait` is silent until done** — use **`--timeout 30m`**; Grafana admin: Secret **`kube-prometheus-grafana`**, keys **`admin-user`** / **`admin-password`**.
- **Loki** + **Fluent Bit****`grafana/loki` 6.55.0** SingleBinary + **filesystem** on **Longhorn** (`clusters/noble/apps/loki/`); **`loki.auth_enabled: false`**; **`chunksCache.enabled: false`** (no memcached chunk cache). **`fluent/fluent-bit` 0.56.0** → **`loki-gateway.loki.svc:80`** (`clusters/noble/apps/fluent-bit/`); **`logging`** PSA **privileged**. **Grafana Explore:** **`kubectl apply -f clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`** then **Explore → Loki** (e.g. `{job="fluent-bit"}`). - **Loki** + **Fluent Bit****`grafana/loki` 6.55.0** SingleBinary + **filesystem** on **Longhorn** (`clusters/noble/bootstrap/loki/`); **`loki.auth_enabled: false`**; **`chunksCache.enabled: false`** (no memcached chunk cache). **`fluent/fluent-bit` 0.56.0** → **`loki-gateway.loki.svc:80`** (`clusters/noble/bootstrap/fluent-bit/`); **`logging`** PSA **privileged**. **Grafana Explore:** **`kubectl apply -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`** then **Explore → Loki** (e.g. `{job="fluent-bit"}`).
- **Sealed Secrets** Helm **2.18.4** / app **0.36.1**`clusters/noble/apps/sealed-secrets/` (namespace **`sealed-secrets`**); **`kubeseal`** on client should match controller minor (**README**); back up **`sealed-secrets-key`** (see README). - **Sealed Secrets** Helm **2.18.4** / app **0.36.1**`clusters/noble/bootstrap/sealed-secrets/` (namespace **`sealed-secrets`**); **`kubeseal`** on client should match controller minor (**README**); back up **`sealed-secrets-key`** (see README).
- **External Secrets Operator** Helm **2.2.0** / app **v2.2.0**`clusters/noble/apps/external-secrets/`; Vault **`ClusterSecretStore`** in **`examples/vault-cluster-secret-store.yaml`** (**`http://`** to match Vault listener — apply after Vault **Kubernetes auth**). - **External Secrets Operator** Helm **2.2.0** / app **v2.2.0**`clusters/noble/bootstrap/external-secrets/`; Vault **`ClusterSecretStore`** in **`examples/vault-cluster-secret-store.yaml`** (**`http://`** to match Vault listener — apply after Vault **Kubernetes auth**).
- **Vault** Helm **0.32.0** / app **1.21.2**`clusters/noble/apps/vault/` — standalone **file** storage, **Longhorn** PVC; **HTTP** listener (`global.tlsDisable`); optional **CronJob** lab unseal **`unseal-cronjob.yaml`**; **not** initialized in git — run **`vault operator init`** per **`README.md`**. - **Vault** Helm **0.32.0** / app **1.21.2**`clusters/noble/bootstrap/vault/` — standalone **file** storage, **Longhorn** PVC; **HTTP** listener (`global.tlsDisable`); optional **CronJob** lab unseal **`unseal-cronjob.yaml`**; **not** initialized in git — run **`vault operator init`** per **`README.md`**.
- **Still open:** **Renovate** — install **[Mend Renovate](https://github.com/apps/renovate)** (or self-host) so PRs run; optional **Alertmanager** notification channels; optional **sample Ingress + cert + Pangolin** end-to-end; **Velero** when S3 is ready; **Argo CD SSO**. - **Still open:** **Renovate** — install **[Mend Renovate](https://github.com/apps/renovate)** (or self-host) so PRs run; optional **Alertmanager** notification channels; optional **sample Ingress + cert + Pangolin** end-to-end; **Velero** when S3 is ready; **Argo CD SSO**.
## Inventory ## Inventory
@@ -39,11 +39,11 @@ Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (Vaul
| Kubernetes API VIP (kube-vip) | `192.168.50.230` (see `talos/README.md`; align with `talos/talconfig.yaml` `additionalApiServerCertSans`) | | Kubernetes API VIP (kube-vip) | `192.168.50.230` (see `talos/README.md`; align with `talos/talconfig.yaml` `additionalApiServerCertSans`) |
| MetalLB L2 pool | `192.168.50.210``192.168.50.229` | | MetalLB L2 pool | `192.168.50.210``192.168.50.229` |
| Argo CD `LoadBalancer` | **Pick one IP** in the MetalLB pool (e.g. `192.168.50.210`) | | Argo CD `LoadBalancer` | **Pick one IP** in the MetalLB pool (e.g. `192.168.50.210`) |
| Traefik (apps ingress) | `192.168.50.211`**`metallb.io/loadBalancerIPs`** in `clusters/noble/apps/traefik/values.yaml` | | Traefik (apps ingress) | `192.168.50.211`**`metallb.io/loadBalancerIPs`** in `clusters/noble/bootstrap/traefik/values.yaml` |
| Apps ingress (LAN / split horizon) | `*.apps.noble.lab.pcenicni.dev` → Traefik LB | | Apps ingress (LAN / split horizon) | `*.apps.noble.lab.pcenicni.dev` → Traefik LB |
| Grafana (Ingress + TLS) | **`grafana.apps.noble.lab.pcenicni.dev`** — `grafana.ingress` in `clusters/noble/apps/kube-prometheus-stack/values.yaml` (**`letsencrypt-prod`**) | | Grafana (Ingress + TLS) | **`grafana.apps.noble.lab.pcenicni.dev`** — `grafana.ingress` in `clusters/noble/bootstrap/kube-prometheus-stack/values.yaml` (**`letsencrypt-prod`**) |
| Headlamp (Ingress + TLS) | **`headlamp.apps.noble.lab.pcenicni.dev`** — chart `ingress` in `clusters/noble/apps/headlamp/` (**`letsencrypt-prod`**, **`ingressClassName: traefik`**) | | Headlamp (Ingress + TLS) | **`headlamp.apps.noble.lab.pcenicni.dev`** — chart `ingress` in `clusters/noble/bootstrap/headlamp/` (**`letsencrypt-prod`**, **`ingressClassName: traefik`**) |
| Public DNS (Pangolin) | **Newt** tunnel + **CNAME** at registrar + **Integration API**`clusters/noble/apps/newt/` | | Public DNS (Pangolin) | **Newt** tunnel + **CNAME** at registrar + **Integration API**`clusters/noble/bootstrap/newt/` |
| Velero | S3-compatible URL — configure later | | Velero | S3-compatible URL — configure later |
## Versions ## Versions
@@ -51,7 +51,7 @@ Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (Vaul
- Talos: **v1.12.6** — align `talosctl` client with node image - Talos: **v1.12.6** — align `talosctl` client with node image
- Talos **Image Factory** (iscsi-tools + util-linux-tools): **`factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6`** — same schematic must appear in **`machine.install.image`** after `talhelper genconfig` (bare metal may use `metal-installer/` instead of `nocloud-installer/`) - Talos **Image Factory** (iscsi-tools + util-linux-tools): **`factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6`** — same schematic must appear in **`machine.install.image`** after `talhelper genconfig` (bare metal may use `metal-installer/` instead of `nocloud-installer/`)
- Kubernetes: **1.35.2** on current nodes (bundled with Talos; not pinned in repo) - Kubernetes: **1.35.2** on current nodes (bundled with Talos; not pinned in repo)
- Cilium: **1.16.6** (Helm chart; see `clusters/noble/apps/cilium/README.md`) - Cilium: **1.16.6** (Helm chart; see `clusters/noble/bootstrap/cilium/README.md`)
- MetalLB: **0.15.3** (Helm chart; app **v0.15.3**) - MetalLB: **0.15.3** (Helm chart; app **v0.15.3**)
- metrics-server: **3.13.0** (Helm chart; app **v0.8.0**) - metrics-server: **3.13.0** (Helm chart; app **v0.8.0**)
- Longhorn: **1.11.1** (Helm chart; app **v1.11.1**) - Longhorn: **1.11.1** (Helm chart; app **v1.11.1**)
@@ -65,7 +65,7 @@ Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (Vaul
- Sealed Secrets: **2.18.4** (Helm chart `sealed-secrets/sealed-secrets`; app **0.36.1**) - Sealed Secrets: **2.18.4** (Helm chart `sealed-secrets/sealed-secrets`; app **0.36.1**)
- External Secrets Operator: **2.2.0** (Helm chart `external-secrets/external-secrets`; app **v2.2.0**) - External Secrets Operator: **2.2.0** (Helm chart `external-secrets/external-secrets`; app **v2.2.0**)
- Vault: **0.32.0** (Helm chart `hashicorp/vault`; app **1.21.2**) - Vault: **0.32.0** (Helm chart `hashicorp/vault`; app **1.21.2**)
- Kyverno: **3.7.1** (Helm chart `kyverno/kyverno`; app **v1.17.1**); **kyverno-policies** **3.7.1****baseline** PSS, **Audit** (`clusters/noble/apps/kyverno/`) - Kyverno: **3.7.1** (Helm chart `kyverno/kyverno`; app **v1.17.1**); **kyverno-policies** **3.7.1****baseline** PSS, **Audit** (`clusters/noble/bootstrap/kyverno/`)
- Headlamp: **0.40.1** (Helm chart `headlamp/headlamp`; app matches chart — see [Artifact Hub](https://artifacthub.io/packages/helm/headlamp/headlamp)) - Headlamp: **0.40.1** (Helm chart `headlamp/headlamp`; app matches chart — see [Artifact Hub](https://artifacthub.io/packages/helm/headlamp/headlamp))
- Renovate: **hosted** (Mend **Renovate** GitHub/GitLab app — no cluster chart) **or** **self-hosted** — pin chart when added ([Helm charts](https://docs.renovatebot.com/helm-charts/), OCI `ghcr.io/renovatebot/charts/renovate`); pair **`renovate.json`** with this repos Helm paths under **`clusters/noble/`** - Renovate: **hosted** (Mend **Renovate** GitHub/GitLab app — no cluster chart) **or** **self-hosted** — pin chart when added ([Helm charts](https://docs.renovatebot.com/helm-charts/), OCI `ghcr.io/renovatebot/charts/renovate`); pair **`renovate.json`** with this repos Helm paths under **`clusters/noble/`**
@@ -79,25 +79,25 @@ Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (Vaul
| talhelper source (active) | `talos/talconfig.yaml` — may be **wipe-phase** (no Longhorn volume) during disk recovery | | talhelper source (active) | `talos/talconfig.yaml` — may be **wipe-phase** (no Longhorn volume) during disk recovery |
| Longhorn volume restore | `talos/talconfig.with-longhorn.yaml` — copy to `talconfig.yaml` after GPT wipe (see `talos/README.md` §5) | | Longhorn volume restore | `talos/talconfig.with-longhorn.yaml` — copy to `talconfig.yaml` after GPT wipe (see `talos/README.md` §5) |
| Longhorn GPT wipe automation | `talos/scripts/longhorn-gpt-recovery.sh` | | Longhorn GPT wipe automation | `talos/scripts/longhorn-gpt-recovery.sh` |
| kube-vip (kustomize) | `clusters/noble/apps/kube-vip/` (`vip_interface` e.g. `ens18`) | | kube-vip (kustomize) | `clusters/noble/bootstrap/kube-vip/` (`vip_interface` e.g. `ens18`) |
| Cilium (Helm values) | `clusters/noble/apps/cilium/``values.yaml` (phase 1), optional `values-kpr.yaml`, `README.md` | | Cilium (Helm values) | `clusters/noble/bootstrap/cilium/``values.yaml` (phase 1), optional `values-kpr.yaml`, `README.md` |
| MetalLB | `clusters/noble/apps/metallb/``namespace.yaml` (PSA **privileged**), `ip-address-pool.yaml`, `kustomization.yaml`, `README.md` | | MetalLB | `clusters/noble/bootstrap/metallb/``namespace.yaml` (PSA **privileged**), `ip-address-pool.yaml`, `kustomization.yaml`, `README.md` |
| Longhorn | `clusters/noble/apps/longhorn/``values.yaml`, `namespace.yaml` (PSA **privileged**), `kustomization.yaml` | | Longhorn | `clusters/noble/bootstrap/longhorn/``values.yaml`, `namespace.yaml` (PSA **privileged**), `kustomization.yaml` |
| metrics-server (Helm values) | `clusters/noble/apps/metrics-server/values.yaml` | | metrics-server (Helm values) | `clusters/noble/bootstrap/metrics-server/values.yaml` |
| Traefik (Helm values) | `clusters/noble/apps/traefik/``values.yaml`, `namespace.yaml`, `README.md` | | Traefik (Helm values) | `clusters/noble/bootstrap/traefik/``values.yaml`, `namespace.yaml`, `README.md` |
| cert-manager (Helm + ClusterIssuers) | `clusters/noble/apps/cert-manager/``values.yaml`, `namespace.yaml`, `kustomization.yaml`, `README.md` | | cert-manager (Helm + ClusterIssuers) | `clusters/noble/bootstrap/cert-manager/``values.yaml`, `namespace.yaml`, `kustomization.yaml`, `README.md` |
| Newt / Pangolin tunnel (Helm) | `clusters/noble/apps/newt/``values.yaml`, `namespace.yaml`, `README.md` | | Newt / Pangolin tunnel (Helm) | `clusters/noble/bootstrap/newt/``values.yaml`, `namespace.yaml`, `README.md` |
| Argo CD (bootstrap + app-of-apps) | `clusters/noble/bootstrap/argocd/``values.yaml`, `root-application.yaml`, `apps/`, `README.md` | | Argo CD (Helm) + optional app-of-apps | `clusters/noble/bootstrap/argocd/``values.yaml`, `root-application.yaml`, `README.md`; optional **`Application`** tree in **`clusters/noble/apps/`** |
| kube-prometheus-stack (Helm values) | `clusters/noble/apps/kube-prometheus-stack/``values.yaml`, `namespace.yaml` | | kube-prometheus-stack (Helm values) | `clusters/noble/bootstrap/kube-prometheus-stack/``values.yaml`, `namespace.yaml` |
| Grafana Loki datasource (ConfigMap; no chart change) | `clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml` | | Grafana Loki datasource (ConfigMap; no chart change) | `clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml` |
| Loki (Helm values) | `clusters/noble/apps/loki/``values.yaml`, `namespace.yaml` | | Loki (Helm values) | `clusters/noble/bootstrap/loki/``values.yaml`, `namespace.yaml` |
| Fluent Bit → Loki (Helm values) | `clusters/noble/apps/fluent-bit/``values.yaml`, `namespace.yaml` | | Fluent Bit → Loki (Helm values) | `clusters/noble/bootstrap/fluent-bit/``values.yaml`, `namespace.yaml` |
| Sealed Secrets (Helm) | `clusters/noble/apps/sealed-secrets/``values.yaml`, `namespace.yaml`, `README.md` | | Sealed Secrets (Helm) | `clusters/noble/bootstrap/sealed-secrets/``values.yaml`, `namespace.yaml`, `README.md` |
| External Secrets Operator (Helm + Vault store example) | `clusters/noble/apps/external-secrets/``values.yaml`, `namespace.yaml`, `README.md`, `examples/vault-cluster-secret-store.yaml` | | External Secrets Operator (Helm + Vault store example) | `clusters/noble/bootstrap/external-secrets/``values.yaml`, `namespace.yaml`, `README.md`, `examples/vault-cluster-secret-store.yaml` |
| Vault (Helm + optional unseal CronJob) | `clusters/noble/apps/vault/``values.yaml`, `namespace.yaml`, `unseal-cronjob.yaml`, `cilium-network-policy.yaml`, `configure-kubernetes-auth.sh`, `README.md` | | Vault (Helm + optional unseal CronJob) | `clusters/noble/bootstrap/vault/``values.yaml`, `namespace.yaml`, `unseal-cronjob.yaml`, `cilium-network-policy.yaml`, `configure-kubernetes-auth.sh`, `README.md` |
| Kyverno + PSS baseline policies | `clusters/noble/apps/kyverno/``values.yaml`, `policies-values.yaml`, `namespace.yaml`, `README.md` | | Kyverno + PSS baseline policies | `clusters/noble/bootstrap/kyverno/``values.yaml`, `policies-values.yaml`, `namespace.yaml`, `README.md` |
| Headlamp (Helm + Ingress) | `clusters/noble/apps/headlamp/``values.yaml`, `namespace.yaml`, `README.md` | | Headlamp (Helm + Ingress) | `clusters/noble/bootstrap/headlamp/``values.yaml`, `namespace.yaml`, `README.md` |
| Renovate (repo config + optional self-hosted Helm) | **`renovate.json`** at repo root; optional `clusters/noble/apps/renovate/` for self-hosted chart + token Secret (**Sealed Secrets** / **ESO** after **Phase E**) | | Renovate (repo config + optional self-hosted Helm) | **`renovate.json`** at repo root; optional self-hosted chart under **`clusters/noble/apps/`** (Argo) + token Secret (**Sealed Secrets** / **ESO** after **Phase E**) |
**Git vs cluster:** manifests and `talconfig` live in git; **`talhelper genconfig -o out`**, bootstrap, Helm, and `kubectl` run on your LAN. See **`talos/README.md`** for workstation reachability (lab LAN/VPN), **`talosctl kubeconfig`** vs Kubernetes `server:` (VIP vs node IP), and **`--insecure`** only in maintenance. **Git vs cluster:** manifests and `talconfig` live in git; **`talhelper genconfig -o out`**, bootstrap, Helm, and `kubectl` run on your LAN. See **`talos/README.md`** for workstation reachability (lab LAN/VPN), **`talosctl kubeconfig`** vs Kubernetes `server:` (VIP vs node IP), and **`--insecure`** only in maintenance.
@@ -105,10 +105,10 @@ Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (Vaul
1. **Talos** installed; **Cilium** (or chosen CNI) **before** most workloads — with `cni: none`, nodes stay **NotReady** / **network-unavailable** taint until CNI is up. 1. **Talos** installed; **Cilium** (or chosen CNI) **before** most workloads — with `cni: none`, nodes stay **NotReady** / **network-unavailable** taint until CNI is up.
2. **MetalLB Helm chart** (CRDs + controller) **before** `kubectl apply -k` on the pool manifests. 2. **MetalLB Helm chart** (CRDs + controller) **before** `kubectl apply -k` on the pool manifests.
3. **`clusters/noble/apps/metallb/namespace.yaml`** before or merged onto `metallb-system` so Pod Security does not block speaker (see `apps/metallb/README.md`). 3. **`clusters/noble/bootstrap/metallb/namespace.yaml`** before or merged onto `metallb-system` so Pod Security does not block speaker (see `bootstrap/metallb/README.md`).
4. **Longhorn:** Talos user volume + extensions in `talconfig.with-longhorn.yaml` (when restored); Helm **`defaultDataPath`** in `clusters/noble/apps/longhorn/values.yaml`. 4. **Longhorn:** Talos user volume + extensions in `talconfig.with-longhorn.yaml` (when restored); Helm **`defaultDataPath`** in `clusters/noble/bootstrap/longhorn/values.yaml`.
5. **Loki → Fluent Bit → Grafana datasource:** deploy **Loki** (`loki-gateway` Service) before **Fluent Bit**; apply **`clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`** after **Loki** (sidecar picks up the ConfigMap — no kube-prometheus values change for Loki). 5. **Loki → Fluent Bit → Grafana datasource:** deploy **Loki** (`loki-gateway` Service) before **Fluent Bit**; apply **`clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`** after **Loki** (sidecar picks up the ConfigMap — no kube-prometheus values change for Loki).
6. **Vault:** **Longhorn** default **StorageClass** before **`clusters/noble/apps/vault/`** Helm (PVC **`data-vault-0`**); **External Secrets** **`ClusterSecretStore`** after Vault is initialized, unsealed, and **Kubernetes auth** is configured. 6. **Vault:** **Longhorn** default **StorageClass** before **`clusters/noble/bootstrap/vault/`** Helm (PVC **`data-vault-0`**); **External Secrets** **`ClusterSecretStore`** after Vault is initialized, unsealed, and **Kubernetes auth** is configured.
7. **Headlamp:** **Traefik** + **cert-manager** (**`letsencrypt-prod`**) before exposing **`headlamp.apps.noble.lab.pcenicni.dev`**; treat as **cluster-admin** UI — protect with network policy / SSO when hardening (**Phase G**). 7. **Headlamp:** **Traefik** + **cert-manager** (**`letsencrypt-prod`**) before exposing **`headlamp.apps.noble.lab.pcenicni.dev`**; treat as **cluster-admin** UI — protect with network policy / SSO when hardening (**Phase G**).
8. **Renovate:** **Git remote** + platform access (**hosted app** needs org/repo install; **self-hosted** needs **`RENOVATE_TOKEN`** and chart **`renovate.config`**). If the bot runs **in-cluster**, add the token **after** **Sealed Secrets** / **Vault** (**Phase E**) — no ingress required for the bot itself. 8. **Renovate:** **Git remote** + platform access (**hosted app** needs org/repo install; **self-hosted** needs **`RENOVATE_TOKEN`** and chart **`renovate.config`**). If the bot runs **in-cluster**, add the token **after** **Sealed Secrets** / **Vault** (**Phase E**) — no ingress required for the bot itself.
@@ -130,7 +130,7 @@ Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (Vaul
- [x] `apply-config` all nodes (`talos/README.md` §2 — **no** `--insecure` after nodes join; use `TALOSCONFIG`) - [x] `apply-config` all nodes (`talos/README.md` §2 — **no** `--insecure` after nodes join; use `TALOSCONFIG`)
- [x] `talosctl bootstrap` once; other control planes and worker join - [x] `talosctl bootstrap` once; other control planes and worker join
- [x] `talosctl kubeconfig` → working `kubectl` (`talos/README.md` §3 — override `server:` if VIP not reachable from workstation) - [x] `talosctl kubeconfig` → working `kubectl` (`talos/README.md` §3 — override `server:` if VIP not reachable from workstation)
- [x] **kube-vip manifests** in `clusters/noble/apps/kube-vip` - [x] **kube-vip manifests** in `clusters/noble/bootstrap/kube-vip`
- [x] kube-vip healthy; `vip_interface` matches uplink (`talosctl get links`); VIP reachable where needed - [x] kube-vip healthy; `vip_interface` matches uplink (`talosctl get links`); VIP reachable where needed
- [x] `talosctl health` (e.g. `talosctl health -n 192.168.50.20` with `TALOSCONFIG` set) - [x] `talosctl health` (e.g. `talosctl health -n 192.168.50.20` with `TALOSCONFIG` set)
@@ -138,45 +138,45 @@ Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (Vaul
**Install order:** **Cilium****metrics-server****Longhorn** (Talos disk + Helm) → **MetalLB** (Helm → pool manifests) → ingress / certs / DNS as planned. **Install order:** **Cilium****metrics-server****Longhorn** (Talos disk + Helm) → **MetalLB** (Helm → pool manifests) → ingress / certs / DNS as planned.
- [x] **Cilium** (Helm **1.16.6**) — **required** before MetalLB if `cni: none` (`clusters/noble/apps/cilium/`) - [x] **Cilium** (Helm **1.16.6**) — **required** before MetalLB if `cni: none` (`clusters/noble/bootstrap/cilium/`)
- [x] **metrics-server** — Helm **3.13.0**; values in `clusters/noble/apps/metrics-server/values.yaml`; verify `kubectl top nodes` - [x] **metrics-server** — Helm **3.13.0**; values in `clusters/noble/bootstrap/metrics-server/values.yaml`; verify `kubectl top nodes`
- [x] **Longhorn** — Talos: user volume + kubelet mounts + extensions (`talos/README.md` §5); Helm **1.11.1**; `kubectl apply -k clusters/noble/apps/longhorn`; verify **`nodes.longhorn.io`** and test PVC **`Bound`** - [x] **Longhorn** — Talos: user volume + kubelet mounts + extensions (`talos/README.md` §5); Helm **1.11.1**; `kubectl apply -k clusters/noble/bootstrap/longhorn`; verify **`nodes.longhorn.io`** and test PVC **`Bound`**
- [x] **MetalLB** — chart installed; **pool + L2** from `clusters/noble/apps/metallb/` applied (`192.168.50.210``229`) - [x] **MetalLB** — chart installed; **pool + L2** from `clusters/noble/bootstrap/metallb/` applied (`192.168.50.210``229`)
- [x] **`Service` `LoadBalancer`** / pool check — MetalLB assigns from `210``229` (validated before Traefik; temporary nginx test removed in favor of Traefik) - [x] **`Service` `LoadBalancer`** / pool check — MetalLB assigns from `210``229` (validated before Traefik; temporary nginx test removed in favor of Traefik)
- [x] **Traefik** `LoadBalancer` for `*.apps.noble.lab.pcenicni.dev``clusters/noble/apps/traefik/`; **`192.168.50.211`** - [x] **Traefik** `LoadBalancer` for `*.apps.noble.lab.pcenicni.dev``clusters/noble/bootstrap/traefik/`; **`192.168.50.211`**
- [x] **cert-manager** + ClusterIssuer (**`letsencrypt-staging`** / **`letsencrypt-prod`**) — `clusters/noble/apps/cert-manager/` - [x] **cert-manager** + ClusterIssuer (**`letsencrypt-staging`** / **`letsencrypt-prod`**) — `clusters/noble/bootstrap/cert-manager/`
- [x] **Newt** (Pangolin tunnel; replaces ExternalDNS for public DNS) — `clusters/noble/apps/newt/`**`newt-pangolin-auth`**; CNAME + **Integration API** per **`newt/README.md`** - [x] **Newt** (Pangolin tunnel; replaces ExternalDNS for public DNS) — `clusters/noble/bootstrap/newt/`**`newt-pangolin-auth`**; CNAME + **Integration API** per **`newt/README.md`**
## Phase C — GitOps ## Phase C — GitOps
- [x] **Argo CD** bootstrap — `clusters/noble/bootstrap/argocd/` (`helm upgrade --install argocd …`) — also covered by **`ansible/playbooks/noble.yml`** (role **`noble_argocd`**) - [x] **Argo CD** bootstrap — `clusters/noble/bootstrap/argocd/` (`helm upgrade --install argocd …`) — also covered by **`ansible/playbooks/noble.yml`** (role **`noble_argocd`**)
- [x] Argo CD server **LoadBalancer****`192.168.50.210`** (see `values.yaml`) - [x] Argo CD server **LoadBalancer****`192.168.50.210`** (see `values.yaml`)
- [x] **App-of-apps** — optional; **`apps/kustomization.yaml`** is **empty** (core stack is **Ansible**-managed, not Argo). Set **`repoURL`** in **`root-application.yaml`** and add **`Application`** manifests only for optional GitOps workloads — see **`bootstrap/argocd/apps/README.md`** - [x] **App-of-apps** — optional; **`clusters/noble/apps/kustomization.yaml`** is **empty** (core stack is **Ansible**-managed from **`clusters/noble/bootstrap/`**, not Argo). Set **`repoURL`** in **`root-application.yaml`** and add **`Application`** manifests only for optional GitOps workloads — see **`clusters/noble/apps/README.md`**
- [x] **Renovate****`renovate.json`** at repo root ([Renovate](https://docs.renovatebot.com/) — **Kubernetes** manager for **`clusters/noble/**/*.yaml`** image pins; grouped minor/patch PRs). **Activate PRs:** install **[Mend Renovate](https://github.com/apps/renovate)** on the Git repo (**Option A**), or **Option B:** self-hosted chart per [Helm charts](https://docs.renovatebot.com/helm-charts/) + token from **Sealed Secrets** / **ESO**. Helm **chart** versions pinned only in comments still need manual bumps or extra **regex** `customManagers` — extend **`renovate.json`** as needed. - [x] **Renovate****`renovate.json`** at repo root ([Renovate](https://docs.renovatebot.com/) — **Kubernetes** manager for **`clusters/noble/**/*.yaml`** image pins; grouped minor/patch PRs). **Activate PRs:** install **[Mend Renovate](https://github.com/apps/renovate)** on the Git repo (**Option A**), or **Option B:** self-hosted chart per [Helm charts](https://docs.renovatebot.com/helm-charts/) + token from **Sealed Secrets** / **ESO**. Helm **chart** versions pinned only in comments still need manual bumps or extra **regex** `customManagers` — extend **`renovate.json`** as needed.
- [ ] SSO — later - [ ] SSO — later
## Phase D — Observability ## Phase D — Observability
- [x] **kube-prometheus-stack**`kubectl apply -f clusters/noble/apps/kube-prometheus-stack/namespace.yaml` then **`helm upgrade --install`** as in `clusters/noble/apps/kube-prometheus-stack/values.yaml` (chart **82.15.1**); PVCs **`longhorn`**; **`--wait --timeout 30m`** recommended; verify **`kubectl -n monitoring get pods,pvc`** - [x] **kube-prometheus-stack**`kubectl apply -f clusters/noble/bootstrap/kube-prometheus-stack/namespace.yaml` then **`helm upgrade --install`** as in `clusters/noble/bootstrap/kube-prometheus-stack/values.yaml` (chart **82.15.1**); PVCs **`longhorn`**; **`--wait --timeout 30m`** recommended; verify **`kubectl -n monitoring get pods,pvc`**
- [x] **Loki** + **Fluent Bit** + **Grafana Loki datasource****order:** **`kubectl apply -f clusters/noble/apps/loki/namespace.yaml`** → **`helm upgrade --install loki`** `grafana/loki` **6.55.0** `-f clusters/noble/apps/loki/values.yaml`**`kubectl apply -f clusters/noble/apps/fluent-bit/namespace.yaml`** → **`helm upgrade --install fluent-bit`** `fluent/fluent-bit` **0.56.0** `-f clusters/noble/apps/fluent-bit/values.yaml`**`kubectl apply -f clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`**. Verify **Explore → Loki** in Grafana; **`kubectl -n loki get pods,pvc`**, **`kubectl -n logging get pods`** - [x] **Loki** + **Fluent Bit** + **Grafana Loki datasource****order:** **`kubectl apply -f clusters/noble/bootstrap/loki/namespace.yaml`** → **`helm upgrade --install loki`** `grafana/loki` **6.55.0** `-f clusters/noble/bootstrap/loki/values.yaml`**`kubectl apply -f clusters/noble/bootstrap/fluent-bit/namespace.yaml`** → **`helm upgrade --install fluent-bit`** `fluent/fluent-bit` **0.56.0** `-f clusters/noble/bootstrap/fluent-bit/values.yaml`**`kubectl apply -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`**. Verify **Explore → Loki** in Grafana; **`kubectl -n loki get pods,pvc`**, **`kubectl -n logging get pods`**
- [x] **Headlamp** — Kubernetes web UI ([Headlamp](https://headlamp.dev/)); **`helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/`**; **`kubectl apply -f clusters/noble/apps/headlamp/namespace.yaml`** → **`helm upgrade --install headlamp headlamp/headlamp --version 0.40.1 -n headlamp -f clusters/noble/apps/headlamp/values.yaml`**; **Ingress** **`https://headlamp.apps.noble.lab.pcenicni.dev`** (**`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **`values.yaml`:** **`config.sessionTTL: null`** works around chart **0.40.1** / binary mismatch ([headlamp#4883](https://github.com/kubernetes-sigs/headlamp/issues/4883)). **RBAC:** chart defaults are permissive — tighten before LAN-wide exposure; align with **Phase G** hardening. - [x] **Headlamp** — Kubernetes web UI ([Headlamp](https://headlamp.dev/)); **`helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/`**; **`kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml`** → **`helm upgrade --install headlamp headlamp/headlamp --version 0.40.1 -n headlamp -f clusters/noble/bootstrap/headlamp/values.yaml`**; **Ingress** **`https://headlamp.apps.noble.lab.pcenicni.dev`** (**`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **`values.yaml`:** **`config.sessionTTL: null`** works around chart **0.40.1** / binary mismatch ([headlamp#4883](https://github.com/kubernetes-sigs/headlamp/issues/4883)). **RBAC:** chart defaults are permissive — tighten before LAN-wide exposure; align with **Phase G** hardening.
## Phase E — Secrets ## Phase E — Secrets
- [x] **Sealed Secrets** (optional Git workflow) — `clusters/noble/apps/sealed-secrets/` (Helm **2.18.4**); **`kubeseal`** + key backup per **`README.md`** - [x] **Sealed Secrets** (optional Git workflow) — `clusters/noble/bootstrap/sealed-secrets/` (Helm **2.18.4**); **`kubeseal`** + key backup per **`README.md`**
- [x] **Vault** in-cluster on Longhorn + **auto-unseal**`clusters/noble/apps/vault/` (Helm **0.32.0**); **Longhorn** PVC; **OSS** “auto-unseal” = optional **`unseal-cronjob.yaml`** + Secret (**README**); **`configure-kubernetes-auth.sh`** for ESO (**Kubernetes auth** + KV + role) - [x] **Vault** in-cluster on Longhorn + **auto-unseal**`clusters/noble/bootstrap/vault/` (Helm **0.32.0**); **Longhorn** PVC; **OSS** “auto-unseal” = optional **`unseal-cronjob.yaml`** + Secret (**README**); **`configure-kubernetes-auth.sh`** for ESO (**Kubernetes auth** + KV + role)
- [x] **External Secrets Operator** + Vault `ClusterSecretStore` — operator **`clusters/noble/apps/external-secrets/`** (Helm **2.2.0**); apply **`examples/vault-cluster-secret-store.yaml`** after Vault (**`README.md`**) - [x] **External Secrets Operator** + Vault `ClusterSecretStore` — operator **`clusters/noble/bootstrap/external-secrets/`** (Helm **2.2.0**); apply **`examples/vault-cluster-secret-store.yaml`** after Vault (**`README.md`**)
## Phase F — Policy + backups ## Phase F — Policy + backups
- [x] **Kyverno** baseline policies — `clusters/noble/apps/kyverno/` (Helm **kyverno** **3.7.1** + **kyverno-policies** **3.7.1**, **baseline** / **Audit** — see **`README.md`**) - [x] **Kyverno** baseline policies — `clusters/noble/bootstrap/kyverno/` (Helm **kyverno** **3.7.1** + **kyverno-policies** **3.7.1**, **baseline** / **Audit** — see **`README.md`**)
- [ ] **Velero** when S3 is ready; backup/restore drill - [ ] **Velero** when S3 is ready; backup/restore drill
## Phase G — Hardening ## Phase G — Hardening
- [x] **Cilium** — Vault **`CiliumNetworkPolicy`** (`clusters/noble/apps/vault/cilium-network-policy.yaml`) — HTTP **8200** from **`external-secrets`** + **`vault`**; extend for other clients as needed - [x] **Cilium** — Vault **`CiliumNetworkPolicy`** (`clusters/noble/bootstrap/vault/cilium-network-policy.yaml`) — HTTP **8200** from **`external-secrets`** + **`vault`**; extend for other clients as needed
- [x] **Runbooks****`talos/runbooks/`** (API VIP / kube-vip, etcdTalos, Longhorn, Vault) - [x] **Runbooks****`talos/runbooks/`** (API VIP / kube-vip, etcdTalos, Longhorn, Vault)
- [x] **RBAC****Headlamp** **`ClusterRoleBinding`** uses built-in **`edit`** (not **`cluster-admin`**); **Argo CD** **`policy.default: role:readonly`** with **`g, admin, role:admin`** — see **`clusters/noble/apps/headlamp/values.yaml`**, **`clusters/noble/bootstrap/argocd/values.yaml`**, **`talos/runbooks/rbac.md`** - [x] **RBAC****Headlamp** **`ClusterRoleBinding`** uses built-in **`edit`** (not **`cluster-admin`**); **Argo CD** **`policy.default: role:readonly`** with **`g, admin, role:admin`** — see **`clusters/noble/bootstrap/headlamp/values.yaml`**, **`clusters/noble/bootstrap/argocd/values.yaml`**, **`talos/runbooks/rbac.md`**
- [ ] **Alertmanager** — add **`slack_configs`**, **`pagerduty_configs`**, or other receivers under **`kube-prometheus-stack`** `alertmanager.config` (chart defaults use **`null`** receiver) - [ ] **Alertmanager** — add **`slack_configs`**, **`pagerduty_configs`**, or other receivers under **`kube-prometheus-stack`** `alertmanager.config` (chart defaults use **`null`** receiver)
## Quick validation ## Quick validation

View File

@@ -106,7 +106,7 @@ sed -i '' 's|https://192.168.50.230:6443|https://192.168.50.20:6443|g' kubeconfi
Quick check from your Mac: `nc -vz 192.168.50.20 50000` (Talos) and `nc -vz 192.168.50.20 6443` (Kubernetes). Quick check from your Mac: `nc -vz 192.168.50.20 50000` (Talos) and `nc -vz 192.168.50.20 6443` (Kubernetes).
**`dial tcp 192.168.50.230:6443` on nodes:** Host-network components (including **Cilium**) cannot use the in-cluster `kubernetes` Service; they otherwise follow **`cluster.controlPlane.endpoint`** (the VIP). Talos **KubePrism** on **`127.0.0.1:7445`** (default) load-balances to healthy apiservers. Ensure the CNI Helm values set **`k8sServiceHost: "127.0.0.1"`** and **`k8sServicePort: "7445"`** — see [`clusters/noble/apps/cilium/values.yaml`](../clusters/noble/apps/cilium/values.yaml). Also confirm **kube-vip**s **`vip_interface`** matches the uplink (`talosctl -n <ip> get links` — e.g. **`ens18`** on these nodes). A bare **`curl -k https://192.168.50.230:6443/healthz`** often returns **`401 Unauthorized`** because no client cert was sent — that still means TLS to the VIP worked. **`dial tcp 192.168.50.230:6443` on nodes:** Host-network components (including **Cilium**) cannot use the in-cluster `kubernetes` Service; they otherwise follow **`cluster.controlPlane.endpoint`** (the VIP). Talos **KubePrism** on **`127.0.0.1:7445`** (default) load-balances to healthy apiservers. Ensure the CNI Helm values set **`k8sServiceHost: "127.0.0.1"`** and **`k8sServicePort: "7445"`** — see [`clusters/noble/bootstrap/cilium/values.yaml`](../clusters/noble/bootstrap/cilium/values.yaml). Also confirm **kube-vip**s **`vip_interface`** matches the uplink (`talosctl -n <ip> get links` — e.g. **`ens18`** on these nodes). A bare **`curl -k https://192.168.50.230:6443/healthz`** often returns **`401 Unauthorized`** because no client cert was sent — that still means TLS to the VIP worked.
**Verify the VIP with `kubectl` (copy as-is):** use a real kubeconfig path (not ` /path/to/…`). From the **repository root**: **Verify the VIP with `kubectl` (copy as-is):** use a real kubeconfig path (not ` /path/to/…`). From the **repository root**:
@@ -124,23 +124,23 @@ Expect a single line: **`ok`**. If you see **`The connection to the server local
| Component | Apply | | Component | Apply |
|-----------|--------| |-----------|--------|
| Cilium | **Before** kube-vip/MetalLB scheduling: Helm from [`clusters/noble/apps/cilium/README.md`](../clusters/noble/apps/cilium/README.md) (`values.yaml`) | | Cilium | **Before** kube-vip/MetalLB scheduling: Helm from [`clusters/noble/bootstrap/cilium/README.md`](../clusters/noble/bootstrap/cilium/README.md) (`values.yaml`) |
| kube-vip | `kubectl apply -k ../clusters/noble/apps/kube-vip` | | kube-vip | `kubectl apply -k ../clusters/noble/bootstrap/kube-vip` |
| MetalLB pool | After MetalLB controller install: `kubectl apply -k ../clusters/noble/apps/metallb` | | MetalLB pool | After MetalLB controller install: `kubectl apply -k ../clusters/noble/bootstrap/metallb` |
| Longhorn PSA + Helm | `kubectl apply -k ../clusters/noble/apps/longhorn` then Helm from §5 below | | Longhorn PSA + Helm | `kubectl apply -k ../clusters/noble/bootstrap/longhorn` then Helm from §5 below |
Set `vip_interface` in `clusters/noble/apps/kube-vip/vip-daemonset.yaml` if it does not match the control-plane uplink (`talosctl -n <cp-ip> get links`). Set `vip_interface` in `clusters/noble/bootstrap/kube-vip/vip-daemonset.yaml` if it does not match the control-plane uplink (`talosctl -n <cp-ip> get links`).
## 5. Longhorn (Talos) ## 5. Longhorn (Talos)
1. **Machine image:** `talconfig.yaml` includes `iscsi-tools` and `util-linux-tools` extensions. After `talhelper genconfig`, **upgrade each node** so the running installer image matches (extensions are in the image, not applied live by config alone). If `longhorn-manager` logs **`iscsiadm` / `open-iscsi`**, the node image does not include the extension yet. 1. **Machine image:** `talconfig.yaml` includes `iscsi-tools` and `util-linux-tools` extensions. After `talhelper genconfig`, **upgrade each node** so the running installer image matches (extensions are in the image, not applied live by config alone). If `longhorn-manager` logs **`iscsiadm` / `open-iscsi`**, the node image does not include the extension yet.
2. **Pod Security + path:** Apply `kubectl apply -k ../clusters/noble/apps/longhorn` (privileged `longhorn-system`). The Helm chart host-mounts **`/var/lib/longhorn`**; `talconfig` adds a kubelet **bind** from `/var/mnt/longhorn``/var/lib/longhorn` so that path matches the dedicated XFS volume. 2. **Pod Security + path:** Apply `kubectl apply -k ../clusters/noble/bootstrap/longhorn` (privileged `longhorn-system`). The Helm chart host-mounts **`/var/lib/longhorn`**; `talconfig` adds a kubelet **bind** from `/var/mnt/longhorn``/var/lib/longhorn` so that path matches the dedicated XFS volume.
3. **Data path:** From the **repository root** (not `talos/`), run Helm with a real release and chart name — not literal `...`: 3. **Data path:** From the **repository root** (not `talos/`), run Helm with a real release and chart name — not literal `...`:
```bash ```bash
helm repo add longhorn https://charts.longhorn.io && helm repo update helm repo add longhorn https://charts.longhorn.io && helm repo update
helm upgrade --install longhorn longhorn/longhorn -n longhorn-system --create-namespace \ helm upgrade --install longhorn longhorn/longhorn -n longhorn-system --create-namespace \
-f clusters/noble/apps/longhorn/values.yaml -f clusters/noble/bootstrap/longhorn/values.yaml
``` ```
If Longhorn defaults to `/var/lib/longhorn`, you get **wrong format** / **no space** on the Talos root filesystem. If Longhorn defaults to `/var/lib/longhorn`, you get **wrong format** / **no space** on the Talos root filesystem.

View File

@@ -4,7 +4,7 @@
**Checks** **Checks**
1. VIP and interface align with [`talos/talconfig.yaml`](../talconfig.yaml) (`cluster.network`, `additionalApiServerCertSans`) and [`clusters/noble/apps/kube-vip/`](../../clusters/noble/apps/kube-vip/). 1. VIP and interface align with [`talos/talconfig.yaml`](../talconfig.yaml) (`cluster.network`, `additionalApiServerCertSans`) and [`clusters/noble/bootstrap/kube-vip/`](../../clusters/noble/bootstrap/kube-vip/).
2. `kubectl -n kube-system get pods -l app.kubernetes.io/name=kube-vip -o wide` — DaemonSet should be **Running** on control-plane nodes. 2. `kubectl -n kube-system get pods -l app.kubernetes.io/name=kube-vip -o wide` — DaemonSet should be **Running** on control-plane nodes.
3. From a workstation: `ping 192.168.50.230` (if ICMP allowed) and `curl -k https://192.168.50.230:6443/healthz` or `kubectl get --raw /healthz` with kubeconfig `server:` set to the VIP. 3. From a workstation: `ping 192.168.50.230` (if ICMP allowed) and `curl -k https://192.168.50.230:6443/healthz` or `kubectl get --raw /healthz` with kubeconfig `server:` set to the VIP.
4. `talosctl health` with `TALOSCONFIG` (see [`talos/README.md`](../README.md) §3). 4. `talosctl health` with `TALOSCONFIG` (see [`talos/README.md`](../README.md) §3).

View File

@@ -13,4 +13,4 @@
- Node disk pressure / mount missing: fix Talos machine config, reboot node per Talos docs. - Node disk pressure / mount missing: fix Talos machine config, reboot node per Talos docs.
- Recovery / GPT wipe scripts: [`talos/scripts/longhorn-gpt-recovery.sh`](../scripts/longhorn-gpt-recovery.sh) and CLUSTER-BUILD notes. - Recovery / GPT wipe scripts: [`talos/scripts/longhorn-gpt-recovery.sh`](../scripts/longhorn-gpt-recovery.sh) and CLUSTER-BUILD notes.
**References:** [`clusters/noble/apps/longhorn/`](../../clusters/noble/apps/longhorn/), [Longhorn docs](https://longhorn.io/docs/). **References:** [`clusters/noble/bootstrap/longhorn/`](../../clusters/noble/bootstrap/longhorn/), [Longhorn docs](https://longhorn.io/docs/).

View File

@@ -1,6 +1,6 @@
# Runbook: Kubernetes RBAC (noble) # Runbook: Kubernetes RBAC (noble)
**Headlamp** (`clusters/noble/apps/headlamp/values.yaml`): the charts **ClusterRoleBinding** uses the built-in **`edit`** ClusterRole — not **`cluster-admin`**. Break-glass changes use **`kubectl`** with an admin kubeconfig. **Headlamp** (`clusters/noble/bootstrap/headlamp/values.yaml`): the charts **ClusterRoleBinding** uses the built-in **`edit`** ClusterRole — not **`cluster-admin`**. Break-glass changes use **`kubectl`** with an admin kubeconfig.
**Argo CD** (`clusters/noble/bootstrap/argocd/values.yaml`): **`policy.default: role:readonly`** — new OIDC/Git users get read-only unless you add **`g, <user-or-group>, role:admin`** (or another role) in **`configs.rbac.policy.csv`**. Local user **`admin`** stays **`role:admin`** via **`g, admin, role:admin`**. **Argo CD** (`clusters/noble/bootstrap/argocd/values.yaml`): **`policy.default: role:readonly`** — new OIDC/Git users get read-only unless you add **`g, <user-or-group>, role:admin`** (or another role) in **`configs.rbac.policy.csv`**. Local user **`admin`** stays **`role:admin`** via **`g, admin, role:admin`**.

View File

@@ -5,9 +5,9 @@
**Checks** **Checks**
1. `kubectl -n vault exec -i sts/vault -- vault status`**Sealed** / **Initialized**. 1. `kubectl -n vault exec -i sts/vault -- vault status`**Sealed** / **Initialized**.
2. Unseal key Secret + optional CronJob: [`clusters/noble/apps/vault/README.md`](../../clusters/noble/apps/vault/README.md), `unseal-cronjob.yaml`. 2. Unseal key Secret + optional CronJob: [`clusters/noble/bootstrap/vault/README.md`](../../clusters/noble/bootstrap/vault/README.md), `unseal-cronjob.yaml`.
3. Kubernetes auth for ESO: [`clusters/noble/apps/vault/configure-kubernetes-auth.sh`](../../clusters/noble/apps/vault/configure-kubernetes-auth.sh) and `kubectl describe clustersecretstore vault`. 3. Kubernetes auth for ESO: [`clusters/noble/bootstrap/vault/configure-kubernetes-auth.sh`](../../clusters/noble/bootstrap/vault/configure-kubernetes-auth.sh) and `kubectl describe clustersecretstore vault`.
4. **Cilium** policy: if Vault is unreachable from `external-secrets`, check [`clusters/noble/apps/vault/cilium-network-policy.yaml`](../../clusters/noble/apps/vault/cilium-network-policy.yaml) and extend `ingress` for new client namespaces. 4. **Cilium** policy: if Vault is unreachable from `external-secrets`, check [`clusters/noble/bootstrap/vault/cilium-network-policy.yaml`](../../clusters/noble/bootstrap/vault/cilium-network-policy.yaml) and extend `ingress` for new client namespaces.
**Common fixes** **Common fixes**

View File

@@ -8,7 +8,7 @@
# installDisk: confirm with `talosctl disks -n <ip> --insecure` (Proxmox virtio is often /dev/sda). # installDisk: confirm with `talosctl disks -n <ip> --insecure` (Proxmox virtio is often /dev/sda).
# Longhorn data disk: second disk (often /dev/sdb SCSI or /dev/vdb virtio) → XFS at /var/mnt/longhorn. # Longhorn data disk: second disk (often /dev/sdb SCSI or /dev/vdb virtio) → XFS at /var/mnt/longhorn.
# After changing schematic/extensions: regenerate configs, upgrade nodes with new installer image, then reboot if needed. # After changing schematic/extensions: regenerate configs, upgrade nodes with new installer image, then reboot if needed.
# Helm must set defaultDataPath to /var/mnt/longhorn (see clusters/noble/apps/longhorn/values.yaml). # Helm must set defaultDataPath to /var/mnt/longhorn (see clusters/noble/bootstrap/longhorn/values.yaml).
# #
# Image Factory schematic (iscsi-tools + util-linux-tools), nocloud installer — pinned per-node via `talosImageURL` # Image Factory schematic (iscsi-tools + util-linux-tools), nocloud installer — pinned per-node via `talosImageURL`
# (base URL only, no `:tag` — talhelper validates and appends `talosVersion`). # (base URL only, no `:tag` — talhelper validates and appends `talosVersion`).

View File

@@ -8,7 +8,7 @@
# installDisk: confirm with `talosctl disks -n <ip> --insecure` (Proxmox virtio is often /dev/sda). # installDisk: confirm with `talosctl disks -n <ip> --insecure` (Proxmox virtio is often /dev/sda).
# Longhorn data disk: second disk (often /dev/sdb SCSI or /dev/vdb virtio) → XFS at /var/mnt/longhorn. # Longhorn data disk: second disk (often /dev/sdb SCSI or /dev/vdb virtio) → XFS at /var/mnt/longhorn.
# After changing schematic/extensions: regenerate configs, upgrade nodes with new installer image, then reboot if needed. # After changing schematic/extensions: regenerate configs, upgrade nodes with new installer image, then reboot if needed.
# Helm must set defaultDataPath to /var/mnt/longhorn (see clusters/noble/apps/longhorn/values.yaml). # Helm must set defaultDataPath to /var/mnt/longhorn (see clusters/noble/bootstrap/longhorn/values.yaml).
# #
# Image Factory schematic (iscsi-tools + util-linux-tools), nocloud installer — pinned per-node via `talosImageURL` # Image Factory schematic (iscsi-tools + util-linux-tools), nocloud installer — pinned per-node via `talosImageURL`
# (base URL only, no `:tag` — talhelper validates and appends `talosVersion`). # (base URL only, no `:tag` — talhelper validates and appends `talosVersion`).