Update Helm chart versions across multiple components to latest stable releases, including Argo CD (9.5.14), cert-manager (v1.20.2), Cilium (1.19.4), kube-prometheus-stack (85.0.3), Loki (7.0.0), Fluent Bit (0.57.5), Headlamp (0.42.0), Traefik (40.2.0), and Kyverno (3.8.0). Adjusted related documentation and values files to reflect these changes for improved deployment consistency and compatibility.

This commit is contained in:
Nikholas Pcenicni
2026-05-14 18:55:18 -04:00
parent 95b1866144
commit 2321209626
43 changed files with 97 additions and 98 deletions

View File

@@ -11,7 +11,7 @@
- argocd - argocd
- --create-namespace - --create-namespace
- --version - --version
- "9.4.17" - "9.5.14"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/bootstrap/argocd/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/argocd/values.yaml"
- --force-conflicts - --force-conflicts

View File

@@ -75,8 +75,8 @@ noble_authentik_bootstrap_api_wait_retries: 36
noble_authentik_bootstrap_api_wait_delay: 5 noble_authentik_bootstrap_api_wait_delay: 5
# Re-apply the same chart versions as the rest of noble.yml when flipping SSO on. # Re-apply the same chart versions as the rest of noble.yml when flipping SSO on.
noble_authentik_argocd_chart_version: "9.4.17" noble_authentik_argocd_chart_version: "9.5.14"
noble_authentik_kube_prometheus_chart_version: "82.15.1" noble_authentik_kube_prometheus_chart_version: "85.0.3"
noble_authentik_headlamp_chart_version: "0.40.1" noble_authentik_headlamp_chart_version: "0.42.0"
noble_authentik_longhorn_chart_version: "1.11.2" noble_authentik_longhorn_chart_version: "1.11.2"
noble_authentik_kube_prometheus_helm_wait_timeout: 60m noble_authentik_kube_prometheus_helm_wait_timeout: 60m

View File

@@ -21,7 +21,7 @@
- --namespace - --namespace
- cert-manager - cert-manager
- --version - --version
- v1.20.0 - v1.20.2
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/bootstrap/cert-manager/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/cert-manager/values.yaml"
- --force-conflicts - --force-conflicts

View File

@@ -51,7 +51,7 @@
- --namespace - --namespace
- kube-system - kube-system
- --version - --version
- "1.16.6" - "1.19.4"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/bootstrap/cilium/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/cilium/values.yaml"
- --force-conflicts - --force-conflicts

View File

@@ -56,7 +56,7 @@
- -n - -n
- kyverno - kyverno
- --version - --version
- "3.7.1" - "3.8.0"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/bootstrap/kyverno/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/kyverno/values.yaml"
- --force-conflicts - --force-conflicts

View File

@@ -44,7 +44,7 @@
- -n - -n
- kyverno - kyverno
- --version - --version
- "3.7.1" - "3.8.0"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/bootstrap/kyverno/policies-values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/kyverno/policies-values.yaml"
- --force-conflicts - --force-conflicts

View File

@@ -31,7 +31,7 @@
- --namespace - --namespace
- newt - newt
- --version - --version
- "1.2.0" - "1.5.0"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/bootstrap/newt/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/newt/values.yaml"
- --force-conflicts - --force-conflicts

View File

@@ -50,7 +50,7 @@
- -n - -n
- monitoring - monitoring
- --version - --version
- "82.15.1" - "85.0.3"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml"
- --force-conflicts - --force-conflicts
@@ -120,7 +120,7 @@
- -n - -n
- monitoring - monitoring
- --version - --version
- "82.15.1" - "85.0.3"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml"
- --force-conflicts - --force-conflicts
@@ -157,7 +157,7 @@
- -n - -n
- loki - loki
- --version - --version
- "6.55.0" - "7.0.0"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/bootstrap/loki/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/loki/values.yaml"
- --force-conflicts - --force-conflicts
@@ -179,7 +179,7 @@
- -n - -n
- logging - logging
- --version - --version
- "0.56.0" - "0.57.5"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/bootstrap/fluent-bit/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/fluent-bit/values.yaml"
- --force-conflicts - --force-conflicts
@@ -197,7 +197,7 @@
- headlamp - headlamp
- headlamp/headlamp - headlamp/headlamp
- --version - --version
- "0.40.1" - "0.42.0"
- -n - -n
- headlamp - headlamp
- -f - -f

View File

@@ -21,7 +21,7 @@
- --namespace - --namespace
- traefik - traefik
- --version - --version
- "39.0.6" - "40.2.0"
- -f - -f
- "{{ noble_repo_root }}/clusters/noble/bootstrap/traefik/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/traefik/values.yaml"
- --force-conflicts - --force-conflicts

View File

@@ -1,6 +1,6 @@
--- ---
# **noble_velero_install** is in **ansible/inventory/group_vars/all.yml**. Override S3 fields via extra-vars or group_vars. # **noble_velero_install** is in **ansible/inventory/group_vars/all.yml**. Override S3 fields via extra-vars or group_vars.
noble_velero_chart_version: "12.0.0" noble_velero_chart_version: "12.0.1"
noble_velero_s3_bucket: "" noble_velero_s3_bucket: ""
noble_velero_s3_url: "" noble_velero_s3_url: ""

View File

@@ -10,7 +10,7 @@ helm repo update
helm upgrade --install argocd argo/argo-cd \ helm upgrade --install argocd argo/argo-cd \
--namespace argocd \ --namespace argocd \
--create-namespace \ --create-namespace \
--version 9.4.17 \ --version 9.5.14 \
-f clusters/noble/bootstrap/argocd/values.yaml \ -f clusters/noble/bootstrap/argocd/values.yaml \
--wait --wait
``` ```
@@ -43,7 +43,7 @@ If **`helm upgrade --wait`** fails with *Secret was previously issued by `letsen
kubectl -n argocd delete certificate argocd-server --ignore-not-found kubectl -n argocd delete certificate argocd-server --ignore-not-found
kubectl -n argocd delete secret argocd-server-tls --ignore-not-found kubectl -n argocd delete secret argocd-server-tls --ignore-not-found
helm upgrade --install argocd argo/argo-cd -n argocd --create-namespace \ helm upgrade --install argocd argo/argo-cd -n argocd --create-namespace \
--version 9.4.17 -f clusters/noble/bootstrap/argocd/values.yaml --wait --version 9.5.14 -f clusters/noble/bootstrap/argocd/values.yaml --wait
``` ```
## 3. Register this repo (if private) ## 3. Register this repo (if private)
@@ -112,4 +112,4 @@ After **`noble-bootstrap-root`** is automated and leaf apps are synced, **git**
## Versions ## Versions
Pinned in **`values.yaml`** comments (chart **9.4.17** / Argo CD **v3.3.6** at time of writing). Bump **`--version`** when upgrading. Pinned in **`values.yaml`** comments (chart **9.5.14** / Argo CD **v3.4.2** at time of writing). Bump **`--version`** when upgrading.

View File

@@ -14,7 +14,7 @@ spec:
path: clusters/noble/bootstrap/cert-manager path: clusters/noble/bootstrap/cert-manager
- repoURL: https://charts.jetstack.io - repoURL: https://charts.jetstack.io
chart: cert-manager chart: cert-manager
targetRevision: v1.20.0 targetRevision: v1.20.2
helm: helm:
releaseName: cert-manager releaseName: cert-manager
valueFiles: valueFiles:

View File

@@ -11,7 +11,7 @@ spec:
sources: sources:
- repoURL: https://helm.cilium.io/ - repoURL: https://helm.cilium.io/
chart: cilium chart: cilium
targetRevision: 1.16.6 targetRevision: 1.19.4
helm: helm:
releaseName: cilium releaseName: cilium
valueFiles: valueFiles:

View File

@@ -11,7 +11,7 @@ spec:
sources: sources:
- repoURL: https://fluent.github.io/helm-charts - repoURL: https://fluent.github.io/helm-charts
chart: fluent-bit chart: fluent-bit
targetRevision: 0.56.0 targetRevision: 0.57.5
helm: helm:
releaseName: fluent-bit releaseName: fluent-bit
valueFiles: valueFiles:

View File

@@ -11,7 +11,7 @@ spec:
sources: sources:
- repoURL: https://kubernetes-sigs.github.io/headlamp/ - repoURL: https://kubernetes-sigs.github.io/headlamp/
chart: headlamp chart: headlamp
targetRevision: 0.40.1 targetRevision: 0.42.0
helm: helm:
releaseName: headlamp releaseName: headlamp
valueFiles: valueFiles:

View File

@@ -11,7 +11,7 @@ spec:
sources: sources:
- repoURL: https://prometheus-community.github.io/helm-charts - repoURL: https://prometheus-community.github.io/helm-charts
chart: kube-prometheus-stack chart: kube-prometheus-stack
targetRevision: 82.15.1 targetRevision: 85.0.3
helm: helm:
skipCrds: true skipCrds: true
releaseName: kube-prometheus releaseName: kube-prometheus

View File

@@ -11,7 +11,7 @@ spec:
sources: sources:
- repoURL: https://kyverno.github.io/kyverno/ - repoURL: https://kyverno.github.io/kyverno/
chart: kyverno chart: kyverno
targetRevision: 3.7.1 targetRevision: 3.8.0
helm: helm:
releaseName: kyverno releaseName: kyverno
valueFiles: valueFiles:

View File

@@ -11,7 +11,7 @@ spec:
sources: sources:
- repoURL: https://kyverno.github.io/kyverno/ - repoURL: https://kyverno.github.io/kyverno/
chart: kyverno-policies chart: kyverno-policies
targetRevision: 3.7.1 targetRevision: 3.8.0
helm: helm:
releaseName: kyverno-policies releaseName: kyverno-policies
valueFiles: valueFiles:

View File

@@ -11,7 +11,7 @@ spec:
sources: sources:
- repoURL: https://grafana.github.io/helm-charts - repoURL: https://grafana.github.io/helm-charts
chart: loki chart: loki
targetRevision: 6.55.0 targetRevision: 7.0.0
helm: helm:
releaseName: loki releaseName: loki
valueFiles: valueFiles:

View File

@@ -11,7 +11,7 @@ spec:
sources: sources:
- repoURL: https://traefik.github.io/charts - repoURL: https://traefik.github.io/charts
chart: traefik chart: traefik
targetRevision: 39.0.6 targetRevision: 40.2.0
helm: helm:
releaseName: traefik releaseName: traefik
valueFiles: valueFiles:

View File

@@ -1,13 +1,13 @@
# Argo CD — noble lab (GitOps) # Argo CD — noble lab (GitOps)
# #
# Chart: argo/argo-cd — pin version on the helm command (e.g. 9.4.17). # Chart: argo/argo-cd — pin version on the helm command (e.g. 9.5.14).
# UI/API: **Ingress** via **Traefik** at **argo.apps.noble.lab.pcenicni.dev** (TLS: cert-manager # UI/API: **Ingress** via **Traefik** at **argo.apps.noble.lab.pcenicni.dev** (TLS: cert-manager
# ClusterIssuer + **`server.insecure`** so TLS terminates at Traefik). # ClusterIssuer + **`server.insecure`** so TLS terminates at Traefik).
# DNS: **`argo.apps.noble.lab.pcenicni.dev`** → Traefik LB **192.168.50.211** (same wildcard as apps). # DNS: **`argo.apps.noble.lab.pcenicni.dev`** → Traefik LB **192.168.50.211** (same wildcard as apps).
# #
# helm repo add argo https://argoproj.github.io/argo-helm # helm repo add argo https://argoproj.github.io/argo-helm
# helm upgrade --install argocd argo/argo-cd -n argocd --create-namespace \ # helm upgrade --install argocd argo/argo-cd -n argocd --create-namespace \
# --version 9.4.17 -f clusters/noble/bootstrap/argocd/values.yaml --wait # --version 9.5.14 -f clusters/noble/bootstrap/argocd/values.yaml --wait
# #
# Initial admin password: kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath='{.data.password}' | base64 -d # Initial admin password: kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath='{.data.password}' | base64 -d
# #

View File

@@ -29,7 +29,7 @@ Without this Secret, **`ClusterIssuer`** will not complete certificate orders.
helm repo update helm repo update
helm upgrade --install cert-manager jetstack/cert-manager \ helm upgrade --install cert-manager jetstack/cert-manager \
--namespace cert-manager \ --namespace cert-manager \
--version v1.20.0 \ --version v1.20.2 \
-f clusters/noble/bootstrap/cert-manager/values.yaml \ -f clusters/noble/bootstrap/cert-manager/values.yaml \
--wait --wait
``` ```

View File

@@ -1,12 +1,12 @@
# cert-manager — noble lab # cert-manager — noble lab
# #
# Chart: jetstack/cert-manager — pin version on the helm command (e.g. v1.20.0). # Chart: jetstack/cert-manager — pin version on the helm command (e.g. v1.20.2).
# #
# kubectl apply -f clusters/noble/bootstrap/cert-manager/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/cert-manager/namespace.yaml
# helm repo add jetstack https://charts.jetstack.io # helm repo add jetstack https://charts.jetstack.io
# helm repo update # helm repo update
# helm upgrade --install cert-manager jetstack/cert-manager -n cert-manager \ # helm upgrade --install cert-manager jetstack/cert-manager -n cert-manager \
# --version v1.20.0 -f clusters/noble/bootstrap/cert-manager/values.yaml --wait # --version v1.20.2 -f clusters/noble/bootstrap/cert-manager/values.yaml --wait
# #
# kubectl apply -k clusters/noble/bootstrap/cert-manager # kubectl apply -k clusters/noble/bootstrap/cert-manager

View File

@@ -13,7 +13,7 @@ helm repo add cilium https://helm.cilium.io/
helm repo update helm repo update
helm upgrade --install cilium cilium/cilium \ helm upgrade --install cilium cilium/cilium \
--namespace kube-system \ --namespace kube-system \
--version 1.16.6 \ --version 1.19.4 \
-f clusters/noble/bootstrap/cilium/values.yaml \ -f clusters/noble/bootstrap/cilium/values.yaml \
--wait --wait
``` ```

View File

@@ -1,7 +1,7 @@
# Cilium on Talos — phase 1: bring up CNI while kube-proxy still runs. # Cilium on Talos — phase 1: bring up CNI while kube-proxy still runs.
# See README.md for install order (before MetalLB scheduling) and optional kube-proxy replacement. # See README.md for install order (before MetalLB scheduling) and optional kube-proxy replacement.
# #
# Chart: cilium/cilium — pin version in helm command (e.g. 1.16.6). # Chart: cilium/cilium — pin version in helm command (e.g. 1.19.4).
# Ref: https://www.talos.dev/latest/kubernetes-guides/network/deploying-cilium/ # Ref: https://www.talos.dev/latest/kubernetes-guides/network/deploying-cilium/
ipam: ipam:

View File

@@ -1,6 +1,6 @@
# Fluent Bit — noble lab (DaemonSet; ship Kubernetes container logs to Loki gateway). # Fluent Bit — noble lab (DaemonSet; ship Kubernetes container logs to Loki gateway).
# #
# Chart: fluent/fluent-bit — pin version on install (e.g. 0.56.0). # Chart: fluent/fluent-bit — pin version on install (e.g. 0.57.5).
# Install **after** Loki so `loki-gateway.loki.svc` exists. # Install **after** Loki so `loki-gateway.loki.svc` exists.
# #
# Talos: only **tail** `/var/log/containers` (no host **systemd** input — journal layout differs from typical Linux). # Talos: only **tail** `/var/log/containers` (no host **systemd** input — journal layout differs from typical Linux).
@@ -9,7 +9,7 @@
# helm repo add fluent https://fluent.github.io/helm-charts # helm repo add fluent https://fluent.github.io/helm-charts
# helm repo update # helm repo update
# helm upgrade --install fluent-bit fluent/fluent-bit -n logging \ # helm upgrade --install fluent-bit fluent/fluent-bit -n logging \
# --version 0.56.0 -f clusters/noble/bootstrap/fluent-bit/values.yaml --wait --timeout 15m # --version 0.57.5 -f clusters/noble/bootstrap/fluent-bit/values.yaml --wait --timeout 15m
config: config:
inputs: | inputs: |

View File

@@ -2,7 +2,7 @@
[Headlamp](https://headlamp.dev/) web UI for the cluster. Exposed on **`https://headlamp.apps.noble.lab.pcenicni.dev`** via **Traefik** + **cert-manager** (`letsencrypt-prod`), same pattern as Grafana. [Headlamp](https://headlamp.dev/) web UI for the cluster. Exposed on **`https://headlamp.apps.noble.lab.pcenicni.dev`** via **Traefik** + **cert-manager** (`letsencrypt-prod`), same pattern as Grafana.
- **Chart:** `headlamp/headlamp` **0.40.1** (`config.sessionTTL: null` avoids chart/binary mismatch — [issue #4883](https://github.com/kubernetes-sigs/headlamp/issues/4883)) - **Chart:** `headlamp/headlamp` **0.42.0** (`config.sessionTTL: null` still omits **`-session-ttl`** if needed — [issue #4883](https://github.com/kubernetes-sigs/headlamp/issues/4883))
- **Namespace:** `headlamp` - **Namespace:** `headlamp`
## Install ## Install
@@ -12,7 +12,7 @@ helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/
helm repo update helm repo update
kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml
helm upgrade --install headlamp headlamp/headlamp -n headlamp \ helm upgrade --install headlamp headlamp/headlamp -n headlamp \
--version 0.40.1 -f clusters/noble/bootstrap/headlamp/values.yaml --wait --timeout 10m --version 0.42.0 -f clusters/noble/bootstrap/headlamp/values.yaml --wait --timeout 10m
``` ```
Sign-in uses a **ServiceAccount token** (Headlamp docs: create a limited SA for day-to-day use). This repo binds the Headlamp workload SA to the built-in **`edit`** ClusterRole (**`clusterRoleBinding.clusterRoleName: edit`** in **`values.yaml`**) — not **`cluster-admin`**. For cluster-scoped admin work, use **`kubectl`** with your admin kubeconfig. Optional **OIDC** in **`config.oidc`** replaces token login for SSO. **In-cluster OIDC requires kube-apiserver OIDC** (same Authentik app issuer + **`oidc-client-id: headlamp`**) or proxied K8s calls return **401** while **`/me`** still returns 200 — see **`talos/talconfig.yaml`**, **`oidc-noble-admins-clusterrolebinding.yaml`**, and **`ansible/roles/noble_authentik/README.md`** troubleshooting. Sign-in uses a **ServiceAccount token** (Headlamp docs: create a limited SA for day-to-day use). This repo binds the Headlamp workload SA to the built-in **`edit`** ClusterRole (**`clusterRoleBinding.clusterRoleName: edit`** in **`values.yaml`**) — not **`cluster-admin`**. For cluster-scoped admin work, use **`kubectl`** with your admin kubeconfig. Optional **OIDC** in **`config.oidc`** replaces token login for SSO. **In-cluster OIDC requires kube-apiserver OIDC** (same Authentik app issuer + **`oidc-client-id: headlamp`**) or proxied K8s calls return **401** while **`/me`** still returns 200 — see **`talos/talconfig.yaml`**, **`oidc-noble-admins-clusterrolebinding.yaml`**, and **`ansible/roles/noble_authentik/README.md`** troubleshooting.

View File

@@ -4,7 +4,7 @@
# helm repo update # helm repo update
# kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml
# helm upgrade --install headlamp headlamp/headlamp -n headlamp \ # helm upgrade --install headlamp headlamp/headlamp -n headlamp \
# --version 0.40.1 -f clusters/noble/bootstrap/headlamp/values.yaml --wait --timeout 10m # --version 0.42.0 -f clusters/noble/bootstrap/headlamp/values.yaml --wait --timeout 10m
# #
# DNS: headlamp.apps.noble.lab.pcenicni.dev → Traefik LB (see talos/CLUSTER-BUILD.md). # DNS: headlamp.apps.noble.lab.pcenicni.dev → Traefik LB (see talos/CLUSTER-BUILD.md).
# Default chart RBAC is broad — restrict for production (Phase G). # Default chart RBAC is broad — restrict for production (Phase G).
@@ -16,8 +16,7 @@
clusterRoleBinding: clusterRoleBinding:
clusterRoleName: edit clusterRoleName: edit
# #
# Chart 0.40.1 passes -session-ttl but the v0.40.1 binary does not define it — omit the flag: # Optional: set **config.sessionTTL** (seconds) or **null** to omit **-session-ttl** (see headlamp#4883 for older chart/binary mismatches).
# https://github.com/kubernetes-sigs/headlamp/issues/4883
config: config:
sessionTTL: null sessionTTL: null

View File

@@ -1,13 +1,13 @@
# kube-prometheus-stack — noble lab (Prometheus Operator + Grafana + Alertmanager + exporters) # kube-prometheus-stack — noble lab (Prometheus Operator + Grafana + Alertmanager + exporters)
# #
# Chart: prometheus-community/kube-prometheus-stack — pin version on install (e.g. 82.15.1). # Chart: prometheus-community/kube-prometheus-stack — pin version on install (e.g. 85.0.3).
# #
# Install (use one terminal; chain with && so `helm upgrade` always runs after `helm repo update`): # Install (use one terminal; chain with && so `helm upgrade` always runs after `helm repo update`):
# #
# kubectl apply -f clusters/noble/bootstrap/kube-prometheus-stack/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/kube-prometheus-stack/namespace.yaml
# helm repo add prometheus-community https://prometheus-community.github.io/helm-charts # helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
# helm repo update && helm upgrade --install kube-prometheus prometheus-community/kube-prometheus-stack -n monitoring \ # helm repo update && helm upgrade --install kube-prometheus prometheus-community/kube-prometheus-stack -n monitoring \
# --version 82.15.1 -f clusters/noble/bootstrap/kube-prometheus-stack/values.yaml --wait --timeout 60m # --version 85.0.3 -f clusters/noble/bootstrap/kube-prometheus-stack/values.yaml --wait --timeout 60m
# #
# Why it looks "stalled": with --wait, Helm prints almost nothing until the release finishes (can be many minutes). # Why it looks "stalled": with --wait, Helm prints almost nothing until the release finishes (can be many minutes).
# Do not use --timeout 5m for first install — Longhorn PVCs + StatefulSets often need 3060m. To watch progress, # Do not use --timeout 5m for first install — Longhorn PVCs + StatefulSets often need 3060m. To watch progress,

View File

@@ -35,7 +35,7 @@ spec:
effect: NoExecute effect: NoExecute
containers: containers:
- name: kube-vip - name: kube-vip
image: ghcr.io/kube-vip/kube-vip:v0.8.3 image: ghcr.io/kube-vip/kube-vip:v0.8.10
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
args: args:
- manager - manager

View File

@@ -2,7 +2,7 @@
Admission policies using [Kyverno](https://kyverno.io/). The main chart installs controllers and CRDs; **`kyverno-policies`** installs **Pod Security Standard** rules matching the **`baseline`** profile in **`Audit`** mode (violations are visible in policy reports; workloads are not denied). Admission policies using [Kyverno](https://kyverno.io/). The main chart installs controllers and CRDs; **`kyverno-policies`** installs **Pod Security Standard** rules matching the **`baseline`** profile in **`Audit`** mode (violations are visible in policy reports; workloads are not denied).
- **Charts:** `kyverno/kyverno` **3.7.1** (app **v1.17.1**), `kyverno/kyverno-policies` **3.7.1** - **Charts:** `kyverno/kyverno` **3.8.0** (app **v1.18.0**), `kyverno/kyverno-policies` **3.8.0**
- **Namespace:** `kyverno` - **Namespace:** `kyverno`
## Install ## Install
@@ -12,9 +12,9 @@ helm repo add kyverno https://kyverno.github.io/kyverno/
helm repo update helm repo update
kubectl apply -f clusters/noble/bootstrap/kyverno/namespace.yaml kubectl apply -f clusters/noble/bootstrap/kyverno/namespace.yaml
helm upgrade --install kyverno kyverno/kyverno -n kyverno \ helm upgrade --install kyverno kyverno/kyverno -n kyverno \
--version 3.7.1 -f clusters/noble/bootstrap/kyverno/values.yaml --wait --timeout 15m --version 3.8.0 -f clusters/noble/bootstrap/kyverno/values.yaml --wait --timeout 15m
helm upgrade --install kyverno-policies kyverno/kyverno-policies -n kyverno \ helm upgrade --install kyverno-policies kyverno/kyverno-policies -n kyverno \
--version 3.7.1 -f clusters/noble/bootstrap/kyverno/policies-values.yaml --wait --timeout 10m --version 3.8.0 -f clusters/noble/bootstrap/kyverno/policies-values.yaml --wait --timeout 10m
``` ```
Verify: Verify:

View File

@@ -1,7 +1,7 @@
# kyverno/kyverno-policies — Pod Security Standards as Kyverno ClusterPolicies # kyverno/kyverno-policies — Pod Security Standards as Kyverno ClusterPolicies
# #
# helm upgrade --install kyverno-policies kyverno/kyverno-policies -n kyverno \ # helm upgrade --install kyverno-policies kyverno/kyverno-policies -n kyverno \
# --version 3.7.1 -f clusters/noble/bootstrap/kyverno/policies-values.yaml --wait --timeout 10m # --version 3.8.0 -f clusters/noble/bootstrap/kyverno/policies-values.yaml --wait --timeout 10m
# #
# Default profile is baseline; validationFailureAction is Audit so existing privileged # Default profile is baseline; validationFailureAction is Audit so existing privileged
# workloads are not blocked. Kyverno still emits PolicyReports for matches — Headlamp # workloads are not blocked. Kyverno still emits PolicyReports for matches — Headlamp

View File

@@ -4,7 +4,7 @@
# helm repo update # helm repo update
# kubectl apply -f clusters/noble/bootstrap/kyverno/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/kyverno/namespace.yaml
# helm upgrade --install kyverno kyverno/kyverno -n kyverno \ # helm upgrade --install kyverno kyverno/kyverno -n kyverno \
# --version 3.7.1 -f clusters/noble/bootstrap/kyverno/values.yaml --wait --timeout 15m # --version 3.8.0 -f clusters/noble/bootstrap/kyverno/values.yaml --wait --timeout 15m
# #
# Baseline Pod Security policies (separate chart): see policies-values.yaml + README.md # Baseline Pod Security policies (separate chart): see policies-values.yaml + README.md
# #

View File

@@ -1,12 +1,12 @@
# Grafana Loki — noble lab (SingleBinary, filesystem on Longhorn; no MinIO/S3). # Grafana Loki — noble lab (SingleBinary, filesystem on Longhorn; no MinIO/S3).
# #
# Chart: grafana/loki — pin version on install (e.g. 6.55.0). # Chart: grafana/loki — pin version on install (e.g. 7.0.0).
# #
# kubectl apply -f clusters/noble/bootstrap/loki/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/loki/namespace.yaml
# helm repo add grafana https://grafana.github.io/helm-charts # helm repo add grafana https://grafana.github.io/helm-charts
# helm repo update # helm repo update
# helm upgrade --install loki grafana/loki -n loki \ # helm upgrade --install loki grafana/loki -n loki \
# --version 6.55.0 -f clusters/noble/bootstrap/loki/values.yaml --wait --timeout 30m # --version 7.0.0 -f clusters/noble/bootstrap/loki/values.yaml --wait --timeout 30m
# #
# Query/push URL for Grafana + Fluent Bit: http://loki-gateway.loki.svc.cluster.local:80 # Query/push URL for Grafana + Fluent Bit: http://loki-gateway.loki.svc.cluster.local:80
# #

View File

@@ -41,7 +41,7 @@ helm repo add fossorial https://charts.fossorial.io
helm repo update helm repo update
helm upgrade --install newt fossorial/newt \ helm upgrade --install newt fossorial/newt \
--namespace newt \ --namespace newt \
--version 1.2.0 \ --version 1.5.0 \
-f clusters/noble/bootstrap/newt/values.yaml \ -f clusters/noble/bootstrap/newt/values.yaml \
--wait --wait
``` ```

View File

@@ -10,7 +10,7 @@
# #
# helm repo add fossorial https://charts.fossorial.io # helm repo add fossorial https://charts.fossorial.io
# helm upgrade --install newt fossorial/newt -n newt \ # helm upgrade --install newt fossorial/newt -n newt \
# --version 1.2.0 -f clusters/noble/bootstrap/newt/values.yaml --wait # --version 1.5.0 -f clusters/noble/bootstrap/newt/values.yaml --wait
# #
# See README.md for Pangolin Integration API (domains + HTTP resources + CNAME). # See README.md for Pangolin Integration API (domains + HTTP resources + CNAME).

View File

@@ -15,7 +15,7 @@
helm repo update helm repo update
helm upgrade --install traefik traefik/traefik \ helm upgrade --install traefik traefik/traefik \
--namespace traefik \ --namespace traefik \
--version 39.0.6 \ --version 40.2.0 \
-f clusters/noble/bootstrap/traefik/values.yaml \ -f clusters/noble/bootstrap/traefik/values.yaml \
--wait --wait
``` ```

View File

@@ -1,12 +1,12 @@
# Traefik ingress controller — noble lab # Traefik ingress controller — noble lab
# #
# Chart: traefik/traefik — pin version on the helm command (e.g. 39.0.6). # Chart: traefik/traefik — pin version on the helm command (e.g. 40.2.0).
# DNS: point *.apps.noble.lab.pcenicni.dev to the LoadBalancer IP below. # DNS: point *.apps.noble.lab.pcenicni.dev to the LoadBalancer IP below.
# #
# kubectl apply -f clusters/noble/bootstrap/traefik/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/traefik/namespace.yaml
# helm repo add traefik https://traefik.github.io/charts # helm repo add traefik https://traefik.github.io/charts
# helm upgrade --install traefik traefik/traefik -n traefik \ # helm upgrade --install traefik traefik/traefik -n traefik \
# --version 39.0.6 -f clusters/noble/bootstrap/traefik/values.yaml --wait # --version 40.2.0 -f clusters/noble/bootstrap/traefik/values.yaml --wait
service: service:
type: LoadBalancer type: LoadBalancer

View File

@@ -5,7 +5,7 @@ Ansible-managed core stack — **not** reconciled by Argo CD (`clusters/noble/ap
## What you get ## What you get
- **No web UI** — Velero is operated with the **`velero`** CLI and **`kubectl`** (Backup, Schedule, Restore CRDs). Metrics are exposed for Prometheus; there is no first-party dashboard in this chart. - **No web UI** — Velero is operated with the **`velero`** CLI and **`kubectl`** (Backup, Schedule, Restore CRDs). Metrics are exposed for Prometheus; there is no first-party dashboard in this chart.
- **vmware-tanzu/velero** Helm chart (**12.0.0** → Velero **1.18.0**) in namespace **`velero`** - **vmware-tanzu/velero** Helm chart (**12.0.1** → Velero **1.18.0**) in namespace **`velero`**
- **AWS plugin** init container for **S3-compatible** object storage (`velero/velero-plugin-for-aws:v1.14.0`) - **AWS plugin** init container for **S3-compatible** object storage (`velero/velero-plugin-for-aws:v1.14.0`)
- **CSI snapshots** via Veleros built-in CSI support (`EnableCSI`) and **VolumeSnapshotLocation** `velero.io/csi` (no separate CSI plugin image for Velero ≥ 1.14) - **CSI snapshots** via Veleros built-in CSI support (`EnableCSI`) and **VolumeSnapshotLocation** `velero.io/csi` (no separate CSI plugin image for Velero ≥ 1.14)
- **Prometheus** scraping: **ServiceMonitor** labeled for **kube-prometheus** (`release: kube-prometheus`) - **Prometheus** scraping: **ServiceMonitor** labeled for **kube-prometheus** (`release: kube-prometheus`)
@@ -99,7 +99,7 @@ From repo root:
kubectl apply -f clusters/noble/bootstrap/velero/namespace.yaml kubectl apply -f clusters/noble/bootstrap/velero/namespace.yaml
# Create velero-cloud-credentials (see above), then: # Create velero-cloud-credentials (see above), then:
helm repo add vmware-tanzu https://vmware-tanzu.github.io/helm-charts && helm repo update helm repo add vmware-tanzu https://vmware-tanzu.github.io/helm-charts && helm repo update
helm upgrade --install velero vmware-tanzu/velero -n velero --version 12.0.0 \ helm upgrade --install velero vmware-tanzu/velero -n velero --version 12.0.1 \
-f clusters/noble/bootstrap/velero/values.yaml \ -f clusters/noble/bootstrap/velero/values.yaml \
--set-string configuration.backupStorageLocation[0].bucket=YOUR_BUCKET \ --set-string configuration.backupStorageLocation[0].bucket=YOUR_BUCKET \
--set-string configuration.backupStorageLocation[0].config.s3Url=https://YOUR-S3-ENDPOINT \ --set-string configuration.backupStorageLocation[0].config.s3Url=https://YOUR-S3-ENDPOINT \

View File

@@ -2,10 +2,10 @@
# Install: **ansible/playbooks/noble.yml** role **noble_velero** (override S3 settings via **noble_velero_*** vars). # Install: **ansible/playbooks/noble.yml** role **noble_velero** (override S3 settings via **noble_velero_*** vars).
# Requires Secret **velero/velero-cloud-credentials** key **cloud** (INI for AWS plugin — see README). # Requires Secret **velero/velero-cloud-credentials** key **cloud** (INI for AWS plugin — see README).
# #
# Chart: vmware-tanzu/velero — pin version on install (e.g. 12.0.0 / Velero 1.18.0). # Chart: vmware-tanzu/velero — pin version on install (e.g. 12.0.1 / Velero 1.18.0).
# helm repo add vmware-tanzu https://vmware-tanzu.github.io/helm-charts && helm repo update # helm repo add vmware-tanzu https://vmware-tanzu.github.io/helm-charts && helm repo update
# kubectl apply -f clusters/noble/bootstrap/velero/namespace.yaml # kubectl apply -f clusters/noble/bootstrap/velero/namespace.yaml
# helm upgrade --install velero vmware-tanzu/velero -n velero --version 12.0.0 -f clusters/noble/bootstrap/velero/values.yaml # helm upgrade --install velero vmware-tanzu/velero -n velero --version 12.0.1 -f clusters/noble/bootstrap/velero/values.yaml
initContainers: initContainers:
- name: velero-plugin-for-aws - name: velero-plugin-for-aws

View File

@@ -13,7 +13,7 @@ spec:
source: source:
repoURL: https://eclipse-che.github.io/che-operator/charts repoURL: https://eclipse-che.github.io/che-operator/charts
chart: eclipse-che chart: eclipse-che
targetRevision: 7.116.0 targetRevision: 7.117.0
helm: helm:
releaseName: eclipse-che releaseName: eclipse-che
destination: destination:

View File

@@ -198,17 +198,17 @@ See [`talos/CLUSTER-BUILD.md`](../talos/CLUSTER-BUILD.md) for the authoritative
| Component | Chart / app (from CLUSTER-BUILD.md) | | Component | Chart / app (from CLUSTER-BUILD.md) |
|-----------|-------------------------------------| |-----------|-------------------------------------|
| Talos / Kubernetes | v1.12.6 / 1.35.2 bundled | | Talos / Kubernetes | v1.12.6 / 1.35.2 bundled |
| Cilium | Helm 1.16.6 | | Cilium | Helm 1.19.4 |
| MetalLB | 0.15.3 | | MetalLB | 0.15.3 |
| Longhorn | 1.11.1 | | Longhorn | 1.11.2 |
| Traefik | 39.0.6 / app v3.6.11 | | Traefik | 40.2.0 / app v3.7.1 |
| cert-manager | v1.20.0 | | cert-manager | v1.20.2 |
| Argo CD | 9.4.17 / app v3.3.6 | | Argo CD | 9.5.14 / app v3.4.2 |
| kube-prometheus-stack | 82.15.1 | | kube-prometheus-stack | 85.0.3 |
| Loki / Fluent Bit | 6.55.0 / 0.56.0 | | Loki / Fluent Bit | 7.0.0 / 0.57.5 |
| SOPS (client tooling) | see `clusters/noble/secrets/README.md` | | SOPS (client tooling) | see `clusters/noble/secrets/README.md` |
| Kyverno | 3.7.1 / policies 3.7.1 | | Kyverno | 3.8.0 / policies 3.8.0 |
| Newt | 1.2.0 / app 1.10.1 | | Newt | 1.5.0 / app 1.12.5 |
--- ---

View File

@@ -7,20 +7,20 @@ This document is the **exported TODO** for the **noble** Talos cluster (4 nodes)
Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (**`talos/runbooks/`**, **SOPS**-encrypted secrets in **`clusters/noble/secrets/`**). **Next focus:** optional **Alertmanager** receivers (Slack/PagerDuty); tighten **RBAC** (Headlamp / cluster-admin); **Cilium** policies for other namespaces as needed; enable **Mend Renovate** for PRs; Pangolin/sample Ingress; **Velero** backup/restore drill after S3 credentials are set (**`noble_velero_install`**). Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (**`talos/runbooks/`**, **SOPS**-encrypted secrets in **`clusters/noble/secrets/`**). **Next focus:** optional **Alertmanager** receivers (Slack/PagerDuty); tighten **RBAC** (Headlamp / cluster-admin); **Cilium** policies for other namespaces as needed; enable **Mend Renovate** for PRs; Pangolin/sample Ingress; **Velero** backup/restore drill after S3 credentials are set (**`noble_velero_install`**).
- **Talos** v1.12.6 (target) / **Kubernetes** as bundled — four nodes **Ready** unless upgrading; **`talosctl health`**; **`talos/kubeconfig`** is **local only** (gitignored — never commit; regenerate with `talosctl kubeconfig` per `talos/README.md`). **Image Factory (nocloud installer):** `factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6` - **Talos** v1.12.6 (target) / **Kubernetes** as bundled — four nodes **Ready** unless upgrading; **`talosctl health`**; **`talos/kubeconfig`** is **local only** (gitignored — never commit; regenerate with `talosctl kubeconfig` per `talos/README.md`). **Image Factory (nocloud installer):** `factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6`
- **Cilium** Helm **1.16.6** / app **1.16.6** (`clusters/noble/bootstrap/cilium/`, phase 1 values). - **Cilium** Helm **1.19.4** / app **1.19.4** (`clusters/noble/bootstrap/cilium/`, phase 1 values).
- **CSI Volume Snapshot** — **external-snapshotter** **v8.5.0** CRDs + **`registry.k8s.io/sig-storage/snapshot-controller`** (`clusters/noble/bootstrap/csi-snapshot-controller/`, Ansible **`noble_csi_snapshot_controller`**). - **CSI Volume Snapshot** — **external-snapshotter** **v8.5.0** CRDs + **`registry.k8s.io/sig-storage/snapshot-controller`** (`clusters/noble/bootstrap/csi-snapshot-controller/`, Ansible **`noble_csi_snapshot_controller`**).
- **MetalLB** Helm **0.15.3** / app **v0.15.3**; **IPAddressPool** `noble-l2` + **L2Advertisement** — pool **`192.168.50.210``192.168.50.229`**. - **MetalLB** Helm **0.15.3** / app **v0.15.3**; **IPAddressPool** `noble-l2` + **L2Advertisement** — pool **`192.168.50.210``192.168.50.229`**.
- **kube-vip** DaemonSet **3/3** on control planes; VIP **`192.168.50.230`** on **`ens18`** (`vip_subnet` **`/32`** required — bare **`32`** breaks parsing). **Verified from workstation:** `kubectl config set-cluster noble --server=https://192.168.50.230:6443` then **`kubectl get --raw /healthz`** → **`ok`** (`talos/kubeconfig`; see `talos/README.md`). - **kube-vip** DaemonSet **3/3** on control planes; VIP **`192.168.50.230`** on **`ens18`** (`vip_subnet` **`/32`** required — bare **`32`** breaks parsing). **Verified from workstation:** `kubectl config set-cluster noble --server=https://192.168.50.230:6443` then **`kubectl get --raw /healthz`** → **`ok`** (`talos/kubeconfig`; see `talos/README.md`).
- **metrics-server** Helm **3.13.0** / app **v0.8.0**`clusters/noble/bootstrap/metrics-server/values.yaml` (`--kubelet-insecure-tls` for Talos); **`kubectl top nodes`** works. - **metrics-server** Helm **3.13.0** / app **v0.8.0**`clusters/noble/bootstrap/metrics-server/values.yaml` (`--kubelet-insecure-tls` for Talos); **`kubectl top nodes`** works.
- **Longhorn** Helm **1.11.1** / app **v1.11.1**`clusters/noble/bootstrap/longhorn/` (PSA **privileged** namespace, `defaultDataPath` `/var/mnt/longhorn`, `preUpgradeChecker` enabled); **StorageClass** `longhorn` (default); **`nodes.longhorn.io`** all **Ready**; test **PVC** `Bound` on `longhorn`. - **Longhorn** Helm **1.11.2** / app **v1.11.2**`clusters/noble/bootstrap/longhorn/` (PSA **privileged** namespace, `defaultDataPath` `/var/mnt/longhorn`, `preUpgradeChecker` enabled); **StorageClass** `longhorn` (default); **`nodes.longhorn.io`** all **Ready**; test **PVC** `Bound` on `longhorn`.
- **Traefik** Helm **39.0.6** / app **v3.6.11**`clusters/noble/bootstrap/traefik/`; **`Service`** **`LoadBalancer`** **`EXTERNAL-IP` `192.168.50.211`**; **`IngressClass`** **`traefik`** (default). Point **`*.apps.noble.lab.pcenicni.dev`** at **`192.168.50.211`**. MetalLB pool verification was done before replacing the temporary nginx test with Traefik. - **Traefik** Helm **40.2.0** / app **v3.7.1**`clusters/noble/bootstrap/traefik/`; **`Service`** **`LoadBalancer`** **`EXTERNAL-IP` `192.168.50.211`**; **`IngressClass`** **`traefik`** (default). Point **`*.apps.noble.lab.pcenicni.dev`** at **`192.168.50.211`**. MetalLB pool verification was done before replacing the temporary nginx test with Traefik.
- **cert-manager** Helm **v1.20.0** / app **v1.20.0**`clusters/noble/bootstrap/cert-manager/`; **`ClusterIssuer`** **`letsencrypt-staging`** and **`letsencrypt-prod`** (**DNS-01** via **Cloudflare** for **`pcenicni.dev`**, Secret **`cloudflare-dns-api-token`** in **`cert-manager`**); ACME email **`certificates@noble.lab.pcenicni.dev`** (edit in manifests if you want a different mailbox). - **cert-manager** Helm **v1.20.2** / app **v1.20.2**`clusters/noble/bootstrap/cert-manager/`; **`ClusterIssuer`** **`letsencrypt-staging`** and **`letsencrypt-prod`** (**DNS-01** via **Cloudflare** for **`pcenicni.dev`**, Secret **`cloudflare-dns-api-token`** in **`cert-manager`**); ACME email **`certificates@noble.lab.pcenicni.dev`** (edit in manifests if you want a different mailbox).
- **Newt** Helm **1.2.0** / app **1.10.1**`clusters/noble/bootstrap/newt/` (**fossorial/newt**); Pangolin site tunnel — **`newt-pangolin-auth`** Secret (**`PANGOLIN_ENDPOINT`**, **`NEWT_ID`**, **`NEWT_SECRET`**). Store credentials in git with **SOPS** (`clusters/noble/secrets/newt-pangolin-auth.secret.yaml`, **`age-key.txt`**, **`.sops.yaml`**) — see **`clusters/noble/secrets/README.md`**. **Public DNS** is **not** automated with ExternalDNS: **CNAME** records at your DNS host per Pangolins domain instructions, plus **Integration API** for HTTP resources/targets — see **`clusters/noble/bootstrap/newt/README.md`**. LAN access to Traefik can still use **`*.apps.noble.lab.pcenicni.dev`** → **`192.168.50.211`** (split horizon / local resolver). - **Newt** Helm **1.5.0** / app **1.12.5**`clusters/noble/bootstrap/newt/` (**fossorial/newt**); Pangolin site tunnel — **`newt-pangolin-auth`** Secret (**`PANGOLIN_ENDPOINT`**, **`NEWT_ID`**, **`NEWT_SECRET`**). Store credentials in git with **SOPS** (`clusters/noble/secrets/newt-pangolin-auth.secret.yaml`, **`age-key.txt`**, **`.sops.yaml`**) — see **`clusters/noble/secrets/README.md`**. **Public DNS** is **not** automated with ExternalDNS: **CNAME** records at your DNS host per Pangolins domain instructions, plus **Integration API** for HTTP resources/targets — see **`clusters/noble/bootstrap/newt/README.md`**. LAN access to Traefik can still use **`*.apps.noble.lab.pcenicni.dev`** → **`192.168.50.211`** (split horizon / local resolver).
- **Argo CD** Helm **9.4.17** / app **v3.3.6**`clusters/noble/bootstrap/argocd/`; **`argocd-server`** **`LoadBalancer`** **`192.168.50.210`**; **`noble-bootstrap-root`** → **`clusters/noble/bootstrap`** (kustomize includes **`clusters/noble/apps/`** for optional leaf **`Application`** manifests; manual sync until **`argocd/README.md`** §5 after **`noble.yml`**). Edit **`repoURL`** in **`bootstrap-root-application.yaml`** before applying. - **Argo CD** Helm **9.5.14** / app **v3.4.2**`clusters/noble/bootstrap/argocd/`; **`argocd-server`** **`LoadBalancer`** **`192.168.50.210`**; **`noble-bootstrap-root`** → **`clusters/noble/bootstrap`** (kustomize includes **`clusters/noble/apps/`** for optional leaf **`Application`** manifests; manual sync until **`argocd/README.md`** §5 after **`noble.yml`**). Edit **`repoURL`** in **`bootstrap-root-application.yaml`** before applying.
- **kube-prometheus-stack** — Helm chart **82.15.1**`clusters/noble/bootstrap/kube-prometheus-stack/` (**namespace** `monitoring`, PSA **privileged****node-exporter** needs host mounts); **Longhorn** PVCs for Prometheus, Grafana, Alertmanager; **node-exporter** DaemonSet **4/4**. **Grafana Ingress:** **`https://grafana.apps.noble.lab.pcenicni.dev`** (Traefik **`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **Loki** datasource in Grafana: ConfigMap **`clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`** (sidecar label **`grafana_datasource: "1"`**) — not via **`grafana.additionalDataSources`** in the chart. **`helm upgrade --install` with `--wait` is silent until done** — use **`--timeout 30m`**; Grafana admin: Secret **`kube-prometheus-grafana`**, keys **`admin-user`** / **`admin-password`**. - **kube-prometheus-stack** — Helm chart **85.0.3**`clusters/noble/bootstrap/kube-prometheus-stack/` (**namespace** `monitoring`, PSA **privileged****node-exporter** needs host mounts); **Longhorn** PVCs for Prometheus, Grafana, Alertmanager; **node-exporter** DaemonSet **4/4**. **Grafana Ingress:** **`https://grafana.apps.noble.lab.pcenicni.dev`** (Traefik **`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **Loki** datasource in Grafana: ConfigMap **`clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`** (sidecar label **`grafana_datasource: "1"`**) — not via **`grafana.additionalDataSources`** in the chart. **`helm upgrade --install` with `--wait` is silent until done** — use **`--timeout 30m`**; Grafana admin: Secret **`kube-prometheus-grafana`**, keys **`admin-user`** / **`admin-password`**.
- **Loki** + **Fluent Bit****`grafana/loki` 6.55.0** SingleBinary + **filesystem** on **Longhorn** (`clusters/noble/bootstrap/loki/`); **`loki.auth_enabled: false`**; **`chunksCache.enabled: false`** (no memcached chunk cache). **`fluent/fluent-bit` 0.56.0** → **`loki-gateway.loki.svc:80`** (`clusters/noble/bootstrap/fluent-bit/`); **`logging`** PSA **privileged**. **Grafana Explore:** **`kubectl apply -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`** then **Explore → Loki** (e.g. `{job="fluent-bit"}`). - **Loki** + **Fluent Bit****`grafana/loki` 7.0.0** SingleBinary + **filesystem** on **Longhorn** (`clusters/noble/bootstrap/loki/`); **`loki.auth_enabled: false`**; **`chunksCache.enabled: false`** (no memcached chunk cache). **`fluent/fluent-bit` 0.57.5** → **`loki-gateway.loki.svc:80`** (`clusters/noble/bootstrap/fluent-bit/`); **`logging`** PSA **privileged**. **Grafana Explore:** **`kubectl apply -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`** then **Explore → Loki** (e.g. `{job="fluent-bit"}`).
- **SOPS** — cluster **`Secret`** manifests under **`clusters/noble/secrets/`** encrypted with **age** (see **`.sops.yaml`**, **`age-key.txt`** gitignored); **`noble.yml`** decrypt-applies when the private key is present. - **SOPS** — cluster **`Secret`** manifests under **`clusters/noble/secrets/`** encrypted with **age** (see **`.sops.yaml`**, **`age-key.txt`** gitignored); **`noble.yml`** decrypt-applies when the private key is present.
- **Velero** Helm **12.0.0** / app **v1.18.0**`clusters/noble/bootstrap/velero/` (**Ansible** **`noble_velero`**, not Argo); **S3-compatible** backup location + **CSI** snapshots (**`EnableCSI`**); enable with **`noble_velero_install`** per **`velero/README.md`**. - **Velero** Helm **12.0.1** / app **v1.18.0**`clusters/noble/bootstrap/velero/` (**Ansible** **`noble_velero`**, not Argo); **S3-compatible** backup location + **CSI** snapshots (**`EnableCSI`**); enable with **`noble_velero_install`** per **`velero/README.md`**.
- **Still open:** **Renovate** — install **[Mend Renovate](https://github.com/apps/renovate)** (or self-host) so PRs run; optional **Alertmanager** notification channels; optional **sample Ingress + cert + Pangolin** end-to-end; **Argo CD SSO**. - **Still open:** **Renovate** — install **[Mend Renovate](https://github.com/apps/renovate)** (or self-host) so PRs run; optional **Alertmanager** notification channels; optional **sample Ingress + cert + Pangolin** end-to-end; **Argo CD SSO**.
## Inventory ## Inventory
@@ -51,20 +51,20 @@ Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (**`t
- Talos: **v1.12.6** — align `talosctl` client with node image - Talos: **v1.12.6** — align `talosctl` client with node image
- Talos **Image Factory** (iscsi-tools + util-linux-tools): **`factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6`** — same schematic must appear in **`machine.install.image`** after `talhelper genconfig` (bare metal may use `metal-installer/` instead of `nocloud-installer/`) - Talos **Image Factory** (iscsi-tools + util-linux-tools): **`factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6`** — same schematic must appear in **`machine.install.image`** after `talhelper genconfig` (bare metal may use `metal-installer/` instead of `nocloud-installer/`)
- Kubernetes: **1.35.2** on current nodes (bundled with Talos; not pinned in repo) - Kubernetes: **1.35.2** on current nodes (bundled with Talos; not pinned in repo)
- Cilium: **1.16.6** (Helm chart; see `clusters/noble/bootstrap/cilium/README.md`) - Cilium: **1.19.4** (Helm chart; see `clusters/noble/bootstrap/cilium/README.md`)
- MetalLB: **0.15.3** (Helm chart; app **v0.15.3**) - MetalLB: **0.15.3** (Helm chart; app **v0.15.3**)
- metrics-server: **3.13.0** (Helm chart; app **v0.8.0**) - metrics-server: **3.13.0** (Helm chart; app **v0.8.0**)
- Longhorn: **1.11.1** (Helm chart; app **v1.11.1**) - Longhorn: **1.11.2** (Helm chart; app **v1.11.2**)
- Traefik: **39.0.6** (Helm chart; app **v3.6.11**) - Traefik: **40.2.0** (Helm chart; app **v3.7.1**)
- cert-manager: **v1.20.0** (Helm chart; app **v1.20.0**) - cert-manager: **v1.20.2** (Helm chart; app **v1.20.2**)
- Newt (Fossorial): **1.2.0** (Helm chart; app **1.10.1**) - Newt (Fossorial): **1.5.0** (Helm chart; app **1.12.5**)
- Argo CD: **9.4.17** (Helm chart `argo/argo-cd`; app **v3.3.6**) - Argo CD: **9.5.14** (Helm chart `argo/argo-cd`; app **v3.4.2**)
- kube-prometheus-stack: **82.15.1** (Helm chart `prometheus-community/kube-prometheus-stack`; app **v0.89.x** bundle) - kube-prometheus-stack: **85.0.3** (Helm chart `prometheus-community/kube-prometheus-stack`; app **v0.90.1** bundle)
- Loki: **6.55.0** (Helm chart `grafana/loki`; app **3.6.7**) - Loki: **7.0.0** (Helm chart `grafana/loki`; app **3.6.7**)
- Fluent Bit: **0.56.0** (Helm chart `fluent/fluent-bit`; app **4.2.3**) - Fluent Bit: **0.57.5** (Helm chart `fluent/fluent-bit`; app **5.0.5**)
- Kyverno: **3.7.1** (Helm chart `kyverno/kyverno`; app **v1.17.1**); **kyverno-policies** **3.7.1****baseline** PSS, **Audit** (`clusters/noble/bootstrap/kyverno/`) - Kyverno: **3.8.0** (Helm chart `kyverno/kyverno`; app **v1.18.0**); **kyverno-policies** **3.8.0****baseline** PSS, **Audit** (`clusters/noble/bootstrap/kyverno/`)
- Headlamp: **0.40.1** (Helm chart `headlamp/headlamp`; app matches chart — see [Artifact Hub](https://artifacthub.io/packages/helm/headlamp/headlamp)) - Headlamp: **0.42.0** (Helm chart `headlamp/headlamp`; app matches chart — see [Artifact Hub](https://artifacthub.io/packages/helm/headlamp/headlamp))
- Velero: **12.0.0** (Helm chart `vmware-tanzu/velero`; app **v1.18.0**) — **`clusters/noble/bootstrap/velero/`**; AWS plugin **v1.14.0**; Ansible **`noble_velero`** - Velero: **12.0.1** (Helm chart `vmware-tanzu/velero`; app **v1.18.0**) — **`clusters/noble/bootstrap/velero/`**; AWS plugin **v1.14.0**; Ansible **`noble_velero`**
- Renovate: **hosted** (Mend **Renovate** GitHub/GitLab app — no cluster chart) **or** **self-hosted** — pin chart when added ([Helm charts](https://docs.renovatebot.com/helm-charts/), OCI `ghcr.io/renovatebot/charts/renovate`); pair **`renovate.json`** with this repos Helm paths under **`clusters/noble/`** - Renovate: **hosted** (Mend **Renovate** GitHub/GitLab app — no cluster chart) **or** **self-hosted** — pin chart when added ([Helm charts](https://docs.renovatebot.com/helm-charts/), OCI `ghcr.io/renovatebot/charts/renovate`); pair **`renovate.json`** with this repos Helm paths under **`clusters/noble/`**
## Repo paths (this workspace) ## Repo paths (this workspace)
@@ -137,10 +137,10 @@ Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (**`t
**Install order:** **Cilium****Volume Snapshot CRDs + snapshot-controller** (`clusters/noble/bootstrap/csi-snapshot-controller/`, Ansible **`noble_csi_snapshot_controller`**) → **metrics-server****Longhorn** (Talos disk + Helm) → **MetalLB** (Helm → pool manifests) → ingress / certs / DNS as planned. **Install order:** **Cilium****Volume Snapshot CRDs + snapshot-controller** (`clusters/noble/bootstrap/csi-snapshot-controller/`, Ansible **`noble_csi_snapshot_controller`**) → **metrics-server****Longhorn** (Talos disk + Helm) → **MetalLB** (Helm → pool manifests) → ingress / certs / DNS as planned.
- [x] **Cilium** (Helm **1.16.6**) — **required** before MetalLB if `cni: none` (`clusters/noble/bootstrap/cilium/`) - [x] **Cilium** (Helm **1.19.4**) — **required** before MetalLB if `cni: none` (`clusters/noble/bootstrap/cilium/`)
- [x] **CSI Volume Snapshot** — CRDs + **`snapshot-controller`** in **`kube-system`** (`clusters/noble/bootstrap/csi-snapshot-controller/`); Ansible **`noble_csi_snapshot_controller`**; verify `kubectl api-resources | grep VolumeSnapshot` - [x] **CSI Volume Snapshot** — CRDs + **`snapshot-controller`** in **`kube-system`** (`clusters/noble/bootstrap/csi-snapshot-controller/`); Ansible **`noble_csi_snapshot_controller`**; verify `kubectl api-resources | grep VolumeSnapshot`
- [x] **metrics-server** — Helm **3.13.0**; values in `clusters/noble/bootstrap/metrics-server/values.yaml`; verify `kubectl top nodes` - [x] **metrics-server** — Helm **3.13.0**; values in `clusters/noble/bootstrap/metrics-server/values.yaml`; verify `kubectl top nodes`
- [x] **Longhorn** — Talos: user volume + kubelet mounts + extensions (`talos/README.md` §5); Helm **1.11.1**; `kubectl apply -k clusters/noble/bootstrap/longhorn`; verify **`nodes.longhorn.io`** and test PVC **`Bound`** - [x] **Longhorn** — Talos: user volume + kubelet mounts + extensions (`talos/README.md` §5); Helm **1.11.2**; `kubectl apply -k clusters/noble/bootstrap/longhorn`; verify **`nodes.longhorn.io`** and test PVC **`Bound`**
- [x] **MetalLB** — chart installed; **pool + L2** from `clusters/noble/bootstrap/metallb/` applied (`192.168.50.210``229`) - [x] **MetalLB** — chart installed; **pool + L2** from `clusters/noble/bootstrap/metallb/` applied (`192.168.50.210``229`)
- [x] **`Service` `LoadBalancer`** / pool check — MetalLB assigns from `210``229` (validated before Traefik; temporary nginx test removed in favor of Traefik) - [x] **`Service` `LoadBalancer`** / pool check — MetalLB assigns from `210``229` (validated before Traefik; temporary nginx test removed in favor of Traefik)
- [x] **Traefik** `LoadBalancer` for `*.apps.noble.lab.pcenicni.dev``clusters/noble/bootstrap/traefik/`; **`192.168.50.211`** - [x] **Traefik** `LoadBalancer` for `*.apps.noble.lab.pcenicni.dev``clusters/noble/bootstrap/traefik/`; **`192.168.50.211`**
@@ -157,9 +157,9 @@ Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (**`t
## Phase D — Observability ## Phase D — Observability
- [x] **kube-prometheus-stack**`kubectl apply -f clusters/noble/bootstrap/kube-prometheus-stack/namespace.yaml` then **`helm upgrade --install`** as in `clusters/noble/bootstrap/kube-prometheus-stack/values.yaml` (chart **82.15.1**); PVCs **`longhorn`**; **`--wait --timeout 30m`** recommended; verify **`kubectl -n monitoring get pods,pvc`** - [x] **kube-prometheus-stack**`kubectl apply -f clusters/noble/bootstrap/kube-prometheus-stack/namespace.yaml` then **`helm upgrade --install`** as in `clusters/noble/bootstrap/kube-prometheus-stack/values.yaml` (chart **85.0.3**); PVCs **`longhorn`**; **`--wait --timeout 30m`** recommended; verify **`kubectl -n monitoring get pods,pvc`**
- [x] **Loki** + **Fluent Bit** + **Grafana Loki datasource****order:** **`kubectl apply -f clusters/noble/bootstrap/loki/namespace.yaml`** → **`helm upgrade --install loki`** `grafana/loki` **6.55.0** `-f clusters/noble/bootstrap/loki/values.yaml`**`kubectl apply -f clusters/noble/bootstrap/fluent-bit/namespace.yaml`** → **`helm upgrade --install fluent-bit`** `fluent/fluent-bit` **0.56.0** `-f clusters/noble/bootstrap/fluent-bit/values.yaml`**`kubectl apply -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`**. Verify **Explore → Loki** in Grafana; **`kubectl -n loki get pods,pvc`**, **`kubectl -n logging get pods`** - [x] **Loki** + **Fluent Bit** + **Grafana Loki datasource****order:** **`kubectl apply -f clusters/noble/bootstrap/loki/namespace.yaml`** → **`helm upgrade --install loki`** `grafana/loki` **7.0.0** `-f clusters/noble/bootstrap/loki/values.yaml`**`kubectl apply -f clusters/noble/bootstrap/fluent-bit/namespace.yaml`** → **`helm upgrade --install fluent-bit`** `fluent/fluent-bit` **0.57.5** `-f clusters/noble/bootstrap/fluent-bit/values.yaml`**`kubectl apply -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`**. Verify **Explore → Loki** in Grafana; **`kubectl -n loki get pods,pvc`**, **`kubectl -n logging get pods`**
- [x] **Headlamp** — Kubernetes web UI ([Headlamp](https://headlamp.dev/)); **`helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/`**; **`kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml`** → **`helm upgrade --install headlamp headlamp/headlamp --version 0.40.1 -n headlamp -f clusters/noble/bootstrap/headlamp/values.yaml`**; **Ingress** **`https://headlamp.apps.noble.lab.pcenicni.dev`** (**`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **`values.yaml`:** **`config.sessionTTL: null`** works around chart **0.40.1** / binary mismatch ([headlamp#4883](https://github.com/kubernetes-sigs/headlamp/issues/4883)). **RBAC:** chart defaults are permissive — tighten before LAN-wide exposure; align with **Phase G** hardening. - [x] **Headlamp** — Kubernetes web UI ([Headlamp](https://headlamp.dev/)); **`helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/`**; **`kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml`** → **`helm upgrade --install headlamp headlamp/headlamp --version 0.42.0 -n headlamp -f clusters/noble/bootstrap/headlamp/values.yaml`** (add **`-f values-authentik-oidc.yaml`** when using OIDC); **Ingress** **`https://headlamp.apps.noble.lab.pcenicni.dev`** (**`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **`values.yaml`:** **`config.sessionTTL: null`** omits **`-session-ttl`** if you hit older chart/binary mismatches ([headlamp#4883](https://github.com/kubernetes-sigs/headlamp/issues/4883)); chart **0.42.x** defaults are otherwise fine. **RBAC:** chart defaults are permissive — tighten before LAN-wide exposure; align with **Phase G** hardening.
## Phase E — Secrets ## Phase E — Secrets
@@ -167,7 +167,7 @@ Lab stack is **up** on-cluster through **Phase D****F** and **Phase G** (**`t
## Phase F — Policy + backups ## Phase F — Policy + backups
- [x] **Kyverno** baseline policies — `clusters/noble/bootstrap/kyverno/` (Helm **kyverno** **3.7.1** + **kyverno-policies** **3.7.1**, **baseline** / **Audit** — see **`README.md`**) - [x] **Kyverno** baseline policies — `clusters/noble/bootstrap/kyverno/` (Helm **kyverno** **3.8.0** + **kyverno-policies** **3.8.0**, **baseline** / **Audit** — see **`README.md`**)
- [ ] **Velero** — manifests + Ansible **`noble_velero`** (`clusters/noble/bootstrap/velero/`); enable with **`noble_velero_install: true`** + S3 bucket/URL + **`velero/velero-cloud-credentials`** (see **`velero/README.md`**); optional backup/restore drill - [ ] **Velero** — manifests + Ansible **`noble_velero`** (`clusters/noble/bootstrap/velero/`); enable with **`noble_velero_install: true`** + S3 bucket/URL + **`velero/velero-cloud-credentials`** (see **`velero/README.md`**); optional backup/restore drill
## Phase G — Hardening ## Phase G — Hardening