diff --git a/ansible/roles/noble_argocd/tasks/main.yml b/ansible/roles/noble_argocd/tasks/main.yml index f7d534c..4900bb3 100644 --- a/ansible/roles/noble_argocd/tasks/main.yml +++ b/ansible/roles/noble_argocd/tasks/main.yml @@ -11,7 +11,7 @@ - argocd - --create-namespace - --version - - "9.4.17" + - "9.5.14" - -f - "{{ noble_repo_root }}/clusters/noble/bootstrap/argocd/values.yaml" - --force-conflicts diff --git a/ansible/roles/noble_authentik/defaults/main.yml b/ansible/roles/noble_authentik/defaults/main.yml index 785daa7..2762406 100644 --- a/ansible/roles/noble_authentik/defaults/main.yml +++ b/ansible/roles/noble_authentik/defaults/main.yml @@ -75,8 +75,8 @@ noble_authentik_bootstrap_api_wait_retries: 36 noble_authentik_bootstrap_api_wait_delay: 5 # Re-apply the same chart versions as the rest of noble.yml when flipping SSO on. -noble_authentik_argocd_chart_version: "9.4.17" -noble_authentik_kube_prometheus_chart_version: "82.15.1" -noble_authentik_headlamp_chart_version: "0.40.1" +noble_authentik_argocd_chart_version: "9.5.14" +noble_authentik_kube_prometheus_chart_version: "85.0.3" +noble_authentik_headlamp_chart_version: "0.42.0" noble_authentik_longhorn_chart_version: "1.11.2" noble_authentik_kube_prometheus_helm_wait_timeout: 60m diff --git a/ansible/roles/noble_cert_manager/tasks/main.yml b/ansible/roles/noble_cert_manager/tasks/main.yml index 56f413d..dcc6ad2 100644 --- a/ansible/roles/noble_cert_manager/tasks/main.yml +++ b/ansible/roles/noble_cert_manager/tasks/main.yml @@ -21,7 +21,7 @@ - --namespace - cert-manager - --version - - v1.20.0 + - v1.20.2 - -f - "{{ noble_repo_root }}/clusters/noble/bootstrap/cert-manager/values.yaml" - --force-conflicts diff --git a/ansible/roles/noble_cilium/tasks/main.yml b/ansible/roles/noble_cilium/tasks/main.yml index 3059f60..18bfc36 100644 --- a/ansible/roles/noble_cilium/tasks/main.yml +++ b/ansible/roles/noble_cilium/tasks/main.yml @@ -51,7 +51,7 @@ - --namespace - kube-system - --version - - "1.16.6" + - "1.19.4" - -f - "{{ noble_repo_root }}/clusters/noble/bootstrap/cilium/values.yaml" - --force-conflicts diff --git a/ansible/roles/noble_kyverno/tasks/main.yml b/ansible/roles/noble_kyverno/tasks/main.yml index af3b618..32a1669 100644 --- a/ansible/roles/noble_kyverno/tasks/main.yml +++ b/ansible/roles/noble_kyverno/tasks/main.yml @@ -56,7 +56,7 @@ - -n - kyverno - --version - - "3.7.1" + - "3.8.0" - -f - "{{ noble_repo_root }}/clusters/noble/bootstrap/kyverno/values.yaml" - --force-conflicts diff --git a/ansible/roles/noble_kyverno_policies/tasks/main.yml b/ansible/roles/noble_kyverno_policies/tasks/main.yml index 6ae181f..904b249 100644 --- a/ansible/roles/noble_kyverno_policies/tasks/main.yml +++ b/ansible/roles/noble_kyverno_policies/tasks/main.yml @@ -44,7 +44,7 @@ - -n - kyverno - --version - - "3.7.1" + - "3.8.0" - -f - "{{ noble_repo_root }}/clusters/noble/bootstrap/kyverno/policies-values.yaml" - --force-conflicts diff --git a/ansible/roles/noble_newt/tasks/main.yml b/ansible/roles/noble_newt/tasks/main.yml index 8ccead6..8f5d529 100644 --- a/ansible/roles/noble_newt/tasks/main.yml +++ b/ansible/roles/noble_newt/tasks/main.yml @@ -31,7 +31,7 @@ - --namespace - newt - --version - - "1.2.0" + - "1.5.0" - -f - "{{ noble_repo_root }}/clusters/noble/bootstrap/newt/values.yaml" - --force-conflicts diff --git a/ansible/roles/noble_platform/tasks/main.yml b/ansible/roles/noble_platform/tasks/main.yml index 9ac55a7..4ba444d 100644 --- a/ansible/roles/noble_platform/tasks/main.yml +++ b/ansible/roles/noble_platform/tasks/main.yml @@ -50,7 +50,7 @@ - -n - monitoring - --version - - "82.15.1" + - "85.0.3" - -f - "{{ noble_repo_root }}/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml" - --force-conflicts @@ -120,7 +120,7 @@ - -n - monitoring - --version - - "82.15.1" + - "85.0.3" - -f - "{{ noble_repo_root }}/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml" - --force-conflicts @@ -157,7 +157,7 @@ - -n - loki - --version - - "6.55.0" + - "7.0.0" - -f - "{{ noble_repo_root }}/clusters/noble/bootstrap/loki/values.yaml" - --force-conflicts @@ -179,7 +179,7 @@ - -n - logging - --version - - "0.56.0" + - "0.57.5" - -f - "{{ noble_repo_root }}/clusters/noble/bootstrap/fluent-bit/values.yaml" - --force-conflicts @@ -197,7 +197,7 @@ - headlamp - headlamp/headlamp - --version - - "0.40.1" + - "0.42.0" - -n - headlamp - -f diff --git a/ansible/roles/noble_traefik/tasks/main.yml b/ansible/roles/noble_traefik/tasks/main.yml index 706865a..e126d09 100644 --- a/ansible/roles/noble_traefik/tasks/main.yml +++ b/ansible/roles/noble_traefik/tasks/main.yml @@ -21,7 +21,7 @@ - --namespace - traefik - --version - - "39.0.6" + - "40.2.0" - -f - "{{ noble_repo_root }}/clusters/noble/bootstrap/traefik/values.yaml" - --force-conflicts diff --git a/ansible/roles/noble_velero/defaults/main.yml b/ansible/roles/noble_velero/defaults/main.yml index 3b12d97..8aef787 100644 --- a/ansible/roles/noble_velero/defaults/main.yml +++ b/ansible/roles/noble_velero/defaults/main.yml @@ -1,6 +1,6 @@ --- # **noble_velero_install** is in **ansible/inventory/group_vars/all.yml**. Override S3 fields via extra-vars or group_vars. -noble_velero_chart_version: "12.0.0" +noble_velero_chart_version: "12.0.1" noble_velero_s3_bucket: "" noble_velero_s3_url: "" diff --git a/clusters/noble/bootstrap/argocd/README.md b/clusters/noble/bootstrap/argocd/README.md index 8315a4c..97340bf 100644 --- a/clusters/noble/bootstrap/argocd/README.md +++ b/clusters/noble/bootstrap/argocd/README.md @@ -10,7 +10,7 @@ helm repo update helm upgrade --install argocd argo/argo-cd \ --namespace argocd \ --create-namespace \ - --version 9.4.17 \ + --version 9.5.14 \ -f clusters/noble/bootstrap/argocd/values.yaml \ --wait ``` @@ -43,7 +43,7 @@ If **`helm upgrade --wait`** fails with *Secret was previously issued by `letsen kubectl -n argocd delete certificate argocd-server --ignore-not-found kubectl -n argocd delete secret argocd-server-tls --ignore-not-found helm upgrade --install argocd argo/argo-cd -n argocd --create-namespace \ - --version 9.4.17 -f clusters/noble/bootstrap/argocd/values.yaml --wait + --version 9.5.14 -f clusters/noble/bootstrap/argocd/values.yaml --wait ``` ## 3. Register this repo (if private) @@ -112,4 +112,4 @@ After **`noble-bootstrap-root`** is automated and leaf apps are synced, **git** ## Versions -Pinned in **`values.yaml`** comments (chart **9.4.17** / Argo CD **v3.3.6** at time of writing). Bump **`--version`** when upgrading. +Pinned in **`values.yaml`** comments (chart **9.5.14** / Argo CD **v3.4.2** at time of writing). Bump **`--version`** when upgrading. diff --git a/clusters/noble/bootstrap/argocd/app-of-apps/cert-manager-application.yaml b/clusters/noble/bootstrap/argocd/app-of-apps/cert-manager-application.yaml index 7bdba2f..eeaf5f8 100644 --- a/clusters/noble/bootstrap/argocd/app-of-apps/cert-manager-application.yaml +++ b/clusters/noble/bootstrap/argocd/app-of-apps/cert-manager-application.yaml @@ -14,7 +14,7 @@ spec: path: clusters/noble/bootstrap/cert-manager - repoURL: https://charts.jetstack.io chart: cert-manager - targetRevision: v1.20.0 + targetRevision: v1.20.2 helm: releaseName: cert-manager valueFiles: diff --git a/clusters/noble/bootstrap/argocd/app-of-apps/cilium-application.yaml b/clusters/noble/bootstrap/argocd/app-of-apps/cilium-application.yaml index 5ce8b8c..3a127a9 100644 --- a/clusters/noble/bootstrap/argocd/app-of-apps/cilium-application.yaml +++ b/clusters/noble/bootstrap/argocd/app-of-apps/cilium-application.yaml @@ -11,7 +11,7 @@ spec: sources: - repoURL: https://helm.cilium.io/ chart: cilium - targetRevision: 1.16.6 + targetRevision: 1.19.4 helm: releaseName: cilium valueFiles: diff --git a/clusters/noble/bootstrap/argocd/app-of-apps/fluent-bit-application.yaml b/clusters/noble/bootstrap/argocd/app-of-apps/fluent-bit-application.yaml index ebea249..8af920c 100644 --- a/clusters/noble/bootstrap/argocd/app-of-apps/fluent-bit-application.yaml +++ b/clusters/noble/bootstrap/argocd/app-of-apps/fluent-bit-application.yaml @@ -11,7 +11,7 @@ spec: sources: - repoURL: https://fluent.github.io/helm-charts chart: fluent-bit - targetRevision: 0.56.0 + targetRevision: 0.57.5 helm: releaseName: fluent-bit valueFiles: diff --git a/clusters/noble/bootstrap/argocd/app-of-apps/headlamp-application.yaml b/clusters/noble/bootstrap/argocd/app-of-apps/headlamp-application.yaml index 056c9b2..9accf74 100644 --- a/clusters/noble/bootstrap/argocd/app-of-apps/headlamp-application.yaml +++ b/clusters/noble/bootstrap/argocd/app-of-apps/headlamp-application.yaml @@ -11,7 +11,7 @@ spec: sources: - repoURL: https://kubernetes-sigs.github.io/headlamp/ chart: headlamp - targetRevision: 0.40.1 + targetRevision: 0.42.0 helm: releaseName: headlamp valueFiles: diff --git a/clusters/noble/bootstrap/argocd/app-of-apps/kube-prometheus-application.yaml b/clusters/noble/bootstrap/argocd/app-of-apps/kube-prometheus-application.yaml index 06fac48..bbaeefc 100644 --- a/clusters/noble/bootstrap/argocd/app-of-apps/kube-prometheus-application.yaml +++ b/clusters/noble/bootstrap/argocd/app-of-apps/kube-prometheus-application.yaml @@ -11,7 +11,7 @@ spec: sources: - repoURL: https://prometheus-community.github.io/helm-charts chart: kube-prometheus-stack - targetRevision: 82.15.1 + targetRevision: 85.0.3 helm: skipCrds: true releaseName: kube-prometheus diff --git a/clusters/noble/bootstrap/argocd/app-of-apps/kyverno-application.yaml b/clusters/noble/bootstrap/argocd/app-of-apps/kyverno-application.yaml index c0f4f96..4a79de0 100644 --- a/clusters/noble/bootstrap/argocd/app-of-apps/kyverno-application.yaml +++ b/clusters/noble/bootstrap/argocd/app-of-apps/kyverno-application.yaml @@ -11,7 +11,7 @@ spec: sources: - repoURL: https://kyverno.github.io/kyverno/ chart: kyverno - targetRevision: 3.7.1 + targetRevision: 3.8.0 helm: releaseName: kyverno valueFiles: diff --git a/clusters/noble/bootstrap/argocd/app-of-apps/kyverno-policies-application.yaml b/clusters/noble/bootstrap/argocd/app-of-apps/kyverno-policies-application.yaml index f385f69..834a528 100644 --- a/clusters/noble/bootstrap/argocd/app-of-apps/kyverno-policies-application.yaml +++ b/clusters/noble/bootstrap/argocd/app-of-apps/kyverno-policies-application.yaml @@ -11,7 +11,7 @@ spec: sources: - repoURL: https://kyverno.github.io/kyverno/ chart: kyverno-policies - targetRevision: 3.7.1 + targetRevision: 3.8.0 helm: releaseName: kyverno-policies valueFiles: diff --git a/clusters/noble/bootstrap/argocd/app-of-apps/loki-application.yaml b/clusters/noble/bootstrap/argocd/app-of-apps/loki-application.yaml index 7fdcb3a..84674e8 100644 --- a/clusters/noble/bootstrap/argocd/app-of-apps/loki-application.yaml +++ b/clusters/noble/bootstrap/argocd/app-of-apps/loki-application.yaml @@ -11,7 +11,7 @@ spec: sources: - repoURL: https://grafana.github.io/helm-charts chart: loki - targetRevision: 6.55.0 + targetRevision: 7.0.0 helm: releaseName: loki valueFiles: diff --git a/clusters/noble/bootstrap/argocd/app-of-apps/traefik-application.yaml b/clusters/noble/bootstrap/argocd/app-of-apps/traefik-application.yaml index 3ce9261..7ff27f1 100644 --- a/clusters/noble/bootstrap/argocd/app-of-apps/traefik-application.yaml +++ b/clusters/noble/bootstrap/argocd/app-of-apps/traefik-application.yaml @@ -11,7 +11,7 @@ spec: sources: - repoURL: https://traefik.github.io/charts chart: traefik - targetRevision: 39.0.6 + targetRevision: 40.2.0 helm: releaseName: traefik valueFiles: diff --git a/clusters/noble/bootstrap/argocd/values.yaml b/clusters/noble/bootstrap/argocd/values.yaml index 26824ed..8fc5bf6 100644 --- a/clusters/noble/bootstrap/argocd/values.yaml +++ b/clusters/noble/bootstrap/argocd/values.yaml @@ -1,13 +1,13 @@ # Argo CD — noble lab (GitOps) # -# Chart: argo/argo-cd — pin version on the helm command (e.g. 9.4.17). +# Chart: argo/argo-cd — pin version on the helm command (e.g. 9.5.14). # UI/API: **Ingress** via **Traefik** at **argo.apps.noble.lab.pcenicni.dev** (TLS: cert-manager # ClusterIssuer + **`server.insecure`** so TLS terminates at Traefik). # DNS: **`argo.apps.noble.lab.pcenicni.dev`** → Traefik LB **192.168.50.211** (same wildcard as apps). # # helm repo add argo https://argoproj.github.io/argo-helm # helm upgrade --install argocd argo/argo-cd -n argocd --create-namespace \ -# --version 9.4.17 -f clusters/noble/bootstrap/argocd/values.yaml --wait +# --version 9.5.14 -f clusters/noble/bootstrap/argocd/values.yaml --wait # # Initial admin password: kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath='{.data.password}' | base64 -d # diff --git a/clusters/noble/bootstrap/cert-manager/README.md b/clusters/noble/bootstrap/cert-manager/README.md index d88112f..8ea29e2 100644 --- a/clusters/noble/bootstrap/cert-manager/README.md +++ b/clusters/noble/bootstrap/cert-manager/README.md @@ -29,7 +29,7 @@ Without this Secret, **`ClusterIssuer`** will not complete certificate orders. helm repo update helm upgrade --install cert-manager jetstack/cert-manager \ --namespace cert-manager \ - --version v1.20.0 \ + --version v1.20.2 \ -f clusters/noble/bootstrap/cert-manager/values.yaml \ --wait ``` diff --git a/clusters/noble/bootstrap/cert-manager/values.yaml b/clusters/noble/bootstrap/cert-manager/values.yaml index bf53025..d868f06 100644 --- a/clusters/noble/bootstrap/cert-manager/values.yaml +++ b/clusters/noble/bootstrap/cert-manager/values.yaml @@ -1,12 +1,12 @@ # cert-manager — noble lab # -# Chart: jetstack/cert-manager — pin version on the helm command (e.g. v1.20.0). +# Chart: jetstack/cert-manager — pin version on the helm command (e.g. v1.20.2). # # kubectl apply -f clusters/noble/bootstrap/cert-manager/namespace.yaml # helm repo add jetstack https://charts.jetstack.io # helm repo update # helm upgrade --install cert-manager jetstack/cert-manager -n cert-manager \ -# --version v1.20.0 -f clusters/noble/bootstrap/cert-manager/values.yaml --wait +# --version v1.20.2 -f clusters/noble/bootstrap/cert-manager/values.yaml --wait # # kubectl apply -k clusters/noble/bootstrap/cert-manager diff --git a/clusters/noble/bootstrap/cilium/README.md b/clusters/noble/bootstrap/cilium/README.md index 7d6157b..287ef68 100644 --- a/clusters/noble/bootstrap/cilium/README.md +++ b/clusters/noble/bootstrap/cilium/README.md @@ -13,7 +13,7 @@ helm repo add cilium https://helm.cilium.io/ helm repo update helm upgrade --install cilium cilium/cilium \ --namespace kube-system \ - --version 1.16.6 \ + --version 1.19.4 \ -f clusters/noble/bootstrap/cilium/values.yaml \ --wait ``` diff --git a/clusters/noble/bootstrap/cilium/values.yaml b/clusters/noble/bootstrap/cilium/values.yaml index 8092d95..210cbe2 100644 --- a/clusters/noble/bootstrap/cilium/values.yaml +++ b/clusters/noble/bootstrap/cilium/values.yaml @@ -1,7 +1,7 @@ # Cilium on Talos — phase 1: bring up CNI while kube-proxy still runs. # See README.md for install order (before MetalLB scheduling) and optional kube-proxy replacement. # -# Chart: cilium/cilium — pin version in helm command (e.g. 1.16.6). +# Chart: cilium/cilium — pin version in helm command (e.g. 1.19.4). # Ref: https://www.talos.dev/latest/kubernetes-guides/network/deploying-cilium/ ipam: diff --git a/clusters/noble/bootstrap/fluent-bit/values.yaml b/clusters/noble/bootstrap/fluent-bit/values.yaml index 0e2c2f5..0fef20d 100644 --- a/clusters/noble/bootstrap/fluent-bit/values.yaml +++ b/clusters/noble/bootstrap/fluent-bit/values.yaml @@ -1,6 +1,6 @@ # Fluent Bit — noble lab (DaemonSet; ship Kubernetes container logs to Loki gateway). # -# Chart: fluent/fluent-bit — pin version on install (e.g. 0.56.0). +# Chart: fluent/fluent-bit — pin version on install (e.g. 0.57.5). # Install **after** Loki so `loki-gateway.loki.svc` exists. # # Talos: only **tail** `/var/log/containers` (no host **systemd** input — journal layout differs from typical Linux). @@ -9,7 +9,7 @@ # helm repo add fluent https://fluent.github.io/helm-charts # helm repo update # helm upgrade --install fluent-bit fluent/fluent-bit -n logging \ -# --version 0.56.0 -f clusters/noble/bootstrap/fluent-bit/values.yaml --wait --timeout 15m +# --version 0.57.5 -f clusters/noble/bootstrap/fluent-bit/values.yaml --wait --timeout 15m config: inputs: | diff --git a/clusters/noble/bootstrap/headlamp/README.md b/clusters/noble/bootstrap/headlamp/README.md index a0b977f..e4b8c25 100644 --- a/clusters/noble/bootstrap/headlamp/README.md +++ b/clusters/noble/bootstrap/headlamp/README.md @@ -2,7 +2,7 @@ [Headlamp](https://headlamp.dev/) web UI for the cluster. Exposed on **`https://headlamp.apps.noble.lab.pcenicni.dev`** via **Traefik** + **cert-manager** (`letsencrypt-prod`), same pattern as Grafana. -- **Chart:** `headlamp/headlamp` **0.40.1** (`config.sessionTTL: null` avoids chart/binary mismatch — [issue #4883](https://github.com/kubernetes-sigs/headlamp/issues/4883)) +- **Chart:** `headlamp/headlamp` **0.42.0** (`config.sessionTTL: null` still omits **`-session-ttl`** if needed — [issue #4883](https://github.com/kubernetes-sigs/headlamp/issues/4883)) - **Namespace:** `headlamp` ## Install @@ -12,7 +12,7 @@ helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/ helm repo update kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml helm upgrade --install headlamp headlamp/headlamp -n headlamp \ - --version 0.40.1 -f clusters/noble/bootstrap/headlamp/values.yaml --wait --timeout 10m + --version 0.42.0 -f clusters/noble/bootstrap/headlamp/values.yaml --wait --timeout 10m ``` Sign-in uses a **ServiceAccount token** (Headlamp docs: create a limited SA for day-to-day use). This repo binds the Headlamp workload SA to the built-in **`edit`** ClusterRole (**`clusterRoleBinding.clusterRoleName: edit`** in **`values.yaml`**) — not **`cluster-admin`**. For cluster-scoped admin work, use **`kubectl`** with your admin kubeconfig. Optional **OIDC** in **`config.oidc`** replaces token login for SSO. **In-cluster OIDC requires kube-apiserver OIDC** (same Authentik app issuer + **`oidc-client-id: headlamp`**) or proxied K8s calls return **401** while **`/me`** still returns 200 — see **`talos/talconfig.yaml`**, **`oidc-noble-admins-clusterrolebinding.yaml`**, and **`ansible/roles/noble_authentik/README.md`** troubleshooting. diff --git a/clusters/noble/bootstrap/headlamp/values.yaml b/clusters/noble/bootstrap/headlamp/values.yaml index 208f062..7263f80 100644 --- a/clusters/noble/bootstrap/headlamp/values.yaml +++ b/clusters/noble/bootstrap/headlamp/values.yaml @@ -4,7 +4,7 @@ # helm repo update # kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml # helm upgrade --install headlamp headlamp/headlamp -n headlamp \ -# --version 0.40.1 -f clusters/noble/bootstrap/headlamp/values.yaml --wait --timeout 10m +# --version 0.42.0 -f clusters/noble/bootstrap/headlamp/values.yaml --wait --timeout 10m # # DNS: headlamp.apps.noble.lab.pcenicni.dev → Traefik LB (see talos/CLUSTER-BUILD.md). # Default chart RBAC is broad — restrict for production (Phase G). @@ -16,8 +16,7 @@ clusterRoleBinding: clusterRoleName: edit # -# Chart 0.40.1 passes -session-ttl but the v0.40.1 binary does not define it — omit the flag: -# https://github.com/kubernetes-sigs/headlamp/issues/4883 +# Optional: set **config.sessionTTL** (seconds) or **null** to omit **-session-ttl** (see headlamp#4883 for older chart/binary mismatches). config: sessionTTL: null diff --git a/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml b/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml index 4ce8072..ba31b28 100644 --- a/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml +++ b/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml @@ -1,13 +1,13 @@ # kube-prometheus-stack — noble lab (Prometheus Operator + Grafana + Alertmanager + exporters) # -# Chart: prometheus-community/kube-prometheus-stack — pin version on install (e.g. 82.15.1). +# Chart: prometheus-community/kube-prometheus-stack — pin version on install (e.g. 85.0.3). # # Install (use one terminal; chain with && so `helm upgrade` always runs after `helm repo update`): # # kubectl apply -f clusters/noble/bootstrap/kube-prometheus-stack/namespace.yaml # helm repo add prometheus-community https://prometheus-community.github.io/helm-charts # helm repo update && helm upgrade --install kube-prometheus prometheus-community/kube-prometheus-stack -n monitoring \ -# --version 82.15.1 -f clusters/noble/bootstrap/kube-prometheus-stack/values.yaml --wait --timeout 60m +# --version 85.0.3 -f clusters/noble/bootstrap/kube-prometheus-stack/values.yaml --wait --timeout 60m # # Why it looks "stalled": with --wait, Helm prints almost nothing until the release finishes (can be many minutes). # Do not use --timeout 5m for first install — Longhorn PVCs + StatefulSets often need 30–60m. To watch progress, diff --git a/clusters/noble/bootstrap/kube-vip/vip-daemonset.yaml b/clusters/noble/bootstrap/kube-vip/vip-daemonset.yaml index 49a5706..b43082f 100644 --- a/clusters/noble/bootstrap/kube-vip/vip-daemonset.yaml +++ b/clusters/noble/bootstrap/kube-vip/vip-daemonset.yaml @@ -35,7 +35,7 @@ spec: effect: NoExecute containers: - name: kube-vip - image: ghcr.io/kube-vip/kube-vip:v0.8.3 + image: ghcr.io/kube-vip/kube-vip:v0.8.10 imagePullPolicy: IfNotPresent args: - manager diff --git a/clusters/noble/bootstrap/kyverno/README.md b/clusters/noble/bootstrap/kyverno/README.md index 81ea6d5..70cee5f 100644 --- a/clusters/noble/bootstrap/kyverno/README.md +++ b/clusters/noble/bootstrap/kyverno/README.md @@ -2,7 +2,7 @@ Admission policies using [Kyverno](https://kyverno.io/). The main chart installs controllers and CRDs; **`kyverno-policies`** installs **Pod Security Standard** rules matching the **`baseline`** profile in **`Audit`** mode (violations are visible in policy reports; workloads are not denied). -- **Charts:** `kyverno/kyverno` **3.7.1** (app **v1.17.1**), `kyverno/kyverno-policies` **3.7.1** +- **Charts:** `kyverno/kyverno` **3.8.0** (app **v1.18.0**), `kyverno/kyverno-policies` **3.8.0** - **Namespace:** `kyverno` ## Install @@ -12,9 +12,9 @@ helm repo add kyverno https://kyverno.github.io/kyverno/ helm repo update kubectl apply -f clusters/noble/bootstrap/kyverno/namespace.yaml helm upgrade --install kyverno kyverno/kyverno -n kyverno \ - --version 3.7.1 -f clusters/noble/bootstrap/kyverno/values.yaml --wait --timeout 15m + --version 3.8.0 -f clusters/noble/bootstrap/kyverno/values.yaml --wait --timeout 15m helm upgrade --install kyverno-policies kyverno/kyverno-policies -n kyverno \ - --version 3.7.1 -f clusters/noble/bootstrap/kyverno/policies-values.yaml --wait --timeout 10m + --version 3.8.0 -f clusters/noble/bootstrap/kyverno/policies-values.yaml --wait --timeout 10m ``` Verify: diff --git a/clusters/noble/bootstrap/kyverno/policies-values.yaml b/clusters/noble/bootstrap/kyverno/policies-values.yaml index 52cd7fc..2eceb51 100644 --- a/clusters/noble/bootstrap/kyverno/policies-values.yaml +++ b/clusters/noble/bootstrap/kyverno/policies-values.yaml @@ -1,7 +1,7 @@ # kyverno/kyverno-policies — Pod Security Standards as Kyverno ClusterPolicies # # helm upgrade --install kyverno-policies kyverno/kyverno-policies -n kyverno \ -# --version 3.7.1 -f clusters/noble/bootstrap/kyverno/policies-values.yaml --wait --timeout 10m +# --version 3.8.0 -f clusters/noble/bootstrap/kyverno/policies-values.yaml --wait --timeout 10m # # Default profile is baseline; validationFailureAction is Audit so existing privileged # workloads are not blocked. Kyverno still emits PolicyReports for matches — Headlamp diff --git a/clusters/noble/bootstrap/kyverno/values.yaml b/clusters/noble/bootstrap/kyverno/values.yaml index acc8787..ee288bd 100644 --- a/clusters/noble/bootstrap/kyverno/values.yaml +++ b/clusters/noble/bootstrap/kyverno/values.yaml @@ -4,7 +4,7 @@ # helm repo update # kubectl apply -f clusters/noble/bootstrap/kyverno/namespace.yaml # helm upgrade --install kyverno kyverno/kyverno -n kyverno \ -# --version 3.7.1 -f clusters/noble/bootstrap/kyverno/values.yaml --wait --timeout 15m +# --version 3.8.0 -f clusters/noble/bootstrap/kyverno/values.yaml --wait --timeout 15m # # Baseline Pod Security policies (separate chart): see policies-values.yaml + README.md # diff --git a/clusters/noble/bootstrap/loki/values.yaml b/clusters/noble/bootstrap/loki/values.yaml index c6355d6..b9d90ea 100644 --- a/clusters/noble/bootstrap/loki/values.yaml +++ b/clusters/noble/bootstrap/loki/values.yaml @@ -1,12 +1,12 @@ # Grafana Loki — noble lab (SingleBinary, filesystem on Longhorn; no MinIO/S3). # -# Chart: grafana/loki — pin version on install (e.g. 6.55.0). +# Chart: grafana/loki — pin version on install (e.g. 7.0.0). # # kubectl apply -f clusters/noble/bootstrap/loki/namespace.yaml # helm repo add grafana https://grafana.github.io/helm-charts # helm repo update # helm upgrade --install loki grafana/loki -n loki \ -# --version 6.55.0 -f clusters/noble/bootstrap/loki/values.yaml --wait --timeout 30m +# --version 7.0.0 -f clusters/noble/bootstrap/loki/values.yaml --wait --timeout 30m # # Query/push URL for Grafana + Fluent Bit: http://loki-gateway.loki.svc.cluster.local:80 # diff --git a/clusters/noble/bootstrap/newt/README.md b/clusters/noble/bootstrap/newt/README.md index 5d9d937..b4cc011 100644 --- a/clusters/noble/bootstrap/newt/README.md +++ b/clusters/noble/bootstrap/newt/README.md @@ -41,7 +41,7 @@ helm repo add fossorial https://charts.fossorial.io helm repo update helm upgrade --install newt fossorial/newt \ --namespace newt \ - --version 1.2.0 \ + --version 1.5.0 \ -f clusters/noble/bootstrap/newt/values.yaml \ --wait ``` diff --git a/clusters/noble/bootstrap/newt/values.yaml b/clusters/noble/bootstrap/newt/values.yaml index 4570608..fcba1a1 100644 --- a/clusters/noble/bootstrap/newt/values.yaml +++ b/clusters/noble/bootstrap/newt/values.yaml @@ -10,7 +10,7 @@ # # helm repo add fossorial https://charts.fossorial.io # helm upgrade --install newt fossorial/newt -n newt \ -# --version 1.2.0 -f clusters/noble/bootstrap/newt/values.yaml --wait +# --version 1.5.0 -f clusters/noble/bootstrap/newt/values.yaml --wait # # See README.md for Pangolin Integration API (domains + HTTP resources + CNAME). diff --git a/clusters/noble/bootstrap/traefik/README.md b/clusters/noble/bootstrap/traefik/README.md index 64905cc..f496cd3 100644 --- a/clusters/noble/bootstrap/traefik/README.md +++ b/clusters/noble/bootstrap/traefik/README.md @@ -15,7 +15,7 @@ helm repo update helm upgrade --install traefik traefik/traefik \ --namespace traefik \ - --version 39.0.6 \ + --version 40.2.0 \ -f clusters/noble/bootstrap/traefik/values.yaml \ --wait ``` diff --git a/clusters/noble/bootstrap/traefik/values.yaml b/clusters/noble/bootstrap/traefik/values.yaml index c4c2368..f07d138 100644 --- a/clusters/noble/bootstrap/traefik/values.yaml +++ b/clusters/noble/bootstrap/traefik/values.yaml @@ -1,12 +1,12 @@ # Traefik ingress controller — noble lab # -# Chart: traefik/traefik — pin version on the helm command (e.g. 39.0.6). +# Chart: traefik/traefik — pin version on the helm command (e.g. 40.2.0). # DNS: point *.apps.noble.lab.pcenicni.dev to the LoadBalancer IP below. # # kubectl apply -f clusters/noble/bootstrap/traefik/namespace.yaml # helm repo add traefik https://traefik.github.io/charts # helm upgrade --install traefik traefik/traefik -n traefik \ -# --version 39.0.6 -f clusters/noble/bootstrap/traefik/values.yaml --wait +# --version 40.2.0 -f clusters/noble/bootstrap/traefik/values.yaml --wait service: type: LoadBalancer diff --git a/clusters/noble/bootstrap/velero/README.md b/clusters/noble/bootstrap/velero/README.md index 2ec5559..834c8f2 100644 --- a/clusters/noble/bootstrap/velero/README.md +++ b/clusters/noble/bootstrap/velero/README.md @@ -5,7 +5,7 @@ Ansible-managed core stack — **not** reconciled by Argo CD (`clusters/noble/ap ## What you get - **No web UI** — Velero is operated with the **`velero`** CLI and **`kubectl`** (Backup, Schedule, Restore CRDs). Metrics are exposed for Prometheus; there is no first-party dashboard in this chart. -- **vmware-tanzu/velero** Helm chart (**12.0.0** → Velero **1.18.0**) in namespace **`velero`** +- **vmware-tanzu/velero** Helm chart (**12.0.1** → Velero **1.18.0**) in namespace **`velero`** - **AWS plugin** init container for **S3-compatible** object storage (`velero/velero-plugin-for-aws:v1.14.0`) - **CSI snapshots** via Velero’s built-in CSI support (`EnableCSI`) and **VolumeSnapshotLocation** `velero.io/csi` (no separate CSI plugin image for Velero ≥ 1.14) - **Prometheus** scraping: **ServiceMonitor** labeled for **kube-prometheus** (`release: kube-prometheus`) @@ -99,7 +99,7 @@ From repo root: kubectl apply -f clusters/noble/bootstrap/velero/namespace.yaml # Create velero-cloud-credentials (see above), then: helm repo add vmware-tanzu https://vmware-tanzu.github.io/helm-charts && helm repo update -helm upgrade --install velero vmware-tanzu/velero -n velero --version 12.0.0 \ +helm upgrade --install velero vmware-tanzu/velero -n velero --version 12.0.1 \ -f clusters/noble/bootstrap/velero/values.yaml \ --set-string configuration.backupStorageLocation[0].bucket=YOUR_BUCKET \ --set-string configuration.backupStorageLocation[0].config.s3Url=https://YOUR-S3-ENDPOINT \ diff --git a/clusters/noble/bootstrap/velero/values.yaml b/clusters/noble/bootstrap/velero/values.yaml index 401c2e5..2eb2c22 100644 --- a/clusters/noble/bootstrap/velero/values.yaml +++ b/clusters/noble/bootstrap/velero/values.yaml @@ -2,10 +2,10 @@ # Install: **ansible/playbooks/noble.yml** role **noble_velero** (override S3 settings via **noble_velero_*** vars). # Requires Secret **velero/velero-cloud-credentials** key **cloud** (INI for AWS plugin — see README). # -# Chart: vmware-tanzu/velero — pin version on install (e.g. 12.0.0 / Velero 1.18.0). +# Chart: vmware-tanzu/velero — pin version on install (e.g. 12.0.1 / Velero 1.18.0). # helm repo add vmware-tanzu https://vmware-tanzu.github.io/helm-charts && helm repo update # kubectl apply -f clusters/noble/bootstrap/velero/namespace.yaml -# helm upgrade --install velero vmware-tanzu/velero -n velero --version 12.0.0 -f clusters/noble/bootstrap/velero/values.yaml +# helm upgrade --install velero vmware-tanzu/velero -n velero --version 12.0.1 -f clusters/noble/bootstrap/velero/values.yaml initContainers: - name: velero-plugin-for-aws diff --git a/clusters/noble/wip/eclipse-che/application-operator.yaml b/clusters/noble/wip/eclipse-che/application-operator.yaml index 8da44c1..0bad4df 100644 --- a/clusters/noble/wip/eclipse-che/application-operator.yaml +++ b/clusters/noble/wip/eclipse-che/application-operator.yaml @@ -13,7 +13,7 @@ spec: source: repoURL: https://eclipse-che.github.io/che-operator/charts chart: eclipse-che - targetRevision: 7.116.0 + targetRevision: 7.117.0 helm: releaseName: eclipse-che destination: diff --git a/docs/architecture.md b/docs/architecture.md index 995c8f0..983f4df 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -198,17 +198,17 @@ See [`talos/CLUSTER-BUILD.md`](../talos/CLUSTER-BUILD.md) for the authoritative | Component | Chart / app (from CLUSTER-BUILD.md) | |-----------|-------------------------------------| | Talos / Kubernetes | v1.12.6 / 1.35.2 bundled | -| Cilium | Helm 1.16.6 | +| Cilium | Helm 1.19.4 | | MetalLB | 0.15.3 | -| Longhorn | 1.11.1 | -| Traefik | 39.0.6 / app v3.6.11 | -| cert-manager | v1.20.0 | -| Argo CD | 9.4.17 / app v3.3.6 | -| kube-prometheus-stack | 82.15.1 | -| Loki / Fluent Bit | 6.55.0 / 0.56.0 | +| Longhorn | 1.11.2 | +| Traefik | 40.2.0 / app v3.7.1 | +| cert-manager | v1.20.2 | +| Argo CD | 9.5.14 / app v3.4.2 | +| kube-prometheus-stack | 85.0.3 | +| Loki / Fluent Bit | 7.0.0 / 0.57.5 | | SOPS (client tooling) | see `clusters/noble/secrets/README.md` | -| Kyverno | 3.7.1 / policies 3.7.1 | -| Newt | 1.2.0 / app 1.10.1 | +| Kyverno | 3.8.0 / policies 3.8.0 | +| Newt | 1.5.0 / app 1.12.5 | --- diff --git a/talos/CLUSTER-BUILD.md b/talos/CLUSTER-BUILD.md index 8190f84..c7fb324 100644 --- a/talos/CLUSTER-BUILD.md +++ b/talos/CLUSTER-BUILD.md @@ -7,20 +7,20 @@ This document is the **exported TODO** for the **noble** Talos cluster (4 nodes) Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (**`talos/runbooks/`**, **SOPS**-encrypted secrets in **`clusters/noble/secrets/`**). **Next focus:** optional **Alertmanager** receivers (Slack/PagerDuty); tighten **RBAC** (Headlamp / cluster-admin); **Cilium** policies for other namespaces as needed; enable **Mend Renovate** for PRs; Pangolin/sample Ingress; **Velero** backup/restore drill after S3 credentials are set (**`noble_velero_install`**). - **Talos** v1.12.6 (target) / **Kubernetes** as bundled — four nodes **Ready** unless upgrading; **`talosctl health`**; **`talos/kubeconfig`** is **local only** (gitignored — never commit; regenerate with `talosctl kubeconfig` per `talos/README.md`). **Image Factory (nocloud installer):** `factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6` -- **Cilium** Helm **1.16.6** / app **1.16.6** (`clusters/noble/bootstrap/cilium/`, phase 1 values). +- **Cilium** Helm **1.19.4** / app **1.19.4** (`clusters/noble/bootstrap/cilium/`, phase 1 values). - **CSI Volume Snapshot** — **external-snapshotter** **v8.5.0** CRDs + **`registry.k8s.io/sig-storage/snapshot-controller`** (`clusters/noble/bootstrap/csi-snapshot-controller/`, Ansible **`noble_csi_snapshot_controller`**). - **MetalLB** Helm **0.15.3** / app **v0.15.3**; **IPAddressPool** `noble-l2` + **L2Advertisement** — pool **`192.168.50.210`–`192.168.50.229`**. - **kube-vip** DaemonSet **3/3** on control planes; VIP **`192.168.50.230`** on **`ens18`** (`vip_subnet` **`/32`** required — bare **`32`** breaks parsing). **Verified from workstation:** `kubectl config set-cluster noble --server=https://192.168.50.230:6443` then **`kubectl get --raw /healthz`** → **`ok`** (`talos/kubeconfig`; see `talos/README.md`). - **metrics-server** Helm **3.13.0** / app **v0.8.0** — `clusters/noble/bootstrap/metrics-server/values.yaml` (`--kubelet-insecure-tls` for Talos); **`kubectl top nodes`** works. -- **Longhorn** Helm **1.11.1** / app **v1.11.1** — `clusters/noble/bootstrap/longhorn/` (PSA **privileged** namespace, `defaultDataPath` `/var/mnt/longhorn`, `preUpgradeChecker` enabled); **StorageClass** `longhorn` (default); **`nodes.longhorn.io`** all **Ready**; test **PVC** `Bound` on `longhorn`. -- **Traefik** Helm **39.0.6** / app **v3.6.11** — `clusters/noble/bootstrap/traefik/`; **`Service`** **`LoadBalancer`** **`EXTERNAL-IP` `192.168.50.211`**; **`IngressClass`** **`traefik`** (default). Point **`*.apps.noble.lab.pcenicni.dev`** at **`192.168.50.211`**. MetalLB pool verification was done before replacing the temporary nginx test with Traefik. -- **cert-manager** Helm **v1.20.0** / app **v1.20.0** — `clusters/noble/bootstrap/cert-manager/`; **`ClusterIssuer`** **`letsencrypt-staging`** and **`letsencrypt-prod`** (**DNS-01** via **Cloudflare** for **`pcenicni.dev`**, Secret **`cloudflare-dns-api-token`** in **`cert-manager`**); ACME email **`certificates@noble.lab.pcenicni.dev`** (edit in manifests if you want a different mailbox). -- **Newt** Helm **1.2.0** / app **1.10.1** — `clusters/noble/bootstrap/newt/` (**fossorial/newt**); Pangolin site tunnel — **`newt-pangolin-auth`** Secret (**`PANGOLIN_ENDPOINT`**, **`NEWT_ID`**, **`NEWT_SECRET`**). Store credentials in git with **SOPS** (`clusters/noble/secrets/newt-pangolin-auth.secret.yaml`, **`age-key.txt`**, **`.sops.yaml`**) — see **`clusters/noble/secrets/README.md`**. **Public DNS** is **not** automated with ExternalDNS: **CNAME** records at your DNS host per Pangolin’s domain instructions, plus **Integration API** for HTTP resources/targets — see **`clusters/noble/bootstrap/newt/README.md`**. LAN access to Traefik can still use **`*.apps.noble.lab.pcenicni.dev`** → **`192.168.50.211`** (split horizon / local resolver). -- **Argo CD** Helm **9.4.17** / app **v3.3.6** — `clusters/noble/bootstrap/argocd/`; **`argocd-server`** **`LoadBalancer`** **`192.168.50.210`**; **`noble-bootstrap-root`** → **`clusters/noble/bootstrap`** (kustomize includes **`clusters/noble/apps/`** for optional leaf **`Application`** manifests; manual sync until **`argocd/README.md`** §5 after **`noble.yml`**). Edit **`repoURL`** in **`bootstrap-root-application.yaml`** before applying. -- **kube-prometheus-stack** — Helm chart **82.15.1** — `clusters/noble/bootstrap/kube-prometheus-stack/` (**namespace** `monitoring`, PSA **privileged** — **node-exporter** needs host mounts); **Longhorn** PVCs for Prometheus, Grafana, Alertmanager; **node-exporter** DaemonSet **4/4**. **Grafana Ingress:** **`https://grafana.apps.noble.lab.pcenicni.dev`** (Traefik **`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **Loki** datasource in Grafana: ConfigMap **`clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`** (sidecar label **`grafana_datasource: "1"`**) — not via **`grafana.additionalDataSources`** in the chart. **`helm upgrade --install` with `--wait` is silent until done** — use **`--timeout 30m`**; Grafana admin: Secret **`kube-prometheus-grafana`**, keys **`admin-user`** / **`admin-password`**. -- **Loki** + **Fluent Bit** — **`grafana/loki` 6.55.0** SingleBinary + **filesystem** on **Longhorn** (`clusters/noble/bootstrap/loki/`); **`loki.auth_enabled: false`**; **`chunksCache.enabled: false`** (no memcached chunk cache). **`fluent/fluent-bit` 0.56.0** → **`loki-gateway.loki.svc:80`** (`clusters/noble/bootstrap/fluent-bit/`); **`logging`** PSA **privileged**. **Grafana Explore:** **`kubectl apply -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`** then **Explore → Loki** (e.g. `{job="fluent-bit"}`). +- **Longhorn** Helm **1.11.2** / app **v1.11.2** — `clusters/noble/bootstrap/longhorn/` (PSA **privileged** namespace, `defaultDataPath` `/var/mnt/longhorn`, `preUpgradeChecker` enabled); **StorageClass** `longhorn` (default); **`nodes.longhorn.io`** all **Ready**; test **PVC** `Bound` on `longhorn`. +- **Traefik** Helm **40.2.0** / app **v3.7.1** — `clusters/noble/bootstrap/traefik/`; **`Service`** **`LoadBalancer`** **`EXTERNAL-IP` `192.168.50.211`**; **`IngressClass`** **`traefik`** (default). Point **`*.apps.noble.lab.pcenicni.dev`** at **`192.168.50.211`**. MetalLB pool verification was done before replacing the temporary nginx test with Traefik. +- **cert-manager** Helm **v1.20.2** / app **v1.20.2** — `clusters/noble/bootstrap/cert-manager/`; **`ClusterIssuer`** **`letsencrypt-staging`** and **`letsencrypt-prod`** (**DNS-01** via **Cloudflare** for **`pcenicni.dev`**, Secret **`cloudflare-dns-api-token`** in **`cert-manager`**); ACME email **`certificates@noble.lab.pcenicni.dev`** (edit in manifests if you want a different mailbox). +- **Newt** Helm **1.5.0** / app **1.12.5** — `clusters/noble/bootstrap/newt/` (**fossorial/newt**); Pangolin site tunnel — **`newt-pangolin-auth`** Secret (**`PANGOLIN_ENDPOINT`**, **`NEWT_ID`**, **`NEWT_SECRET`**). Store credentials in git with **SOPS** (`clusters/noble/secrets/newt-pangolin-auth.secret.yaml`, **`age-key.txt`**, **`.sops.yaml`**) — see **`clusters/noble/secrets/README.md`**. **Public DNS** is **not** automated with ExternalDNS: **CNAME** records at your DNS host per Pangolin’s domain instructions, plus **Integration API** for HTTP resources/targets — see **`clusters/noble/bootstrap/newt/README.md`**. LAN access to Traefik can still use **`*.apps.noble.lab.pcenicni.dev`** → **`192.168.50.211`** (split horizon / local resolver). +- **Argo CD** Helm **9.5.14** / app **v3.4.2** — `clusters/noble/bootstrap/argocd/`; **`argocd-server`** **`LoadBalancer`** **`192.168.50.210`**; **`noble-bootstrap-root`** → **`clusters/noble/bootstrap`** (kustomize includes **`clusters/noble/apps/`** for optional leaf **`Application`** manifests; manual sync until **`argocd/README.md`** §5 after **`noble.yml`**). Edit **`repoURL`** in **`bootstrap-root-application.yaml`** before applying. +- **kube-prometheus-stack** — Helm chart **85.0.3** — `clusters/noble/bootstrap/kube-prometheus-stack/` (**namespace** `monitoring`, PSA **privileged** — **node-exporter** needs host mounts); **Longhorn** PVCs for Prometheus, Grafana, Alertmanager; **node-exporter** DaemonSet **4/4**. **Grafana Ingress:** **`https://grafana.apps.noble.lab.pcenicni.dev`** (Traefik **`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **Loki** datasource in Grafana: ConfigMap **`clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`** (sidecar label **`grafana_datasource: "1"`**) — not via **`grafana.additionalDataSources`** in the chart. **`helm upgrade --install` with `--wait` is silent until done** — use **`--timeout 30m`**; Grafana admin: Secret **`kube-prometheus-grafana`**, keys **`admin-user`** / **`admin-password`**. +- **Loki** + **Fluent Bit** — **`grafana/loki` 7.0.0** SingleBinary + **filesystem** on **Longhorn** (`clusters/noble/bootstrap/loki/`); **`loki.auth_enabled: false`**; **`chunksCache.enabled: false`** (no memcached chunk cache). **`fluent/fluent-bit` 0.57.5** → **`loki-gateway.loki.svc:80`** (`clusters/noble/bootstrap/fluent-bit/`); **`logging`** PSA **privileged**. **Grafana Explore:** **`kubectl apply -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`** then **Explore → Loki** (e.g. `{job="fluent-bit"}`). - **SOPS** — cluster **`Secret`** manifests under **`clusters/noble/secrets/`** encrypted with **age** (see **`.sops.yaml`**, **`age-key.txt`** gitignored); **`noble.yml`** decrypt-applies when the private key is present. -- **Velero** Helm **12.0.0** / app **v1.18.0** — `clusters/noble/bootstrap/velero/` (**Ansible** **`noble_velero`**, not Argo); **S3-compatible** backup location + **CSI** snapshots (**`EnableCSI`**); enable with **`noble_velero_install`** per **`velero/README.md`**. +- **Velero** Helm **12.0.1** / app **v1.18.0** — `clusters/noble/bootstrap/velero/` (**Ansible** **`noble_velero`**, not Argo); **S3-compatible** backup location + **CSI** snapshots (**`EnableCSI`**); enable with **`noble_velero_install`** per **`velero/README.md`**. - **Still open:** **Renovate** — install **[Mend Renovate](https://github.com/apps/renovate)** (or self-host) so PRs run; optional **Alertmanager** notification channels; optional **sample Ingress + cert + Pangolin** end-to-end; **Argo CD SSO**. ## Inventory @@ -51,20 +51,20 @@ Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (**`t - Talos: **v1.12.6** — align `talosctl` client with node image - Talos **Image Factory** (iscsi-tools + util-linux-tools): **`factory.talos.dev/nocloud-installer/249d9135de54962744e917cfe654117000cba369f9152fbab9d055a00aa3664f:v1.12.6`** — same schematic must appear in **`machine.install.image`** after `talhelper genconfig` (bare metal may use `metal-installer/` instead of `nocloud-installer/`) - Kubernetes: **1.35.2** on current nodes (bundled with Talos; not pinned in repo) -- Cilium: **1.16.6** (Helm chart; see `clusters/noble/bootstrap/cilium/README.md`) +- Cilium: **1.19.4** (Helm chart; see `clusters/noble/bootstrap/cilium/README.md`) - MetalLB: **0.15.3** (Helm chart; app **v0.15.3**) - metrics-server: **3.13.0** (Helm chart; app **v0.8.0**) -- Longhorn: **1.11.1** (Helm chart; app **v1.11.1**) -- Traefik: **39.0.6** (Helm chart; app **v3.6.11**) -- cert-manager: **v1.20.0** (Helm chart; app **v1.20.0**) -- Newt (Fossorial): **1.2.0** (Helm chart; app **1.10.1**) -- Argo CD: **9.4.17** (Helm chart `argo/argo-cd`; app **v3.3.6**) -- kube-prometheus-stack: **82.15.1** (Helm chart `prometheus-community/kube-prometheus-stack`; app **v0.89.x** bundle) -- Loki: **6.55.0** (Helm chart `grafana/loki`; app **3.6.7**) -- Fluent Bit: **0.56.0** (Helm chart `fluent/fluent-bit`; app **4.2.3**) -- Kyverno: **3.7.1** (Helm chart `kyverno/kyverno`; app **v1.17.1**); **kyverno-policies** **3.7.1** — **baseline** PSS, **Audit** (`clusters/noble/bootstrap/kyverno/`) -- Headlamp: **0.40.1** (Helm chart `headlamp/headlamp`; app matches chart — see [Artifact Hub](https://artifacthub.io/packages/helm/headlamp/headlamp)) -- Velero: **12.0.0** (Helm chart `vmware-tanzu/velero`; app **v1.18.0**) — **`clusters/noble/bootstrap/velero/`**; AWS plugin **v1.14.0**; Ansible **`noble_velero`** +- Longhorn: **1.11.2** (Helm chart; app **v1.11.2**) +- Traefik: **40.2.0** (Helm chart; app **v3.7.1**) +- cert-manager: **v1.20.2** (Helm chart; app **v1.20.2**) +- Newt (Fossorial): **1.5.0** (Helm chart; app **1.12.5**) +- Argo CD: **9.5.14** (Helm chart `argo/argo-cd`; app **v3.4.2**) +- kube-prometheus-stack: **85.0.3** (Helm chart `prometheus-community/kube-prometheus-stack`; app **v0.90.1** bundle) +- Loki: **7.0.0** (Helm chart `grafana/loki`; app **3.6.7**) +- Fluent Bit: **0.57.5** (Helm chart `fluent/fluent-bit`; app **5.0.5**) +- Kyverno: **3.8.0** (Helm chart `kyverno/kyverno`; app **v1.18.0**); **kyverno-policies** **3.8.0** — **baseline** PSS, **Audit** (`clusters/noble/bootstrap/kyverno/`) +- Headlamp: **0.42.0** (Helm chart `headlamp/headlamp`; app matches chart — see [Artifact Hub](https://artifacthub.io/packages/helm/headlamp/headlamp)) +- Velero: **12.0.1** (Helm chart `vmware-tanzu/velero`; app **v1.18.0**) — **`clusters/noble/bootstrap/velero/`**; AWS plugin **v1.14.0**; Ansible **`noble_velero`** - Renovate: **hosted** (Mend **Renovate** GitHub/GitLab app — no cluster chart) **or** **self-hosted** — pin chart when added ([Helm charts](https://docs.renovatebot.com/helm-charts/), OCI `ghcr.io/renovatebot/charts/renovate`); pair **`renovate.json`** with this repo’s Helm paths under **`clusters/noble/`** ## Repo paths (this workspace) @@ -137,10 +137,10 @@ Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (**`t **Install order:** **Cilium** → **Volume Snapshot CRDs + snapshot-controller** (`clusters/noble/bootstrap/csi-snapshot-controller/`, Ansible **`noble_csi_snapshot_controller`**) → **metrics-server** → **Longhorn** (Talos disk + Helm) → **MetalLB** (Helm → pool manifests) → ingress / certs / DNS as planned. -- [x] **Cilium** (Helm **1.16.6**) — **required** before MetalLB if `cni: none` (`clusters/noble/bootstrap/cilium/`) +- [x] **Cilium** (Helm **1.19.4**) — **required** before MetalLB if `cni: none` (`clusters/noble/bootstrap/cilium/`) - [x] **CSI Volume Snapshot** — CRDs + **`snapshot-controller`** in **`kube-system`** (`clusters/noble/bootstrap/csi-snapshot-controller/`); Ansible **`noble_csi_snapshot_controller`**; verify `kubectl api-resources | grep VolumeSnapshot` - [x] **metrics-server** — Helm **3.13.0**; values in `clusters/noble/bootstrap/metrics-server/values.yaml`; verify `kubectl top nodes` -- [x] **Longhorn** — Talos: user volume + kubelet mounts + extensions (`talos/README.md` §5); Helm **1.11.1**; `kubectl apply -k clusters/noble/bootstrap/longhorn`; verify **`nodes.longhorn.io`** and test PVC **`Bound`** +- [x] **Longhorn** — Talos: user volume + kubelet mounts + extensions (`talos/README.md` §5); Helm **1.11.2**; `kubectl apply -k clusters/noble/bootstrap/longhorn`; verify **`nodes.longhorn.io`** and test PVC **`Bound`** - [x] **MetalLB** — chart installed; **pool + L2** from `clusters/noble/bootstrap/metallb/` applied (`192.168.50.210`–`229`) - [x] **`Service` `LoadBalancer`** / pool check — MetalLB assigns from `210`–`229` (validated before Traefik; temporary nginx test removed in favor of Traefik) - [x] **Traefik** `LoadBalancer` for `*.apps.noble.lab.pcenicni.dev` — `clusters/noble/bootstrap/traefik/`; **`192.168.50.211`** @@ -157,9 +157,9 @@ Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (**`t ## Phase D — Observability -- [x] **kube-prometheus-stack** — `kubectl apply -f clusters/noble/bootstrap/kube-prometheus-stack/namespace.yaml` then **`helm upgrade --install`** as in `clusters/noble/bootstrap/kube-prometheus-stack/values.yaml` (chart **82.15.1**); PVCs **`longhorn`**; **`--wait --timeout 30m`** recommended; verify **`kubectl -n monitoring get pods,pvc`** -- [x] **Loki** + **Fluent Bit** + **Grafana Loki datasource** — **order:** **`kubectl apply -f clusters/noble/bootstrap/loki/namespace.yaml`** → **`helm upgrade --install loki`** `grafana/loki` **6.55.0** `-f clusters/noble/bootstrap/loki/values.yaml` → **`kubectl apply -f clusters/noble/bootstrap/fluent-bit/namespace.yaml`** → **`helm upgrade --install fluent-bit`** `fluent/fluent-bit` **0.56.0** `-f clusters/noble/bootstrap/fluent-bit/values.yaml` → **`kubectl apply -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`**. Verify **Explore → Loki** in Grafana; **`kubectl -n loki get pods,pvc`**, **`kubectl -n logging get pods`** -- [x] **Headlamp** — Kubernetes web UI ([Headlamp](https://headlamp.dev/)); **`helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/`**; **`kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml`** → **`helm upgrade --install headlamp headlamp/headlamp --version 0.40.1 -n headlamp -f clusters/noble/bootstrap/headlamp/values.yaml`**; **Ingress** **`https://headlamp.apps.noble.lab.pcenicni.dev`** (**`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **`values.yaml`:** **`config.sessionTTL: null`** works around chart **0.40.1** / binary mismatch ([headlamp#4883](https://github.com/kubernetes-sigs/headlamp/issues/4883)). **RBAC:** chart defaults are permissive — tighten before LAN-wide exposure; align with **Phase G** hardening. +- [x] **kube-prometheus-stack** — `kubectl apply -f clusters/noble/bootstrap/kube-prometheus-stack/namespace.yaml` then **`helm upgrade --install`** as in `clusters/noble/bootstrap/kube-prometheus-stack/values.yaml` (chart **85.0.3**); PVCs **`longhorn`**; **`--wait --timeout 30m`** recommended; verify **`kubectl -n monitoring get pods,pvc`** +- [x] **Loki** + **Fluent Bit** + **Grafana Loki datasource** — **order:** **`kubectl apply -f clusters/noble/bootstrap/loki/namespace.yaml`** → **`helm upgrade --install loki`** `grafana/loki` **7.0.0** `-f clusters/noble/bootstrap/loki/values.yaml` → **`kubectl apply -f clusters/noble/bootstrap/fluent-bit/namespace.yaml`** → **`helm upgrade --install fluent-bit`** `fluent/fluent-bit` **0.57.5** `-f clusters/noble/bootstrap/fluent-bit/values.yaml` → **`kubectl apply -f clusters/noble/bootstrap/grafana-loki-datasource/loki-datasource.yaml`**. Verify **Explore → Loki** in Grafana; **`kubectl -n loki get pods,pvc`**, **`kubectl -n logging get pods`** +- [x] **Headlamp** — Kubernetes web UI ([Headlamp](https://headlamp.dev/)); **`helm repo add headlamp https://kubernetes-sigs.github.io/headlamp/`**; **`kubectl apply -f clusters/noble/bootstrap/headlamp/namespace.yaml`** → **`helm upgrade --install headlamp headlamp/headlamp --version 0.42.0 -n headlamp -f clusters/noble/bootstrap/headlamp/values.yaml`** (add **`-f values-authentik-oidc.yaml`** when using OIDC); **Ingress** **`https://headlamp.apps.noble.lab.pcenicni.dev`** (**`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **`values.yaml`:** **`config.sessionTTL: null`** omits **`-session-ttl`** if you hit older chart/binary mismatches ([headlamp#4883](https://github.com/kubernetes-sigs/headlamp/issues/4883)); chart **0.42.x** defaults are otherwise fine. **RBAC:** chart defaults are permissive — tighten before LAN-wide exposure; align with **Phase G** hardening. ## Phase E — Secrets @@ -167,7 +167,7 @@ Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (**`t ## Phase F — Policy + backups -- [x] **Kyverno** baseline policies — `clusters/noble/bootstrap/kyverno/` (Helm **kyverno** **3.7.1** + **kyverno-policies** **3.7.1**, **baseline** / **Audit** — see **`README.md`**) +- [x] **Kyverno** baseline policies — `clusters/noble/bootstrap/kyverno/` (Helm **kyverno** **3.8.0** + **kyverno-policies** **3.8.0**, **baseline** / **Audit** — see **`README.md`**) - [ ] **Velero** — manifests + Ansible **`noble_velero`** (`clusters/noble/bootstrap/velero/`); enable with **`noble_velero_install: true`** + S3 bucket/URL + **`velero/velero-cloud-credentials`** (see **`velero/README.md`**); optional backup/restore drill ## Phase G — Hardening