From 7c9fd1fde606f07082fe9006dddb526506e0be21 Mon Sep 17 00:00:00 2001 From: Nikholas Pcenicni <82239765+nikpcenicni@users.noreply.github.com> Date: Thu, 14 May 2026 19:24:44 -0400 Subject: [PATCH] Enhance Headlamp's metrics access by updating the ClusterRoleBinding to include permissions for metrics.k8s.io, nodes, and CustomResourceDefinitions. Update README and RBAC documentation to clarify OIDC user permissions and troubleshooting steps for metrics visibility issues. --- clusters/noble/bootstrap/headlamp/README.md | 4 ++++ .../headlamp/metrics-clusterrolebinding.yaml | 20 +++++++++++++++---- talos/runbooks/rbac.md | 3 +++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/clusters/noble/bootstrap/headlamp/README.md b/clusters/noble/bootstrap/headlamp/README.md index 916688c..ac30d23 100644 --- a/clusters/noble/bootstrap/headlamp/README.md +++ b/clusters/noble/bootstrap/headlamp/README.md @@ -38,6 +38,10 @@ Headlamp logs like **“Request completed successfully”** for **`/plugins`** o 3. **API server logs** often spell out the failure (**invalid bearer token**, wrong **audience**, unknown **issuer**). Check **`kube-apiserver`** logs on a control-plane node if steps 1–2 look correct. 4. **`oidc: email not verified`**: with **`oidc-username-claim: email`**, the API server rejects **`email_verified: false`**. Either set **`oidc-username-claim`** to a non-email claim (this repo uses **`preferred_username`** in **`talos/talconfig.yaml`**) or make Authentik issue **`email_verified: true`** for that user. +## OIDC: no nodes, no CPU/memory, plugins misbehave + +In-cluster Headlamp calls the API **as your OIDC user**, not as the **headlamp** ServiceAccount. The built-in **`edit`** role does not cover **`metrics.k8s.io`** or cluster **nodes**. Re-apply **`kubectl apply -k clusters/noble/bootstrap/headlamp`** so **`metrics-clusterrolebinding.yaml`** stays current: it binds **`noble-admins`** to **`headlamp-metrics-reader`**, which adds metrics, **nodes**, and read-only **CustomResourceDefinitions** (helps many plugins). Ensure **metrics-server** (or equivalent) is installed. If the plugin marketplace never loads, check the browser network tab for blocked HTTPS requests to external hosts. + To use another duration (cluster `spec.serviceAccount` / admission limits may cap it): ```bash diff --git a/clusters/noble/bootstrap/headlamp/metrics-clusterrolebinding.yaml b/clusters/noble/bootstrap/headlamp/metrics-clusterrolebinding.yaml index 4df6185..49d1c34 100644 --- a/clusters/noble/bootstrap/headlamp/metrics-clusterrolebinding.yaml +++ b/clusters/noble/bootstrap/headlamp/metrics-clusterrolebinding.yaml @@ -1,7 +1,10 @@ -# Grant Headlamp's ServiceAccount read access to the Kubernetes Metrics API. -# The chart binds headlamp SA to 'edit' (safe default) but 'edit' does not include -# metrics.k8s.io — without this, Headlamp shows no CPU/memory/node data on the dashboard. -# This binding is additive: it does not escalate headlamp beyond 'edit' elsewhere. +# Additive dashboard permissions on top of the built-in **edit** ClusterRole (Helm **clusterRoleBinding.clusterRoleName**). +# The chart binds the Headlamp **ServiceAccount** to **edit**, but **edit** does not cover: +# - **metrics.k8s.io** (no CPU/memory from metrics-server without this) +# - **nodes** / **nodes/status** at cluster scope (cluster overview / node pages stay empty) +# **OIDC** users authenticate as themselves, not the pod SA — the same ClusterRole must be bound to IdP groups +# (e.g. **noble-admins**) or they see 403 on metrics and node list while namespaced resources still work. +# **customresourcedefinitions** (read-only): many Headlamp plugins list CRDs to register views; **edit** alone often omits this. --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -14,6 +17,12 @@ rules: - apiGroups: ["metrics.k8s.io"] resources: ["nodes", "pods"] verbs: ["get", "list"] + - apiGroups: [""] + resources: ["nodes", "nodes/status"] + verbs: ["get", "list", "watch"] + - apiGroups: ["apiextensions.k8s.io"] + resources: ["customresourcedefinitions"] + verbs: ["get", "list", "watch"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding @@ -30,3 +39,6 @@ subjects: - kind: ServiceAccount name: headlamp namespace: headlamp + - apiGroup: rbac.authorization.k8s.io + kind: Group + name: noble-admins diff --git a/talos/runbooks/rbac.md b/talos/runbooks/rbac.md index 7122c5e..e38a6b8 100644 --- a/talos/runbooks/rbac.md +++ b/talos/runbooks/rbac.md @@ -9,6 +9,9 @@ Headlamp sends your **IdP JWT** to the Kubernetes API. **`/me`** is answered by 2. **Ensure control planes can reach** `https://auth.apps.noble.lab.pcenicni.dev/...` (JWKS / discovery). If that URL is unreachable from nodes, OIDC validation fails. 3. **Apply cluster RBAC for OIDC groups**: **`kubectl apply -k clusters/noble/bootstrap/headlamp`** (includes **`oidc-noble-admins-clusterrolebinding.yaml`**). Your user must be in Authentik group **`noble-admins`** and the id_token should carry a **`groups`** claim if you rely on that binding. +**Headlamp OIDC: nodes / CPU-memory metrics / plugins look broken (403 or empty)** +The chart binds only the **pod ServiceAccount** to **`headlamp-metrics-reader`** unless you also bind your **IdP group**. **`metrics-clusterrolebinding.yaml`** binds **`noble-admins`** to the same additive ClusterRole as the SA (metrics API, **nodes**, read-only **CRDs**). Without **`metrics-server`** (or another **metrics.k8s.io** provider), CPU and memory stay empty even with RBAC. Plugin catalogs that load from the public internet can still fail from the browser (network, ad blockers) unrelated to RBAC. + Quick discovery check (any machine with DNS to Authentik): ```bash