# kube-prometheus-stack — noble lab (Prometheus Operator + Grafana + Alertmanager + exporters) # # Chart: prometheus-community/kube-prometheus-stack — pin version on install (e.g. 82.15.1). # # Install (use one terminal; chain with && so `helm upgrade` always runs after `helm repo update`): # # kubectl apply -f clusters/noble/apps/kube-prometheus-stack/namespace.yaml # helm repo add prometheus-community https://prometheus-community.github.io/helm-charts # helm repo update && helm upgrade --install kube-prometheus prometheus-community/kube-prometheus-stack -n monitoring \ # --version 82.15.1 -f clusters/noble/apps/kube-prometheus-stack/values.yaml --wait --timeout 30m # # Why it looks "stalled": with --wait, Helm prints almost nothing until the release finishes (can be many minutes). # Do not use --timeout 5m for first install — Longhorn PVCs + StatefulSets often need 15–30m. To watch progress, # open a second terminal: kubectl -n monitoring get pods,sts,ds -w # To apply manifest changes without blocking: omit --wait, then kubectl -n monitoring get pods -w # # Grafana admin password: Secret `kube-prometheus-grafana` keys `admin-user` / `admin-password` unless you set grafana.adminPassword. # --- Longhorn-backed persistence (default chart storage is emptyDir) --- alertmanager: alertmanagerSpec: storage: volumeClaimTemplate: spec: storageClassName: longhorn accessModes: ["ReadWriteOnce"] resources: requests: storage: 5Gi prometheus: prometheusSpec: retention: 15d retentionSize: 25GB storageSpec: volumeClaimTemplate: spec: storageClassName: longhorn accessModes: ["ReadWriteOnce"] resources: requests: storage: 30Gi grafana: persistence: enabled: true type: sts storageClassName: longhorn accessModes: - ReadWriteOnce size: 10Gi # HTTPS via Traefik + cert-manager (ClusterIssuer letsencrypt-prod; same pattern as other *.apps.noble.lab.pcenicni.dev hosts). # DNS: grafana.apps.noble.lab.pcenicni.dev → Traefik LoadBalancer (192.168.50.211) — see clusters/noble/apps/traefik/values.yaml ingress: enabled: true ingressClassName: traefik path: / pathType: Prefix annotations: cert-manager.io/cluster-issuer: letsencrypt-prod hosts: - grafana.apps.noble.lab.pcenicni.dev tls: - secretName: grafana-apps-noble-tls hosts: - grafana.apps.noble.lab.pcenicni.dev grafana.ini: server: domain: grafana.apps.noble.lab.pcenicni.dev root_url: https://grafana.apps.noble.lab.pcenicni.dev/ # Loki datasource: apply `clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml` (sidecar ConfigMap) instead of additionalDataSources here.