Files
home-server/clusters/noble/bootstrap/kube-prometheus-stack/values.yaml

113 lines
3.9 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# kube-prometheus-stack — noble lab (Prometheus Operator + Grafana + Alertmanager + exporters)
#
# Chart: prometheus-community/kube-prometheus-stack — pin version on install (e.g. 82.15.1).
#
# Install (use one terminal; chain with && so `helm upgrade` always runs after `helm repo update`):
#
# kubectl apply -f clusters/noble/apps/kube-prometheus-stack/namespace.yaml
# helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
# helm repo update && helm upgrade --install kube-prometheus prometheus-community/kube-prometheus-stack -n monitoring \
# --version 82.15.1 -f clusters/noble/apps/kube-prometheus-stack/values.yaml --wait --timeout 30m
#
# Why it looks "stalled": with --wait, Helm prints almost nothing until the release finishes (can be many minutes).
# Do not use --timeout 5m for first install — Longhorn PVCs + StatefulSets often need 1530m. To watch progress,
# open a second terminal: kubectl -n monitoring get pods,sts,ds -w
# To apply manifest changes without blocking: omit --wait, then kubectl -n monitoring get pods -w
#
# Grafana admin password: Secret `kube-prometheus-grafana` keys `admin-user` / `admin-password` unless you set grafana.adminPassword.
# Use cert-manager for admission webhook TLS instead of Helm pre-hook Jobs (patch/create Secret).
# Those Jobs are validated by Kyverno before `kyverno-svc` exists during a single Argo sync, which fails.
# Requires cert-manager CRDs (bootstrap before this chart).
prometheusOperator:
admissionWebhooks:
certManager:
enabled: true
# --- Longhorn-backed persistence (default chart storage is emptyDir) ---
alertmanager:
alertmanagerSpec:
storage:
volumeClaimTemplate:
spec:
storageClassName: longhorn
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 5Gi
ingress:
enabled: true
ingressClassName: traefik
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
hosts:
- alertmanager.apps.noble.lab.pcenicni.dev
paths:
- /
pathType: Prefix
tls:
- secretName: alertmanager-apps-noble-tls
hosts:
- alertmanager.apps.noble.lab.pcenicni.dev
prometheus:
prometheusSpec:
retention: 15d
retentionSize: 25GB
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: longhorn
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 30Gi
ingress:
enabled: true
ingressClassName: traefik
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
hosts:
- prometheus.apps.noble.lab.pcenicni.dev
paths:
- /
pathType: Prefix
tls:
- secretName: prometheus-apps-noble-tls
hosts:
- prometheus.apps.noble.lab.pcenicni.dev
grafana:
persistence:
enabled: true
type: sts
storageClassName: longhorn
accessModes:
- ReadWriteOnce
size: 10Gi
# HTTPS via Traefik + cert-manager (ClusterIssuer letsencrypt-prod; same pattern as other *.apps.noble.lab.pcenicni.dev hosts).
# DNS: grafana.apps.noble.lab.pcenicni.dev → Traefik LoadBalancer (192.168.50.211) — see clusters/noble/apps/traefik/values.yaml
ingress:
enabled: true
ingressClassName: traefik
path: /
pathType: Prefix
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
hosts:
- grafana.apps.noble.lab.pcenicni.dev
tls:
- secretName: grafana-apps-noble-tls
hosts:
- grafana.apps.noble.lab.pcenicni.dev
grafana.ini:
server:
domain: grafana.apps.noble.lab.pcenicni.dev
root_url: https://grafana.apps.noble.lab.pcenicni.dev/
# Traefik sets X-Forwarded-*; required for correct redirects and cookies behind the ingress.
use_proxy_headers: true
# Loki datasource: apply `clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml` (sidecar ConfigMap) instead of additionalDataSources here.