113 lines
3.9 KiB
YAML
113 lines
3.9 KiB
YAML
# kube-prometheus-stack — noble lab (Prometheus Operator + Grafana + Alertmanager + exporters)
|
||
#
|
||
# Chart: prometheus-community/kube-prometheus-stack — pin version on install (e.g. 82.15.1).
|
||
#
|
||
# Install (use one terminal; chain with && so `helm upgrade` always runs after `helm repo update`):
|
||
#
|
||
# kubectl apply -f clusters/noble/apps/kube-prometheus-stack/namespace.yaml
|
||
# helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||
# helm repo update && helm upgrade --install kube-prometheus prometheus-community/kube-prometheus-stack -n monitoring \
|
||
# --version 82.15.1 -f clusters/noble/apps/kube-prometheus-stack/values.yaml --wait --timeout 30m
|
||
#
|
||
# Why it looks "stalled": with --wait, Helm prints almost nothing until the release finishes (can be many minutes).
|
||
# Do not use --timeout 5m for first install — Longhorn PVCs + StatefulSets often need 15–30m. To watch progress,
|
||
# open a second terminal: kubectl -n monitoring get pods,sts,ds -w
|
||
# To apply manifest changes without blocking: omit --wait, then kubectl -n monitoring get pods -w
|
||
#
|
||
# Grafana admin password: Secret `kube-prometheus-grafana` keys `admin-user` / `admin-password` unless you set grafana.adminPassword.
|
||
|
||
# Use cert-manager for admission webhook TLS instead of Helm pre-hook Jobs (patch/create Secret).
|
||
# Those Jobs are validated by Kyverno before `kyverno-svc` exists during a single Argo sync, which fails.
|
||
# Requires cert-manager CRDs (bootstrap before this chart).
|
||
prometheusOperator:
|
||
admissionWebhooks:
|
||
certManager:
|
||
enabled: true
|
||
|
||
# --- Longhorn-backed persistence (default chart storage is emptyDir) ---
|
||
alertmanager:
|
||
alertmanagerSpec:
|
||
storage:
|
||
volumeClaimTemplate:
|
||
spec:
|
||
storageClassName: longhorn
|
||
accessModes: ["ReadWriteOnce"]
|
||
resources:
|
||
requests:
|
||
storage: 5Gi
|
||
ingress:
|
||
enabled: true
|
||
ingressClassName: traefik
|
||
annotations:
|
||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||
hosts:
|
||
- alertmanager.apps.noble.lab.pcenicni.dev
|
||
paths:
|
||
- /
|
||
pathType: Prefix
|
||
tls:
|
||
- secretName: alertmanager-apps-noble-tls
|
||
hosts:
|
||
- alertmanager.apps.noble.lab.pcenicni.dev
|
||
|
||
prometheus:
|
||
prometheusSpec:
|
||
retention: 15d
|
||
retentionSize: 25GB
|
||
storageSpec:
|
||
volumeClaimTemplate:
|
||
spec:
|
||
storageClassName: longhorn
|
||
accessModes: ["ReadWriteOnce"]
|
||
resources:
|
||
requests:
|
||
storage: 30Gi
|
||
ingress:
|
||
enabled: true
|
||
ingressClassName: traefik
|
||
annotations:
|
||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||
hosts:
|
||
- prometheus.apps.noble.lab.pcenicni.dev
|
||
paths:
|
||
- /
|
||
pathType: Prefix
|
||
tls:
|
||
- secretName: prometheus-apps-noble-tls
|
||
hosts:
|
||
- prometheus.apps.noble.lab.pcenicni.dev
|
||
|
||
grafana:
|
||
persistence:
|
||
enabled: true
|
||
type: sts
|
||
storageClassName: longhorn
|
||
accessModes:
|
||
- ReadWriteOnce
|
||
size: 10Gi
|
||
|
||
# HTTPS via Traefik + cert-manager (ClusterIssuer letsencrypt-prod; same pattern as other *.apps.noble.lab.pcenicni.dev hosts).
|
||
# DNS: grafana.apps.noble.lab.pcenicni.dev → Traefik LoadBalancer (192.168.50.211) — see clusters/noble/apps/traefik/values.yaml
|
||
ingress:
|
||
enabled: true
|
||
ingressClassName: traefik
|
||
path: /
|
||
pathType: Prefix
|
||
annotations:
|
||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||
hosts:
|
||
- grafana.apps.noble.lab.pcenicni.dev
|
||
tls:
|
||
- secretName: grafana-apps-noble-tls
|
||
hosts:
|
||
- grafana.apps.noble.lab.pcenicni.dev
|
||
|
||
grafana.ini:
|
||
server:
|
||
domain: grafana.apps.noble.lab.pcenicni.dev
|
||
root_url: https://grafana.apps.noble.lab.pcenicni.dev/
|
||
# Traefik sets X-Forwarded-*; required for correct redirects and cookies behind the ingress.
|
||
use_proxy_headers: true
|
||
|
||
# Loki datasource: apply `clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml` (sidecar ConfigMap) instead of additionalDataSources here.
|