Update CLUSTER-BUILD.md to include kube-prometheus-stack Helm chart details, enhance observability phase with Grafana ingress configuration, and clarify deployment instructions for monitoring components. Mark tasks as completed for kube-prometheus-stack installation and PVC binding on Longhorn.
This commit is contained in:
72
clusters/noble/apps/kube-prometheus-stack/values.yaml
Normal file
72
clusters/noble/apps/kube-prometheus-stack/values.yaml
Normal file
@@ -0,0 +1,72 @@
|
||||
# kube-prometheus-stack — noble lab (Prometheus Operator + Grafana + Alertmanager + exporters)
|
||||
#
|
||||
# Chart: prometheus-community/kube-prometheus-stack — pin version on install (e.g. 82.15.1).
|
||||
#
|
||||
# Install (use one terminal; chain with && so `helm upgrade` always runs after `helm repo update`):
|
||||
#
|
||||
# kubectl apply -f clusters/noble/apps/kube-prometheus-stack/namespace.yaml
|
||||
# helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||
# helm repo update && helm upgrade --install kube-prometheus prometheus-community/kube-prometheus-stack -n monitoring \
|
||||
# --version 82.15.1 -f clusters/noble/apps/kube-prometheus-stack/values.yaml --wait --timeout 30m
|
||||
#
|
||||
# Why it looks "stalled": with --wait, Helm prints almost nothing until the release finishes (can be many minutes).
|
||||
# Do not use --timeout 5m for first install — Longhorn PVCs + StatefulSets often need 15–30m. To watch progress,
|
||||
# open a second terminal: kubectl -n monitoring get pods,sts,ds -w
|
||||
# To apply manifest changes without blocking: omit --wait, then kubectl -n monitoring get pods -w
|
||||
#
|
||||
# Grafana admin password: Secret `kube-prometheus-grafana` keys `admin-user` / `admin-password` unless you set grafana.adminPassword.
|
||||
|
||||
# --- Longhorn-backed persistence (default chart storage is emptyDir) ---
|
||||
alertmanager:
|
||||
alertmanagerSpec:
|
||||
storage:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
storageClassName: longhorn
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
|
||||
prometheus:
|
||||
prometheusSpec:
|
||||
retention: 15d
|
||||
retentionSize: 25GB
|
||||
storageSpec:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
storageClassName: longhorn
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 30Gi
|
||||
|
||||
grafana:
|
||||
persistence:
|
||||
enabled: true
|
||||
type: sts
|
||||
storageClassName: longhorn
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
size: 10Gi
|
||||
|
||||
# HTTPS via Traefik + cert-manager (ClusterIssuer letsencrypt-prod; same pattern as other *.apps.noble.lab.pcenicni.dev hosts).
|
||||
# DNS: grafana.apps.noble.lab.pcenicni.dev → Traefik LoadBalancer (192.168.50.211) — see clusters/noble/apps/traefik/values.yaml
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
path: /
|
||||
pathType: Prefix
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
hosts:
|
||||
- grafana.apps.noble.lab.pcenicni.dev
|
||||
tls:
|
||||
- secretName: grafana-apps-noble-tls
|
||||
hosts:
|
||||
- grafana.apps.noble.lab.pcenicni.dev
|
||||
|
||||
grafana.ini:
|
||||
server:
|
||||
domain: grafana.apps.noble.lab.pcenicni.dev
|
||||
root_url: https://grafana.apps.noble.lab.pcenicni.dev/
|
||||
Reference in New Issue
Block a user