Refactor noble cluster configurations by removing deprecated Argo CD application management files and transitioning to a streamlined Ansible-driven installation approach. Update kustomization.yaml files to reflect the new structure, ensuring clarity on resource management. Introduce new namespaces and configurations for cert-manager, external-secrets, and logging components, enhancing the overall deployment process. Add detailed README.md documentation for each component to guide users through the setup and management of the noble lab environment.
This commit is contained in:
112
clusters/noble/bootstrap/kube-prometheus-stack/values.yaml
Normal file
112
clusters/noble/bootstrap/kube-prometheus-stack/values.yaml
Normal file
@@ -0,0 +1,112 @@
|
||||
# kube-prometheus-stack — noble lab (Prometheus Operator + Grafana + Alertmanager + exporters)
|
||||
#
|
||||
# Chart: prometheus-community/kube-prometheus-stack — pin version on install (e.g. 82.15.1).
|
||||
#
|
||||
# Install (use one terminal; chain with && so `helm upgrade` always runs after `helm repo update`):
|
||||
#
|
||||
# kubectl apply -f clusters/noble/apps/kube-prometheus-stack/namespace.yaml
|
||||
# helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||
# helm repo update && helm upgrade --install kube-prometheus prometheus-community/kube-prometheus-stack -n monitoring \
|
||||
# --version 82.15.1 -f clusters/noble/apps/kube-prometheus-stack/values.yaml --wait --timeout 30m
|
||||
#
|
||||
# Why it looks "stalled": with --wait, Helm prints almost nothing until the release finishes (can be many minutes).
|
||||
# Do not use --timeout 5m for first install — Longhorn PVCs + StatefulSets often need 15–30m. To watch progress,
|
||||
# open a second terminal: kubectl -n monitoring get pods,sts,ds -w
|
||||
# To apply manifest changes without blocking: omit --wait, then kubectl -n monitoring get pods -w
|
||||
#
|
||||
# Grafana admin password: Secret `kube-prometheus-grafana` keys `admin-user` / `admin-password` unless you set grafana.adminPassword.
|
||||
|
||||
# Use cert-manager for admission webhook TLS instead of Helm pre-hook Jobs (patch/create Secret).
|
||||
# Those Jobs are validated by Kyverno before `kyverno-svc` exists during a single Argo sync, which fails.
|
||||
# Requires cert-manager CRDs (bootstrap before this chart).
|
||||
prometheusOperator:
|
||||
admissionWebhooks:
|
||||
certManager:
|
||||
enabled: true
|
||||
|
||||
# --- Longhorn-backed persistence (default chart storage is emptyDir) ---
|
||||
alertmanager:
|
||||
alertmanagerSpec:
|
||||
storage:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
storageClassName: longhorn
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
hosts:
|
||||
- alertmanager.apps.noble.lab.pcenicni.dev
|
||||
paths:
|
||||
- /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- secretName: alertmanager-apps-noble-tls
|
||||
hosts:
|
||||
- alertmanager.apps.noble.lab.pcenicni.dev
|
||||
|
||||
prometheus:
|
||||
prometheusSpec:
|
||||
retention: 15d
|
||||
retentionSize: 25GB
|
||||
storageSpec:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
storageClassName: longhorn
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 30Gi
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
hosts:
|
||||
- prometheus.apps.noble.lab.pcenicni.dev
|
||||
paths:
|
||||
- /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- secretName: prometheus-apps-noble-tls
|
||||
hosts:
|
||||
- prometheus.apps.noble.lab.pcenicni.dev
|
||||
|
||||
grafana:
|
||||
persistence:
|
||||
enabled: true
|
||||
type: sts
|
||||
storageClassName: longhorn
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
size: 10Gi
|
||||
|
||||
# HTTPS via Traefik + cert-manager (ClusterIssuer letsencrypt-prod; same pattern as other *.apps.noble.lab.pcenicni.dev hosts).
|
||||
# DNS: grafana.apps.noble.lab.pcenicni.dev → Traefik LoadBalancer (192.168.50.211) — see clusters/noble/apps/traefik/values.yaml
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
path: /
|
||||
pathType: Prefix
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
hosts:
|
||||
- grafana.apps.noble.lab.pcenicni.dev
|
||||
tls:
|
||||
- secretName: grafana-apps-noble-tls
|
||||
hosts:
|
||||
- grafana.apps.noble.lab.pcenicni.dev
|
||||
|
||||
grafana.ini:
|
||||
server:
|
||||
domain: grafana.apps.noble.lab.pcenicni.dev
|
||||
root_url: https://grafana.apps.noble.lab.pcenicni.dev/
|
||||
# Traefik sets X-Forwarded-*; required for correct redirects and cookies behind the ingress.
|
||||
use_proxy_headers: true
|
||||
|
||||
# Loki datasource: apply `clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml` (sidecar ConfigMap) instead of additionalDataSources here.
|
||||
Reference in New Issue
Block a user