Update kube-prometheus-stack values.yaml to clarify Loki datasource configuration and enhance observability documentation in CLUSTER-BUILD.md. Include deployment instructions for Loki and Fluent Bit, and mark tasks related to Grafana integration as completed.

This commit is contained in:
Nikholas Pcenicni
2026-03-28 00:56:49 -04:00
parent 7caba0d90c
commit 2b4f568632
7 changed files with 176 additions and 3 deletions

View File

@@ -0,0 +1,10 @@
# Fluent Bit (tail container logs → Loki) — apply before Helm.
# HostPath mounts under /var/log require PSA privileged (same idea as monitoring/node-exporter).
apiVersion: v1
kind: Namespace
metadata:
name: logging
labels:
pod-security.kubernetes.io/enforce: privileged
pod-security.kubernetes.io/audit: privileged
pod-security.kubernetes.io/warn: privileged

View File

@@ -0,0 +1,40 @@
# Fluent Bit — noble lab (DaemonSet; ship Kubernetes container logs to Loki gateway).
#
# Chart: fluent/fluent-bit — pin version on install (e.g. 0.56.0).
# Install **after** Loki so `loki-gateway.loki.svc` exists.
#
# Talos: only **tail** `/var/log/containers` (no host **systemd** input — journal layout differs from typical Linux).
#
# kubectl apply -f clusters/noble/apps/fluent-bit/namespace.yaml
# helm repo add fluent https://fluent.github.io/helm-charts
# helm repo update
# helm upgrade --install fluent-bit fluent/fluent-bit -n logging \
# --version 0.56.0 -f clusters/noble/apps/fluent-bit/values.yaml --wait --timeout 15m
config:
inputs: |
[INPUT]
Name tail
Path /var/log/containers/*.log
multiline.parser docker, cri
Tag kube.*
Mem_Buf_Limit 5MB
Skip_Long_Lines On
filters: |
[FILTER]
Name kubernetes
Match kube.*
Merge_Log On
Keep_Log Off
K8S-Logging.Parser On
K8S-Logging.Exclude On
outputs: |
[OUTPUT]
Name loki
Match kube.*
Host loki-gateway.loki.svc.cluster.local
Port 80
tls Off
labels job=fluent-bit

View File

@@ -0,0 +1,27 @@
# Extra Grafana datasource — apply to **monitoring** (same namespace as kube-prometheus Grafana).
# The Grafana sidecar watches ConfigMaps labeled **grafana_datasource: "1"** and loads YAML keys as files.
# Does not require editing the kube-prometheus-stack Helm release.
#
# kubectl apply -f clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml
#
# Remove with: kubectl delete -f clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-datasource-loki
namespace: monitoring
labels:
grafana_datasource: "1"
data:
loki.yaml: |
apiVersion: 1
datasources:
- name: Loki
type: loki
uid: loki
access: proxy
url: http://loki-gateway.loki.svc.cluster.local:80
isDefault: false
editable: false
jsonData:
maxLines: 1000

View File

@@ -70,3 +70,5 @@ grafana:
server:
domain: grafana.apps.noble.lab.pcenicni.dev
root_url: https://grafana.apps.noble.lab.pcenicni.dev/
# Loki datasource: apply `clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml` (sidecar ConfigMap) instead of additionalDataSources here.

View File

@@ -0,0 +1,9 @@
# Loki (SingleBinary + filesystem on Longhorn) — apply before Helm.
apiVersion: v1
kind: Namespace
metadata:
name: loki
labels:
pod-security.kubernetes.io/enforce: baseline
pod-security.kubernetes.io/audit: baseline
pod-security.kubernetes.io/warn: baseline

View File

@@ -0,0 +1,78 @@
# Grafana Loki — noble lab (SingleBinary, filesystem on Longhorn; no MinIO/S3).
#
# Chart: grafana/loki — pin version on install (e.g. 6.55.0).
#
# kubectl apply -f clusters/noble/apps/loki/namespace.yaml
# helm repo add grafana https://grafana.github.io/helm-charts
# helm repo update
# helm upgrade --install loki grafana/loki -n loki \
# --version 6.55.0 -f clusters/noble/apps/loki/values.yaml --wait --timeout 30m
#
# Query/push URL for Grafana + Fluent Bit: http://loki-gateway.loki.svc.cluster.local:80
deploymentMode: SingleBinary
loki:
# Single-tenant lab: chart default auth_enabled: true requires X-Scope-OrgID on every query/push (Grafana + Fluent Bit break).
auth_enabled: false
commonConfig:
replication_factor: 1
storage:
type: filesystem
schemaConfig:
configs:
- from: "2024-04-01"
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: loki_index_
period: 24h
pattern_ingester:
enabled: false
limits_config:
allow_structured_metadata: true
volume_enabled: true
singleBinary:
replicas: 1
persistence:
enabled: true
storageClass: longhorn
size: 30Gi
backend:
replicas: 0
read:
replicas: 0
write:
replicas: 0
ingester:
replicas: 0
querier:
replicas: 0
queryFrontend:
replicas: 0
queryScheduler:
replicas: 0
distributor:
replicas: 0
compactor:
replicas: 0
indexGateway:
replicas: 0
bloomCompactor:
replicas: 0
bloomGateway:
replicas: 0
minio:
enabled: false
gateway:
enabled: true
# Memcached chunk cache: chart default is ~8Gi RAM requests; even 512Mi can stay Pending on small clusters (affinity).
# Homelab: disable — Loki works without it; queries may be slightly slower under load.
chunksCache:
enabled: false