Enhance monitoring configurations by enabling persistence for Loki and updating storage settings for Prometheus and Alertmanager to use Longhorn. Add Longhorn application to kustomization.yaml for improved storage management.

This commit is contained in:
Nikholas Pcenicni
2026-03-27 16:27:58 -04:00
parent 036f8ef37e
commit 8cacf5f5de
7 changed files with 299 additions and 6 deletions

View File

@@ -0,0 +1,125 @@
#!/usr/bin/env bash
set -euo pipefail
# Safe Talos rolling upgrade script:
# 1.8.4 -> 1.9.5 -> 1.10.7 -> 1.11.6 -> 1.12.5
# Order: cp-1, cp-2, cp-3, worker-1
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
TALOSCONFIG_PATH="${TALOSCONFIG_PATH:-${REPO_ROOT}/talos/clusterconfig/talosconfig}"
ENDPOINT="${ENDPOINT:-192.168.50.230}"
CONTROL_PLANES=("192.168.50.20" "192.168.50.30" "192.168.50.40")
WORKERS=("192.168.50.10")
UPGRADE_VERSIONS=("v1.12.5")
if [[ ! -f "${TALOSCONFIG_PATH}" ]]; then
echo "Talos config not found: ${TALOSCONFIG_PATH}"
echo "Set TALOSCONFIG_PATH=/absolute/path/to/talosconfig and retry."
exit 1
fi
run_talosctl() {
talosctl --talosconfig "${TALOSCONFIG_PATH}" "$@"
}
normalize_version() {
local version="$1"
echo "${version#v}"
}
version_ge() {
local left
local right
left="$(normalize_version "$1")"
right="$(normalize_version "$2")"
[[ "$(printf "%s\n%s\n" "${left}" "${right}" | sort -V | tail -n1)" == "${left}" ]]
}
get_node_talos_version() {
local node_ip="$1"
local output
output="$(run_talosctl -n "${node_ip}" version 2>/dev/null || true)"
# Prefer the server tag for the requested node from the NODE/Tag block.
local node_tag
node_tag="$(
printf "%s\n" "${output}" | awk -v node="${node_ip}" '
$1=="NODE:" && $2==node { seen=1; next }
seen && $1=="Tag:" { print $2; exit }
'
)"
if [[ -n "${node_tag}" ]]; then
echo "${node_tag}"
return 0
fi
return 1
}
check_cluster_ready() {
echo "Checking cluster health via endpoint ${ENDPOINT}..."
run_talosctl -e "${ENDPOINT}" -n "${CONTROL_PLANES[0]}" health
kubectl get nodes -o wide
}
upgrade_node_to_version() {
local node_ip="$1"
local version="$2"
local image="ghcr.io/siderolabs/installer:${version}"
local current_version=""
echo
echo "=== Upgrading node ${node_ip} to ${version} ==="
if current_version="$(get_node_talos_version "${node_ip}")"; then
echo "Current Talos version on ${node_ip}: ${current_version}"
if version_ge "${current_version}" "${version}"; then
echo "Node ${node_ip} already at or above ${version}; skipping upgrade/reboot."
return 0
fi
else
echo "Could not determine current server version for ${node_ip}; continuing with upgrade."
fi
run_talosctl -n "${node_ip}" upgrade --image "${image}"
run_talosctl -n "${node_ip}" reboot
echo "Waiting for cluster and node health after ${node_ip} reboot..."
run_talosctl -e "${ENDPOINT}" -n "${CONTROL_PLANES[0]}" health
run_talosctl -n "${node_ip}" version
kubectl get nodes -o wide
}
echo "Using TALOSCONFIG: ${TALOSCONFIG_PATH}"
echo "Control planes: ${CONTROL_PLANES[*]}"
echo "Workers: ${WORKERS[*]}"
echo "Upgrade hops: ${UPGRADE_VERSIONS[*]}"
echo
check_cluster_ready
for version in "${UPGRADE_VERSIONS[@]}"; do
echo
echo "##### Starting upgrade hop ${version} #####"
for node in "${CONTROL_PLANES[@]}"; do
upgrade_node_to_version "${node}" "${version}"
done
for node in "${WORKERS[@]}"; do
upgrade_node_to_version "${node}" "${version}"
done
echo "Completed hop ${version}. Verifying cluster state..."
check_cluster_ready
done
echo
echo "All upgrade hops complete."
run_talosctl version
kubectl get nodes -o wide