Files
home-server/talos/upgrade-talos-1.8.4-to-1.12.5.sh

126 lines
3.2 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
# Safe Talos rolling upgrade script:
# 1.8.4 -> 1.9.5 -> 1.10.7 -> 1.11.6 -> 1.12.5
# Order: cp-1, cp-2, cp-3, worker-1
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
TALOSCONFIG_PATH="${TALOSCONFIG_PATH:-${REPO_ROOT}/talos/clusterconfig/talosconfig}"
ENDPOINT="${ENDPOINT:-192.168.50.230}"
CONTROL_PLANES=("192.168.50.20" "192.168.50.30" "192.168.50.40")
WORKERS=("192.168.50.10")
UPGRADE_VERSIONS=("v1.12.5")
if [[ ! -f "${TALOSCONFIG_PATH}" ]]; then
echo "Talos config not found: ${TALOSCONFIG_PATH}"
echo "Set TALOSCONFIG_PATH=/absolute/path/to/talosconfig and retry."
exit 1
fi
run_talosctl() {
talosctl --talosconfig "${TALOSCONFIG_PATH}" "$@"
}
normalize_version() {
local version="$1"
echo "${version#v}"
}
version_ge() {
local left
local right
left="$(normalize_version "$1")"
right="$(normalize_version "$2")"
[[ "$(printf "%s\n%s\n" "${left}" "${right}" | sort -V | tail -n1)" == "${left}" ]]
}
get_node_talos_version() {
local node_ip="$1"
local output
output="$(run_talosctl -n "${node_ip}" version 2>/dev/null || true)"
# Prefer the server tag for the requested node from the NODE/Tag block.
local node_tag
node_tag="$(
printf "%s\n" "${output}" | awk -v node="${node_ip}" '
$1=="NODE:" && $2==node { seen=1; next }
seen && $1=="Tag:" { print $2; exit }
'
)"
if [[ -n "${node_tag}" ]]; then
echo "${node_tag}"
return 0
fi
return 1
}
check_cluster_ready() {
echo "Checking cluster health via endpoint ${ENDPOINT}..."
run_talosctl -e "${ENDPOINT}" -n "${CONTROL_PLANES[0]}" health
kubectl get nodes -o wide
}
upgrade_node_to_version() {
local node_ip="$1"
local version="$2"
local image="ghcr.io/siderolabs/installer:${version}"
local current_version=""
echo
echo "=== Upgrading node ${node_ip} to ${version} ==="
if current_version="$(get_node_talos_version "${node_ip}")"; then
echo "Current Talos version on ${node_ip}: ${current_version}"
if version_ge "${current_version}" "${version}"; then
echo "Node ${node_ip} already at or above ${version}; skipping upgrade/reboot."
return 0
fi
else
echo "Could not determine current server version for ${node_ip}; continuing with upgrade."
fi
run_talosctl -n "${node_ip}" upgrade --image "${image}"
run_talosctl -n "${node_ip}" reboot
echo "Waiting for cluster and node health after ${node_ip} reboot..."
run_talosctl -e "${ENDPOINT}" -n "${CONTROL_PLANES[0]}" health
run_talosctl -n "${node_ip}" version
kubectl get nodes -o wide
}
echo "Using TALOSCONFIG: ${TALOSCONFIG_PATH}"
echo "Control planes: ${CONTROL_PLANES[*]}"
echo "Workers: ${WORKERS[*]}"
echo "Upgrade hops: ${UPGRADE_VERSIONS[*]}"
echo
check_cluster_ready
for version in "${UPGRADE_VERSIONS[@]}"; do
echo
echo "##### Starting upgrade hop ${version} #####"
for node in "${CONTROL_PLANES[@]}"; do
upgrade_node_to_version "${node}" "${version}"
done
for node in "${WORKERS[@]}"; do
upgrade_node_to_version "${node}" "${version}"
done
echo "Completed hop ${version}. Verifying cluster state..."
check_cluster_ready
done
echo
echo "All upgrade hops complete."
run_talosctl version
kubectl get nodes -o wide