126 lines
3.2 KiB
Bash
Executable File
126 lines
3.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# Safe Talos rolling upgrade script:
|
|
# 1.8.4 -> 1.9.5 -> 1.10.7 -> 1.11.6 -> 1.12.5
|
|
# Order: cp-1, cp-2, cp-3, worker-1
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
|
|
|
TALOSCONFIG_PATH="${TALOSCONFIG_PATH:-${REPO_ROOT}/talos/clusterconfig/talosconfig}"
|
|
ENDPOINT="${ENDPOINT:-192.168.50.230}"
|
|
|
|
CONTROL_PLANES=("192.168.50.20" "192.168.50.30" "192.168.50.40")
|
|
WORKERS=("192.168.50.10")
|
|
|
|
UPGRADE_VERSIONS=("v1.12.5")
|
|
|
|
if [[ ! -f "${TALOSCONFIG_PATH}" ]]; then
|
|
echo "Talos config not found: ${TALOSCONFIG_PATH}"
|
|
echo "Set TALOSCONFIG_PATH=/absolute/path/to/talosconfig and retry."
|
|
exit 1
|
|
fi
|
|
|
|
run_talosctl() {
|
|
talosctl --talosconfig "${TALOSCONFIG_PATH}" "$@"
|
|
}
|
|
|
|
normalize_version() {
|
|
local version="$1"
|
|
echo "${version#v}"
|
|
}
|
|
|
|
version_ge() {
|
|
local left
|
|
local right
|
|
left="$(normalize_version "$1")"
|
|
right="$(normalize_version "$2")"
|
|
[[ "$(printf "%s\n%s\n" "${left}" "${right}" | sort -V | tail -n1)" == "${left}" ]]
|
|
}
|
|
|
|
get_node_talos_version() {
|
|
local node_ip="$1"
|
|
local output
|
|
|
|
output="$(run_talosctl -n "${node_ip}" version 2>/dev/null || true)"
|
|
|
|
# Prefer the server tag for the requested node from the NODE/Tag block.
|
|
local node_tag
|
|
node_tag="$(
|
|
printf "%s\n" "${output}" | awk -v node="${node_ip}" '
|
|
$1=="NODE:" && $2==node { seen=1; next }
|
|
seen && $1=="Tag:" { print $2; exit }
|
|
'
|
|
)"
|
|
|
|
if [[ -n "${node_tag}" ]]; then
|
|
echo "${node_tag}"
|
|
return 0
|
|
fi
|
|
|
|
return 1
|
|
}
|
|
|
|
check_cluster_ready() {
|
|
echo "Checking cluster health via endpoint ${ENDPOINT}..."
|
|
run_talosctl -e "${ENDPOINT}" -n "${CONTROL_PLANES[0]}" health
|
|
kubectl get nodes -o wide
|
|
}
|
|
|
|
upgrade_node_to_version() {
|
|
local node_ip="$1"
|
|
local version="$2"
|
|
local image="ghcr.io/siderolabs/installer:${version}"
|
|
local current_version=""
|
|
|
|
echo
|
|
echo "=== Upgrading node ${node_ip} to ${version} ==="
|
|
if current_version="$(get_node_talos_version "${node_ip}")"; then
|
|
echo "Current Talos version on ${node_ip}: ${current_version}"
|
|
if version_ge "${current_version}" "${version}"; then
|
|
echo "Node ${node_ip} already at or above ${version}; skipping upgrade/reboot."
|
|
return 0
|
|
fi
|
|
else
|
|
echo "Could not determine current server version for ${node_ip}; continuing with upgrade."
|
|
fi
|
|
|
|
run_talosctl -n "${node_ip}" upgrade --image "${image}"
|
|
run_talosctl -n "${node_ip}" reboot
|
|
|
|
echo "Waiting for cluster and node health after ${node_ip} reboot..."
|
|
run_talosctl -e "${ENDPOINT}" -n "${CONTROL_PLANES[0]}" health
|
|
run_talosctl -n "${node_ip}" version
|
|
kubectl get nodes -o wide
|
|
}
|
|
|
|
echo "Using TALOSCONFIG: ${TALOSCONFIG_PATH}"
|
|
echo "Control planes: ${CONTROL_PLANES[*]}"
|
|
echo "Workers: ${WORKERS[*]}"
|
|
echo "Upgrade hops: ${UPGRADE_VERSIONS[*]}"
|
|
echo
|
|
|
|
check_cluster_ready
|
|
|
|
for version in "${UPGRADE_VERSIONS[@]}"; do
|
|
echo
|
|
echo "##### Starting upgrade hop ${version} #####"
|
|
|
|
for node in "${CONTROL_PLANES[@]}"; do
|
|
upgrade_node_to_version "${node}" "${version}"
|
|
done
|
|
|
|
for node in "${WORKERS[@]}"; do
|
|
upgrade_node_to_version "${node}" "${version}"
|
|
done
|
|
|
|
echo "Completed hop ${version}. Verifying cluster state..."
|
|
check_cluster_ready
|
|
done
|
|
|
|
echo
|
|
echo "All upgrade hops complete."
|
|
run_talosctl version
|
|
kubectl get nodes -o wide
|