64 lines
2.4 KiB
Bash
Executable File
64 lines
2.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Recover from GPT on Longhorn data disk: apply wipe-phase config → wipe → restore Longhorn talconfig.
|
|
# Prereq: talos/talconfig.yaml is the WIPE phase (no userVolumes longhorn); talhelper genconfig -o out already run.
|
|
set -euo pipefail
|
|
|
|
TALOS_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
|
export TALOSCONFIG="${TALOSCONFIG:-$TALOS_ROOT/out/talosconfig}"
|
|
DISK="${DISK:-sdb}"
|
|
|
|
NODES=(
|
|
"192.168.50.10:noble-helium.yaml"
|
|
"192.168.50.20:noble-neon.yaml"
|
|
"192.168.50.30:noble-argon.yaml"
|
|
"192.168.50.40:noble-krypton.yaml"
|
|
)
|
|
|
|
die() { echo "error: $*" >&2; exit 1; }
|
|
|
|
[[ -f "$TALOSCONFIG" ]] || die "missing $TALOSCONFIG — run: cd $TALOS_ROOT && talhelper genconfig -o out"
|
|
|
|
phase_apply_wipe() {
|
|
echo "=== Phase 1: apply WIPE-phase machine config to every node (releases u-longhorn) ==="
|
|
for entry in "${NODES[@]}"; do
|
|
ip="${entry%%:*}"
|
|
file="${entry##*:}"
|
|
echo "Applying $file to $ip ..."
|
|
talosctl apply-config -n "$ip" --file "$TALOS_ROOT/out/$file"
|
|
done
|
|
echo "Reboot all Talos nodes now (or wait for volume controller), then confirm u-longhorn is gone:"
|
|
echo " talosctl get volumestatus -n 192.168.50.20"
|
|
echo "When wipe would succeed, run: $0 phase2"
|
|
}
|
|
|
|
phase_wipe_disks() {
|
|
echo "=== Phase 2: wipe data disk $DISK on each node (must NOT be 'in use by volume u-longhorn') ==="
|
|
for entry in "${NODES[@]}"; do
|
|
ip="${entry%%:*}"
|
|
echo "Wiping $DISK on $ip ..."
|
|
talosctl wipe disk "$DISK" -n "$ip"
|
|
done
|
|
echo "=== Phase 3: restore Longhorn talconfig, genconfig, apply to all nodes ==="
|
|
cp -f "$TALOS_ROOT/talconfig.with-longhorn.yaml" "$TALOS_ROOT/talconfig.yaml"
|
|
(cd "$TALOS_ROOT" && talhelper genconfig -o out)
|
|
for entry in "${NODES[@]}"; do
|
|
ip="${entry%%:*}"
|
|
file="${entry##*:}"
|
|
echo "Applying restored $file to $ip ..."
|
|
talosctl apply-config -n "$ip" --file "$TALOS_ROOT/out/$file"
|
|
done
|
|
echo "Done. Reboot nodes if Longhorn volume does not come up clean."
|
|
}
|
|
|
|
case "${1:-}" in
|
|
phase1|apply) phase_apply_wipe ;;
|
|
phase2|wipe) phase_wipe_disks ;;
|
|
"")
|
|
echo "Usage: cd talos && talhelper genconfig -o out && export TALOSCONFIG=\"\$(pwd)/out/talosconfig\""
|
|
echo " $0 phase1 # apply WIPE machine config to all nodes — reboot if u-longhorn lingers"
|
|
echo " DISK=vdb $0 phase2 # wipe disk, restore Longhorn talconfig, genconfig, apply all"
|
|
echo "Env DISK defaults to sdb."
|
|
;;
|
|
*) die "unknown arg: $1" ;;
|
|
esac
|