Enhance Authentik role by adding Helm wait timeout for oauth2-proxy and improving task conditions for admin access and OAuth2 provider upserts. Update README with new variable descriptions and adjust Longhorn deployment tasks to ensure proper rollout before Loki installation, enhancing overall deployment reliability.

This commit is contained in:
Nikholas Pcenicni
2026-05-14 15:52:42 -04:00
parent 5e5c6ef671
commit 4bc8da0225
9 changed files with 57 additions and 11 deletions

View File

@@ -82,7 +82,7 @@ ansible-playbook playbooks/noble.yml --tags authentik -e noble_authentik_install
### Variables — `inventory/group_vars/` and role defaults ### Variables — `inventory/group_vars/` and role defaults
- **`inventory/group_vars/all.yml`:** **`noble_newt_install`**, **`noble_velero_install`**, **`noble_authentik_install`**, **`noble_cert_manager_require_cloudflare_secret`**, **`noble_argocd_apply_root_application`**, **`noble_argocd_apply_bootstrap_root_application`**, **`noble_k8s_api_server_override`**, **`noble_k8s_api_server_auto_fallback`**, **`noble_k8s_api_server_fallback`**, **`noble_skip_k8s_health_check`** - **`inventory/group_vars/all.yml`:** **`noble_newt_install`**, **`noble_velero_install`**, **`noble_authentik_install`**, **`noble_cert_manager_require_cloudflare_secret`**, **`noble_argocd_apply_root_application`**, **`noble_argocd_apply_bootstrap_root_application`**, **`noble_k8s_api_server_override`**, **`noble_k8s_api_server_auto_fallback`**, **`noble_k8s_api_server_fallback`**, **`noble_skip_k8s_health_check`**
- **`roles/noble_platform/defaults/main.yml`:** **`noble_apply_sops_secrets`**, **`noble_sops_age_key_file`** (SOPS secrets under **`clusters/noble/secrets/`**) - **`roles/noble_platform/defaults/main.yml`:** **`noble_apply_sops_secrets`**, **`noble_sops_age_key_file`**, **`noble_platform_loki_helm_wait_timeout`**, **`noble_platform_wait_longhorn_csi_before_loki`**, **`noble_platform_longhorn_csi_rollout_timeout`**
## Roles ## Roles

View File

@@ -14,6 +14,8 @@ noble_authentik_namespace: authentik
# Helm release name (deployments: **{release}-server**, **{release}-worker**). # Helm release name (deployments: **{release}-server**, **{release}-worker**).
noble_authentik_release_name: authentik noble_authentik_release_name: authentik
noble_authentik_oauth2_proxy_chart_version: "10.4.3" noble_authentik_oauth2_proxy_chart_version: "10.4.3"
# Helm **--wait** timeout for **oauth2-proxy** (first pull / API checks can exceed 10m).
noble_authentik_oauth2_proxy_helm_wait_timeout: 10m
noble_authentik_host: auth.apps.noble.lab.pcenicni.dev noble_authentik_host: auth.apps.noble.lab.pcenicni.dev
noble_authentik_public_url: "https://{{ noble_authentik_host }}" noble_authentik_public_url: "https://{{ noble_authentik_host }}"

View File

@@ -51,5 +51,4 @@ def main() -> None:
print("worker: bootstrap user group membership updated", flush=True) print("worker: bootstrap user group membership updated", flush=True)
if __name__ == "__main__": main()
main()

View File

@@ -69,5 +69,4 @@ def main() -> None:
) )
if __name__ == "__main__": main()
main()

View File

@@ -106,5 +106,4 @@ def main() -> None:
print("worker: OAuth2 providers + applications upserted", flush=True) print("worker: OAuth2 providers + applications upserted", flush=True)
if __name__ == "__main__": main()
main()

View File

@@ -276,7 +276,15 @@
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
register: noble_authentik_worker_admin_access register: noble_authentik_worker_admin_access
changed_when: true changed_when: >-
"worker:" in (noble_authentik_worker_admin_access.stdout | default(""))
and "authentik Admins" in (noble_authentik_worker_admin_access.stdout | default(""))
failed_when: >-
(noble_authentik_worker_admin_access.rc | default(-1)) != 0
or (
"worker:" not in (noble_authentik_worker_admin_access.stdout | default(""))
or "authentik Admins" not in (noble_authentik_worker_admin_access.stdout | default(""))
)
when: when:
- noble_authentik_configure_idp | default(true) | bool - noble_authentik_configure_idp | default(true) | bool
- noble_authentik_ensure_admin_ui_access | default(true) | bool - noble_authentik_ensure_admin_ui_access | default(true) | bool
@@ -321,7 +329,15 @@
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
register: noble_authentik_worker_oidc_upsert register: noble_authentik_worker_oidc_upsert
changed_when: true changed_when: >-
"worker: OAuth2 providers + applications upserted"
in (noble_authentik_worker_oidc_upsert.stdout | default(""))
failed_when: >-
(noble_authentik_worker_oidc_upsert.rc | default(-1)) != 0
or (
"worker: OAuth2 providers + applications upserted"
not in (noble_authentik_worker_oidc_upsert.stdout | default(""))
)
when: when:
- noble_authentik_configure_idp | default(true) | bool - noble_authentik_configure_idp | default(true) | bool
- (noble_authentik_oidc_provision_via | default('worker') | lower) == 'worker' - (noble_authentik_oidc_provision_via | default('worker') | lower) == 'worker'
@@ -366,7 +382,10 @@
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
register: noble_authentik_worker_user_groups register: noble_authentik_worker_user_groups
changed_when: true changed_when: >-
"worker: bootstrap user group membership updated"
in (noble_authentik_worker_user_groups.stdout | default(""))
failed_when: (noble_authentik_worker_user_groups.rc | default(-1)) != 0
when: when:
- noble_authentik_configure_idp | default(true) | bool - noble_authentik_configure_idp | default(true) | bool
- (noble_authentik_oidc_provision_via | default('worker') | lower) == 'worker' - (noble_authentik_oidc_provision_via | default('worker') | lower) == 'worker'
@@ -467,7 +486,7 @@
- --force-conflicts - --force-conflicts
- --wait - --wait
- --timeout - --timeout
- 10m - "{{ noble_authentik_oauth2_proxy_helm_wait_timeout }}"
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
changed_when: true changed_when: true

View File

@@ -11,6 +11,11 @@ noble_platform_kube_prometheus_operator_wait_retries: 60
noble_platform_kube_prometheus_operator_wait_delay: 5 noble_platform_kube_prometheus_operator_wait_delay: 5
# Longhorn PVCs + full stack often need 45-60m; node-exporter DaemonSet can be last at 3/4 until one node catches up. # Longhorn PVCs + full stack often need 45-60m; node-exporter DaemonSet can be last at 3/4 until one node catches up.
noble_platform_kube_prometheus_helm_wait_timeout: 60m noble_platform_kube_prometheus_helm_wait_timeout: 60m
# Loki SingleBinary + Longhorn PVC: Helm **--wait** can exceed **5m** defaults; raise if Longhorn attach is slow.
noble_platform_loki_helm_wait_timeout: 30m
# Before Loki (first Longhorn PVC workload), ensure CSI plugin DaemonSet is fully rolled out (avoids **FailedMount** / backend timeouts).
noble_platform_wait_longhorn_csi_before_loki: true
noble_platform_longhorn_csi_rollout_timeout: 15m
# Decrypt **clusters/noble/secrets/*.yaml** with SOPS and kubectl apply (requires **sops**, **age**, and **age-key.txt**). # Decrypt **clusters/noble/secrets/*.yaml** with SOPS and kubectl apply (requires **sops**, **age**, and **age-key.txt**).
noble_apply_sops_secrets: true noble_apply_sops_secrets: true

View File

@@ -131,6 +131,21 @@
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
changed_when: true changed_when: true
- name: Wait for Longhorn CSI plugin before Loki (PVC attach)
ansible.builtin.command:
argv:
- kubectl
- rollout
- status
- daemonset/longhorn-csi-plugin
- -n
- longhorn-system
- --timeout={{ noble_platform_longhorn_csi_rollout_timeout }}
environment:
KUBECONFIG: "{{ noble_kubeconfig }}"
when: noble_platform_wait_longhorn_csi_before_loki | default(true) | bool
changed_when: false
- name: Install Loki - name: Install Loki
ansible.builtin.command: ansible.builtin.command:
argv: argv:
@@ -147,6 +162,8 @@
- "{{ noble_repo_root }}/clusters/noble/bootstrap/loki/values.yaml" - "{{ noble_repo_root }}/clusters/noble/bootstrap/loki/values.yaml"
- --force-conflicts - --force-conflicts
- --wait - --wait
- --timeout
- "{{ noble_platform_loki_helm_wait_timeout }}"
environment: environment:
KUBECONFIG: "{{ noble_kubeconfig }}" KUBECONFIG: "{{ noble_kubeconfig }}"
changed_when: true changed_when: true

View File

@@ -9,6 +9,12 @@
# --version 6.55.0 -f clusters/noble/bootstrap/loki/values.yaml --wait --timeout 30m # --version 6.55.0 -f clusters/noble/bootstrap/loki/values.yaml --wait --timeout 30m
# #
# Query/push URL for Grafana + Fluent Bit: http://loki-gateway.loki.svc.cluster.local:80 # Query/push URL for Grafana + Fluent Bit: http://loki-gateway.loki.svc.cluster.local:80
#
# Troubleshooting: if **helm --wait** times out with **StatefulSet/loki/loki not ready**, run
# **kubectl -n loki describe pod loki-0**. **FailedMount** + **longhorn-backend** / **hasn't been attached yet**
# is a **Longhorn CSI** issue (not Loki config): confirm **kubectl -n longhorn-system rollout status
# daemonset/longhorn-csi-plugin** succeeds, check Longhorn UI → Volume, and consider **kubectl delete pod -n loki loki-0**
# to recreate the pod after storage is healthy.
deploymentMode: SingleBinary deploymentMode: SingleBinary