Update .gitignore to include generated noble-lab-ui-urls.md and enhance README.md with new role documentation. Refactor noble.yml to incorporate noble_landing_urls role for improved URL management. Add ingress configurations for alertmanager, prometheus, longhorn, and vault to support TLS termination via Traefik. Update network policies and values.yaml for vault to allow traffic from Traefik. These changes aim to streamline deployment and enhance service accessibility.
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -6,3 +6,6 @@ talos/kubeconfig
|
||||
# Local secrets
|
||||
age-key.txt
|
||||
.env
|
||||
|
||||
# Generated by ansible noble_landing_urls
|
||||
ansible/output/noble-lab-ui-urls.md
|
||||
@@ -78,6 +78,7 @@ ansible-playbook playbooks/noble.yml --skip-tags newt
|
||||
| `talos_phase_a` | Talos genconfig, apply-config, bootstrap, kubeconfig |
|
||||
| `helm_repos` | `helm repo add` / `update` |
|
||||
| `noble_*` | Cilium, metrics-server, Longhorn, MetalLB (20m Helm wait), kube-vip, Traefik, cert-manager, Newt, Argo CD, Kyverno, platform stack |
|
||||
| `noble_landing_urls` | Writes **`ansible/output/noble-lab-ui-urls.md`** — URLs, service names, and (optional) Argo/Grafana passwords from Secrets |
|
||||
| `noble_post_deploy` | Post-install reminders |
|
||||
| `talos_bootstrap` | Genconfig-only (used by older playbook) |
|
||||
|
||||
|
||||
@@ -224,3 +224,5 @@
|
||||
tags: [kyverno_policies, policy]
|
||||
- role: noble_platform
|
||||
tags: [platform, observability, apps]
|
||||
- role: noble_landing_urls
|
||||
tags: [landing, platform, observability, apps]
|
||||
|
||||
43
ansible/roles/noble_landing_urls/defaults/main.yml
Normal file
43
ansible/roles/noble_landing_urls/defaults/main.yml
Normal file
@@ -0,0 +1,43 @@
|
||||
---
|
||||
# Regenerated when **noble_landing_urls** runs (after platform stack). Paths match Traefik + cert-manager Ingresses.
|
||||
noble_landing_urls_dest: "{{ noble_repo_root }}/ansible/output/noble-lab-ui-urls.md"
|
||||
|
||||
# When true, run kubectl against the cluster to fill Argo CD / Grafana passwords in the markdown (requires working kubeconfig).
|
||||
noble_landing_urls_fetch_credentials: true
|
||||
|
||||
noble_lab_ui_entries:
|
||||
- name: Argo CD
|
||||
description: GitOps UI (sync, apps, repos)
|
||||
namespace: argocd
|
||||
service: argocd-server
|
||||
url: https://argo.apps.noble.lab.pcenicni.dev
|
||||
- name: Grafana
|
||||
description: Dashboards, Loki explore (logs)
|
||||
namespace: monitoring
|
||||
service: kube-prometheus-grafana
|
||||
url: https://grafana.apps.noble.lab.pcenicni.dev
|
||||
- name: Prometheus
|
||||
description: Prometheus UI (queries, targets) — lab; protect in production
|
||||
namespace: monitoring
|
||||
service: kube-prometheus-kube-prome-prometheus
|
||||
url: https://prometheus.apps.noble.lab.pcenicni.dev
|
||||
- name: Alertmanager
|
||||
description: Alertmanager UI (silences, status)
|
||||
namespace: monitoring
|
||||
service: kube-prometheus-kube-prome-alertmanager
|
||||
url: https://alertmanager.apps.noble.lab.pcenicni.dev
|
||||
- name: Headlamp
|
||||
description: Kubernetes UI (cluster resources)
|
||||
namespace: headlamp
|
||||
service: headlamp
|
||||
url: https://headlamp.apps.noble.lab.pcenicni.dev
|
||||
- name: Longhorn
|
||||
description: Storage volumes, nodes, backups
|
||||
namespace: longhorn-system
|
||||
service: longhorn-frontend
|
||||
url: https://longhorn.apps.noble.lab.pcenicni.dev
|
||||
- name: Vault
|
||||
description: Secrets engine UI (after init/unseal)
|
||||
namespace: vault
|
||||
service: vault
|
||||
url: https://vault.apps.noble.lab.pcenicni.dev
|
||||
55
ansible/roles/noble_landing_urls/tasks/fetch_credentials.yml
Normal file
55
ansible/roles/noble_landing_urls/tasks/fetch_credentials.yml
Normal file
@@ -0,0 +1,55 @@
|
||||
---
|
||||
# Populates template variables from Secrets (no_log on kubectl to avoid leaking into Ansible stdout).
|
||||
- name: Fetch Argo CD initial admin password (base64)
|
||||
ansible.builtin.command:
|
||||
argv:
|
||||
- kubectl
|
||||
- -n
|
||||
- argocd
|
||||
- get
|
||||
- secret
|
||||
- argocd-initial-admin-secret
|
||||
- -o
|
||||
- jsonpath={.data.password}
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
register: noble_fetch_argocd_pw_b64
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
no_log: true
|
||||
|
||||
- name: Fetch Grafana admin user (base64)
|
||||
ansible.builtin.command:
|
||||
argv:
|
||||
- kubectl
|
||||
- -n
|
||||
- monitoring
|
||||
- get
|
||||
- secret
|
||||
- kube-prometheus-grafana
|
||||
- -o
|
||||
- jsonpath={.data.admin-user}
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
register: noble_fetch_grafana_user_b64
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
no_log: true
|
||||
|
||||
- name: Fetch Grafana admin password (base64)
|
||||
ansible.builtin.command:
|
||||
argv:
|
||||
- kubectl
|
||||
- -n
|
||||
- monitoring
|
||||
- get
|
||||
- secret
|
||||
- kube-prometheus-grafana
|
||||
- -o
|
||||
- jsonpath={.data.admin-password}
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
register: noble_fetch_grafana_pw_b64
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
no_log: true
|
||||
20
ansible/roles/noble_landing_urls/tasks/main.yml
Normal file
20
ansible/roles/noble_landing_urls/tasks/main.yml
Normal file
@@ -0,0 +1,20 @@
|
||||
---
|
||||
- name: Ensure output directory for generated landing page
|
||||
ansible.builtin.file:
|
||||
path: "{{ noble_repo_root }}/ansible/output"
|
||||
state: directory
|
||||
mode: "0755"
|
||||
|
||||
- name: Fetch initial credentials from cluster Secrets (optional)
|
||||
ansible.builtin.include_tasks: fetch_credentials.yml
|
||||
when: noble_landing_urls_fetch_credentials | default(true) | bool
|
||||
|
||||
- name: Write noble lab UI URLs (markdown landing page)
|
||||
ansible.builtin.template:
|
||||
src: noble-lab-ui-urls.md.j2
|
||||
dest: "{{ noble_landing_urls_dest }}"
|
||||
mode: "0644"
|
||||
|
||||
- name: Show landing page path
|
||||
ansible.builtin.debug:
|
||||
msg: "Noble lab UI list written to {{ noble_landing_urls_dest }}"
|
||||
@@ -0,0 +1,50 @@
|
||||
# Noble lab — web UIs (LAN)
|
||||
|
||||
> **Sensitive:** This file may include **passwords read from Kubernetes Secrets** when credential fetch ran. It is **gitignored** — do not commit or share.
|
||||
|
||||
**DNS:** point **`*.apps.noble.lab.pcenicni.dev`** at the Traefik **LoadBalancer** (MetalLB **`192.168.50.211`** by default — see `clusters/noble/apps/traefik/values.yaml`).
|
||||
|
||||
**TLS:** **cert-manager** + **`letsencrypt-prod`** on each Ingress (public **DNS-01** for **`pcenicni.dev`**).
|
||||
|
||||
This file is **generated** by Ansible (`noble_landing_urls` role). Use it as a temporary landing page to find services after deploy.
|
||||
|
||||
| UI | What | Kubernetes service | Namespace | URL |
|
||||
|----|------|----------------------|-----------|-----|
|
||||
{% for e in noble_lab_ui_entries %}
|
||||
| {{ e.name }} | {{ e.description }} | `{{ e.service }}` | `{{ e.namespace }}` | [{{ e.url }}]({{ e.url }}) |
|
||||
{% endfor %}
|
||||
|
||||
## Initial access (logins)
|
||||
|
||||
| App | Username / identity | Password / secret |
|
||||
|-----|---------------------|-------------------|
|
||||
| **Argo CD** | `admin` | {% if (noble_fetch_argocd_pw_b64 is defined) and (noble_fetch_argocd_pw_b64.rc | default(1) == 0) and (noble_fetch_argocd_pw_b64.stdout | default('') | length > 0) %}`{{ noble_fetch_argocd_pw_b64.stdout | b64decode }}`{% else %}*(not fetched — use commands below)*{% endif %} |
|
||||
| **Grafana** | {% if (noble_fetch_grafana_user_b64 is defined) and (noble_fetch_grafana_user_b64.rc | default(1) == 0) and (noble_fetch_grafana_user_b64.stdout | default('') | length > 0) %}`{{ noble_fetch_grafana_user_b64.stdout | b64decode }}`{% else %}*(from Secret — use commands below)*{% endif %} | {% if (noble_fetch_grafana_pw_b64 is defined) and (noble_fetch_grafana_pw_b64.rc | default(1) == 0) and (noble_fetch_grafana_pw_b64.stdout | default('') | length > 0) %}`{{ noble_fetch_grafana_pw_b64.stdout | b64decode }}`{% else %}*(not fetched — use commands below)*{% endif %} |
|
||||
| **Headlamp** | ServiceAccount token | No fixed password. Sign in with a SA token, or configure OIDC — `clusters/noble/apps/headlamp/README.md`. |
|
||||
| **Prometheus** | — | No auth in default install (lab). |
|
||||
| **Alertmanager** | — | No auth in default install (lab). |
|
||||
| **Longhorn** | — | No default login unless you enable access control in the UI settings. |
|
||||
| **Vault** | Token | Root token is only from **`vault operator init`** (not stored in git). See `clusters/noble/apps/vault/README.md`. |
|
||||
|
||||
### Commands to retrieve passwords (if not filled above)
|
||||
|
||||
```bash
|
||||
# Argo CD initial admin (Secret removed after you change password)
|
||||
kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath='{.data.password}' | base64 -d
|
||||
echo
|
||||
|
||||
# Grafana admin user / password
|
||||
kubectl -n monitoring get secret kube-prometheus-grafana -o jsonpath='{.data.admin-user}' | base64 -d
|
||||
echo
|
||||
kubectl -n monitoring get secret kube-prometheus-grafana -o jsonpath='{.data.admin-password}' | base64 -d
|
||||
echo
|
||||
```
|
||||
|
||||
To generate this file **without** calling kubectl, run Ansible with **`-e noble_landing_urls_fetch_credentials=false`**.
|
||||
|
||||
## Notes
|
||||
|
||||
- **Argo CD** `argocd-initial-admin-secret` disappears after you change the admin password.
|
||||
- **Grafana** password is random unless you set `grafana.adminPassword` in chart values.
|
||||
- **Vault** UI needs **unsealed** Vault; tokens come from your chosen auth method.
|
||||
- **Prometheus / Alertmanager** UIs are unauthenticated by default — restrict when hardening (`talos/CLUSTER-BUILD.md` Phase G).
|
||||
8
ansible/roles/noble_platform/defaults/main.yml
Normal file
8
ansible/roles/noble_platform/defaults/main.yml
Normal file
@@ -0,0 +1,8 @@
|
||||
---
|
||||
# kubectl apply -k can hit transient etcd timeouts under load; retries + longer API deadline help.
|
||||
noble_platform_kubectl_request_timeout: 120s
|
||||
noble_platform_kustomize_retries: 5
|
||||
noble_platform_kustomize_delay: 20
|
||||
|
||||
# Vault: injector (vault-k8s) owns MutatingWebhookConfiguration.caBundle; Helm upgrade can SSA-conflict. Delete webhook so Helm can recreate it.
|
||||
noble_vault_delete_injector_webhook_before_helm: true
|
||||
@@ -5,10 +5,15 @@
|
||||
argv:
|
||||
- kubectl
|
||||
- apply
|
||||
- "--request-timeout={{ noble_platform_kubectl_request_timeout }}"
|
||||
- -k
|
||||
- "{{ noble_repo_root }}/clusters/noble/apps"
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
register: noble_platform_kustomize
|
||||
retries: "{{ noble_platform_kustomize_retries | int }}"
|
||||
delay: "{{ noble_platform_kustomize_delay | int }}"
|
||||
until: noble_platform_kustomize.rc == 0
|
||||
changed_when: true
|
||||
|
||||
- name: Install Sealed Secrets
|
||||
@@ -49,6 +54,21 @@
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
changed_when: true
|
||||
|
||||
# vault-k8s patches webhook CA after install; Helm 3/4 SSA then conflicts on upgrade. Removing the MWC lets Helm re-apply cleanly; injector repopulates caBundle.
|
||||
- name: Delete Vault agent injector MutatingWebhookConfiguration before Helm (avoids caBundle field conflict)
|
||||
ansible.builtin.command:
|
||||
argv:
|
||||
- kubectl
|
||||
- delete
|
||||
- mutatingwebhookconfiguration
|
||||
- vault-agent-injector-cfg
|
||||
- --ignore-not-found
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
register: noble_vault_mwc_delete
|
||||
when: noble_vault_delete_injector_webhook_before_helm | default(true) | bool
|
||||
changed_when: "'deleted' in (noble_vault_mwc_delete.stdout | default(''))"
|
||||
|
||||
- name: Install Vault
|
||||
ansible.builtin.command:
|
||||
argv:
|
||||
@@ -66,6 +86,7 @@
|
||||
- --wait
|
||||
environment:
|
||||
KUBECONFIG: "{{ noble_kubeconfig }}"
|
||||
HELM_SERVER_SIDE_APPLY: "false"
|
||||
changed_when: true
|
||||
|
||||
- name: Install kube-prometheus-stack
|
||||
|
||||
@@ -35,6 +35,20 @@ alertmanager:
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
hosts:
|
||||
- alertmanager.apps.noble.lab.pcenicni.dev
|
||||
paths:
|
||||
- /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- secretName: alertmanager-apps-noble-tls
|
||||
hosts:
|
||||
- alertmanager.apps.noble.lab.pcenicni.dev
|
||||
|
||||
prometheus:
|
||||
prometheusSpec:
|
||||
@@ -48,6 +62,20 @@ prometheus:
|
||||
resources:
|
||||
requests:
|
||||
storage: 30Gi
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
hosts:
|
||||
- prometheus.apps.noble.lab.pcenicni.dev
|
||||
paths:
|
||||
- /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- secretName: prometheus-apps-noble-tls
|
||||
hosts:
|
||||
- prometheus.apps.noble.lab.pcenicni.dev
|
||||
|
||||
grafana:
|
||||
persistence:
|
||||
@@ -78,5 +106,7 @@ grafana:
|
||||
server:
|
||||
domain: grafana.apps.noble.lab.pcenicni.dev
|
||||
root_url: https://grafana.apps.noble.lab.pcenicni.dev/
|
||||
# Traefik sets X-Forwarded-*; required for correct redirects and cookies behind the ingress.
|
||||
use_proxy_headers: true
|
||||
|
||||
# Loki datasource: apply `clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml` (sidecar ConfigMap) instead of additionalDataSources here.
|
||||
|
||||
@@ -16,6 +16,19 @@ defaultSettings:
|
||||
# Default 30% reserved often makes small data disks look "full" to the scheduler.
|
||||
storageReservedPercentageForDefaultDisk: "10"
|
||||
|
||||
# Longhorn UI — same *.apps.noble.lab.pcenicni.dev pattern as Grafana / Headlamp (Traefik LB → cert-manager TLS).
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
host: longhorn.apps.noble.lab.pcenicni.dev
|
||||
path: /
|
||||
pathType: Prefix
|
||||
tls: true
|
||||
tlsSecret: longhorn-apps-noble-tls
|
||||
secureBackends: false
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
|
||||
# Pre-upgrade Job: keep enabled for normal Helm upgrades (disable only if GitOps sync fights the Job).
|
||||
preUpgradeChecker:
|
||||
jobEnabled: true
|
||||
|
||||
@@ -24,6 +24,13 @@ spec:
|
||||
- ports:
|
||||
- port: "8200"
|
||||
protocol: TCP
|
||||
- fromEndpoints:
|
||||
- matchLabels:
|
||||
"k8s:io.kubernetes.pod.namespace": traefik
|
||||
toPorts:
|
||||
- ports:
|
||||
- port: "8200"
|
||||
protocol: TCP
|
||||
- fromEndpoints:
|
||||
- matchLabels:
|
||||
"k8s:io.kubernetes.pod.namespace": vault
|
||||
|
||||
@@ -44,5 +44,19 @@ server:
|
||||
path: "/v1/sys/health?uninitcode=204&sealedcode=204&standbyok=true"
|
||||
port: 8200
|
||||
|
||||
# LAN: TLS terminates at Traefik + cert-manager; listener stays HTTP (global.tlsDisable).
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
hosts:
|
||||
- host: vault.apps.noble.lab.pcenicni.dev
|
||||
paths: []
|
||||
tls:
|
||||
- secretName: vault-apps-noble-tls
|
||||
hosts:
|
||||
- vault.apps.noble.lab.pcenicni.dev
|
||||
|
||||
ui:
|
||||
enabled: true
|
||||
|
||||
@@ -35,6 +35,17 @@ echo
|
||||
|
||||
Change the password in the UI or via `argocd account update-password`.
|
||||
|
||||
### TLS: changing ClusterIssuer (e.g. staging → prod)
|
||||
|
||||
If **`helm upgrade --wait`** fails with *Secret was previously issued by `letsencrypt-staging`* (or another issuer), cert-manager will not replace the TLS Secret in place. Remove the old cert material once, then upgrade again:
|
||||
|
||||
```bash
|
||||
kubectl -n argocd delete certificate argocd-server --ignore-not-found
|
||||
kubectl -n argocd delete secret argocd-server-tls --ignore-not-found
|
||||
helm upgrade --install argocd argo/argo-cd -n argocd --create-namespace \
|
||||
--version 9.4.17 -f clusters/noble/bootstrap/argocd/values.yaml --wait
|
||||
```
|
||||
|
||||
## 3. Register this repo (if private)
|
||||
|
||||
Use **Settings → Repositories** in the UI, or `argocd repo add` / a `Secret` of type `repository`.
|
||||
|
||||
@@ -32,17 +32,20 @@ server:
|
||||
certificate:
|
||||
enabled: true
|
||||
domain: argo.apps.noble.lab.pcenicni.dev
|
||||
# If you change issuer.name, delete Certificate/Secret once so cert-manager can re-issue (see README.md).
|
||||
issuer:
|
||||
group: cert-manager.io
|
||||
kind: ClusterIssuer
|
||||
name: letsencrypt-staging
|
||||
name: letsencrypt-prod
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
hostname: argo.apps.noble.lab.pcenicni.dev
|
||||
tls: true
|
||||
annotations: {}
|
||||
# Traefik terminates TLS; Argo serves HTTP/2 cleartext (insecure). Without h2c, UI/API can 404 or fail gRPC.
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/service.serversscheme: h2c
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
|
||||
Reference in New Issue
Block a user