From fc985932fe850c5843519876648432dbc5fa9af9 Mon Sep 17 00:00:00 2001 From: Nikholas Pcenicni <82239765+nikpcenicni@users.noreply.github.com> Date: Sat, 28 Mar 2026 02:41:51 -0400 Subject: [PATCH] Update cert-manager configurations to use DNS-01 challenge with Cloudflare for both production and staging ClusterIssuers. Modify README.md to reflect the new DNS-01 setup and provide instructions for creating the necessary Cloudflare API token secret. This change enhances certificate issuance reliability when using Cloudflare's proxy services. --- clusters/noble/apps/cert-manager/README.md | 14 +++++++++++++- .../clusterissuer-letsencrypt-prod.yaml | 13 ++++++++++--- .../clusterissuer-letsencrypt-staging.yaml | 11 ++++++++--- talos/CLUSTER-BUILD.md | 2 +- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/clusters/noble/apps/cert-manager/README.md b/clusters/noble/apps/cert-manager/README.md index 7a31ae5..bbe68a9 100644 --- a/clusters/noble/apps/cert-manager/README.md +++ b/clusters/noble/apps/cert-manager/README.md @@ -1,6 +1,16 @@ # cert-manager — noble -**Prerequisites:** **Traefik** (ingress class **`traefik`**), DNS for **`*.apps.noble.lab.pcenicni.dev`** → Traefik LB. +**Prerequisites:** **Traefik** (ingress class **`traefik`**), DNS for **`*.apps.noble.lab.pcenicni.dev`** → Traefik LB for app traffic. + +**ACME (Let’s Encrypt)** uses **DNS-01** via **Cloudflare** for zone **`pcenicni.dev`**. Create an API token with **Zone → DNS → Edit** and **Zone → Zone → Read** (or use the “Edit zone DNS” template), then: + +```bash +kubectl -n cert-manager create secret generic cloudflare-dns-api-token \ + --from-literal=api-token='YOUR_CLOUDFLARE_API_TOKEN' \ + --dry-run=client -o yaml | kubectl apply -f - +``` + +Without this Secret, **`ClusterIssuer`** will not complete certificate orders. 1. Create the namespace: @@ -35,3 +45,5 @@ ``` Use **`cert-manager.io/cluster-issuer: letsencrypt-staging`** on Ingresses while testing; switch to **`letsencrypt-prod`** when ready. + +**HTTP-01** is not configured: if the hostname is **proxied** (orange cloud) in Cloudflare, Let’s Encrypt may hit Cloudflare’s edge and get **404** for `/.well-known/acme-challenge/`. DNS-01 avoids that. diff --git a/clusters/noble/apps/cert-manager/clusterissuer-letsencrypt-prod.yaml b/clusters/noble/apps/cert-manager/clusterissuer-letsencrypt-prod.yaml index 677928b..65fcb9e 100644 --- a/clusters/noble/apps/cert-manager/clusterissuer-letsencrypt-prod.yaml +++ b/clusters/noble/apps/cert-manager/clusterissuer-letsencrypt-prod.yaml @@ -11,6 +11,13 @@ spec: privateKeySecretRef: name: letsencrypt-prod-account-key solvers: - - http01: - ingress: - class: traefik + # DNS-01 — works when public HTTP to Traefik is wrong (e.g. hostname proxied through Cloudflare + # returns 404 for /.well-known/acme-challenge). Requires Secret cloudflare-dns-api-token in cert-manager. + - dns01: + cloudflare: + apiTokenSecretRef: + name: cloudflare-dns-api-token + key: api-token + selector: + dnsZones: + - pcenicni.dev diff --git a/clusters/noble/apps/cert-manager/clusterissuer-letsencrypt-staging.yaml b/clusters/noble/apps/cert-manager/clusterissuer-letsencrypt-staging.yaml index 560d839..5c0c53f 100644 --- a/clusters/noble/apps/cert-manager/clusterissuer-letsencrypt-staging.yaml +++ b/clusters/noble/apps/cert-manager/clusterissuer-letsencrypt-staging.yaml @@ -11,6 +11,11 @@ spec: privateKeySecretRef: name: letsencrypt-staging-account-key solvers: - - http01: - ingress: - class: traefik + - dns01: + cloudflare: + apiTokenSecretRef: + name: cloudflare-dns-api-token + key: api-token + selector: + dnsZones: + - pcenicni.dev diff --git a/talos/CLUSTER-BUILD.md b/talos/CLUSTER-BUILD.md index 102281b..fb00e94 100644 --- a/talos/CLUSTER-BUILD.md +++ b/talos/CLUSTER-BUILD.md @@ -13,7 +13,7 @@ Lab stack is **up** on-cluster through **Phase D**–**F** and **Phase G** (Vaul - **metrics-server** Helm **3.13.0** / app **v0.8.0** — `clusters/noble/apps/metrics-server/values.yaml` (`--kubelet-insecure-tls` for Talos); **`kubectl top nodes`** works. - **Longhorn** Helm **1.11.1** / app **v1.11.1** — `clusters/noble/apps/longhorn/` (PSA **privileged** namespace, `defaultDataPath` `/var/mnt/longhorn`, `preUpgradeChecker` enabled); **StorageClass** `longhorn` (default); **`nodes.longhorn.io`** all **Ready**; test **PVC** `Bound` on `longhorn`. - **Traefik** Helm **39.0.6** / app **v3.6.11** — `clusters/noble/apps/traefik/`; **`Service`** **`LoadBalancer`** **`EXTERNAL-IP` `192.168.50.211`**; **`IngressClass`** **`traefik`** (default). Point **`*.apps.noble.lab.pcenicni.dev`** at **`192.168.50.211`**. MetalLB pool verification was done before replacing the temporary nginx test with Traefik. -- **cert-manager** Helm **v1.20.0** / app **v1.20.0** — `clusters/noble/apps/cert-manager/`; **`ClusterIssuer`** **`letsencrypt-staging`** and **`letsencrypt-prod`** (HTTP-01, ingress class **`traefik`**); ACME email **`certificates@noble.lab.pcenicni.dev`** (edit in manifests if you want a different mailbox). +- **cert-manager** Helm **v1.20.0** / app **v1.20.0** — `clusters/noble/apps/cert-manager/`; **`ClusterIssuer`** **`letsencrypt-staging`** and **`letsencrypt-prod`** (**DNS-01** via **Cloudflare** for **`pcenicni.dev`**, Secret **`cloudflare-dns-api-token`** in **`cert-manager`**); ACME email **`certificates@noble.lab.pcenicni.dev`** (edit in manifests if you want a different mailbox). - **Newt** Helm **1.2.0** / app **1.10.1** — `clusters/noble/apps/newt/` (**fossorial/newt**); Pangolin site tunnel — **`newt-pangolin-auth`** Secret (**`PANGOLIN_ENDPOINT`**, **`NEWT_ID`**, **`NEWT_SECRET`**). Prefer a **SealedSecret** in git (`kubeseal` — see `clusters/noble/apps/sealed-secrets/examples/`) after rotating credentials if they were exposed. **Public DNS** is **not** automated with ExternalDNS: **CNAME** records at your DNS host per Pangolin’s domain instructions, plus **Integration API** for HTTP resources/targets — see **`clusters/noble/apps/newt/README.md`**. LAN access to Traefik can still use **`*.apps.noble.lab.pcenicni.dev`** → **`192.168.50.211`** (split horizon / local resolver). - **Argo CD** Helm **9.4.17** / app **v3.3.6** — `clusters/noble/bootstrap/argocd/`; **`argocd-server`** **`LoadBalancer`** **`192.168.50.210`**; app-of-apps scaffold under **`bootstrap/argocd/apps/`** (edit **`root-application.yaml`** `repoURL` before applying). - **kube-prometheus-stack** — Helm chart **82.15.1** — `clusters/noble/apps/kube-prometheus-stack/` (**namespace** `monitoring`, PSA **privileged** — **node-exporter** needs host mounts); **Longhorn** PVCs for Prometheus, Grafana, Alertmanager; **node-exporter** DaemonSet **4/4**. **Grafana Ingress:** **`https://grafana.apps.noble.lab.pcenicni.dev`** (Traefik **`ingressClassName: traefik`**, **`cert-manager.io/cluster-issuer: letsencrypt-prod`**). **Loki** datasource in Grafana: ConfigMap **`clusters/noble/apps/grafana-loki-datasource/loki-datasource.yaml`** (sidecar label **`grafana_datasource: "1"`**) — not via **`grafana.additionalDataSources`** in the chart. **`helm upgrade --install` with `--wait` is silent until done** — use **`--timeout 30m`**; Grafana admin: Secret **`kube-prometheus-grafana`**, keys **`admin-user`** / **`admin-password`**.