diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a8027dc..02bad04 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,15 +2,17 @@ name: CI on: push: - branches: [main] + branches: [main, dev] pull_request: - branches: [main] + branches: [main, dev] jobs: lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-go@v5 with: go-version: "1.26.3" @@ -22,10 +24,12 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-go@v5 with: go-version: "1.26.3" - - run: go test ./api/... ./pkg/... ./internal/... -race -coverprofile=coverage.out + - run: go test ./api/... ./cmd/... ./pkg/... ./internal/... -race -coverprofile=coverage.out - uses: actions/upload-artifact@v4 with: name: coverage @@ -35,6 +39,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-go@v5 with: go-version: "1.26.3" @@ -47,6 +53,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-go@v5 with: go-version: "1.26.3" @@ -56,8 +64,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: azure/setup-helm@v4 - - run: helm plugin install https://github.com/helm-unittest/helm-unittest + - run: helm plugin install --verify=false https://github.com/helm-unittest/helm-unittest - run: helm lint charts/kilo-clustermesh-operator --strict - run: helm unittest charts/kilo-clustermesh-operator @@ -65,8 +75,48 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-go@v5 with: go-version: "1.26.3" - run: make manifests generate - run: git diff --exit-code + + image: + if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev') + needs: [lint, test, build, helm, generate, integration] + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - uses: docker/setup-buildx-action@v3 + - uses: docker/metadata-action@v5 + id: meta + with: + images: ghcr.io/${{ github.repository }} + tags: | + type=ref,event=branch + type=sha,prefix=sha-,format=long + - uses: docker/build-push-action@v6 + with: + context: . + file: Containerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + platforms: linux/amd64,linux/arm64 + build-args: | + VERSION=${{ github.ref_name }} + REVISION=${{ github.sha }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.gitignore b/.gitignore index b89329c..f5e69c1 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,6 @@ Thumbs.db # Internal planning docs (not for public history) PLAN.md + +# Cluster-specific deployment manifests (kept local; not part of the project) +deploy/ diff --git a/.golangci.yml b/.golangci.yml index fb9c6bf..8d7b73b 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -140,6 +140,7 @@ linters: - revive - gochecknoglobals - noinlineerr + - goconst path: _test\.go - linters: - err113 diff --git a/Containerfile b/Containerfile index a8b5f61..98d3b73 100644 --- a/Containerfile +++ b/Containerfile @@ -1,4 +1,4 @@ -FROM docker.io/library/golang:1.26@sha256:313faae491b410a35402c05d35e7518ae99103d957308e940e1ae2cfa0aac29b AS builder +FROM --platform=$BUILDPLATFORM docker.io/library/golang:1.26@sha256:313faae491b410a35402c05d35e7518ae99103d957308e940e1ae2cfa0aac29b AS builder ARG TARGETOS TARGETARCH ARG VERSION=dev ARG REVISION=unknown @@ -16,7 +16,7 @@ RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \ FROM gcr.io/distroless/static:nonroot@sha256:e3f945647ffb95b5839c07038d64f9811adf17308b9121d8a2b87b6a22a80a39 -LABEL org.opencontainers.image.source="https://github.com/squat/kilo-clustermesh-operator" +LABEL org.opencontainers.image.source="https://github.com/cozystack/kilo-clustermesh-operator" LABEL org.opencontainers.image.description="Kubernetes ClusterMesh operator for Kilo" LABEL org.opencontainers.image.licenses="Apache-2.0" LABEL org.opencontainers.image.title="kilo-clustermesh-operator" diff --git a/Makefile b/Makefile index 1fd8055..35deed1 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,8 @@ help: ## Display this help. .PHONY: manifests manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. - "$(CONTROLLER_GEN)" rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases + "$(CONTROLLER_GEN)" rbac:roleName=manager-role webhook paths="./..." + "$(CONTROLLER_GEN)" crd paths="./api/..." output:crd:artifacts:config=config/crd/bases .PHONY: generate generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. diff --git a/README.md b/README.md index 95a83b2..2314d6a 100644 --- a/README.md +++ b/README.md @@ -1,108 +1,78 @@ # kilo-clustermesh-operator -Kubernetes ClusterMesh operator for [Kilo](https://github.com/squat/kilo) — connects two or more clusters into a WireGuard-based mesh network. +> Kubernetes operator that connects two or more clusters into a WireGuard-based mesh network using [Kilo](https://github.com/squat/kilo). + +## Table of Contents + +- [Overview](#overview) +- [Features](#features) +- [Requirements](#requirements) +- [Quick Start](#quick-start) +- [How It Works](#how-it-works) +- [Documentation](#documentation) +- [Project Status](#project-status) +- [License](#license) + +--- ## Overview -The operator watches `ClusterMesh` resources and reconciles Kilo `Peer` objects so that every node in each remote cluster becomes a peer in the local cluster's WireGuard mesh. This enables cross-cluster pod-to-pod and service connectivity without a shared control plane. +`kilo-clustermesh-operator` extends Kilo's single-cluster WireGuard mesh to span multiple Kubernetes clusters. You declare a `ClusterMesh` resource that lists all participating clusters, and the operator reconciles Kilo `Peer` objects so that every node in each remote cluster becomes a peer on the local cluster — enabling direct pod-to-pod and service connectivity across clusters without a shared control plane. + +The operator runs on a single cluster and reaches remote clusters via kubeconfigs stored in Kubernetes Secrets. No second operator instance is required on remote clusters. + +## Features -Each `ClusterMesh` resource declares two or more participating clusters, including which one is local. The operator connects to each remote cluster using a kubeconfig stored in a Kubernetes Secret, lists the remote nodes, validates their CIDRs against the declared spec, and creates or updates Kilo `Peer` objects on the local cluster accordingly. +- **Multi-cluster WireGuard mesh** — declarative `ClusterMesh` CRD bridges any number of clusters +- **Fork-aware Kilo support** — accepts WireGuard IP annotations in both upstream (`/32`) and Cozystack-patched (`/`) form; normalises to host routes automatically +- **Endpoint resolution chain** — per-node endpoint determined by priority: `clustermesh-endpoint` annotation → `force-endpoint` annotation → Node `ExternalIP` combined with `wireguardPort`; nodes with no resolvable endpoint are skipped cleanly +- **Anchor peers** — a single per-cluster anchor `Peer` advertises `serviceCIDR` and `additionalCIDRs` so service and host-network ranges are reachable across clusters +- **Embedded CRD bootstrap** — the operator self-applies its CRD at startup; no separate CRD pre-install step required +- **Safe cluster reconfiguration** — a change-watcher triggers a controlled pod restart when cluster topology or kubeconfig Secrets change, rebuilding the client registry from scratch +- **Finalizer-based cleanup** — removing a `ClusterMesh` CR triggers deletion of all managed `Peer` objects on every cluster before the resource is released -## Prerequisites +## Requirements -- Kubernetes 1.28+ in every participating cluster -- [Kilo](https://github.com/squat/kilo) installed in each cluster with `--mesh-granularity=cross` -- Each cluster must be reachable from the controller (API server endpoint) -- Helm 3.x (for chart-based installation) +- Kubernetes 1.28+ on every participating cluster +- [Kilo](https://github.com/squat/kilo) installed and running on every cluster (both upstream and the Cozystack-patched build are supported) +- Each node that participates in the mesh must expose its WireGuard UDP port on a network address reachable from every other cluster — by default port `51820`, configurable per cluster via `wireguardPort` +- Each remote cluster's API server must be reachable from the cluster where the operator runs +- A kubeconfig Secret for each non-local cluster, granting the operator read access to `nodes` and read/write access to `peers` on that cluster +- Helm 3.x for chart-based installation ## Quick Start -Install the operator via Helm: +### 1. Install the operator + +Clone the repository and install with Helm: ```bash -helm install kilo-clustermesh-operator \ - oci://ghcr.io/squat/kilo-clustermesh-operator/charts/kilo-clustermesh-operator \ +git clone https://github.com/cozystack/kilo-clustermesh-operator.git +cd kilo-clustermesh-operator +helm install kilo-clustermesh-operator charts/kilo-clustermesh-operator \ --namespace kilo-system \ --create-namespace ``` -Create a `ClusterMesh` resource: +Container images are published to `ghcr.io/cozystack/kilo-clustermesh-operator` and tagged `sha-` (e.g. `sha-43caba9978f26383593bedec79930c62e7ecead7`). Pin a specific build by overriding `image.tag` in your values file: ```yaml -apiVersion: kilo.squat.ai/v1alpha1 -kind: ClusterMesh -metadata: - name: my-mesh - namespace: kilo-system -spec: - clusters: - - name: cluster-a - local: true - podCIDRs: ["10.1.0.0/16"] - wireguardCIDR: "10.100.0.0/24" - serviceCIDR: "10.96.0.0/12" - - name: cluster-b - kubeconfigSecretRef: - name: cluster-b-kubeconfig - key: kubeconfig - podCIDRs: ["10.2.0.0/16"] - wireguardCIDR: "10.100.1.0/24" - serviceCIDR: "10.96.0.0/12" +image: + tag: sha- ``` -## ClusterMesh CRD Reference - -**Group**: `kilo.squat.ai` | **Version**: `v1alpha1` | **Kind**: `ClusterMesh` - -Short name: `cm` | Scope: Namespaced - -### Spec - -| Field | Type | Required | Description | -| --- | --- | --- | --- | -| `clusters` | `[]ClusterEntry` | Yes | List of clusters in this mesh. Minimum 2 entries. | - -### ClusterEntry - -| Field | Type | Required | Description | -| --- | --- | --- | --- | -| `name` | `string` | Yes | Unique identifier for this cluster within the mesh. Must be a valid DNS-1123 label (max 63 chars). | -| `local` | `bool` | No | Marks this as the cluster where the controller runs. Exactly one entry must be local. | -| `kubeconfigSecretRef` | `SecretKeyRef` | No | Reference to a Secret containing the kubeconfig for this cluster. Required for non-local clusters. | -| `podCIDRs` | `[]string` | Yes | Pod network CIDRs for this cluster. `Node.Spec.PodCIDRs` must be subsets of these. Supports dual-stack. Minimum 1 entry. | -| `wireguardCIDR` | `string` | Yes | CIDR for Kilo's WireGuard interface (`kilo0`). Each node's `kilo.squat.ai/wireguard-ip` must fall within this CIDR. | -| `serviceCIDR` | `string` | No | Kubernetes service network CIDR. If set, advertised via an anchor Peer so services are reachable across clusters. | -| `additionalCIDRs` | `[]string` | No | Extra CIDRs to advertise into the mesh (e.g., host-network ranges, external subnets). | - -### SecretKeyRef +### 2. Prepare remote-cluster credentials -| Field | Type | Required | Description | -| --- | --- | --- | --- | -| `name` | `string` | Yes | Name of the Kubernetes Secret. | -| `key` | `string` | Yes | Key within the Secret's `data` map. | - -### Status - -| Field | Type | Description | -| --- | --- | --- | -| `clusters` | `[]ClusterStatus` | Per-cluster reconciliation state. | -| `conditions` | `[]metav1.Condition` | Standard Kubernetes conditions. The `Ready` condition reflects overall mesh health. | - -### ClusterStatus - -| Field | Type | Description | -| --- | --- | --- | -| `name` | `string` | Matches `ClusterEntry.name`. | -| `registeredPeers` | `int` | Number of Kilo `Peer` objects created for this cluster's nodes. | -| `skippedNodes` | `int` | Number of nodes that failed CIDR validation and were not peered. | - -## Remote Cluster Setup - -The operator needs read access to nodes and write access to `peers` on each remote cluster. - -Apply the following `ClusterRole` on each remote cluster: +On every remote cluster, create a `ServiceAccount`, `ClusterRole`, `ClusterRoleBinding`, and a long-lived token `Secret`: ```yaml +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: clustermesh-reader + namespace: kube-system +--- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -114,84 +84,127 @@ rules: - apiGroups: [kilo.squat.ai] resources: [peers] verbs: [get, list, watch, create, update, patch, delete] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: clustermesh-reader +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kilo-clustermesh-remote +subjects: + - kind: ServiceAccount + name: clustermesh-reader + namespace: kube-system +--- +apiVersion: v1 +kind: Secret +metadata: + name: clustermesh-reader-token + namespace: kube-system + annotations: + kubernetes.io/service-account.name: clustermesh-reader +type: kubernetes.io/service-account-token ``` -Create a ServiceAccount, bind the role, generate a kubeconfig, then store it as a Secret in the local cluster: +Build a kubeconfig from the token and store it as a Secret on the cluster where the operator runs: ```bash -kubectl --context remote-cluster create serviceaccount clustermesh-reader -n kube-system -kubectl --context remote-cluster create clusterrolebinding clustermesh-reader \ - --clusterrole=kilo-clustermesh-remote \ - --serviceaccount=kube-system:clustermesh-reader - -# Generate kubeconfig from the ServiceAccount token -kubectl --context remote-cluster create token clustermesh-reader -n kube-system --duration=8760h \ - | kubectl --context local-cluster create secret generic cluster-b-kubeconfig \ - --from-literal=kubeconfig="$(kubectl config view --minify --flatten)" \ - --namespace kilo-system +TOKEN=$(kubectl --kubeconfig "$REMOTE" --namespace kube-system \ + get secret clustermesh-reader-token --output jsonpath='{.data.token}' | base64 --decode) +CA=$(kubectl --kubeconfig "$REMOTE" --namespace kube-system \ + get secret clustermesh-reader-token --output jsonpath='{.data.ca\.crt}') +SERVER=$(kubectl --kubeconfig "$REMOTE" config view --minify \ + --output jsonpath='{.clusters[0].cluster.server}') + +TMP=$(mktemp); chmod 600 "$TMP" +cat > "$TMP" < **Warning:** Pod CIDRs, WireGuard CIDRs, and service CIDRs must not overlap between any two clusters in the same namespace. Overlapping CIDRs block reconciliation for all affected meshes. +> +> **Note:** The CRD is automatically installed by the operator at startup — you do not need to apply it separately. -```bash -golangci-lint run -``` +## How It Works -### Build +On each reconcile cycle, the operator connects to every cluster in the `ClusterMesh` spec, lists all `Node` objects, validates each node's pod CIDR and WireGuard IP against the declared spec, and creates or updates Kilo `Peer` objects accordingly. Nodes that fail validation or have no resolvable endpoint are skipped. For each cluster that declares a `serviceCIDR` or `additionalCIDRs`, an anchor `Peer` carrying those CIDRs is also created on every other cluster. The operator uses a finalizer to clean up all managed peers when a `ClusterMesh` resource is deleted. -```bash -go build -o bin/manager ./cmd/main.go -``` +See [./docs/architecture.md](./docs/architecture.md) for the full reconciliation flow and component details. -### Regenerate CRDs and DeepCopy +> **Note:** The operator watches `ClusterMesh` and `Secret` objects only — it does **not** watch `Node` objects. After changing a node annotation (endpoint, WireGuard IP, public key), trigger a reconcile manually: +> +> ```bash +> kubectl --namespace kilo-system annotate clustermesh \ +> reconcile-trigger="$(date +%s)" --overwrite +> ``` -```bash -make manifests generate -``` +## Documentation -### Helm chart tests +| Page | Description | +| --- | --- | +| [Architecture](./docs/architecture.md) | Reconciliation flow, component internals, CRD bootstrap, change-watcher | +| [Installation](./docs/installation.md) | Helm chart values, RBAC setup, image pinning, uninstall procedure | +| [Configuration](./docs/configuration.md) | Full `ClusterMesh` CRD reference, field constraints, status conditions | +| [Per-node setup](./docs/per-node-setup.md) | Endpoint resolution chain, node annotations, WireGuard IP requirements | +| [Troubleshooting](./docs/troubleshooting.md) | Common failure modes, skip reasons, CIDR overlap, stale peers | +| [Known Gaps](./docs/known-gaps.md) | Outstanding work and proposal divergences (for contributors) | -```bash -helm lint charts/kilo-clustermesh-operator --strict -helm unittest charts/kilo-clustermesh-operator -``` +## Project Status -## License +Alpha — the API is functional and in active use within Cozystack, but the CRD version is `v1alpha1` and breaking changes may occur before a stable release. See [docs/known-gaps.md](./docs/known-gaps.md) for outstanding work and divergences from the upstream proposal. -Copyright 2026 The Kilo Authors. +## License -Licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE) for the full text. +Copyright 2026 The Kilo Authors. Licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE) for the full text. diff --git a/api/v1alpha1/clustermesh_types.go b/api/v1alpha1/clustermesh_types.go index 42fc5a0..11b0e24 100644 --- a/api/v1alpha1/clustermesh_types.go +++ b/api/v1alpha1/clustermesh_types.go @@ -53,9 +53,22 @@ type ClusterEntry struct { PodCIDRs []string `json:"podCIDRs"` //nolint:tagliatelle // "podCIDRs" is the canonical field name; "CIDR" is a well-known acronym // WireguardCIDR is the CIDR for Kilo's WireGuard interface (kilo0) addresses. - // Each node's kilo.squat.ai/wireguard-ip must be a /32 (or /128) within this CIDR. + // Each node's kilo.squat.ai/wireguard-ip must have its host IP within this CIDR. + // The annotation may carry any prefix length (e.g. "10.4.0.1/32" upstream Kilo + // or "10.4.0.1/16" cozystack-patched Kilo); only the host portion is validated. WireguardCIDR string `json:"wireguardCIDR"` + // WireguardPort is the UDP port of Kilo's WireGuard endpoint on each node in + // this cluster. Used as a fallback when the operator synthesises the + // endpoint from Node.Status.Addresses (i.e. neither + // kilo.squat.ai/clustermesh-endpoint nor kilo.squat.ai/force-endpoint is set + // on a node). Defaults to 51820. + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=65535 + // +kubebuilder:default=51820 + // +optional + WireguardPort uint16 `json:"wireguardPort,omitempty"` + // ServiceCIDR is the Kubernetes service network CIDR for this cluster. // If set, it will be advertised via an anchor Peer so that services // in this cluster are reachable from other mesh members. diff --git a/api/v1alpha1/clustermesh_types_test.go b/api/v1alpha1/clustermesh_types_test.go index 2e31cb3..b141917 100644 --- a/api/v1alpha1/clustermesh_types_test.go +++ b/api/v1alpha1/clustermesh_types_test.go @@ -59,6 +59,7 @@ func TestClusterMeshJSONRoundTrip(t *testing.T) { Local: true, PodCIDRs: []string{"10.0.0.0/16", "fd00::/48"}, WireguardCIDR: "172.30.0.0/24", + WireguardPort: 51820, ServiceCIDR: "10.96.0.0/12", AdditionalCIDRs: []string{"192.168.100.0/24"}, }, @@ -67,6 +68,7 @@ func TestClusterMeshJSONRoundTrip(t *testing.T) { KubeconfigSecretRef: secretRef, PodCIDRs: []string{"10.1.0.0/16"}, WireguardCIDR: "172.30.1.0/24", + WireguardPort: 52000, }, }, }, diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 2cb8f99..dd285d8 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -1,5 +1,21 @@ //go:build !ignore_autogenerated +/* +Copyright 2026 The Kilo Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + // Code generated by controller-gen. DO NOT EDIT. package v1alpha1 diff --git a/charts/kilo-clustermesh-operator/templates/deployment.yaml b/charts/kilo-clustermesh-operator/templates/deployment.yaml index 12f231e..0018313 100644 --- a/charts/kilo-clustermesh-operator/templates/deployment.yaml +++ b/charts/kilo-clustermesh-operator/templates/deployment.yaml @@ -33,15 +33,22 @@ spec: image: "{{ .Values.image.repository }}:{{ include "kilo-clustermesh-operator.imageTag" . }}" imagePullPolicy: {{ .Values.image.pullPolicy }} args: - - "--namespace={{ .Release.Namespace }}" - "--leader-elect={{ .Values.leaderElect }}" - "--metrics-bind-address={{ .Values.metricsBindAddress }}" + - "--metrics-secure={{ .Values.metricsSecure }}" - "--health-probe-bind-address={{ .Values.healthProbeBindAddress }}" + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace securityContext: runAsNonRoot: true runAsUser: 65532 allowPrivilegeEscalation: false readOnlyRootFilesystem: true + seccompProfile: + type: RuntimeDefault capabilities: drop: - ALL diff --git a/charts/kilo-clustermesh-operator/templates/role.yaml b/charts/kilo-clustermesh-operator/templates/role.yaml index 2004601..8515bab 100644 --- a/charts/kilo-clustermesh-operator/templates/role.yaml +++ b/charts/kilo-clustermesh-operator/templates/role.yaml @@ -45,3 +45,10 @@ rules: verbs: - create - patch + - apiGroups: + - events.k8s.io + resources: + - events + verbs: + - create + - patch diff --git a/charts/kilo-clustermesh-operator/templates/serviceaccount.yaml b/charts/kilo-clustermesh-operator/templates/serviceaccount.yaml index cea75d0..fdc5c8c 100644 --- a/charts/kilo-clustermesh-operator/templates/serviceaccount.yaml +++ b/charts/kilo-clustermesh-operator/templates/serviceaccount.yaml @@ -10,5 +10,4 @@ metadata: annotations: {{- toYaml . | nindent 4 }} {{- end }} -automountServiceAccountToken: false {{- end }} diff --git a/charts/kilo-clustermesh-operator/tests/deployment_test.yaml b/charts/kilo-clustermesh-operator/tests/deployment_test.yaml index bfe4410..853ae8e 100644 --- a/charts/kilo-clustermesh-operator/tests/deployment_test.yaml +++ b/charts/kilo-clustermesh-operator/tests/deployment_test.yaml @@ -30,14 +30,6 @@ tests: path: spec.replicas value: 2 - - it: should pass --namespace arg from release namespace - release: - namespace: test-ns - asserts: - - contains: - path: spec.template.spec.containers[0].args - content: "--namespace=test-ns" - - it: should pass --leader-elect=true when leaderElect is true set: leaderElect: true @@ -62,6 +54,20 @@ tests: path: spec.template.spec.containers[0].args content: "--metrics-bind-address=:9090" + - it: should pass --metrics-secure=false by default + asserts: + - contains: + path: spec.template.spec.containers[0].args + content: "--metrics-secure=false" + + - it: should pass --metrics-secure=true when enabled + set: + metricsSecure: true + asserts: + - contains: + path: spec.template.spec.containers[0].args + content: "--metrics-secure=true" + - it: should pass --health-probe-bind-address from values set: healthProbeBindAddress: ":9091" @@ -175,3 +181,25 @@ tests: - equal: path: spec.template.spec.containers[0].imagePullPolicy value: Always + + - it: should inject POD_NAMESPACE via downward API + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + + - it: should set seccompProfile to RuntimeDefault to satisfy PodSecurity restricted + asserts: + - equal: + path: spec.template.spec.containers[0].securityContext.seccompProfile.type + value: RuntimeDefault + + - it: should default image.repository to the cozystack fork + asserts: + - matchRegex: + path: spec.template.spec.containers[0].image + pattern: "^ghcr\\.io/cozystack/kilo-clustermesh-operator:" diff --git a/charts/kilo-clustermesh-operator/tests/rbac_test.yaml b/charts/kilo-clustermesh-operator/tests/rbac_test.yaml index e519126..4a65325 100644 --- a/charts/kilo-clustermesh-operator/tests/rbac_test.yaml +++ b/charts/kilo-clustermesh-operator/tests/rbac_test.yaml @@ -178,6 +178,20 @@ tests: - create - patch + - it: Role should have rules for events.k8s.io events + template: role.yaml + asserts: + - contains: + path: rules + content: + apiGroups: + - events.k8s.io + resources: + - events + verbs: + - create + - patch + - it: RoleBinding should reference correct ServiceAccount template: rolebinding.yaml release: diff --git a/charts/kilo-clustermesh-operator/tests/serviceaccount_test.yaml b/charts/kilo-clustermesh-operator/tests/serviceaccount_test.yaml index 3dd479b..94c6f87 100644 --- a/charts/kilo-clustermesh-operator/tests/serviceaccount_test.yaml +++ b/charts/kilo-clustermesh-operator/tests/serviceaccount_test.yaml @@ -16,13 +16,12 @@ tests: - hasDocuments: count: 0 - - it: should set automountServiceAccountToken to false + - it: should not disable automountServiceAccountToken (operator needs in-cluster token) set: serviceAccount.create: true asserts: - - equal: + - notExists: path: automountServiceAccountToken - value: false - it: should default name to release-name-chart-name release: diff --git a/charts/kilo-clustermesh-operator/values.yaml b/charts/kilo-clustermesh-operator/values.yaml index c3985ff..4505e5f 100644 --- a/charts/kilo-clustermesh-operator/values.yaml +++ b/charts/kilo-clustermesh-operator/values.yaml @@ -1,5 +1,5 @@ image: - repository: ghcr.io/squat/kilo-clustermesh-operator + repository: ghcr.io/cozystack/kilo-clustermesh-operator # tag defaults to appVersion when empty tag: "" pullPolicy: IfNotPresent @@ -9,6 +9,7 @@ replicaCount: 1 leaderElect: true metricsBindAddress: ":8080" +metricsSecure: false healthProbeBindAddress: ":8081" resources: diff --git a/cmd/main.go b/cmd/main.go index 192e2fc..6a48dde 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -17,30 +17,56 @@ limitations under the License. package main import ( + "context" "crypto/tls" "flag" + "log/slog" "os" // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) // to ensure that exec-entrypoint and run can make use of them. _ "k8s.io/client-go/plugin/pkg/client/auth" + "github.com/cockroachdb/errors" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/cache" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/metrics/filters" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/webhook" kilov1alpha1 "github.com/squat/kilo-clustermesh-operator/api/v1alpha1" "github.com/squat/kilo-clustermesh-operator/internal/controller" + "github.com/squat/kilo-clustermesh-operator/internal/crd" + "github.com/squat/kilo-clustermesh-operator/internal/multicluster" + "github.com/squat/kilo-clustermesh-operator/internal/restart" kilopeerv1alpha1 "github.com/squat/kilo-clustermesh-operator/pkg/kilo/v1alpha1" // +kubebuilder:scaffold:imports ) +const ( + podNamespaceEnv = "POD_NAMESPACE" + leaderElectionID = "f27237f1.squat.ai" + controllerEventName = "clustermesh-controller" +) + +// version and revision are set at build time via -X linker flags: +// +// -X main.version=${VERSION} -X main.revision=${REVISION} +// +// They default to the zero string when not provided (e.g. in local dev builds). +var ( + version string + revision string +) + var ( scheme = runtime.NewScheme() setupLog = ctrl.Log.WithName("setup") @@ -55,163 +81,282 @@ func init() { } func main() { - var metricsAddr string + if err := run(); err != nil { + setupLog.Error(err, "operator exited with error") + os.Exit(1) + } +} + +func run() error { + opts := parseFlags() + + ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts.zapOpts))) + slogger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo})) - var metricsCertPath, metricsCertName, metricsCertKey string + namespace, err := readNamespace() + if err != nil { + return err + } - var webhookCertPath, webhookCertName, webhookCertKey string + cfg := ctrl.GetConfigOrDie() - var enableLeaderElection bool + ctx, cancel := context.WithCancel(ctrl.SetupSignalHandler()) + defer cancel() - var probeAddr string + if err := crd.InstallOrUpdate(ctx, cfg); err != nil { + return errors.Wrap(err, "installing CRD") + } - var secureMetrics bool + registry, err := buildInitialRegistry(ctx, cfg, namespace) + if err != nil { + return errors.Wrap(err, "building cluster registry") + } - var enableHTTP2 bool + mgr, err := newManager(cfg, &opts, namespace) + if err != nil { + return err + } - var tlsOpts []func(*tls.Config) + for name, c := range registry.All() { + if err := mgr.Add(c); err != nil { + return errors.Wrapf(err, "registering cluster %q with manager", name) + } + } - flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+ - "Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.") - flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") - flag.BoolVar(&enableLeaderElection, "leader-elect", false, - "Enable leader election for controller manager. "+ - "Enabling this will ensure there is only one active controller manager.") - flag.BoolVar(&secureMetrics, "metrics-secure", true, - "If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.") - flag.StringVar(&webhookCertPath, "webhook-cert-path", "", "The directory that contains the webhook certificate.") - flag.StringVar(&webhookCertName, "webhook-cert-name", "tls.crt", "The name of the webhook certificate file.") - flag.StringVar(&webhookCertKey, "webhook-cert-key", "tls.key", "The name of the webhook key file.") - flag.StringVar(&metricsCertPath, "metrics-cert-path", "", - "The directory that contains the metrics server certificate.") - flag.StringVar(&metricsCertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.") - flag.StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.") - flag.BoolVar(&enableHTTP2, "enable-http2", false, - "If set, HTTP/2 will be enabled for the metrics and webhook servers") + if err := wireReconciler(mgr, registry, slogger); err != nil { + return err + } + + if err := wireChangeWatcher(ctx, mgr, namespace, slogger, cancel); err != nil { + return err + } + + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + return errors.Wrap(err, "setting up health check") + } + + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + return errors.Wrap(err, "setting up ready check") + } + + setupLog.Info("Starting manager", + "namespace", namespace, + "clusters", registry.Clusters(), + "version", version, + "revision", revision, + ) - opts := zap.Options{ - Development: true, + if err := mgr.Start(ctx); err != nil { + return errors.Wrap(err, "manager exited with error") } - opts.BindFlags(flag.CommandLine) + + return nil +} + +type runtimeOpts struct { + metricsAddr string + probeAddr string + metricsCertPath string + metricsCertName string + metricsCertKey string + webhookCertPath string + webhookCertName string + webhookCertKey string + enableLeaderElection bool + secureMetrics bool + enableHTTP2 bool + zapOpts zap.Options +} + +func parseFlags() runtimeOpts { + opts := runtimeOpts{zapOpts: zap.Options{Development: true}} + + flag.StringVar(&opts.metricsAddr, "metrics-bind-address", "0", + "The address the metrics endpoint binds to. Use :8443 for HTTPS or :8080 for HTTP, or 0 to disable.") + flag.StringVar(&opts.probeAddr, "health-probe-bind-address", ":8081", + "The address the probe endpoint binds to.") + flag.BoolVar(&opts.enableLeaderElection, "leader-elect", false, + "Enable leader election for controller manager.") + flag.BoolVar(&opts.secureMetrics, "metrics-secure", true, + "If set, the metrics endpoint is served securely via HTTPS.") + flag.StringVar(&opts.webhookCertPath, "webhook-cert-path", "", + "The directory that contains the webhook certificate.") + flag.StringVar(&opts.webhookCertName, "webhook-cert-name", "tls.crt", + "The name of the webhook certificate file.") + flag.StringVar(&opts.webhookCertKey, "webhook-cert-key", "tls.key", + "The name of the webhook key file.") + flag.StringVar(&opts.metricsCertPath, "metrics-cert-path", "", + "The directory that contains the metrics server certificate.") + flag.StringVar(&opts.metricsCertName, "metrics-cert-name", "tls.crt", + "The name of the metrics server certificate file.") + flag.StringVar(&opts.metricsCertKey, "metrics-cert-key", "tls.key", + "The name of the metrics server key file.") + flag.BoolVar(&opts.enableHTTP2, "enable-http2", false, + "If set, HTTP/2 will be enabled for the metrics and webhook servers.") + opts.zapOpts.BindFlags(flag.CommandLine) flag.Parse() - ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + return opts +} + +// readNamespace reads the operator's own namespace, set via the POD_NAMESPACE +// env var (downward API) at deploy time. +func readNamespace() (string, error) { + ns := os.Getenv(podNamespaceEnv) + if ns == "" { + return "", errors.Newf("%s environment variable is required", podNamespaceEnv) + } + + return ns, nil +} + +// buildInitialRegistry lists all ClusterMesh resources in the operator's +// namespace and constructs a registry that holds clients for every declared +// cluster. If no ClusterMesh resources exist yet, an empty registry is +// returned and the change-watcher will trigger a restart once one is created. +func buildInitialRegistry(ctx context.Context, cfg *rest.Config, namespace string) (*multicluster.ClusterRegistry, error) { + preClient, err := client.New(cfg, client.Options{Scheme: scheme}) + if err != nil { + return nil, errors.Wrap(err, "building pre-manager client") + } + + var meshes kilov1alpha1.ClusterMeshList + if err := preClient.List(ctx, &meshes, client.InNamespace(namespace)); err != nil { + return nil, errors.Wrap(err, "listing ClusterMesh resources") + } + + merged := mergeClusterSpecs(meshes.Items) + + registry, err := multicluster.Build(ctx, merged, cfg, namespace, preClient, scheme) + + return registry, errors.Wrap(err, "constructing registry") +} + +// mergeClusterSpecs collapses every cluster entry across every ClusterMesh +// into a single spec, deduplicating by cluster name (first occurrence wins). +func mergeClusterSpecs(meshes []kilov1alpha1.ClusterMesh) kilov1alpha1.ClusterMeshSpec { + seen := make(map[string]struct{}) + + var merged kilov1alpha1.ClusterMeshSpec + + for i := range meshes { + for j := range meshes[i].Spec.Clusters { + entry := meshes[i].Spec.Clusters[j] + if _, dup := seen[entry.Name]; dup { + continue + } + + seen[entry.Name] = struct{}{} + merged.Clusters = append(merged.Clusters, entry) + } + } + + return merged +} + +func newManager(cfg *rest.Config, opts *runtimeOpts, namespace string) (manager.Manager, error) { + var tlsOpts []func(*tls.Config) - // if the enable-http2 flag is false (the default), http/2 should be disabled - // due to its vulnerabilities. More specifically, disabling http/2 will - // prevent from being vulnerable to the HTTP/2 Stream Cancellation and - // Rapid Reset CVEs. For more information see: - // - https://github.com/advisories/GHSA-qppj-fm5r-hxr3 - // - https://github.com/advisories/GHSA-4374-p667-p6c8 disableHTTP2 := func(c *tls.Config) { setupLog.Info("Disabling HTTP/2") c.NextProtos = []string{"http/1.1"} } - if !enableHTTP2 { + if !opts.enableHTTP2 { tlsOpts = append(tlsOpts, disableHTTP2) } - // Initial webhook TLS options - webhookTLSOpts := tlsOpts - webhookServerOptions := webhook.Options{ - TLSOpts: webhookTLSOpts, - } + webhookServerOptions := webhook.Options{TLSOpts: tlsOpts} - if webhookCertPath != "" { - setupLog.Info("Initializing webhook certificate watcher using provided certificates", - "webhook-cert-path", webhookCertPath, "webhook-cert-name", webhookCertName, "webhook-cert-key", webhookCertKey) - - webhookServerOptions.CertDir = webhookCertPath - webhookServerOptions.CertName = webhookCertName - webhookServerOptions.KeyName = webhookCertKey + if opts.webhookCertPath != "" { + webhookServerOptions.CertDir = opts.webhookCertPath + webhookServerOptions.CertName = opts.webhookCertName + webhookServerOptions.KeyName = opts.webhookCertKey } - webhookServer := webhook.NewServer(webhookServerOptions) - - // Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server. - // More info: - // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.23.3/pkg/metrics/server - // - https://book.kubebuilder.io/reference/metrics.html metricsServerOptions := metricsserver.Options{ - BindAddress: metricsAddr, - SecureServing: secureMetrics, + BindAddress: opts.metricsAddr, + SecureServing: opts.secureMetrics, TLSOpts: tlsOpts, } - if secureMetrics { - // FilterProvider is used to protect the metrics endpoint with authn/authz. - // These configurations ensure that only authorized users and service accounts - // can access the metrics endpoint. The RBAC are configured in 'config/rbac/kustomization.yaml'. More info: - // https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.23.3/pkg/metrics/filters#WithAuthenticationAndAuthorization + if opts.secureMetrics { metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization } - // If the certificate is not specified, controller-runtime will automatically - // generate self-signed certificates for the metrics server. While convenient for development and testing, - // this setup is not recommended for production. - // - // To enable certManager, uncomment the following lines: - // - [METRICS-WITH-CERTS] at config/default/kustomization.yaml to generate and use certificates - // managed by cert-manager for the metrics server. - // - [PROMETHEUS-WITH-CERTS] at config/prometheus/kustomization.yaml for TLS certification. - if metricsCertPath != "" { - setupLog.Info("Initializing metrics certificate watcher using provided certificates", - "metrics-cert-path", metricsCertPath, "metrics-cert-name", metricsCertName, "metrics-cert-key", metricsCertKey) - - metricsServerOptions.CertDir = metricsCertPath - metricsServerOptions.CertName = metricsCertName - metricsServerOptions.KeyName = metricsCertKey + if opts.metricsCertPath != "" { + metricsServerOptions.CertDir = opts.metricsCertPath + metricsServerOptions.CertName = opts.metricsCertName + metricsServerOptions.KeyName = opts.metricsCertKey } - mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + mgr, err := ctrl.NewManager(cfg, ctrl.Options{ Scheme: scheme, Metrics: metricsServerOptions, - WebhookServer: webhookServer, - HealthProbeBindAddress: probeAddr, - LeaderElection: enableLeaderElection, - LeaderElectionID: "f27237f1.squat.ai", - // LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily - // when the Manager ends. This requires the binary to immediately end when the - // Manager is stopped, otherwise, this setting is unsafe. Setting this significantly - // speeds up voluntary leader transitions as the new leader don't have to wait - // LeaseDuration time first. - // - // In the default scaffold provided, the program ends immediately after - // the manager stops, so would be fine to enable this option. However, - // if you are doing or is intended to do any operation such as perform cleanups - // after the manager stops then its usage might be unsafe. - // LeaderElectionReleaseOnCancel: true, + WebhookServer: webhook.NewServer(webhookServerOptions), + HealthProbeBindAddress: opts.probeAddr, + LeaderElection: opts.enableLeaderElection, + LeaderElectionID: leaderElectionID, + // The manager's cache only watches namespaced types we own + // (ClusterMesh + Secret); restrict it to the operator's own + // namespace so we don't need cluster-wide list/watch RBAC. + // Cluster-scoped resources (Peers, Nodes, CRDs, Leases) are + // accessed via the multicluster registry or direct API calls, + // not the manager cache. + Cache: cache.Options{ + DefaultNamespaces: map[string]cache.Config{ + namespace: {}, + }, + }, }) + + return mgr, errors.Wrap(err, "creating manager") +} + +func wireReconciler(mgr manager.Manager, registry *multicluster.ClusterRegistry, slogger *slog.Logger) error { + r := &controller.ClusterMeshReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Registry: registry, + Log: slogger, + Recorder: mgr.GetEventRecorder(controllerEventName), + } + + return errors.Wrap(r.SetupWithManager(mgr), "registering ClusterMesh reconciler") +} + +func wireChangeWatcher( + ctx context.Context, + mgr manager.Manager, + namespace string, + slogger *slog.Logger, + cancel context.CancelFunc, +) error { + preClient, err := client.New(mgr.GetConfig(), client.Options{Scheme: scheme}) if err != nil { - setupLog.Error(err, "Failed to start manager") - os.Exit(1) + return errors.Wrap(err, "building pre-manager client for fingerprint") } - if err := (&controller.ClusterMeshReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "Failed to create controller", "controller", "clustermesh") - os.Exit(1) + watcher := &restart.ChangeWatcher{ + Client: mgr.GetClient(), + Namespace: namespace, + Log: slogger, + Cancel: cancel, } - // +kubebuilder:scaffold:builder - if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { - setupLog.Error(err, "Failed to set up health check") - os.Exit(1) + bootstrap := &restart.ChangeWatcher{ + Client: preClient, + Namespace: namespace, + Log: slogger, } - if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { - setupLog.Error(err, "Failed to set up ready check") - os.Exit(1) + fingerprint, err := bootstrap.ComputeFingerprint(ctx) + if err != nil { + return errors.Wrap(err, "computing start fingerprint") } - setupLog.Info("Starting manager") + watcher.StartFingerprint = fingerprint - if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { - setupLog.Error(err, "Failed to run manager") - os.Exit(1) - } + return errors.Wrap(watcher.SetupWithManager(mgr), "registering change-watcher") } diff --git a/cmd/main_test.go b/cmd/main_test.go new file mode 100644 index 0000000..a3c45c8 --- /dev/null +++ b/cmd/main_test.go @@ -0,0 +1,142 @@ +/* +Copyright 2026 The Kilo Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "testing" + + "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + kilov1alpha1 "github.com/squat/kilo-clustermesh-operator/api/v1alpha1" +) + +func mesh(name string, clusters ...kilov1alpha1.ClusterEntry) kilov1alpha1.ClusterMesh { + return kilov1alpha1.ClusterMesh{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: kilov1alpha1.ClusterMeshSpec{Clusters: clusters}, + } +} + +func entry(name string, podCIDR string) kilov1alpha1.ClusterEntry { + return kilov1alpha1.ClusterEntry{ + Name: name, + PodCIDRs: []string{podCIDR}, + WireguardCIDR: "10.4.0.0/16", + } +} + +func TestMergeClusterSpecs(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + meshes []kilov1alpha1.ClusterMesh + want []kilov1alpha1.ClusterEntry + }{ + { + name: "single mesh with multiple unique clusters preserves all and order", + meshes: []kilov1alpha1.ClusterMesh{ + mesh("mesh1", + entry("alpha", "10.0.0.0/16"), + entry("beta", "10.1.0.0/16"), + entry("gamma", "10.2.0.0/16"), + ), + }, + want: []kilov1alpha1.ClusterEntry{ + entry("alpha", "10.0.0.0/16"), + entry("beta", "10.1.0.0/16"), + entry("gamma", "10.2.0.0/16"), + }, + }, + { + name: "two meshes with no duplicate names include all clusters in input order", + meshes: []kilov1alpha1.ClusterMesh{ + mesh("mesh1", + entry("alpha", "10.0.0.0/16"), + entry("beta", "10.1.0.0/16"), + ), + mesh("mesh2", + entry("gamma", "10.2.0.0/16"), + entry("delta", "10.3.0.0/16"), + ), + }, + want: []kilov1alpha1.ClusterEntry{ + entry("alpha", "10.0.0.0/16"), + entry("beta", "10.1.0.0/16"), + entry("gamma", "10.2.0.0/16"), + entry("delta", "10.3.0.0/16"), + }, + }, + { + name: "two meshes sharing a cluster name - first occurrence wins", + // Both mesh1 and mesh2 declare "shared" but with different podCIDRs. + // The entry from mesh1 (10.0.0.0/16) must survive; mesh2's version (10.9.0.0/16) must be dropped. + meshes: []kilov1alpha1.ClusterMesh{ + mesh("mesh1", + entry("shared", "10.0.0.0/16"), + entry("unique1", "10.1.0.0/16"), + ), + mesh("mesh2", + entry("shared", "10.9.0.0/16"), // duplicate - must be dropped + entry("unique2", "10.2.0.0/16"), + ), + }, + want: []kilov1alpha1.ClusterEntry{ + entry("shared", "10.0.0.0/16"), // first occurrence (from mesh1) wins + entry("unique1", "10.1.0.0/16"), + entry("unique2", "10.2.0.0/16"), + }, + }, + { + name: "single mesh with internal duplicates keeps only first", + meshes: []kilov1alpha1.ClusterMesh{ + mesh("mesh1", + entry("dup", "10.0.0.0/16"), + entry("dup", "10.9.0.0/16"), // duplicate within same mesh - must be dropped + entry("unique", "10.1.0.0/16"), + ), + }, + want: []kilov1alpha1.ClusterEntry{ + entry("dup", "10.0.0.0/16"), + entry("unique", "10.1.0.0/16"), + }, + }, + { + name: "empty input returns empty output", + meshes: []kilov1alpha1.ClusterMesh{}, + want: nil, + }, + { + name: "mesh with empty clusters slice produces empty output", + meshes: []kilov1alpha1.ClusterMesh{ + mesh("mesh1"), + }, + want: nil, + }, + } + + for _, testCase := range tests { + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + + got := mergeClusterSpecs(testCase.meshes) + + assert.Equal(t, testCase.want, got.Clusters) + }) + } +} diff --git a/config/crd/bases/kilo.squat.ai_clustermeshes.yaml b/config/crd/bases/kilo.squat.ai_clustermeshes.yaml index 0271637..64bb653 100644 --- a/config/crd/bases/kilo.squat.ai_clustermeshes.yaml +++ b/config/crd/bases/kilo.squat.ai_clustermeshes.yaml @@ -107,8 +107,21 @@ spec: wireguardCIDR: description: |- WireguardCIDR is the CIDR for Kilo's WireGuard interface (kilo0) addresses. - Each node's kilo.squat.ai/wireguard-ip must be a /32 (or /128) within this CIDR. + Each node's kilo.squat.ai/wireguard-ip must have its host IP within this CIDR. + The annotation may carry any prefix length (e.g. "10.4.0.1/32" upstream Kilo + or "10.4.0.1/16" cozystack-patched Kilo); only the host portion is validated. type: string + wireguardPort: + default: 51820 + description: |- + WireguardPort is the UDP port of Kilo's WireGuard endpoint on each node in + this cluster. Used as a fallback when the operator synthesises the + endpoint from Node.Status.Addresses (i.e. neither + kilo.squat.ai/clustermesh-endpoint nor kilo.squat.ai/force-endpoint is set + on a node). Defaults to 51820. + maximum: 65535 + minimum: 1 + type: integer required: - name - podCIDRs diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..e1891d7 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,216 @@ +# Architecture + +> Component map and reconciliation flow of the Kilo ClusterMesh Operator. + +## Table of Contents + +- [Overview](#overview) +- [Components](#components) +- [Reconciliation Flow](#reconciliation-flow) +- [Kilo Background](#kilo-background) +- [Anchor Peer](#anchor-peer) +- [Manager Cache Scoping](#manager-cache-scoping) +- [CRD Bootstrap](#crd-bootstrap) +- [Restart Watcher](#restart-watcher) + +--- + +## Overview + +The Kilo ClusterMesh Operator watches `ClusterMesh` custom resources in its own namespace and continuously reconciles WireGuard mesh connectivity across a fleet of Kubernetes clusters. For every node in every participating cluster it creates or updates a cluster-scoped `kilo.squat.ai/v1alpha1 Peer` object on each **remote** cluster, telling Kilo exactly which WireGuard public key, endpoint, and allowed IP ranges belong to that node. When a `ClusterMesh` resource is deleted the operator cleans up all managed `Peer` objects via a finalizer before releasing the resource. + +--- + +## Components + +### `controller` — reconciler entry point + +`internal/controller/clustermesh_controller.go` + +Houses `ClusterMeshReconciler`, the single controller registered with the controller-runtime manager. It reacts to `ClusterMesh` create and update events (delete events are filtered out and handled separately via a finalizer), drives the full reconciliation pipeline, and writes status conditions back to the `ClusterMesh` resource. + +### `multicluster` — client cache and cluster registry + +`internal/multicluster/registry.go`, `internal/multicluster/client.go` + +`ClusterRegistry` holds one controller-runtime `cluster.Cluster` per participating cluster. The local cluster uses a copy of the in-cluster REST config; remote clusters build their REST configs from kubeconfig `Secret` objects referenced in the `ClusterMesh` spec. `ClusterRegistry.Client(name)` provides a ready-to-use `client.Client` for reconciling Peer objects on any cluster in the mesh. + +### `validation` — node and mesh-level validation + +`internal/validation/node.go`, `internal/validation/mesh.go` + +Two validation layers run before any Peer objects are written: + +- **`ValidateNode`** checks that a node has the required Kilo annotations (`kilo.squat.ai/wireguard-ip`, `kilo.squat.ai/key`) and that its first pod CIDR falls within the cluster's declared `podCIDRs`. Nodes that fail return a `NodeSkipReason` — see [./troubleshooting.md](./troubleshooting.md) for the full list. +- **`ValidateMeshNetworks`** performs pairwise CIDR overlap checks across **all** `ClusterMesh` objects in the operator namespace, blocking reconciliation if any two clusters share address space. + +### `peer/builder` — synthesises Kilo Peer CRs + +`internal/peer/builder.go` + +`BuildPeer` converts a validated node into a `kilo.squat.ai/v1alpha1 Peer` spec: it resolves the node endpoint (see [Endpoint resolution chain](#reconciliation-flow)), normalises the `kilo.squat.ai/wireguard-ip` annotation to a `/32` (or `/128`) host route for `AllowedIPs`, and appends the node's first pod CIDR. + +`BuildAnchorPeer` creates one additional Peer per source cluster that carries the cluster-wide `serviceCIDR` and `additionalCIDRs` — see [Anchor Peer](#anchor-peer). + +### `peer/reconciler` — applies and maintains Peer CRs in remote clusters + +`internal/peer/reconciler.go` + +`ReconcilePeers` takes a desired list of `Peer` objects and the `client.Client` for a target cluster, then performs a three-way reconcile: create missing Peers, update changed ones, and delete any Peers whose labels (`kilo-clustermesh.io/mesh`, `kilo-clustermesh.io/source-cluster`) match the source cluster but are no longer in the desired list. Passing `nil` as the desired list deletes all managed Peers — this is how the finalizer cleans up on `ClusterMesh` deletion. + +### `kilonode` — annotation constants and endpoint resolution + +`internal/kilonode/annotations.go`, `internal/kilonode/endpoint.go` + +Defines the annotation keys used to read Kilo metadata from nodes: + +- `kilo.squat.ai/wireguard-ip` — WireGuard overlay IP assigned by Kilo +- `kilo.squat.ai/key` — WireGuard public key managed by Kilo +- `kilo.squat.ai/clustermesh-endpoint` — operator-specific cross-cluster endpoint override +- `kilo.squat.ai/force-endpoint` — Kilo's own endpoint override (also consumed by the operator) + +`ResolveEndpoint` implements the three-tier endpoint resolution chain described in [./per-node-setup.md](./per-node-setup.md). + +### `crd` — embedded CRD bootstrap at startup + +`internal/crd/install.go`, `internal/crd/embed.go` + +`InstallOrUpdate` reads the `ClusterMesh` CRD YAML embedded in the binary via `//go:embed`, applies it to the local cluster as a create-or-update, and polls until the CRD reaches `Established=True` (timeout: 30 seconds). The Helm chart does **not** ship CRDs in a `crds/` directory — see [CRD Bootstrap](#crd-bootstrap). + +### `restart` — restart-on-config-change watcher + +`internal/restart/watcher.go` + +`ChangeWatcher` monitors `ClusterMesh` objects and their referenced kubeconfig `Secret` objects. When the cluster configuration fingerprint changes it cancels the manager context, causing the pod to exit and Kubernetes to restart it with a freshly built `ClusterRegistry` — see [Restart Watcher](#restart-watcher). + +### `netutil` — CIDR helpers + +`internal/netutil/cidr.go` + +Utility functions for CIDR parsing and manipulation. The key function is `ParseHostInCIDR`, which accepts both `/32`-style host annotations (upstream Kilo) and `/` annotations (cozystack-patched Kilo) and extracts the host IP. This is what makes the operator compatible with both Kilo variants. + +--- + +## Reconciliation Flow + +The following describes what happens end-to-end when a `ClusterMesh` resource is created or updated. + +```text +ClusterMesh create/update + │ + ▼ +ClusterMeshReconciler.Reconcile() + │ + ├─1─ validateMeshNetworks + │ List ALL ClusterMesh objects in namespace + │ Check pairwise CIDR overlaps → error blocks all affected meshes + │ + ├─2─ reconcileAllClusters [for each source cluster] + │ a. Get srcClient from ClusterRegistry + │ b. List Nodes on source cluster + │ c. For each node: + │ ValidateNode (annotations, podCIDR match) + │ FindDuplicateWGIPs (dedup, first wins) + │ ResolveEndpoint: + │ 1. clustermesh-endpoint annotation + │ 2. force-endpoint annotation + │ 3. Node.Status.Addresses ExternalIP + wireguardPort + │ BuildPeer → Peer{PublicKey, AllowedIPs[wg-ip/32, podCIDR], Endpoint} + │ d. BuildAnchorPeer (nodes[0]) → Peer{AllowedIPs[serviceCIDR, additionalCIDRs]} + │ e. For each target cluster (≠ source): + │ ReconcilePeers(targetClient, desired) + │ create missing / update changed / delete orphans + │ + └─3─ updateStatus + Set Ready=True + per-cluster peer counts +``` + +**Peer naming** follows the pattern `----`, sanitised to DNS-1123 label rules. Names exceeding 253 characters are truncated and suffixed with a SHA-256 hash to remain unique. Peers carry labels `kilo-clustermesh.io/mesh` and `kilo-clustermesh.io/source-cluster`; `ReconcilePeers` uses these labels to identify orphans for deletion. + +**No Node watch.** The reconciler only watches `ClusterMesh` resources. Changes to node annotations (`kilo.squat.ai/wireguard-ip`, `kilo.squat.ai/key`, `kilo.squat.ai/clustermesh-endpoint`, `kilo.squat.ai/force-endpoint`) are **not** detected automatically. After any node annotation change, write a no-op to the `ClusterMesh` resource to trigger a new reconcile cycle. + +> **Note:** Endpoint resolution is strict at each tier: a present but unparseable annotation value is a hard error that skips the node. The resolution does **not** fall through to the next source. Full details and skip reasons are in [./troubleshooting.md](./troubleshooting.md); annotation setup is in [./per-node-setup.md](./per-node-setup.md). + +--- + +## Kilo Background + +[Kilo](https://github.com/squat/kilo) is a multi-cluster network fabric that uses WireGuard to build overlay tunnels between Kubernetes nodes. It assigns each node a WireGuard IP via the `kilo.squat.ai/wireguard-ip` annotation and a WireGuard public key via `kilo.squat.ai/key`. Cross-cluster connectivity is expressed as `kilo.squat.ai/v1alpha1 Peer` objects — cluster-scoped resources describing a remote WireGuard peer (public key, endpoint, allowed CIDRs). + +**Fork awareness.** This operator is designed for the [cozystack-patched Kilo fork](https://github.com/aenix-io/kilo), which uses `cross` granularity: every node receives its own WireGuard IP (not only the location leader). The upstream `kilo.squat.ai/wireguard-ip` annotation carries a `/32` host address; the cozystack fork writes `/` (e.g. `100.66.0.3/16`). The operator handles both forms transparently via `netutil.ParseHostInCIDR`, always normalising `AllowedIPs` to a `/32` host route. See [../README.md](../README.md) for a full compatibility note. + +--- + +## Anchor Peer + +Cross-cluster traffic needs to reach not only pod-to-pod destinations but also cluster Services (`serviceCIDR`) and any other subnets listed in `additionalCIDRs`. Regular per-node Peers only advertise the node's WireGuard IP and its pod CIDR; they carry no information about cluster-wide CIDRs. + +The **anchor peer** fills this gap. `BuildAnchorPeer` creates a single extra `Peer` per source cluster with: + +- `AllowedIPs`: `serviceCIDR` + all entries in `additionalCIDRs` +- `PublicKey` / `Endpoint`: taken from the first valid node in the node list (`nodes[0]`) +- Name: `----anchor` + +The anchor node is `nodes[0]` — the first node that passes validation. It is used only as a WireGuard public-key and endpoint carrier; traffic routed via the anchor peer's allowed CIDRs is handled by the cluster's internal routing once it enters through that node. + +**Nil-return cases.** `BuildAnchorPeer` returns `nil` (and the anchor peer is silently omitted for that reconcile cycle) when `resolvePeerEndpoint` returns ANY error for the anchor node — this covers both situations below, and a malformed endpoint annotation on `nodes[0]` will also silently suppress the anchor: + +1. The source cluster's `ClusterEntry` has no `serviceCIDR` and no `additionalCIDRs`. +2. The anchor node (`nodes[0]`) has no resolvable endpoint. + +> **Warning:** If the anchor peer is omitted because `nodes[0]` has no endpoint, `serviceCIDR` and `additionalCIDRs` will be unreachable from other clusters for the duration of that reconcile cycle. No error event is emitted. Check `nodes[0]`'s annotations and ensure `kilo.squat.ai/clustermesh-endpoint` or `kilo.squat.ai/force-endpoint` is set if the node has no `ExternalIP`. See [./per-node-setup.md](./per-node-setup.md) for annotation setup. + +For full `ClusterEntry` field reference see [./configuration.md](./configuration.md). + +--- + +## Manager Cache Scoping + +The controller-runtime `Manager` is configured with namespace-scoped caches. The operator reads `POD_NAMESPACE` from the downward API and restricts the informer cache for `ClusterMesh` objects and kubeconfig `Secret` objects to **that single namespace only**. + +```go +// cmd/main.go — cache.Options.DefaultNamespaces +cache.Options{ + DefaultNamespaces: map[string]cache.Config{ + namespace: {}, + }, +} +``` + +Cluster-scoped resources — `Node` objects, `Peer` objects, `CustomResourceDefinition` objects, and leader-election `Lease` objects — are **not** handled through the manager cache. They are accessed directly via the `ClusterRegistry`'s per-cluster clients or via raw REST calls. + +This design means the operator is completely isolated to its own namespace for namespace-scoped resources and cannot accidentally observe or modify objects in other namespaces. + +> **Note:** Multiple `ClusterMesh` objects in the same namespace are supported, but cluster entries are deduplicated by name at startup: if two `ClusterMesh` objects reference a cluster with the same name, the first entry encountered wins and the second is silently dropped. See [./troubleshooting.md](./troubleshooting.md) if a cluster appears to use unexpected connection details. + +For deployment specifics (namespace, RBAC, `POD_NAMESPACE` setup) see [./installation.md](./installation.md). + +--- + +## CRD Bootstrap + +The `ClusterMesh` CRD YAML is embedded directly in the operator binary at compile time (`//go:embed`). On every startup, before the manager is initialised, `crd.InstallOrUpdate` applies this CRD to the local cluster and polls until the API server reports `Established=True` (maximum wait: 30 seconds). + +Consequences of this design: + +- **No chart-side CRDs.** The Helm chart has no `crds/` directory. Installing or upgrading the chart without running the operator will not create or update the CRD. +- **Automatic CRD upgrades.** Upgrading the operator binary (via a new Helm chart release or image tag) automatically upgrades the CRD schema on the next pod start. +- **Startup order matters.** If the CRD cannot reach `Established=True` within 30 seconds the operator exits. This can happen on a slow or overloaded API server. + +See [./installation.md](./installation.md) for Helm-based deployment. + +--- + +## Restart Watcher + +`ChangeWatcher` runs alongside the manager and watches for changes to the cluster configuration that cannot be handled by a normal reconcile loop. Specifically, it detects: + +- A new `ClusterMesh` object being created in the namespace +- A cluster entry being renamed or removed from an existing `ClusterMesh` +- The `ResourceVersion` of a referenced kubeconfig `Secret` changing (i.e. the kubeconfig content itself changed) + +The fingerprint is a SHA-256 hash of the sorted JSON representation of `[{name, secretName, secretResourceVersion}]` for every cluster across all `ClusterMesh` objects. When the live fingerprint diverges from the fingerprint captured at startup, `ChangeWatcher` calls `Cancel()`, which stops the manager context. Kubernetes then restarts the pod, and the new process calls `buildInitialRegistry` to rebuild `ClusterRegistry` from scratch with the updated configuration. + +> **Note:** This is intentional design — a pod restart on cluster configuration change, not a crash. The restart is fast (registry build is synchronous at startup) and ensures the informer caches for all remote clusters are correctly initialised. Peer-level changes (node annotation updates, CIDR changes within an existing cluster) do **not** trigger a restart; they are handled by the normal reconcile loop. + +See [./troubleshooting.md](./troubleshooting.md) if the operator is restarting unexpectedly. diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 0000000..888de78 --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,196 @@ +# Configuration + +> Complete reference for the `ClusterMesh` CRD. + +## Table of Contents + +- [Overview](#overview) +- [ClusterMesh resource](#clustermesh-resource) + - [Group / Version / Kind](#group--version--kind) + - [Spec fields](#spec-fields) + - [Status fields](#status-fields) +- [ClusterEntry fields](#clusterentry-fields) +- [SecretKeyRef fields](#secretkeyref-fields) +- [Status conditions](#status-conditions) +- [CIDR validation rules](#cidr-validation-rules) +- [Examples](#examples) + +--- + +## Overview + +`ClusterMesh` is the only custom resource defined by this operator. It declares a set of clusters to connect into a WireGuard mesh and drives the operator's reconciliation loop. Everything configuration-related lives in `spec.clusters`; the operator writes observed state back into `status`. Node-level annotations are outside this CRD — see [per-node-setup.md](./per-node-setup.md). + +--- + +## ClusterMesh resource + +### Group / Version / Kind + +| Field | Value | +|-------|-------| +| API group | `kilo.squat.ai` | +| Version | `v1alpha1` | +| Kind | `ClusterMesh` | +| Plural / short name | `clustermeshes` / `cm` | +| Scope | `Namespaced` | +| Finalizer | `kilo-clustermesh.io/cleanup` | + +**kubectl** example: + +```bash +kubectl get clustermeshes --namespace kilo-system +kubectl get cm --namespace kilo-system +``` + +### Spec fields + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `spec.clusters` | `[]ClusterEntry` | Yes | — | List of clusters in the mesh. Must contain at least 2 entries (`+kubebuilder:validation:MinItems=2`). | + +### Status fields + +| Field | Type | Description | +|-------|------|-------------| +| `status.conditions` | `[]metav1.Condition` | Standard Kubernetes conditions (see [Status conditions](#status-conditions)). `listType=map`, `listMapKey=type`. | +| `status.clusters` | `[]ClusterStatus` | Per-cluster observed state. | +| `status.clusters[].name` | `string` | Matches `ClusterEntry.name`. | +| `status.clusters[].registeredPeers` | `int` | Number of `Peer` objects built for this cluster's valid nodes (per-node peers + optional anchor peer). Set before peers are applied to target clusters. | +| `status.clusters[].skippedNodes` | `int` | Number of nodes that failed validation and were not peered. See [troubleshooting.md](./troubleshooting.md) for skip reasons. | + +--- + +## ClusterEntry fields + +Each element of `spec.clusters` is a `ClusterEntry`. + +| Field | Type | Required | Default | Validation | Description | +|-------|------|----------|---------|------------|-------------| +| `name` | `string` | Yes | — | `pattern: ^[a-z0-9]([a-z0-9\-]{0,61}[a-z0-9])?$`, `maxLength: 63` | Unique cluster identifier within this mesh. DNS-1123 label format. Used as label value and status key. | +| `local` | `bool` | No | `false` | — | Marks the cluster where the operator runs. Exactly one cluster must be `local: true`. No `kubeconfigSecretRef` is needed for the local cluster. | +| `kubeconfigSecretRef` | `*SecretKeyRef` | No¹ | — | — | Reference to a Secret holding the kubeconfig for this cluster. Required for non-local clusters; ignored for the local cluster. Secret must be in the same namespace as the `ClusterMesh` resource. See [installation.md](./installation.md) for Secret setup. | +| `podCIDRs` | `[]string` | Yes | — | `minItems: 1` | Pod network CIDR(s). `Node.Spec.PodCIDRs[0]` on each node must fall within one of these CIDRs. Multiple entries support dual-stack (IPv4 + IPv6). Only `PodCIDRs[0]` is validated per node; IPv6 pod CIDRs are not placed in `AllowedIPs`. | +| `wireguardCIDR` | `string` | Yes | — | — | CIDR for Kilo's `kilo0` WireGuard interface addresses. Each node's `kilo.squat.ai/wireguard-ip` host IP must fall within this CIDR. The annotation may carry any prefix length (`/32` upstream Kilo or `/` on cozystack-patched Kilo); only the host portion is validated. | +| `wireguardPort` | `uint16` | No | `51820` | `minimum: 1`, `maximum: 65535` | UDP port for this cluster's WireGuard endpoints. Used only as a fallback when the operator synthesises an endpoint from `Node.Status.Addresses` (i.e. neither `kilo.squat.ai/clustermesh-endpoint` nor `kilo.squat.ai/force-endpoint` is set on the node). See [per-node-setup.md](./per-node-setup.md). `+kubebuilder:default=51820` | +| `serviceCIDR` | `string` | No | `""` | — | Kubernetes service network CIDR. When set, included in the anchor `Peer`'s `AllowedIPs` so services in this cluster are reachable from other mesh members. When empty, excluded from `AllCIDRs` and from CIDR overlap checks. See [architecture.md](./architecture.md) for anchor peer details. | +| `additionalCIDRs` | `[]string` | No | `[]` | — | Extra CIDRs to advertise via the anchor `Peer` (host-network ranges, external subnets, etc.). All entries are included in `AllCIDRs` for overlap validation. | + +¹ `kubeconfigSecretRef` is logically required for every non-local cluster entry. The CRD marks it `+optional` at the schema level to permit the local cluster to omit it; the controller treats its absence on a non-local entry as a configuration error. + +--- + +## SecretKeyRef fields + +Embedded inside `kubeconfigSecretRef`. + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `name` | `string` | Yes | Name of the Kubernetes `Secret` object in the operator namespace. | +| `key` | `string` | Yes | Key within `Secret.data` whose value contains the kubeconfig bytes. | + +--- + +## Status conditions + +Conditions use the standard `metav1.Condition` type. Two condition types are written by the controller. + +### Condition types and reason values + +| Type | Status | Reason | Set when | +|------|--------|--------|----------| +| `Ready` | `True` | `Reconciled` | All clusters reconciled successfully in the current pass. | +| `Ready` | `False` | `NetworksOverlap` | A CIDR overlap was detected; reconciliation was blocked. | +| `NetworksOverlap` | `True` | `CIDROverlap` | Overlap detected between CIDRs in this mesh or across meshes in the same namespace. | +| `NetworksOverlap` | `False` | `NoOverlap` | All CIDRs are disjoint; mesh passed CIDR validation. | + +`Ready` and `NetworksOverlap` are always updated together: when overlap is detected, `NetworksOverlap=True/CIDROverlap` and `Ready=False/NetworksOverlap` are set atomically. On success, `NetworksOverlap=False/NoOverlap` is set before peer reconciliation, and `Ready=True/Reconciled` is set after. + +> CIDR overlap in any `ClusterMesh` in the namespace blocks reconciliation of all meshes in that namespace — `ValidateMeshNetworks` does pairwise cross-mesh checking on every reconcile. See [troubleshooting.md](./troubleshooting.md). + +--- + +## CIDR validation rules + +CIDR validation is enforced by `ValidateMeshNetworks` (called on every reconcile) and `ValidateClusterNetworks` (per-mesh subset). The rules are: + +1. **All CIDRs within a single `ClusterMesh` must be pairwise disjoint.** The set of CIDRs checked for each cluster entry is built by `AllCIDRs()` in the order: `podCIDRs` → `wireguardCIDR` → `serviceCIDR` (only if non-empty) → `additionalCIDRs`. + +2. **CIDRs across all `ClusterMesh` objects in the same namespace must also be pairwise disjoint.** A single overlap between any two clusters in any combination of meshes fails the check for all affected meshes. + +3. **`serviceCIDR` is excluded from the check when empty.** An empty `serviceCIDR` is not added to `AllCIDRs`, so it cannot cause an overlap error. + +The overlap check uses `net.IPNet.Contains` both ways (`a.Contains(b.IP) || b.Contains(a.IP)`), so a sub-range of another cluster's CIDR is also an error. + +--- + +## Examples + +### Minimal + +Two clusters, no service CIDRs, default WireGuard port. CIDRs are non-overlapping. + +```yaml +apiVersion: kilo.squat.ai/v1alpha1 +kind: ClusterMesh +metadata: + name: prod-mesh + namespace: kilo-system +spec: + clusters: + - name: cluster-a + local: true + podCIDRs: + - 10.244.0.0/16 + wireguardCIDR: 100.64.0.0/16 + + - name: cluster-b + kubeconfigSecretRef: + name: cluster-b-kubeconfig + key: kubeconfig + podCIDRs: + - 10.245.0.0/16 + wireguardCIDR: 100.65.0.0/16 +``` + +### Full + +All fields populated: dual-stack `podCIDRs`, `serviceCIDR`, `additionalCIDRs`, and a non-default `wireguardPort`. + +```yaml +apiVersion: kilo.squat.ai/v1alpha1 +kind: ClusterMesh +metadata: + name: prod-mesh + namespace: kilo-system +spec: + clusters: + - name: cluster-a + local: true + podCIDRs: + - 10.244.0.0/16 + - fd00:10:244::/48 + wireguardCIDR: 100.64.0.0/16 + wireguardPort: 51820 # default; listed explicitly for clarity + serviceCIDR: 10.96.0.0/12 + additionalCIDRs: + - 192.168.10.0/24 + + - name: cluster-b + kubeconfigSecretRef: + name: cluster-b-kubeconfig + key: kubeconfig + podCIDRs: + - 10.245.0.0/16 + - fd00:10:245::/48 + wireguardCIDR: 100.65.0.0/16 + wireguardPort: 52000 + serviceCIDR: 172.16.0.0/12 + additionalCIDRs: + - 192.168.20.0/24 +``` + +> For kubeconfig Secret setup, see [installation.md](./installation.md). +> For node-level annotations (`wireguard-ip`, `clustermesh-endpoint`, `key`), see [per-node-setup.md](./per-node-setup.md). +> For reconciliation flow and anchor peer behaviour, see [architecture.md](./architecture.md). +> Back to [README.md](../README.md). diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 0000000..9f94d52 --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,536 @@ +# Installation + +> Step-by-step guide to deploying the Kilo ClusterMesh Operator and connecting your first pair of clusters. + +## Table of Contents + +- [Prerequisites](#prerequisites) +- [Operator Deployment](#operator-deployment) +- [CRD Bootstrap](#crd-bootstrap) +- [Remote Cluster Kubeconfigs](#remote-cluster-kubeconfigs) +- [Example: Cozystack Deployment](#example-cozystack-deployment) +- [Verifying Installation](#verifying-installation) +- [Uninstalling](#uninstalling) + +--- + +## Prerequisites + +Before installing the operator, make sure every cluster that will join the mesh meets the following requirements. + +### Kilo on every cluster + +The operator manages [Kilo](https://kilo.squat.ai) `Peer` objects — it does not install or manage Kilo itself. Kilo must already be running on every cluster that will participate in the mesh. Specifically: + +- Each node must have Kilo's agent running so that the node receives the standard Kilo annotations (`kilo.squat.ai/wireguard-ip`, `kilo.squat.ai/key`) that the operator reads to build peers. +- Kilo must be configured with `--mesh-granularity=cross`. This is a Cozystack-patched granularity that assigns a WireGuard IP to **every** node, rather than electing one leader per location label. The operator's validation rejects nodes that lack per-node WireGuard IPs. + +See [Per-Node Setup](./per-node-setup.md) for the exact annotations the operator requires on each node. + +### Kubernetes version + +The operator targets the Kubernetes API surface used by `k8s.io/api v0.35.0` and `sigs.k8s.io/controller-runtime v0.23.3` (from `go.mod`). Kubernetes 1.29 or later is recommended. No features beyond standard CRDs, RBAC, and core resources are required. + +> **Note on token Secrets**: Kubernetes 1.24 and later no longer auto-creates token Secrets for ServiceAccounts. The remote-cluster RBAC step below creates an explicit `kubernetes.io/service-account-token` Secret to obtain a long-lived token. Plan for this if your clusters run 1.24+. + +### Non-overlapping CIDRs + +Every cluster in the mesh must use **distinct, non-overlapping** address ranges for all three CIDR types: + +| CIDR type | Example cluster-a | Example cluster-b | Purpose | +| --- | --- | --- | --- | +| `podCIDR` | `10.244.0.0/16` | `10.245.0.0/16` | Pod IP space; must not overlap across clusters | +| `serviceCIDR` | `10.96.0.0/16` | `10.97.0.0/16` | Service IP space; routed to the anchor peer | +| `wireguardCIDR` | `100.66.0.0/16` | `100.67.0.0/16` | WireGuard overlay; must not overlap across clusters | + +The operator's `ValidateMeshNetworks` check runs before every reconcile. If any CIDR overlaps with another cluster in the same namespace, **reconciliation stops** for all affected meshes and the `ClusterMesh` status is set to `Ready=False` with reason `NetworksOverlap`. Overlapping CIDRs is the most common reason for a stuck installation — verify them before proceeding. + +--- + +## Operator Deployment + +The operator is distributed as a Helm chart located at `charts/kilo-clustermesh-operator/` in this repository. There is no external chart repository; install directly from the source tree or from a local copy. + +### Install the chart + +```shell +$ helm install kilo-clustermesh-operator charts/kilo-clustermesh-operator \ + --namespace kilo-clustermesh \ + --create-namespace +``` + +This single command: + +1. Creates the namespace (because of `--create-namespace`). +2. Creates a ServiceAccount, ClusterRole, ClusterRoleBinding, Role, and RoleBinding for the operator. +3. Deploys the operator Deployment. +4. On first start the operator applies the `ClusterMesh` CRD itself — **no separate CRD apply is needed** (see [CRD Bootstrap](#crd-bootstrap)). + +### Image source + +The container image is published at `ghcr.io/cozystack/kilo-clustermesh-operator`. It is a multi-stage build from `golang:1.26`, producing a static binary that runs in a `gcr.io/distroless/static:nonroot` base image as UID 65532. The image source is at `Containerfile` in the repository root. + +By default the chart uses the `appVersion` from `Chart.yaml` as the image tag. For production deployments it is strongly recommended to pin to a specific commit SHA. + +### Default chart values + +```yaml +image: + repository: ghcr.io/cozystack/kilo-clustermesh-operator + tag: "" # uses Chart appVersion when empty + pullPolicy: IfNotPresent + +replicaCount: 1 + +leaderElect: true # enables leader election; required when replicaCount > 1 + +metricsBindAddress: ":8080" # HTTP metrics (see note below) +metricsSecure: false # chart default is HTTP; binary default is HTTPS +healthProbeBindAddress: ":8081" + +resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + +serviceAccount: + create: true + name: "" + annotations: {} +``` + +> **Metrics scheme difference**: The chart overrides the binary's built-in default for metrics. The binary default is `--metrics-bind-address=0` (disabled) with `--metrics-secure=true` (HTTPS). The chart values enable metrics on `:8080` over plain **HTTP**. If you require HTTPS metrics, set `metricsSecure: true` and configure the necessary TLS certificates. + +### Override values example + +To pin the image to a specific commit SHA and increase memory limits: + +```yaml +# my-values.yaml +image: + repository: ghcr.io/cozystack/kilo-clustermesh-operator + tag: sha-43caba9978f26383593bedec79930c62e7ecead7 + pullPolicy: IfNotPresent + +resources: + limits: + cpu: 500m + memory: 256Mi + requests: + cpu: 10m + memory: 64Mi +``` + +```shell +$ helm install kilo-clustermesh-operator charts/kilo-clustermesh-operator \ + --namespace kilo-clustermesh \ + --create-namespace \ + --values my-values.yaml +``` + +--- + +## CRD Bootstrap + +The `ClusterMesh` CRD (`clustermeshes.kilo.squat.ai`) is **not** bundled in the Helm chart's `crds/` directory. Instead, the operator binary self-applies the CRD at every startup using code in `internal/crd/install.go`. + +### Why this approach + +Embedding the CRD in the operator binary (at `internal/crd/clustermeshes.yaml`) rather than in the chart keeps the CRD schema tightly coupled to the version of the operator that interprets it. There is no risk of the chart being upgraded without the CRD being updated, or vice versa. To upgrade the CRD schema, simply upgrade the operator; the new binary applies the new schema on startup. + +### What happens at startup + +On startup, before `ctrl.NewManager()` is called, the operator: + +1. Reads the embedded `clustermeshes.yaml`. +2. Calls `crd.InstallOrUpdate()` — creates the CRD if absent, patches it if present. +3. Polls for `Established=True` with a 500 ms interval, up to a 30-second timeout. +4. Only after the CRD is established does the manager start. + +If the API server is slow to process the CRD (e.g., high load during cluster startup), the operator may time out after 30 seconds and exit. Kubernetes will restart the pod automatically. + +### Operator RBAC for CRD management + +The chart's ClusterRole grants the operator's ServiceAccount: + +```yaml +apiGroups: [apiextensions.k8s.io] +resources: [customresourcedefinitions] +verbs: [get, create, update] +``` + +This is required for self-installation. **Do not** apply the CRD manually from `internal/crd/clustermeshes.yaml` — the operator will overwrite it on startup anyway, and a pre-existing CRD with a different resource version can cause unnecessary churn. + +--- + +## Remote Cluster Kubeconfigs + +The operator runs on one **central cluster** and connects to one or more **remote clusters** over their Kubernetes APIs. For each remote cluster, the operator needs a kubeconfig Secret in its own namespace. The Secret is referenced from the `ClusterMesh` CR using the `kubeconfigSecretRef` field (see [Configuration](./configuration.md) for the full CR reference). + +### What permissions the remote kubeconfig must grant + +On each remote cluster, create a ServiceAccount with the following ClusterRole: + +```yaml +rules: + - apiGroups: [""] + resources: [nodes] + verbs: [get, list, watch] + - apiGroups: [kilo.squat.ai] + resources: [peers] + verbs: [get, list, watch, create, update, patch, delete] +``` + +The operator needs to **read nodes** to discover node annotations (WireGuard IPs, public keys, endpoints) and **write peers** to push the computed WireGuard peer configuration. It does not need access to ClusterMesh objects, Secrets, CRDs, or any other resources on the remote cluster. + +### Creating the remote RBAC + +Apply a manifest similar to the following on each remote cluster: + +```yaml +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: clustermesh-reader + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kilo-clustermesh-remote +rules: + - apiGroups: [""] + resources: [nodes] + verbs: [get, list, watch] + - apiGroups: [kilo.squat.ai] + resources: [peers] + verbs: [get, list, watch, create, update, patch, delete] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: clustermesh-reader +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kilo-clustermesh-remote +subjects: + - kind: ServiceAccount + name: clustermesh-reader + namespace: kube-system +--- +# Kubernetes 1.24+ does not auto-create token Secrets for ServiceAccounts. +# Create one explicitly to obtain a long-lived token. +apiVersion: v1 +kind: Secret +metadata: + name: clustermesh-reader-token + namespace: kube-system + annotations: + kubernetes.io/service-account.name: clustermesh-reader +type: kubernetes.io/service-account-token +``` + +```shell +kubectl --context remote-cluster apply --filename remote-rbac.yaml +``` + +### Building the kubeconfig Secret + +Once the token Secret is ready on the remote cluster, extract the token and CA certificate, build a kubeconfig, and store it as a Secret in the operator's namespace on the central cluster: + +```shell +# Extract token and CA from the remote cluster +$ TOKEN=$(kubectl --context remote-cluster \ + --namespace kube-system \ + get secret clustermesh-reader-token \ + --output jsonpath='{.data.token}' | base64 --decode) + +$ CA=$(kubectl --context remote-cluster \ + --namespace kube-system \ + get secret clustermesh-reader-token \ + --output jsonpath='{.data.ca\.crt}') + +$ SERVER=$(kubectl --context remote-cluster \ + config view --minify --output jsonpath='{.clusters[0].cluster.server}') + +# Write a minimal kubeconfig to a temp file +$ cat > /tmp/remote-kubeconfig.yaml < **Security note**: The kubeconfig contains a long-lived bearer token. Store it only in a Kubernetes Secret (encrypted at rest if your cluster supports it) and delete the local temp file immediately after creating the Secret. + +--- + +## Example: Cozystack Deployment + +The `deploy/cozystack/` directory contains a concrete reference deployment connecting two clusters. This section walks through the same pattern using generic names (`cluster-a` for the central cluster, `cluster-b` for the remote cluster). + +### CIDR plan + +| | cluster-a (central) | cluster-b (remote) | +| --- | --- | --- | +| podCIDR | `10.244.0.0/16` | `10.245.0.0/16` | +| serviceCIDR | `10.96.0.0/16` | `10.97.0.0/16` | +| wireguardCIDR | `100.66.0.0/16` | `100.67.0.0/16` | +| Kilo granularity | `cross` | `cross` | + +### Step 1 — Install the operator on cluster-a + +```shell +$ helm install kilo-clustermesh-operator charts/kilo-clustermesh-operator \ + --kubeconfig /path/to/cluster-a/kubeconfig \ + --namespace cozy-kilo \ + --create-namespace \ + --values deploy/cozystack/values-cluster-a.yaml +``` + +Example values file for a pinned production image: + +```yaml +image: + repository: ghcr.io/cozystack/kilo-clustermesh-operator + tag: sha-43caba9978f26383593bedec79930c62e7ecead7 + pullPolicy: IfNotPresent + +replicaCount: 1 +leaderElect: true + +resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi +``` + +Verify the operator is running and the CRD exists: + +```shell +$ kubectl --kubeconfig /path/to/cluster-a/kubeconfig \ + --namespace cozy-kilo \ + get deployment,pod + +$ kubectl --kubeconfig /path/to/cluster-a/kubeconfig \ + get crd clustermeshes.kilo.squat.ai +``` + +The CRD is created by the operator on first start — if it does not appear within ~30 seconds, check the operator pod logs. + +### Step 2 — Apply remote RBAC on cluster-b + +```shell +$ kubectl --kubeconfig /path/to/cluster-b/kubeconfig \ + apply --filename remote-rbac.yaml +``` + +Use the template from [Remote Cluster Kubeconfigs](#remote-cluster-kubeconfigs) above. Verify the token Secret was populated (the `kubernetes.io/service-account-token` controller fills in `token` and `ca.crt` asynchronously): + +```shell +$ kubectl --kubeconfig /path/to/cluster-b/kubeconfig \ + --namespace kube-system \ + get secret clustermesh-reader-token +``` + +### Step 3 — Build and store the kubeconfig Secret on cluster-a + +Follow the kubeconfig-building steps from [Remote Cluster Kubeconfigs](#remote-cluster-kubeconfigs), targeting cluster-b as the remote and cluster-a's `cozy-kilo` namespace as the destination: + +```shell +$ kubectl --kubeconfig /path/to/cluster-a/kubeconfig \ + --namespace cozy-kilo \ + create secret generic cluster-b-kubeconfig \ + --from-file=kubeconfig=/tmp/cluster-b-kubeconfig.yaml +``` + +### Step 4 — Apply the ClusterMesh CR on cluster-a + +```yaml +# clustermesh.yaml +apiVersion: kilo.squat.ai/v1alpha1 +kind: ClusterMesh +metadata: + name: my-mesh + namespace: cozy-kilo +spec: + clusters: + - name: cluster-a + local: true + podCIDRs: + - 10.244.0.0/16 + wireguardCIDR: 100.66.0.0/16 + serviceCIDR: 10.96.0.0/16 + - name: cluster-b + kubeconfigSecretRef: + name: cluster-b-kubeconfig + key: kubeconfig + podCIDRs: + - 10.245.0.0/16 + wireguardCIDR: 100.67.0.0/16 + serviceCIDR: 10.97.0.0/16 +``` + +```shell +$ kubectl --kubeconfig /path/to/cluster-a/kubeconfig \ + apply --filename clustermesh.yaml +``` + +The `local: true` flag on `cluster-a` tells the operator that this entry describes the cluster the operator itself is running in — no kubeconfig Secret is needed for it. Every other entry requires `kubeconfigSecretRef`. + +For the full list of fields available in the `ClusterMesh` spec, see [Configuration](./configuration.md). + +--- + +## Verifying Installation + +### 1. Check the operator pod + +```shell +$ kubectl --namespace kilo-clustermesh get pod \ + --selector app.kubernetes.io/name=kilo-clustermesh-operator +``` + +The pod should be in `Running` state. If it is crash-looping, inspect logs — the most common startup failures are: + +- Missing `POD_NAMESPACE` environment variable (set automatically by the chart via downward API; indicates the chart is not being used). +- CRD establishment timeout (API server too slow; the pod will be restarted and retry). + +### 2. Check the ClusterMesh status + +```shell +kubectl --namespace kilo-clustermesh get clustermesh my-mesh --output yaml +``` + +Look for the `status.conditions` section. A healthy ClusterMesh shows: + +```yaml +status: + conditions: + - type: Ready + status: "True" + reason: ClusterMeshReady +``` + +If `Ready=False`, check the `reason` and `message` fields. Common reasons: + +- `NetworksOverlap` — CIDR overlap between clusters. Verify the CIDRs in the spec. +- Any error message related to kubeconfig — check that the referenced Secret exists and contains a valid kubeconfig. + +See [Troubleshooting](./troubleshooting.md) for a full list of failure modes and remediation steps. + +### 3. Check Peer objects appear on both clusters + +On the central cluster, Peer objects for remote-cluster nodes should appear: + +```shell +kubectl --namespace kilo-clustermesh get peers.kilo.squat.ai +``` + +On the remote cluster, Peer objects for central-cluster nodes should appear: + +```shell +kubectl --context remote-cluster get peers.kilo.squat.ai +``` + +If peers are absent on the remote cluster, check operator logs for reconciliation errors. Node-level issues (missing annotations, duplicate WireGuard IPs) cause individual nodes to be skipped rather than failing the entire reconcile — see [Per-Node Setup](./per-node-setup.md) for the required node annotations. + +### 4. Operator logs + +```shell +$ kubectl --namespace kilo-clustermesh \ + logs deployment/kilo-clustermesh-operator \ + --follow +``` + +Successful reconciliation produces log lines indicating peer counts per cluster. Errors are logged with structured fields identifying which cluster and which node caused the problem. + +--- + +## Uninstalling + +> **Warning**: The ClusterMesh finalizer (`kilo-clustermesh.io/cleanup`) requires the operator to be **running** when the ClusterMesh CR is deleted. If you remove the Helm chart before deleting the CR, the finalizer will never be honoured and all Peer objects will be **orphaned** on every cluster. Always delete the CR first. + +### Step 1 — Delete the ClusterMesh CR + +```shell +$ kubectl --namespace kilo-clustermesh \ + delete clustermesh my-mesh +``` + +Wait for the resource to disappear (the finalizer causes deletion to block until the operator has cleaned up Peers on all clusters): + +```shell +$ kubectl --namespace kilo-clustermesh \ + get clustermesh my-mesh --watch +``` + +### Step 2 — Uninstall the Helm chart + +```shell +$ helm uninstall kilo-clustermesh-operator \ + --namespace kilo-clustermesh +``` + +### Step 3 — Remove RBAC on remote clusters + +```shell +$ kubectl --context remote-cluster \ + delete --filename remote-rbac.yaml +``` + +### Step 4 — Clean up remaining resources + +The operator namespace may be shared with Kilo itself. Remove only the resources specific to the operator: + +```shell +# Delete the kubeconfig Secret(s) +$ kubectl --namespace kilo-clustermesh \ + delete secret remote-cluster-kubeconfig + +# Delete the CRD (not removed by chart uninstall) +$ kubectl delete crd clustermeshes.kilo.squat.ai +``` + +--- + +## Next Steps + +- [Configuration](./configuration.md) — full `ClusterMesh` CRD reference (all spec fields, status conditions). +- [Per-Node Setup](./per-node-setup.md) — required node annotations and how the operator resolves WireGuard endpoints. +- [Troubleshooting](./troubleshooting.md) — diagnosing `Ready=False`, missing peers, and CIDR overlap errors. +- [README](../README.md) — project overview and quick-start summary. diff --git a/docs/known-gaps.md b/docs/known-gaps.md new file mode 100644 index 0000000..8996a9c --- /dev/null +++ b/docs/known-gaps.md @@ -0,0 +1,226 @@ +# Known Gaps and Outstanding Work + +> Handoff document for contributors picking up the operator after the initial POC. + +This document tracks divergences from the upstream proposal +([cozystack/community#7](https://github.com/cozystack/community/pull/7)), +operational risks identified during review, and concrete follow-up work. +The operator is functional end-to-end in its current shape but is not +yet a full implementation of the proposal as written. + +## Table of Contents + +- [Operator Status](#operator-status) +- [Gaps Relative To The Proposal](#gaps-relative-to-the-proposal) +- [Operational Risks](#operational-risks) +- [Recommended Follow-Ups](#recommended-follow-ups) +- [Settled Design Decisions](#settled-design-decisions) +- [Proposal Text Corrections](#proposal-text-corrections) +- [References](#references) + +--- + +## Operator Status + +What works today: + +- `ClusterMesh` CRD with typed CIDR fields, status conditions + (`Ready`, `NetworksOverlap`), per-cluster registered/skipped counts +- Mesh- and cluster-level CIDR overlap validation + (`internal/validation/mesh.go`) +- Per-node validation: PodCIDR containment, WireGuard IP containment, + duplicate-IP dedup, public-key presence, endpoint resolvability + (`internal/validation/node.go`) +- Three-tier endpoint resolution chain on each node: + `kilo.squat.ai/clustermesh-endpoint` → `kilo.squat.ai/force-endpoint` + → `Node.Status.Addresses` ExternalIP with `wireguardPort` fallback + (`internal/kilonode/endpoint.go`) +- Per-cluster Helm-managed Peer reconciliation in remote clusters via + kubeconfig Secrets, label-isolated, finalizer-cleaned + (`internal/peer/`, `internal/controller/clustermesh_controller.go`) +- Anchor Peer for cluster-wide CIDRs (`serviceCIDR`, `additionalCIDRs`) + (`internal/peer/builder.go:83-105`) +- Embedded CRD bootstrap at startup (`internal/crd/install.go`) +- Restart-on-config-change via fingerprint watcher + (`internal/restart/watcher.go`) +- Full documentation under `docs/` (architecture, installation, + configuration, per-node-setup, troubleshooting) + +What is incomplete or divergent: see the sections below. + +--- + +## Gaps Relative To The Proposal + +### Node Watches Are Missing (Blocker) + +The proposal contracts live reconciliation: any change to a Node +annotation or status in any listed cluster must trigger a reconcile of +the owning `ClusterMesh`. The operator only watches `ClusterMesh` +resources — Node-level changes are not detected automatically. + +**Workaround in use**: write a no-op annotation to the `ClusterMesh` +resource to force a reconcile cycle. Cozystack provisioning automation +can do this externally, but it breaks the self-healing guarantee the +proposal advertises for standalone use. + +**Source of truth**: `cmd/main.go` builds the controller without a Node +informer; `internal/controller/clustermesh_controller.go:107-119` +configures the watch source as `ClusterMesh` only. + +**Effort to close**: medium. The `ClusterRegistry` +(`internal/multicluster/registry.go`) already holds `cluster.Cluster` +objects with started caches. Wire a Node informer per remote cluster +into the controller's watch set with a +`handler.EnqueueRequestsFromMapFunc` that maps remote-cluster Node +events back to local `ClusterMesh` requests, scoped to clusters that +reference the affected cluster name. Care needed around scoping so a +single Node event does not fan out to every `ClusterMesh` in the +namespace. + +### CRD Schema Diverges From The Proposal Example + +The proposal example uses a single flat `spec.clusters[].allowedNetworks: [...]` +list. The operator uses typed fields: `podCIDRs`, `wireguardCIDR`, +`serviceCIDR`, `additionalCIDRs`. The proposal's Open Question §6 +explicitly raises this as a design choice and defers it to v1alpha2. + +This is an intentional improvement, not a defect. The typed schema +gives stronger validation, better documentation, and clearer +per-cluster semantics. But the proposal text still shows the flat +list — anyone implementing against the proposal as written will +diverge from the operator. Resolution belongs in the proposal, not +in the code (see [Proposal Text Corrections](#proposal-text-corrections)). + +### Secret-Change Handling Is Heavier Than Proposed + +The proposal asks the controller to "re-establish the watch and +reconcile" when a kubeconfig Secret changes. The operator cancels the +manager context and lets the pod restart (`internal/restart/watcher.go`), +which rebuilds the `ClusterRegistry` from fresh Secret content. + +Functionally equivalent; operationally heavier — in-flight reconciles +are dropped, and there is a brief gap in Peer maintenance during the +restart. In a hot rotation scenario, repeated pod restarts could +cause churn. + +**Effort to close**: medium. Replace the restart path with a live +client-rebuild on Secret change in `ClusterRegistry`. Requires care +to invalidate in-flight reconciles and avoid using stale clients. + +--- + +## Operational Risks + +### Anchor Peer Silently Suppressed When `nodes[0]` Has No Endpoint + +`BuildAnchorPeer` (`internal/peer/builder.go:83-105`) returns `nil` when +`resolvePeerEndpoint(anchorNode, ...)` returns any error — including a +malformed endpoint annotation on `nodes[0]`. The consequence: cluster-wide +CIDRs (`serviceCIDR`, `additionalCIDRs`) become unreachable from remote +clusters for the duration of that reconcile cycle, with no Event or +status Condition surfaced. + +The `docs/architecture.md` Warning callout documents this behavior, but +operationally there is no signal: an operator inspecting `ClusterMesh` +status sees `Ready=True` and a reasonable `registeredPeers` count, +yet inter-cluster Service traffic silently fails. + +**Effort to close**: small. In `clustermesh_controller.go` +(around the call site that invokes `BuildAnchorPeer`), distinguish +"anchor not needed" (no `serviceCIDR` and no `additionalCIDRs`) from +"anchor suppressed by endpoint failure on `nodes[0]`". For the latter, +emit a Warning Event on the `ClusterMesh` and optionally set a +`AnchorPeerSuppressed=True` status condition. + +--- + +## Recommended Follow-Ups + +Ranked by ratio of impact to effort. Each item is independently +shippable. + +1. **Anchor-peer suppression Event** — small patch, surfaces a real + silent failure mode. Add an Event emission and consider a status + Condition. ~30 minutes including a test. + +2. **Proposal text corrections** — three text edits in + cozystack/community#7. No code change. See + [Proposal Text Corrections](#proposal-text-corrections). + +3. **Node watches** — closes the only ❌ gap against the proposal. + Medium effort, needs careful informer scoping. Should be tracked + as a discrete RFC if the design touches multi-cluster + controller-runtime patterns. + +4. **Live Secret-change handling** — replace pod-restart with + client-rebuild. Removes operational footgun. Medium effort. + +5. **Anchor-node selection beyond `nodes[0]`** — current logic picks + the first validated node as the anchor; if that node loses its + endpoint the anchor is suppressed (item 1). A more robust choice + would iterate validated nodes until one resolves an endpoint. + Small effort once item 1 ships. + +--- + +## Settled Design Decisions + +Do not re-litigate the following. Each was chosen deliberately after +weighing alternatives. + +- **Lazy endpoint chain validation.** The three-tier chain stops at the + first non-empty source. A malformed lower-priority annotation is + silently ignored when a higher-priority source resolves successfully. + The alternative (validate all present annotations eagerly) was + considered and rejected: chain-of-responsibility semantics support + gradual migration from `force-endpoint` to `clustermesh-endpoint` + without breaking on legacy typos. Strict-invalid behavior on the + WINNING source is still in effect. + +- **Cozystack-patched Kilo with `cross` granularity.** The operator + targets the cozystack fork at `aenix-io/kilo` where every node + receives its own WireGuard IP. Do not propose switching to upstream + Kilo's `full` or `location` granularity for this codebase. + +- **Prefix-agnostic WireGuard IP validation.** The `wireguard-ip` + annotation may carry any prefix length (upstream Kilo writes `/32`; + cozystack-Kilo writes `/`, e.g. `100.66.0.3/16`). + Both are accepted; only the host IP is validated against + `wireguardCIDR`, and `AllowedIPs` is always normalised to `/32` + (or `/128` for IPv6). Do not tighten to `/32`-only — that would + break cozystack-Kilo node validation. + +- **Typed CRD fields over flat `allowedNetworks`.** See + [Gaps Relative To The Proposal](#gaps-relative-to-the-proposal). + Decision recorded; only the proposal text remains to be updated. + +- **CRD auto-bootstrap from embedded copy.** The operator applies the + CRD at startup via `internal/crd/install.go`; the Helm chart does + not bundle CRDs. This is documented in + [`./installation.md`](./installation.md) and intentional. + +--- + +## Proposal Text Corrections + +Three text-only edits needed in cozystack/community#7. No code change +in this repository. + +| Item | Proposal Section | Current Text | Should Be | +|---|---|---|---| +| Public-key annotation name | §Peer construction §5 | `kilo.squat.ai/wireguard-public-key` | `kilo.squat.ai/key` (verify against `internal/kilonode/annotations.go:30`) | +| WG-IP prefix rule | §Reconciliation §3 | "has prefix length `/32` (or `/128` for IPv6)" | Allow any prefix; validate host IP against `wireguardCIDR`. Note that vanilla Kilo writes `/32` and cozystack-Kilo writes `/` | +| CRD example | §CRD: ClusterMesh | `spec.clusters[].allowedNetworks: [...]` | `spec.clusters[].podCIDRs`, `.wireguardCIDR`, `.serviceCIDR`, `.additionalCIDRs` (see [`./configuration.md`](./configuration.md) for the full schema) | + +--- + +## References + +- Proposal PR: [cozystack/community#7](https://github.com/cozystack/community/pull/7) +- Operator documentation: [`./architecture.md`](./architecture.md), + [`./configuration.md`](./configuration.md), + [`./per-node-setup.md`](./per-node-setup.md), + [`./troubleshooting.md`](./troubleshooting.md) +- Upstream Kilo: https://github.com/squat/kilo +- Cozystack-patched Kilo: https://github.com/aenix-io/kilo diff --git a/docs/per-node-setup.md b/docs/per-node-setup.md new file mode 100644 index 0000000..9cffc2c --- /dev/null +++ b/docs/per-node-setup.md @@ -0,0 +1,244 @@ +# Per-Node Setup + +> Annotations the operator reads from each Node, and how endpoints are resolved. + +## Table of Contents + +- [Overview](#overview) +- [Required Annotations](#required-annotations) + - [kilo.squat.ai/wireguard-ip](#kilosquataiwireguard-ip) + - [kilo.squat.ai/key](#kilosquataikey) +- [Endpoint Resolution Chain](#endpoint-resolution-chain) + - [Source 1 — clustermesh-endpoint (highest priority)](#source-1--clustermesh-endpoint-highest-priority) + - [Source 2 — force-endpoint (Kilo legacy)](#source-2--force-endpoint-kilo-legacy) + - [Source 3 — Node ExternalIP fallback](#source-3--node-externalip-fallback) + - [Format and IPv6 bracketing](#format-and-ipv6-bracketing) +- [Strict-Invalid Behavior](#strict-invalid-behavior) +- [Examples](#examples) +- [Migrating From force-endpoint To clustermesh-endpoint](#migrating-from-force-endpoint-to-clustermesh-endpoint) + +--- + +## Overview + +The operator reads a small set of annotations from Node objects in each remote cluster to build WireGuard peers. Kilo writes most of these annotations itself as part of its normal operation, but one — `kilo.squat.ai/clustermesh-endpoint` — is operator-specific and must be set manually when you need to control the cross-cluster endpoint independently of Kilo's own routing decisions. The operator does **not** watch Node objects; after changing any node annotation you must trigger a manual reconcile (see below). + +--- + +## Required Annotations + +The two annotations below must be present and valid on every node for that node to be included in the mesh. Missing or malformed values cause the node to be skipped with a reason surfaced in the ClusterMesh status (see [Troubleshooting](./troubleshooting.md) for the full skip-reason table). + +| Annotation | Constant | Written by | +| --- | --- | --- | +| `kilo.squat.ai/wireguard-ip` | `AnnotationWireguardIP` | Kilo (automatic) | +| `kilo.squat.ai/key` | `AnnotationPublicKey` | Kilo (automatic) | + +### kilo.squat.ai/wireguard-ip + +Carries the WireGuard interface address of the node. The operator validates that the host IP portion of this value falls within the `wireguardCIDR` declared for that cluster in the ClusterMesh spec (see [Configuration](./configuration.md)). + +**Fork-aware parsing.** Two formats are accepted: + +| Kilo fork | Written value | Example | +| --- | --- | --- | +| Upstream Kilo | `/32` | `10.4.0.1/32` | +| cozystack-Kilo | `/` | `100.66.0.3/16` | + +In both cases only the **host IP** is extracted and validated. The prefix length in the annotation does not affect the mesh — `AllowedIPs` in the generated Peer is always `/32` (or `/128` for IPv6), regardless of what prefix was written. This prevents a cozystack-style `/16` annotation from claiming the entire subnet in another cluster's routing table. + +> **Duplicate-IP gotcha.** The duplicate-IP check normalises prefix lengths before comparing. `10.4.0.1/32` and `10.4.0.1/16` resolve to the same host IP and therefore conflict. The first node in API listing order keeps its IP; later duplicates are skipped with `WGIPDuplicate`. See [Troubleshooting](./troubleshooting.md) for the full reason list. + +### kilo.squat.ai/key + +The WireGuard public key for the node. The value is an opaque base64 string written by Kilo. The operator passes it unchanged into the Peer object — no validation beyond non-empty is performed here. + +--- + +## Endpoint Resolution Chain + +The operator calls `ResolveEndpoint(node, fallbackPort)` for each node. Sources are tried in priority order; **the first non-empty source wins**. Evaluation is lazy: once a source provides a value (valid or malformed), no lower-priority source is consulted. + +```text +1. kilo.squat.ai/clustermesh-endpoint ← operator-specific, highest priority +2. kilo.squat.ai/force-endpoint ← Kilo's own annotation, legacy +3. Node.Status.Addresses (ExternalIP) ← last resort, uses wireguardPort +``` + +### Source 1 — clustermesh-endpoint (highest priority) + +`kilo.squat.ai/clustermesh-endpoint` (`AnnotationClustermeshEndpoint`) is set by operators and users. It takes precedence over everything else. Its purpose is to decouple cross-cluster endpoint selection from Kilo's intra-cluster topology decisions: changing this annotation has no effect on how Kilo routes traffic between nodes in the same cluster. + +This is the recommended annotation when you need a stable, manually controlled endpoint for cross-cluster WireGuard peers. + +### Source 2 — force-endpoint (Kilo legacy) + +`kilo.squat.ai/force-endpoint` (`AnnotationForceEndpoint`) is Kilo's built-in annotation for overriding endpoint detection. The operator treats it as a fallback when `clustermesh-endpoint` is absent. + +> **Side-effect warning.** Unlike `clustermesh-endpoint`, Kilo itself also reads `force-endpoint` and uses it for intra-cluster peer endpoint selection. Setting it can affect intra-cluster routing, including interactions with Kilo's `cross` granularity setting. Prefer `clustermesh-endpoint` when you only want to control cross-cluster endpoints. + +### Source 3 — Node ExternalIP fallback + +When neither annotation is set, the operator scans `Node.Status.Addresses` for entries with `Type=ExternalIP`. `InternalIP` and `Hostname` entries are ignored. + +- **IPv4 preferred over IPv6.** The first IPv4 ExternalIP is used immediately. IPv6 is only selected when no IPv4 ExternalIP exists. +- **Port.** The port is taken from `ClusterEntry.wireguardPort` (default: `51820`). See [Configuration](./configuration.md) to set a non-default port. + +### Format and IPv6 bracketing + +All endpoint values — whether from an annotation or synthesised from an ExternalIP — must conform to Go's `net.SplitHostPort` format: + +```text +: +``` + +IPv6 addresses must be enclosed in square brackets: + +```text +[2001:db8::1]:51820 +``` + +Bare IPv6 without brackets (e.g. `2001:db8::1:51820`) will fail parsing and the node will be skipped. When the operator synthesises an endpoint from `Node.Status.Addresses` it calls `net.JoinHostPort`, which adds brackets automatically. When you set `clustermesh-endpoint` or `force-endpoint` manually for an IPv6 host, you must add the brackets yourself. + +Bracketed DNS names are also accepted: + +```text +[node.example.com]:51820 +``` + +The brackets are stripped before the DNS name is placed in the Peer object. + +--- + +## Strict-Invalid Behavior + +A **present-but-malformed** annotation value is a **hard error**. The operator does not fall through to the next source. The node is excluded from the mesh and the ClusterMesh status surfaces `NodeEndpointInvalid`. + +This applies to both `clustermesh-endpoint` and `force-endpoint`. Empty or absent annotations are treated as "not set" and cause the next source to be tried. A non-empty value that cannot be parsed as `host:port` (by `net.SplitHostPort`) is always an error. + +**Lazy-validation gotcha.** Because evaluation stops at the first non-empty source, a malformed lower-priority annotation can go undetected. Concretely: if `clustermesh-endpoint` is present and valid, `force-endpoint` is never inspected — a typo in `force-endpoint` is silently ignored. The typo only surfaces if `clustermesh-endpoint` is later removed. See [Troubleshooting](./troubleshooting.md) for a worked example and the rationale. + +--- + +## Examples + +All examples use a node in a remote cluster. Annotations are shown as they would appear in the Node manifest. The `wireguardPort` in the ClusterMesh spec is `51820` unless noted. + +### Example A — Only clustermesh-endpoint set + +```yaml +metadata: + annotations: + kilo.squat.ai/wireguard-ip: "10.4.0.5/32" + kilo.squat.ai/key: "abc123...base64...==" + kilo.squat.ai/clustermesh-endpoint: "203.0.113.1:51820" +``` + +**Result:** endpoint = `203.0.113.1:51820` (Source 1 wins; Sources 2 and 3 are not consulted). + +--- + +### Example B — Only force-endpoint set (Kilo legacy) + +```yaml +metadata: + annotations: + kilo.squat.ai/wireguard-ip: "10.4.0.6/32" + kilo.squat.ai/key: "def456...base64...==" + kilo.squat.ai/force-endpoint: "198.51.100.1:51820" +``` + +**Result:** endpoint = `198.51.100.1:51820` (Source 1 absent; Source 2 wins). + +> Remember: `force-endpoint` is also read by Kilo for intra-cluster peers. Prefer `clustermesh-endpoint` for cross-cluster-only control. + +--- + +### Example C — No annotation, ExternalIP fallback with custom port + +ClusterMesh spec has `wireguardPort: 51821` for this cluster. + +```yaml +metadata: + annotations: + kilo.squat.ai/wireguard-ip: "10.4.0.7/32" + kilo.squat.ai/key: "ghi789...base64...==" + # no clustermesh-endpoint, no force-endpoint +status: + addresses: + - type: ExternalIP + address: "203.0.113.5" +``` + +**Result:** endpoint = `203.0.113.5:51821` (Sources 1 and 2 absent; Source 3 finds an IPv4 ExternalIP and uses `wireguardPort`). + +--- + +### Example D — clustermesh-endpoint wins over force-endpoint (lazy evaluation) + +```yaml +metadata: + annotations: + kilo.squat.ai/wireguard-ip: "10.4.0.8/32" + kilo.squat.ai/key: "jkl012...base64...==" + kilo.squat.ai/clustermesh-endpoint: "203.0.113.10:51820" + kilo.squat.ai/force-endpoint: "not-valid" # malformed — but never checked +``` + +**Result:** endpoint = `203.0.113.10:51820`. Because Source 1 provides a valid value, Source 2 (`force-endpoint`) is never evaluated. The malformed value is silently ignored **while `clustermesh-endpoint` is present and valid**. If `clustermesh-endpoint` is removed, the malformed `force-endpoint` will then surface as `NodeEndpointInvalid`. + +--- + +## Migrating From force-endpoint To clustermesh-endpoint + +Use `clustermesh-endpoint` when you want to control cross-cluster endpoints without affecting Kilo's intra-cluster routing (e.g., nodes using the `cross` granularity setting). + +**Steps:** + +1. Add `clustermesh-endpoint` with the same value currently in `force-endpoint`: + + ```sh + kubectl annotate node node-01 \ + kilo.squat.ai/clustermesh-endpoint=203.0.113.1:51820 \ + --overwrite + ``` + +2. Verify the annotation is set correctly: + + ```sh + kubectl get node node-01 \ + --output jsonpath='{.metadata.annotations.kilo\.squat\.ai/clustermesh-endpoint}' + ``` + +3. Remove the old `force-endpoint` annotation (if it was set only for cross-cluster purposes): + + ```sh + kubectl annotate node node-01 kilo.squat.ai/force-endpoint- + ``` + +4. Trigger a reconcile (the operator does not watch Node objects): + + ```sh + kubectl annotate clustermesh \ + reconcile-trigger=$(date +%s) \ + --overwrite \ + --namespace + ``` + +5. Confirm the ClusterMesh status shows `Ready=True` and the node appears in the peer list: + + ```sh + kubectl get clustermesh \ + --namespace \ + --output yaml + ``` + +Repeat steps 1–4 for each node in the remote cluster. Verify that intra-cluster Kilo routing is unaffected after the migration. + +--- + +*See also:* +*[Configuration](./configuration.md) — `wireguardPort` and other ClusterMesh CRD fields* +*[Troubleshooting](./troubleshooting.md) — `NodeNoEndpoint`, `NodeEndpointInvalid`, `WGIPInvalid`, and the full skip-reason table* +*[Architecture](./architecture.md) — high-level reconcile flow* +*[README](../README.md) — quick start and project overview* diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md new file mode 100644 index 0000000..6389c24 --- /dev/null +++ b/docs/troubleshooting.md @@ -0,0 +1,190 @@ +# Troubleshooting + +> Diagnostic reference for ClusterMesh status conditions and node-skip reasons. + +## Table of Contents + +- [Overview](#overview) +- [Inspecting State](#inspecting-state) +- [Node Skip Reasons](#node-skip-reasons) +- [Mesh-Level Validation Errors](#mesh-level-validation-errors) +- [Status Conditions](#status-conditions) +- [Common Pitfalls](#common-pitfalls) +- [Re-Examining the Embedded CRD](#re-examining-the-embedded-crd) + +--- + +## Overview + +This page lists every symptom surfaced through ClusterMesh status conditions and node-skip reasons, with diagnostic steps for each. Node-level problems appear as `NodeSkipReason` values in Kubernetes events and operator logs. Mesh-level problems appear as status conditions on the ClusterMesh resource itself. Start with [Inspecting State](#inspecting-state) to collect the relevant output, then look up the specific reason or condition type in the tables below. + +--- + +## Inspecting State + +**View ClusterMesh status and conditions:** + +```bash +kubectl --context --namespace get clustermesh.kilo.squat.ai --output yaml +``` + +Look at `.status.conditions` (see [Status Conditions](#status-conditions)) and `.status.clusters[*].skippedNodes`. + +**Stream operator logs (includes per-node skip reasons):** + +```bash +kubectl --context --namespace logs --selector app.kubernetes.io/name=kilo-clustermesh-operator --follow +``` + +**Check Kubernetes events on the ClusterMesh resource:** + +```bash +kubectl --context --namespace get events +``` + +The controller emits a `Warning` event for every skipped node. The event `Reason` field is the `NodeSkipReason` string; the `Action` field is `SkipNodePeering`. + +**Verify Peer CRs exist in a remote cluster:** + +```bash +kubectl --context get peers.kilo.squat.ai +``` + +--- + +## Node Skip Reasons + +The controller validates each node against the `ClusterEntry` for its cluster. Validation runs in this order: PodCIDR → WireGuard IP → Public Key → Endpoint. **A node that fails an earlier check is not checked further** — fix issues in order. + +Duplicate WireGuard IP detection (`FindDuplicateWGIPs`) runs before per-node validation. The first node with a given host IP keeps its entry; later nodes are flagged `WGIPDuplicate` and skipped before `ValidateNode` is called. + +| Reason | Symptom | Likely Cause | Fix | +| --- | --- | --- | --- | +| `NodeNoPodCIDR` | Node has no PodCIDRs or an unparseable first PodCIDR | CNI not yet assigned a pod subnet, or node is not schedulable | Wait for CNI assignment or check `Node.Spec.PodCIDRs` | +| `NodePodCIDROutOfRange` | Node's first PodCIDR is not a subnet of any `ClusterEntry.podCIDRs` | `ClusterEntry.podCIDRs` does not cover the node's actual pod subnet | Expand or correct `podCIDRs` in the ClusterMesh spec | +| `NodeNoWireguardIP` | `kilo.squat.ai/wireguard-ip` annotation missing or empty | Kilo has not yet assigned a WireGuard interface IP to the node | Ensure Kilo is running and `granularity: cross` is set; see [per-node-setup](./per-node-setup.md) | +| `WGIPInvalid` | `kilo.squat.ai/wireguard-ip` annotation present but not a valid CIDR | Annotation was set manually with a malformed value | Correct or remove the annotation; let Kilo re-set it | +| `WGIPOutOfRange` | Node's WireGuard host IP is not within `ClusterEntry.wireguardCIDR`, OR `wireguardCIDR` itself is invalid | Wrong `wireguardCIDR` in the spec, or node annotation points to a different subnet | Correct `wireguardCIDR` in the ClusterMesh spec to match your Kilo WireGuard subnet | +| `WGIPDuplicate` | Two or more nodes have the same WireGuard host IP (prefix length ignored) | Kilo assigned the same IP to multiple nodes due to misconfiguration; `10.4.0.1/16` and `10.4.0.1/32` are treated as the same host IP | Identify the conflicting nodes via events/logs; fix Kilo's IP assignment so each node has a unique host IP | +| `NodeNoPublicKey` | `kilo.squat.ai/key` annotation missing or empty | Kilo has not yet populated the WireGuard public key | Ensure Kilo is running; check `kubectl --context get node --output yaml` for the annotation | +| `NodeNoEndpoint` | No endpoint source found: no `clustermesh-endpoint`, no `force-endpoint`, no `ExternalIP` | Node has no external IP and no endpoint annotation set | Set `kilo.squat.ai/clustermesh-endpoint` or ensure the node has a `Node.Status.Addresses` entry of type `ExternalIP`; see [per-node-setup](./per-node-setup.md) | +| `NodeEndpointInvalid` | An endpoint annotation is present with a non-empty value that cannot be parsed as `host:port` | Typo or malformed value in `kilo.squat.ai/clustermesh-endpoint` or `kilo.squat.ai/force-endpoint` | Fix the annotation value; format is `host:port` | + +> **Note on `SkippedNodes` count:** The `status.clusters[*].skippedNodes` integer counts all skip reasons together. It does not distinguish between `WGIPDuplicate` and other reasons. Use `kubectl get events` or operator logs to find per-node reasons. + +--- + +## Mesh-Level Validation Errors + +These errors are set as status conditions before any peer reconciliation begins. If a mesh-level error is present, no peers are created or updated for the affected ClusterMesh. + +### `ValidateClusterNetworks` + +Called for every reconcile. Checks that all CIDRs within a single ClusterMesh are pairwise disjoint. The set of checked CIDRs for each cluster entry is: + +```text +podCIDRs + wireguardCIDR + serviceCIDR (if set) + additionalCIDRs +``` + +Even `wireguardCIDR` values from different clusters within the same mesh must not overlap each other. + +**Error format:** `CIDR overlap between cluster "" () and cluster "" ()` + +### `ValidateMeshNetworks` + +Called during reconcile after `ValidateClusterNetworks`. Lists **all** ClusterMesh objects in the operator's namespace and checks for cross-mesh CIDR overlaps. A CIDR that appears in mesh-a and mesh-b (even for different CIDR types) is an overlap. + +**Error format:** `CIDR overlap between mesh "" (cluster "", ) and mesh "" (cluster "", )` + +**Effect:** Sets `NetworksOverlap=True` and `Ready=False` on the affected ClusterMesh. Reconciliation stops — no peers are created or updated. Fix: correct the overlapping CIDRs in the ClusterMesh spec. + +> **Warning:** If mesh-a and mesh-b share a CIDR, the mesh that triggers the overlap check will be blocked. Both meshes may need to be corrected. + +--- + +## Status Conditions + +The controller manages two condition types on every ClusterMesh resource. + +| Condition Type | Status | Reason | Meaning | +| --- | --- | --- | --- | +| `Ready` | `True` | `Reconciled` | All clusters were reconciled successfully. Peer objects have been applied. | +| `Ready` | `False` | `NetworksOverlap` | CIDR overlap was detected across ClusterMesh objects in the namespace. Reconciliation was blocked. | +| `NetworksOverlap` | `True` | `CIDROverlap` | A CIDR overlap was found. The `Message` field contains the full overlap description. | +| `NetworksOverlap` | `False` | `NoOverlap` | All CIDRs are disjoint. Normal state. | + +The `Ready=False/NetworksOverlap` path is the only path that actively blocks reconciliation. All other failures (node skips, unreachable clusters) are recorded in `status.clusters` and events but do not prevent the rest of the mesh from being reconciled. + +To inspect conditions: + +```bash +kubectl --context --namespace get clustermesh.kilo.squat.ai --output jsonpath='{.status.conditions}' +``` + +--- + +## Common Pitfalls + +### `Ready=False, Reason=NetworksOverlap` with overlap message + +The `Message` field in the `Ready` condition reads `"CIDR overlap detected across meshes"`. The full overlap detail is in the `NetworksOverlap` condition's `Message`. Run: + +```bash +kubectl --context --namespace get clustermesh.kilo.squat.ai --output yaml +``` + +Look for `.status.conditions[?(@.type=="NetworksOverlap")].message` — it identifies the two clusters and the overlapping CIDR string. Fix the overlap in `Spec.Clusters[*].podCIDRs`, `wireguardCIDR`, `serviceCIDR`, or `additionalCIDRs` as indicated. + +### Some nodes peer, others don't + +This is nearly always a per-node annotation problem. Check: + +1. `kubectl --context get events --namespace ` — look for `Warning/SkipNodePeering` events listing the affected node names and reasons. +2. `kubectl --context get node --output yaml` — verify the four annotations: `kilo.squat.ai/wireguard-ip`, `kilo.squat.ai/key`, `kilo.squat.ai/clustermesh-endpoint` (or `force-endpoint`). +3. Cross-check the node's `Spec.PodCIDRs[0]` against `ClusterEntry.podCIDRs`. + +See [per-node-setup](./per-node-setup.md) for the full annotation reference. + +> **Important:** The operator does not watch Node objects. Changes to node annotations are not detected automatically. After correcting any node annotation, write a no-op to the ClusterMesh resource (e.g., add or change a label) to trigger a reconcile. + +### Operator restarts on Secret change + +The `ChangeWatcher` computes a fingerprint at startup that covers: the names of all cluster entries across all ClusterMesh objects in the operator namespace, and the `ResourceVersion` of each referenced kubeconfig Secret. When any of the following changes, the fingerprint changes and the operator process exits (allowing Kubernetes to restart the pod): + +- A new ClusterMesh is created or deleted in the namespace +- A cluster entry's `name` is changed +- The `ResourceVersion` of a referenced kubeconfig Secret changes (i.e. the Secret was updated) + +This is intentional design, not a crash. The operator must restart to rebuild the multicluster client cache (`ClusterRegistry`) with fresh kubeconfigs. After restart, full reconciliation runs automatically. Expect a brief period where no peers are being updated during the restart. + +### Endpoint chain lazy evaluation: malformed `force-endpoint` silently ignored + +Endpoint sources are evaluated in priority order: `clustermesh-endpoint` → `force-endpoint` → `ExternalIP`. Evaluation stops at the first non-empty source that parses successfully. If `clustermesh-endpoint` is valid, `force-endpoint` is never checked — a typo in `force-endpoint` goes unnoticed. The bug only surfaces if `clustermesh-endpoint` is removed. + +Conversely: if `clustermesh-endpoint` is **present but malformed**, the validator does **not** fall through to `force-endpoint`. The node is immediately skipped with `NodeEndpointInvalid`. A valid `force-endpoint` on the same node does not help. + +See [per-node-setup](./per-node-setup.md) for full endpoint annotation behavior. + +### Anchor peer absent, service CIDRs unreachable + +If `ClusterEntry.serviceCIDR` or `additionalCIDRs` are set but remote clusters cannot reach the service network, check whether an anchor peer was created: + +```bash +kubectl --context get peers.kilo.squat.ai --selector kilo.squat.ai/mesh= +``` + +The anchor peer is built from `nodes[0]` (the first valid node in the cluster). If that node has no resolvable endpoint, `BuildAnchorPeer` returns `nil` silently — no anchor peer is created, no error is surfaced. Ensure at least one node in each cluster has a valid endpoint. See [architecture](./architecture.md) for the reconcile flow. + +--- + +## Re-Examining the Embedded CRD + +The operator installs and upgrades the ClusterMesh CRD at every startup from an embedded copy at `internal/crd/clustermeshes.yaml`. The Helm chart has no `crds/` directory. If the CRD in your cluster looks stale (missing fields, outdated validation), the operator is likely running an older image version. + +To verify what CRD the running operator would apply, check the image tag and consult the corresponding release. Upgrading the operator image automatically upgrades the CRD on the next pod start. + +See [configuration](./configuration.md) for the full CRD field reference and [architecture](./architecture.md) for the CRD install flow. + +--- + +*Cross-references: [per-node-setup](./per-node-setup.md) · [configuration](./configuration.md) · [architecture](./architecture.md) · [README](../README.md)* diff --git a/go.mod b/go.mod index 2b0dcbb..7cf9c3b 100644 --- a/go.mod +++ b/go.mod @@ -4,8 +4,6 @@ go 1.26.3 require ( github.com/cockroachdb/errors v1.13.0 - github.com/onsi/ginkgo/v2 v2.27.2 - github.com/onsi/gomega v1.38.2 github.com/stretchr/testify v1.11.1 k8s.io/api v0.35.0 k8s.io/apiextensions-apiserver v0.35.0 @@ -17,7 +15,6 @@ require ( require ( cel.dev/expr v0.24.0 // indirect - github.com/Masterminds/semver/v3 v3.4.0 // indirect github.com/antlr4-go/antlr/v4 v4.13.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect @@ -38,13 +35,11 @@ require ( github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect github.com/go-openapi/swag v0.23.0 // indirect - github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/google/btree v1.1.3 // indirect github.com/google/cel-go v0.26.0 // indirect github.com/google/gnostic-models v0.7.0 // indirect github.com/google/go-cmp v0.7.0 // indirect - github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect github.com/google/uuid v1.6.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect @@ -81,7 +76,6 @@ require ( go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect - golang.org/x/mod v0.29.0 // indirect golang.org/x/net v0.47.0 // indirect golang.org/x/oauth2 v0.30.0 // indirect golang.org/x/sync v0.18.0 // indirect @@ -89,7 +83,6 @@ require ( golang.org/x/term v0.37.0 // indirect golang.org/x/text v0.31.0 // indirect golang.org/x/time v0.9.0 // indirect - golang.org/x/tools v0.38.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a // indirect diff --git a/go.sum b/go.sum index 0f3a2be..e97427b 100644 --- a/go.sum +++ b/go.sum @@ -37,12 +37,6 @@ github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sa github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/getsentry/sentry-go v0.46.0 h1:mbdDaarbUdOt9X+dx6kDdntkShLEX3/+KyOsVDTPDj0= github.com/getsentry/sentry-go v0.46.0/go.mod h1:evVbw2qotNUdYG8KxXbAdjOQWWvWIwKxpjdZZIvcIPw= -github.com/gkampitakis/ciinfo v0.3.2 h1:JcuOPk8ZU7nZQjdUhctuhQofk7BGHuIy0c9Ez8BNhXs= -github.com/gkampitakis/ciinfo v0.3.2/go.mod h1:1NIwaOcFChN4fa/B0hEBdAb6npDlFL8Bwx4dfRLRqAo= -github.com/gkampitakis/go-diff v1.3.2 h1:Qyn0J9XJSDTgnsgHRdz9Zp24RaJeKMUHg2+PDZZdC4M= -github.com/gkampitakis/go-diff v1.3.2/go.mod h1:LLgOrpqleQe26cte8s36HTWcTmMEur6OPYerdAAS9tk= -github.com/gkampitakis/go-snaps v0.5.15 h1:amyJrvM1D33cPHwVrjo9jQxX8g/7E2wYdZ+01KS3zGE= -github.com/gkampitakis/go-snaps v0.5.15/go.mod h1:HNpx/9GoKisdhw9AFOBT1N7DBs9DiHo/hGheFGBZ+mc= github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= @@ -62,8 +56,6 @@ github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+Gr github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= -github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= -github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= @@ -89,8 +81,6 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= -github.com/joshdk/go-junit v1.0.0 h1:S86cUKIdwBHWwA6xCmFlf3RTLfVXYQfvanM5Uh+K6GE= -github.com/joshdk/go-junit v1.0.0/go.mod h1:TiiV0PqkaNfFXjEiyjWM3XXrhVyCa1K4Zfga6W52ung= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= @@ -108,10 +98,6 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= -github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo= -github.com/maruel/natural v1.1.1/go.mod h1:v+Rfd79xlw1AgVBjbO0BEQmptqb5HvL/k9GRHB7ZKEg= -github.com/mfridman/tparse v0.18.0 h1:wh6dzOKaIwkUGyKgOntDW4liXSo37qg5AXbIhkMV3vE= -github.com/mfridman/tparse v0.18.0/go.mod h1:gEvqZTuCgEhPbYk/2lS3Kcxg1GmTxxU7kTC8DvP0i/A= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -161,14 +147,6 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= -github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= -github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= -github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= -github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= -github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= -github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= -github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= -github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= diff --git a/internal/citest/workflow_test.go b/internal/citest/workflow_test.go new file mode 100644 index 0000000..25a0e42 --- /dev/null +++ b/internal/citest/workflow_test.go @@ -0,0 +1,84 @@ +/* +Copyright 2026 The Kilo Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package citest validates structural properties of the CI workflow files. +package citest_test + +import ( + "bufio" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// repoRoot returns the absolute path to the repository root by walking up from +// this test file's location until a go.mod is found. +func repoRoot(t *testing.T) string { + t.Helper() + + _, callerFile, _, ok := runtime.Caller(0) + require.True(t, ok, "runtime.Caller returned no file info") + + dir := filepath.Dir(callerFile) + + for { + if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil { + return dir + } + + parent := filepath.Dir(dir) + require.NotEqual(t, parent, dir, "reached filesystem root without finding go.mod") + + dir = parent + } +} + +// TestCIWorkflowIncludesCmdPackage asserts that the unit-test job in ci.yml +// explicitly includes ./cmd/... so that tests in cmd/ (e.g. TestMergeClusterSpecs) +// are not silently skipped. +func TestCIWorkflowIncludesCmdPackage(t *testing.T) { + t.Parallel() + + root := repoRoot(t) + ciPath := filepath.Join(root, ".github", "workflows", "ci.yml") + + f, err := os.Open(ciPath) + require.NoError(t, err, "opening ci.yml") + + defer f.Close() + + var found bool + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Text() + if strings.Contains(line, "go test") && strings.Contains(line, "./cmd/...") { + found = true + + break + } + } + + require.NoError(t, scanner.Err(), "scanning ci.yml") + assert.True(t, found, + "ci.yml unit-test job must include ./cmd/... in the go test invocation; "+ + "TestMergeClusterSpecs in cmd/main_test.go is otherwise never executed in CI") +} diff --git a/internal/containerfile/containerfile_test.go b/internal/containerfile/containerfile_test.go new file mode 100644 index 0000000..6f390b9 --- /dev/null +++ b/internal/containerfile/containerfile_test.go @@ -0,0 +1,89 @@ +/* +Copyright 2026 The Kilo Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package containerfile contains tests that validate the repository's +// Containerfile metadata labels. +package containerfile_test + +import ( + "bufio" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// repoRoot returns the absolute path to the repository root by walking up from +// this test file's location until a go.mod is found. +func repoRoot(t *testing.T) string { + t.Helper() + + _, callerFile, _, ok := runtime.Caller(0) + require.True(t, ok, "runtime.Caller returned no file info") + + dir := filepath.Dir(callerFile) + + for { + if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil { + return dir + } + + parent := filepath.Dir(dir) + require.NotEqual(t, parent, dir, "reached filesystem root without finding go.mod") + + dir = parent + } +} + +func TestContainerfileImageSourceLabel(t *testing.T) { + t.Parallel() + + root := repoRoot(t) + containerfilePath := filepath.Join(root, "Containerfile") + + f, err := os.Open(containerfilePath) + require.NoError(t, err, "opening Containerfile") + + defer f.Close() + + const labelKey = "org.opencontainers.image.source" + const wantOwner = "cozystack/kilo-clustermesh-operator" + + var found bool + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if !strings.HasPrefix(line, "LABEL") { + continue + } + + if !strings.Contains(line, labelKey) { + continue + } + + found = true + assert.Contains(t, line, wantOwner, + "org.opencontainers.image.source label must reference cozystack/kilo-clustermesh-operator, got: %q", line) + } + + require.NoError(t, scanner.Err(), "scanning Containerfile") + require.True(t, found, "org.opencontainers.image.source LABEL not found in Containerfile") +} diff --git a/internal/controller/clustermesh_controller.go b/internal/controller/clustermesh_controller.go index aeaa592..325dd9a 100644 --- a/internal/controller/clustermesh_controller.go +++ b/internal/controller/clustermesh_controller.go @@ -25,7 +25,7 @@ import ( apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/tools/record" + "k8s.io/client-go/tools/events" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -57,7 +57,7 @@ type ClusterMeshReconciler struct { Scheme *runtime.Scheme Registry *multicluster.ClusterRegistry Log *slog.Logger - Recorder record.EventRecorder + Recorder events.EventRecorder } // Reconcile implements the main reconciliation loop for ClusterMesh objects. @@ -233,7 +233,7 @@ func (r *ClusterMeshReconciler) filterNodes(log *slog.Logger, mesh *v1alpha1.Clu slog.String("node", node.Name), slog.String("reason", string(reason)), ) - r.Recorder.Event(mesh, corev1.EventTypeWarning, string(reason), "node "+node.Name+" has duplicate WireGuard IP") + r.Recorder.Eventf(mesh, nil, corev1.EventTypeWarning, string(reason), "SkipNodePeering", "node %s has duplicate WireGuard IP", node.Name) skipped++ @@ -247,7 +247,7 @@ func (r *ClusterMeshReconciler) filterNodes(log *slog.Logger, mesh *v1alpha1.Clu slog.String("reason", string(reason)), slog.String("msg", msg), ) - r.Recorder.Event(mesh, corev1.EventTypeWarning, string(reason), msg) + r.Recorder.Eventf(mesh, nil, corev1.EventTypeWarning, string(reason), "SkipNodePeering", "%s", msg) skipped++ @@ -303,7 +303,7 @@ func buildDesiredPeers(meshName string, entry *v1alpha1.ClusterEntry, nodes []*c peers := make([]*kilov1alpha1.Peer, 0, len(nodes)+1) for _, node := range nodes { - p, err := peer.BuildPeer(meshName, entry.Name, node) + p, err := peer.BuildPeer(meshName, entry, node) if err != nil { return nil, errors.Wrapf(err, "building peer for node %q", node.Name) } @@ -312,7 +312,7 @@ func buildDesiredPeers(meshName string, entry *v1alpha1.ClusterEntry, nodes []*c } if len(nodes) > 0 { - if anchor := peer.BuildAnchorPeer(meshName, entry.Name, entry, nodes[0]); anchor != nil { + if anchor := peer.BuildAnchorPeer(meshName, entry, nodes[0]); anchor != nil { peers = append(peers, anchor) } } diff --git a/internal/crd/clustermeshes.yaml b/internal/crd/clustermeshes.yaml index 0271637..64bb653 100644 --- a/internal/crd/clustermeshes.yaml +++ b/internal/crd/clustermeshes.yaml @@ -107,8 +107,21 @@ spec: wireguardCIDR: description: |- WireguardCIDR is the CIDR for Kilo's WireGuard interface (kilo0) addresses. - Each node's kilo.squat.ai/wireguard-ip must be a /32 (or /128) within this CIDR. + Each node's kilo.squat.ai/wireguard-ip must have its host IP within this CIDR. + The annotation may carry any prefix length (e.g. "10.4.0.1/32" upstream Kilo + or "10.4.0.1/16" cozystack-patched Kilo); only the host portion is validated. type: string + wireguardPort: + default: 51820 + description: |- + WireguardPort is the UDP port of Kilo's WireGuard endpoint on each node in + this cluster. Used as a fallback when the operator synthesises the + endpoint from Node.Status.Addresses (i.e. neither + kilo.squat.ai/clustermesh-endpoint nor kilo.squat.ai/force-endpoint is set + on a node). Defaults to 51820. + maximum: 65535 + minimum: 1 + type: integer required: - name - podCIDRs diff --git a/internal/kilonode/annotations.go b/internal/kilonode/annotations.go index dc654b3..cd2b525 100644 --- a/internal/kilonode/annotations.go +++ b/internal/kilonode/annotations.go @@ -19,7 +19,11 @@ package kilonode const ( // AnnotationWireguardIP is the node annotation containing the WireGuard interface IP. - // Value format: "10.4.0.1/32" (must be a host route). + // Two formats are accepted: + // - "/32" (upstream Kilo): host route, e.g. "10.4.0.1/32" + // - "/" (cozystack-Kilo): subnet-masked address, e.g. "100.66.0.3/16" + // In both cases the host part of the address is extracted and normalised to a /32 (or /128) + // when building WireGuard AllowedIPs for the peer. AnnotationWireguardIP = "kilo.squat.ai/wireguard-ip" // AnnotationPublicKey is the node annotation containing the WireGuard public key. @@ -27,8 +31,18 @@ const ( // AnnotationForceEndpoint is the node annotation specifying the WireGuard endpoint. // Value format: "203.0.113.1:51820" or "node.example.com:51820". + // Kilo itself reads this annotation to override intra-cluster endpoint + // detection; the clustermesh operator uses it as a fallback when + // AnnotationClustermeshEndpoint is absent. AnnotationForceEndpoint = "kilo.squat.ai/force-endpoint" + // AnnotationClustermeshEndpoint is the operator-specific node annotation + // for cross-cluster mesh endpoints. Takes precedence over + // AnnotationForceEndpoint. Decoupled from Kilo's own force-endpoint to + // avoid side-effects on intra-cluster topology (e.g. "cross" granularity). + // Value format: "203.0.113.1:51820" or "node.example.com:51820". + AnnotationClustermeshEndpoint = "kilo.squat.ai/clustermesh-endpoint" + // AnnotationLocation is the node annotation for Kilo's location grouping. AnnotationLocation = "kilo.squat.ai/location" ) diff --git a/internal/kilonode/endpoint.go b/internal/kilonode/endpoint.go new file mode 100644 index 0000000..a0027c7 --- /dev/null +++ b/internal/kilonode/endpoint.go @@ -0,0 +1,134 @@ +/* +Copyright 2026 The Kilo Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kilonode + +import ( + "net" + "strconv" + + "github.com/cockroachdb/errors" + corev1 "k8s.io/api/core/v1" +) + +const defaultWireguardPort = 51820 + +// ResolveEndpoint determines the WireGuard endpoint string ("host:port") for a node. +// Sources are tried in priority order; the first non-empty source wins. A malformed +// annotation value is a hard error (do not fall through to the next source). +// fallbackPort is the UDP port used when synthesising the endpoint from Node.Status.Addresses; +// if 0, defaults to 51820. +// Returns ("", false, nil) when no source yields a value. +// Returns (endpoint, true, nil) on success. +// Returns ("", false, err) when an annotation is present but cannot be parsed as host:port. +func ResolveEndpoint(node *corev1.Node, fallbackPort uint16) (string, bool, error) { + // Source 1: operator-specific clustermesh-endpoint annotation (highest priority). + if val, ok := node.Annotations[AnnotationClustermeshEndpoint]; ok && val != "" { + err := validateHostPort(val) + if err != nil { + return "", false, errors.Wrapf(err, "annotation %q on node %q has invalid value %q", + AnnotationClustermeshEndpoint, node.Name, val) + } + + return val, true, nil + } + + // Source 2: Kilo's force-endpoint annotation. + if val, ok := node.Annotations[AnnotationForceEndpoint]; ok && val != "" { + err := validateHostPort(val) + if err != nil { + return "", false, errors.Wrapf(err, "annotation %q on node %q has invalid value %q", + AnnotationForceEndpoint, node.Name, val) + } + + return val, true, nil + } + + // Source 3: Node.Status.Addresses ExternalIP, preferring IPv4 over IPv6. + port := fallbackPort + if port == 0 { + port = defaultWireguardPort + } + + if endpoint, ok := resolveFromExternalIPs(node.Status.Addresses, port); ok { + return endpoint, true, nil + } + + return "", false, nil +} + +// validateHostPort checks that s is a well-formed "host:port" string by +// calling net.SplitHostPort and verifying the port is a valid uint16. +// It does not perform DNS resolution. +func validateHostPort(s string) error { + host, portStr, err := net.SplitHostPort(s) + if err != nil { + return errors.Wrapf(err, "not a valid host:port") + } + + if host == "" { + return errors.New("host part is empty") + } + + port, err := strconv.ParseUint(portStr, 10, 16) + if err != nil { + return errors.Wrapf(err, "port %q is not a valid uint16", portStr) + } + + if port == 0 { + return errors.New("port must be non-zero") + } + + return nil +} + +// resolveFromExternalIPs scans the address list for ExternalIP entries, +// preferring IPv4. Returns the first IPv4 ExternalIP if any, otherwise the +// first IPv6 ExternalIP. The endpoint is formatted as "host:port" using +// net.JoinHostPort (which handles IPv6 bracketing automatically). +func resolveFromExternalIPs(addresses []corev1.NodeAddress, port uint16) (string, bool) { + portStr := strconv.FormatUint(uint64(port), 10) + + var firstIPv6 string + + for _, addr := range addresses { + if addr.Type != corev1.NodeExternalIP { + continue + } + + parsedIP := net.ParseIP(addr.Address) + if parsedIP == nil { + // Non-parseable address — skip it silently; not our concern here. + continue + } + + if parsedIP.To4() != nil { + // IPv4 found — return immediately (highest preference). + return net.JoinHostPort(addr.Address, portStr), true + } + + // IPv6: record the first one but keep scanning for an IPv4. + if firstIPv6 == "" { + firstIPv6 = addr.Address + } + } + + if firstIPv6 != "" { + return net.JoinHostPort(firstIPv6, portStr), true + } + + return "", false +} diff --git a/internal/kilonode/endpoint_test.go b/internal/kilonode/endpoint_test.go new file mode 100644 index 0000000..37d7ae7 --- /dev/null +++ b/internal/kilonode/endpoint_test.go @@ -0,0 +1,223 @@ +/* +Copyright 2026 The Kilo Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kilonode_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/squat/kilo-clustermesh-operator/internal/kilonode" +) + +// makeEndpointNode creates a Node with the given annotations and Status.Addresses for endpoint tests. +func makeEndpointNode(annotations map[string]string, addresses []corev1.NodeAddress) *corev1.Node { + return &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node", + Annotations: annotations, + }, + Status: corev1.NodeStatus{ + Addresses: addresses, + }, + } +} + +func TestResolveEndpoint_ClustermeshAnnotationWins(t *testing.T) { + t.Parallel() + + // clustermesh-endpoint takes priority over force-endpoint when both are set. + node := makeEndpointNode(map[string]string{ + kilonode.AnnotationClustermeshEndpoint: "203.0.113.1:51820", + kilonode.AnnotationForceEndpoint: "198.51.100.1:51820", + }, nil) + + endpoint, found, err := kilonode.ResolveEndpoint(node, 51820) + + require.NoError(t, err) + assert.True(t, found) + assert.Equal(t, "203.0.113.1:51820", endpoint) +} + +func TestResolveEndpoint_ForceEndpointFallback(t *testing.T) { + t.Parallel() + + // When clustermesh-endpoint is absent, force-endpoint is used. + node := makeEndpointNode(map[string]string{ + kilonode.AnnotationForceEndpoint: "198.51.100.1:51820", + }, nil) + + endpoint, found, err := kilonode.ResolveEndpoint(node, 51820) + + require.NoError(t, err) + assert.True(t, found) + assert.Equal(t, "198.51.100.1:51820", endpoint) +} + +func TestResolveEndpoint_ExternalIPFallback_IPv4(t *testing.T) { + t.Parallel() + + // No annotations; single ExternalIP (IPv4) → synthesise endpoint. + node := makeEndpointNode(nil, []corev1.NodeAddress{ + {Type: corev1.NodeExternalIP, Address: "203.0.113.5"}, + }) + + endpoint, found, err := kilonode.ResolveEndpoint(node, 51820) + + require.NoError(t, err) + assert.True(t, found) + assert.Equal(t, "203.0.113.5:51820", endpoint) +} + +func TestResolveEndpoint_ExternalIPFallback_PrefersIPv4(t *testing.T) { + t.Parallel() + + // Node has both IPv4 and IPv6 ExternalIPs → IPv4 must be preferred. + node := makeEndpointNode(nil, []corev1.NodeAddress{ + {Type: corev1.NodeExternalIP, Address: "2001:db8::1"}, + {Type: corev1.NodeExternalIP, Address: "203.0.113.5"}, + }) + + endpoint, found, err := kilonode.ResolveEndpoint(node, 51820) + + require.NoError(t, err) + assert.True(t, found) + assert.Equal(t, "203.0.113.5:51820", endpoint) +} + +func TestResolveEndpoint_ExternalIPFallback_IPv6OnlyWhenNoIPv4(t *testing.T) { + t.Parallel() + + // Only an IPv6 ExternalIP is available → use it with brackets. + node := makeEndpointNode(nil, []corev1.NodeAddress{ + {Type: corev1.NodeExternalIP, Address: "2001:db8::1"}, + }) + + endpoint, found, err := kilonode.ResolveEndpoint(node, 51820) + + require.NoError(t, err) + assert.True(t, found) + assert.Equal(t, "[2001:db8::1]:51820", endpoint) +} + +func TestResolveEndpoint_ExternalIPFallback_DefaultPort(t *testing.T) { + t.Parallel() + + // fallbackPort = 0 → must default to 51820. + node := makeEndpointNode(nil, []corev1.NodeAddress{ + {Type: corev1.NodeExternalIP, Address: "203.0.113.5"}, + }) + + endpoint, found, err := kilonode.ResolveEndpoint(node, 0) + + require.NoError(t, err) + assert.True(t, found) + assert.Equal(t, "203.0.113.5:51820", endpoint) +} + +func TestResolveEndpoint_ExternalIPFallback_CustomPort(t *testing.T) { + t.Parallel() + + // Non-default fallback port must be used as-is. + node := makeEndpointNode(nil, []corev1.NodeAddress{ + {Type: corev1.NodeExternalIP, Address: "203.0.113.5"}, + }) + + endpoint, found, err := kilonode.ResolveEndpoint(node, 12345) + + require.NoError(t, err) + assert.True(t, found) + assert.Equal(t, "203.0.113.5:12345", endpoint) +} + +func TestResolveEndpoint_NoSource_ReturnsFoundFalse(t *testing.T) { + t.Parallel() + + // No annotations, no ExternalIPs → not found, no error. + node := makeEndpointNode(nil, nil) + + endpoint, found, err := kilonode.ResolveEndpoint(node, 51820) + + require.NoError(t, err) + assert.False(t, found) + assert.Empty(t, endpoint) +} + +func TestResolveEndpoint_ClustermeshAnnotationMalformed_ReturnsError(t *testing.T) { + t.Parallel() + + // Malformed clustermesh-endpoint must return an error, not fall through. + node := makeEndpointNode(map[string]string{ + kilonode.AnnotationClustermeshEndpoint: "not-a-valid-endpoint", + }, nil) + + endpoint, found, err := kilonode.ResolveEndpoint(node, 51820) + + require.Error(t, err) + assert.False(t, found) + assert.Empty(t, endpoint) + assert.Contains(t, err.Error(), kilonode.AnnotationClustermeshEndpoint) +} + +func TestResolveEndpoint_ForceEndpointMalformed_ReturnsError(t *testing.T) { + t.Parallel() + + // Malformed force-endpoint must return an error, not fall through. + node := makeEndpointNode(map[string]string{ + kilonode.AnnotationForceEndpoint: "no-port-here", + }, nil) + + endpoint, found, err := kilonode.ResolveEndpoint(node, 51820) + + require.Error(t, err) + assert.False(t, found) + assert.Empty(t, endpoint) + assert.Contains(t, err.Error(), kilonode.AnnotationForceEndpoint) +} + +func TestResolveEndpoint_IgnoresInternalIP(t *testing.T) { + t.Parallel() + + // InternalIP addresses must NOT be used for endpoint synthesis. + node := makeEndpointNode(nil, []corev1.NodeAddress{ + {Type: corev1.NodeInternalIP, Address: "10.0.0.1"}, + }) + + endpoint, found, err := kilonode.ResolveEndpoint(node, 51820) + + require.NoError(t, err) + assert.False(t, found) + assert.Empty(t, endpoint) +} + +func TestResolveEndpoint_IgnoresHostname(t *testing.T) { + t.Parallel() + + // Hostname type must NOT be treated as ExternalIP. + node := makeEndpointNode(nil, []corev1.NodeAddress{ + {Type: corev1.NodeHostName, Address: "worker-1"}, + }) + + endpoint, found, err := kilonode.ResolveEndpoint(node, 51820) + + require.NoError(t, err) + assert.False(t, found) + assert.Empty(t, endpoint) +} diff --git a/internal/netutil/cidr.go b/internal/netutil/cidr.go index 91b6e6d..9a4c528 100644 --- a/internal/netutil/cidr.go +++ b/internal/netutil/cidr.go @@ -58,6 +58,28 @@ func IsHostRoute(n *net.IPNet) bool { return ones == bits } +// ParseHostInCIDR parses a CIDR string and returns both the host IP and the +// masked network. Unlike ParseCIDR, this preserves the host bits — useful for +// annotations that encode a node's address as /, e.g. +// cozystack-patched Kilo writes "100.66.0.3/16". +func ParseHostInCIDR(s string) (net.IP, *net.IPNet, error) { + ip, network, err := net.ParseCIDR(s) + if err != nil { + return nil, nil, errors.Wrapf(err, "invalid CIDR %q", s) + } + + return ip, network, nil +} + +// HostRoute returns the /32 (IPv4) or /128 (IPv6) host route for ip. +func HostRoute(ip net.IP) string { + if ip.To4() != nil { + return ip.String() + "/32" + } + + return ip.String() + "/128" +} + // lastAddr returns the last (broadcast) address of a CIDR. func lastAddr(n *net.IPNet) net.IP { last := make(net.IP, len(n.IP)) diff --git a/internal/netutil/cidr_test.go b/internal/netutil/cidr_test.go index b744aaf..52f4d93 100644 --- a/internal/netutil/cidr_test.go +++ b/internal/netutil/cidr_test.go @@ -144,6 +144,70 @@ func TestIsHostRoute(t *testing.T) { } } +func TestParseHostInCIDR(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + wantHostIP string + wantNetwork string + wantErr bool + }{ + {"IPv4 /32 host route", "10.4.0.1/32", "10.4.0.1", "10.4.0.1/32", false}, + {"IPv4 host inside /16", "100.66.0.3/16", "100.66.0.3", "100.66.0.0/16", false}, + {"IPv4 host inside /24", "10.4.0.1/24", "10.4.0.1", "10.4.0.0/24", false}, + {"IPv6 /128 host route", "fd00::1/128", "fd00::1", "fd00::1/128", false}, + {"IPv6 host inside /64", "fd00::1/64", "fd00::1", "fd00::/64", false}, + {"invalid string", "not-a-cidr", "", "", true}, + {"empty string", "", "", "", true}, + {"IP without mask", "10.0.0.1", "", "", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + hostIP, network, err := ParseHostInCIDR(tt.input) + if tt.wantErr { + assert.Error(t, err) + assert.Nil(t, hostIP) + assert.Nil(t, network) + + return + } + + require.NoError(t, err) + assert.Equal(t, tt.wantHostIP, hostIP.String()) + assert.Equal(t, tt.wantNetwork, network.String()) + }) + } +} + +func TestHostRoute(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + ip string + want string + }{ + {"IPv4", "100.66.0.3", "100.66.0.3/32"}, + {"IPv4 zero", "10.0.0.0", "10.0.0.0/32"}, + {"IPv6", "fd00::1", "fd00::1/128"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + ip := net.ParseIP(tt.ip) + require.NotNil(t, ip, "ParseIP returned nil for %q", tt.ip) + assert.Equal(t, tt.want, HostRoute(ip)) + }) + } +} + func mustParse(t *testing.T, s string) *net.IPNet { t.Helper() diff --git a/internal/peer/builder.go b/internal/peer/builder.go index 9330407..bff959c 100644 --- a/internal/peer/builder.go +++ b/internal/peer/builder.go @@ -27,12 +27,13 @@ import ( v1alpha1 "github.com/squat/kilo-clustermesh-operator/api/v1alpha1" "github.com/squat/kilo-clustermesh-operator/internal/kilonode" + "github.com/squat/kilo-clustermesh-operator/internal/netutil" kilov1alpha1 "github.com/squat/kilo-clustermesh-operator/pkg/kilo/v1alpha1" ) // BuildPeer constructs a Peer object from a validated Node. -// The Peer's allowedIPs = node's PodCIDRs[0] + wireguard-ip annotation. -func BuildPeer(meshName, sourceCluster string, node *corev1.Node) (*kilov1alpha1.Peer, error) { +// The Peer's allowedIPs = node's PodCIDRs[0] + /32 (or /128) host route derived from the wireguard-ip annotation. +func BuildPeer(meshName string, entry *v1alpha1.ClusterEntry, node *corev1.Node) (*kilov1alpha1.Peer, error) { pubKey := node.Annotations[kilonode.AnnotationPublicKey] if pubKey == "" { return nil, errors.Newf("node %q has no public key annotation", node.Name) @@ -43,48 +44,64 @@ func BuildPeer(meshName, sourceCluster string, node *corev1.Node) (*kilov1alpha1 return nil, errors.Newf("node %q has no wireguard-ip annotation", node.Name) } - allowedIPs := []string{node.Spec.PodCIDRs[0], wgIP} + // The annotation may carry the wireguard subnet mask (cozystack-Kilo) or a + // /32 host route (upstream Kilo). In AllowedIPs each peer must claim only + // its own host IP, so normalise to /32 (resp. /128). + hostIP, _, err := netutil.ParseHostInCIDR(wgIP) + if err != nil { + return nil, errors.Wrapf(err, "node %q has invalid wireguard-ip annotation %q", node.Name, wgIP) + } + + allowedIPs := []string{node.Spec.PodCIDRs[0], netutil.HostRoute(hostIP)} + + endpoint, err := resolvePeerEndpoint(node, entry.WireguardPort) + if err != nil { + return nil, err + } peer := &kilov1alpha1.Peer{ ObjectMeta: metav1.ObjectMeta{ - Name: Name(meshName, sourceCluster, node.Name), - Labels: Labels(meshName, sourceCluster), + Name: Name(meshName, entry.Name, node.Name), + Labels: Labels(meshName, entry.Name), }, Spec: kilov1alpha1.PeerSpec{ AllowedIPs: allowedIPs, PublicKey: pubKey, + Endpoint: endpoint, }, } - applyEndpointFromAnnotation(peer, node.Annotations[kilonode.AnnotationForceEndpoint]) - return peer, nil } // BuildAnchorPeer constructs a Peer that carries cluster-wide CIDRs not covered // by per-node Peers (e.g., serviceCIDR, additionalCIDRs). // It uses the first validated node's public key and endpoint as the anchor point. -// Returns nil when there are no cluster-wide CIDRs to advertise. -func BuildAnchorPeer(meshName, sourceCluster string, entry *v1alpha1.ClusterEntry, anchorNode *corev1.Node) *kilov1alpha1.Peer { +// Returns nil when there are no cluster-wide CIDRs to advertise, or when the +// anchor node has no resolvable endpoint (an anchor without an endpoint cannot +// terminate cross-cluster traffic for those CIDRs). +func BuildAnchorPeer(meshName string, entry *v1alpha1.ClusterEntry, anchorNode *corev1.Node) *kilov1alpha1.Peer { anchorCIDRs := collectAnchorCIDRs(entry) if len(anchorCIDRs) == 0 { return nil } - peer := &kilov1alpha1.Peer{ + endpoint, err := resolvePeerEndpoint(anchorNode, entry.WireguardPort) + if err != nil { + return nil + } + + return &kilov1alpha1.Peer{ ObjectMeta: metav1.ObjectMeta{ - Name: Name(meshName, sourceCluster, "anchor"), - Labels: Labels(meshName, sourceCluster), + Name: Name(meshName, entry.Name, "anchor"), + Labels: Labels(meshName, entry.Name), }, Spec: kilov1alpha1.PeerSpec{ AllowedIPs: anchorCIDRs, PublicKey: anchorNode.Annotations[kilonode.AnnotationPublicKey], + Endpoint: endpoint, }, } - - applyEndpointFromAnnotation(peer, anchorNode.Annotations[kilonode.AnnotationForceEndpoint]) - - return peer } // collectAnchorCIDRs returns the cluster-wide CIDRs for an anchor peer. @@ -100,19 +117,26 @@ func collectAnchorCIDRs(entry *v1alpha1.ClusterEntry) []string { return cidrs } -// applyEndpointFromAnnotation parses the endpoint annotation and sets it on the -// peer if parsing succeeds. A missing or unparseable annotation is silently ignored. -func applyEndpointFromAnnotation(peer *kilov1alpha1.Peer, endpointStr string) { - if endpointStr == "" { - return +// resolvePeerEndpoint resolves a node's WireGuard endpoint via the kilonode +// fallback chain (clustermesh-endpoint annotation → force-endpoint annotation +// → ExternalIP) and parses the result into a PeerEndpoint. A present-but- +// malformed annotation, or a node with no source at all, surfaces as an error. +func resolvePeerEndpoint(node *corev1.Node, fallbackPort uint16) (*kilov1alpha1.PeerEndpoint, error) { + endpointStr, found, err := kilonode.ResolveEndpoint(node, fallbackPort) + if err != nil { + return nil, errors.Wrapf(err, "resolving endpoint for node %q", node.Name) + } + + if !found { + return nil, errors.Newf("node %q has no resolvable endpoint", node.Name) } endpoint, err := parseEndpoint(endpointStr) if err != nil { - return + return nil, errors.Wrapf(err, "parsing resolved endpoint %q for node %q", endpointStr, node.Name) } - peer.Spec.Endpoint = endpoint + return endpoint, nil } // parseEndpoint parses "host:port" into a PeerEndpoint. @@ -148,5 +172,5 @@ func buildDNSOrIP(host string) kilov1alpha1.DNSOrIP { return kilov1alpha1.DNSOrIP{IP: cleanHost} } - return kilov1alpha1.DNSOrIP{DNS: host} + return kilov1alpha1.DNSOrIP{DNS: cleanHost} } diff --git a/internal/peer/builder_test.go b/internal/peer/builder_test.go index 3236f33..0b942ec 100644 --- a/internal/peer/builder_test.go +++ b/internal/peer/builder_test.go @@ -30,9 +30,10 @@ import ( ) const ( - testPubKey = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - testPodCIDR = "10.244.1.0/24" - testWgIP = "10.4.0.1/32" + testPubKey = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + testPodCIDR = "10.244.1.0/24" + testWgIP = "10.4.0.1/32" + testForceEndpoint = "203.0.113.1:51820" ) // testNode creates a minimal Node for use in builder tests. @@ -48,11 +49,26 @@ func testNode(name, podCIDR string, annotations map[string]string) *corev1.Node } } -// baseAnnotations returns annotations containing all required fields. +// baseAnnotations returns annotations containing all required fields, +// including a valid force-endpoint so that BuildPeer succeeds by default. +// Tests that need a different endpoint source should override or delete +// the relevant key. func baseAnnotations() map[string]string { return map[string]string{ - kilonode.AnnotationPublicKey: testPubKey, - kilonode.AnnotationWireguardIP: testWgIP, + kilonode.AnnotationPublicKey: testPubKey, + kilonode.AnnotationWireguardIP: testWgIP, + kilonode.AnnotationForceEndpoint: testForceEndpoint, + } +} + +// testEntry returns a minimal ClusterEntry usable in builder tests. +// The Name matches the legacy "cluster-a" string used by tests for peer name +// and label assertions; WireguardPort is set to the well-known default so +// ExternalIP-fallback paths get a deterministic port. +func testEntry() *v1alpha1.ClusterEntry { + return &v1alpha1.ClusterEntry{ + Name: "cluster-a", + WireguardPort: 51820, } } @@ -64,7 +80,7 @@ func TestBuildPeer_HappyPath(t *testing.T) { node := testNode("worker-1", testPodCIDR, annotations) - got, err := peer.BuildPeer("my-mesh", "cluster-a", node) + got, err := peer.BuildPeer("my-mesh", testEntry(), node) require.NoError(t, err) require.NotNil(t, got) @@ -78,6 +94,44 @@ func TestBuildPeer_HappyPath(t *testing.T) { assert.Equal(t, "203.0.113.1", got.Spec.Endpoint.IP) } +func TestBuildPeer_CozystackStyleWGAnnotation(t *testing.T) { + t.Parallel() + + // cozystack-patched Kilo writes the wireguard-ip annotation as + // "/" (e.g. "100.66.0.3/16"), not as a + // /32 host route. BuildPeer must still emit a /32 (resp. /128) host + // route in AllowedIPs so that each peer terminates traffic for exactly + // one WireGuard IP — otherwise every peer would claim the entire + // wireguard subnet and break routing. + annotations := baseAnnotations() + annotations[kilonode.AnnotationWireguardIP] = "100.66.0.3/16" + + node := testNode("worker-1", testPodCIDR, annotations) + + got, err := peer.BuildPeer("my-mesh", testEntry(), node) + + require.NoError(t, err) + require.NotNil(t, got) + + assert.Equal(t, []string{testPodCIDR, "100.66.0.3/32"}, got.Spec.AllowedIPs) +} + +func TestBuildPeer_InvalidWireguardIP(t *testing.T) { + t.Parallel() + + annotations := map[string]string{ + kilonode.AnnotationPublicKey: testPubKey, + kilonode.AnnotationWireguardIP: "not-a-cidr", + } + + node := testNode("worker-1", testPodCIDR, annotations) + + got, err := peer.BuildPeer("my-mesh", testEntry(), node) + + require.Error(t, err) + assert.Nil(t, got) +} + func TestBuildPeer_MissingPublicKey(t *testing.T) { t.Parallel() @@ -87,7 +141,7 @@ func TestBuildPeer_MissingPublicKey(t *testing.T) { node := testNode("worker-1", testPodCIDR, annotations) - got, err := peer.BuildPeer("my-mesh", "cluster-a", node) + got, err := peer.BuildPeer("my-mesh", testEntry(), node) require.Error(t, err) assert.Nil(t, got) @@ -103,23 +157,32 @@ func TestBuildPeer_MissingWireguardIP(t *testing.T) { node := testNode("worker-1", testPodCIDR, annotations) - got, err := peer.BuildPeer("my-mesh", "cluster-a", node) + got, err := peer.BuildPeer("my-mesh", testEntry(), node) require.Error(t, err) assert.Nil(t, got) assert.Contains(t, err.Error(), "no wireguard-ip annotation") } -func TestBuildPeer_WithoutEndpoint(t *testing.T) { +func TestBuildPeer_NoEndpointSources_ReturnsError(t *testing.T) { t.Parallel() - node := testNode("worker-1", testPodCIDR, baseAnnotations()) + // Node has the wireguard-ip and public-key annotations but no + // endpoint source (no clustermesh-endpoint, no force-endpoint, no + // ExternalIP). The fallback chain in kilonode.ResolveEndpoint + // returns no source and BuildPeer surfaces this as a hard error so + // that misconfiguration is visible rather than producing an + // endpoint-less Peer. + annotations := baseAnnotations() + delete(annotations, kilonode.AnnotationForceEndpoint) - got, err := peer.BuildPeer("my-mesh", "cluster-a", node) + node := testNode("worker-1", testPodCIDR, annotations) - require.NoError(t, err) - require.NotNil(t, got) - assert.Nil(t, got.Spec.Endpoint, "endpoint must be nil when annotation is absent") + got, err := peer.BuildPeer("my-mesh", testEntry(), node) + + require.Error(t, err) + assert.Nil(t, got) + assert.Contains(t, err.Error(), "no resolvable endpoint") } func TestBuildPeer_DNSEndpoint(t *testing.T) { @@ -130,7 +193,7 @@ func TestBuildPeer_DNSEndpoint(t *testing.T) { node := testNode("worker-1", testPodCIDR, annotations) - got, err := peer.BuildPeer("my-mesh", "cluster-a", node) + got, err := peer.BuildPeer("my-mesh", testEntry(), node) require.NoError(t, err) require.NotNil(t, got) @@ -148,7 +211,7 @@ func TestBuildPeer_IPEndpoint(t *testing.T) { node := testNode("worker-1", testPodCIDR, annotations) - got, err := peer.BuildPeer("my-mesh", "cluster-a", node) + got, err := peer.BuildPeer("my-mesh", testEntry(), node) require.NoError(t, err) require.NotNil(t, got) @@ -168,7 +231,7 @@ func TestBuildAnchorPeer_WithServiceCIDR(t *testing.T) { node := testNode("worker-1", testPodCIDR, baseAnnotations()) - got := peer.BuildAnchorPeer("my-mesh", "cluster-a", entry, node) + got := peer.BuildAnchorPeer("my-mesh", entry, node) require.NotNil(t, got) assert.Equal(t, peer.Name("my-mesh", "cluster-a", "anchor"), got.Name) @@ -188,7 +251,7 @@ func TestBuildAnchorPeer_WithAdditionalCIDRs(t *testing.T) { node := testNode("worker-1", testPodCIDR, baseAnnotations()) - got := peer.BuildAnchorPeer("my-mesh", "cluster-a", entry, node) + got := peer.BuildAnchorPeer("my-mesh", entry, node) require.NotNil(t, got) assert.Equal(t, []string{"10.96.0.0/12", "192.168.100.0/24", "172.16.0.0/16"}, got.Spec.AllowedIPs) @@ -205,25 +268,160 @@ func TestBuildAnchorPeer_NoAnchorCIDRs(t *testing.T) { node := testNode("worker-1", testPodCIDR, baseAnnotations()) - got := peer.BuildAnchorPeer("my-mesh", "cluster-a", entry, node) + got := peer.BuildAnchorPeer("my-mesh", entry, node) assert.Nil(t, got, "must return nil when there are no cluster-wide CIDRs") } -func TestParseEndpoint_InvalidFormat(t *testing.T) { +func TestBuildPeer_MalformedForceEndpoint_ReturnsError(t *testing.T) { t.Parallel() - // Use BuildPeer with a malformed endpoint to exercise parseEndpoint's error path. - // Since parseEndpoint is unexported, we verify via the public API: an invalid - // endpoint annotation is silently skipped and the Peer is built without endpoint. + // A present-but-malformed force-endpoint annotation is treated as a + // hard error rather than being silently skipped. This makes + // misconfiguration visible at reconcile time instead of producing a + // Peer without an endpoint. annotations := baseAnnotations() annotations[kilonode.AnnotationForceEndpoint] = "no-colon-at-all" node := testNode("worker-1", testPodCIDR, annotations) - got, err := peer.BuildPeer("my-mesh", "cluster-a", node) + got, err := peer.BuildPeer("my-mesh", testEntry(), node) + + require.Error(t, err) + assert.Nil(t, got) +} + +func TestBuildPeer_ClustermeshEndpointPreferred(t *testing.T) { + t.Parallel() + + // When both clustermesh-endpoint and force-endpoint annotations are set, + // the operator-specific clustermesh-endpoint wins. + annotations := baseAnnotations() + annotations[kilonode.AnnotationForceEndpoint] = "203.0.113.1:51820" + annotations[kilonode.AnnotationClustermeshEndpoint] = "198.51.100.42:60000" - require.NoError(t, err, "invalid endpoint annotation must not cause an error") + node := testNode("worker-1", testPodCIDR, annotations) + + got, err := peer.BuildPeer("my-mesh", testEntry(), node) + + require.NoError(t, err) require.NotNil(t, got) - assert.Nil(t, got.Spec.Endpoint, "unparseable endpoint must be silently skipped") + require.NotNil(t, got.Spec.Endpoint) + assert.Equal(t, uint32(60000), got.Spec.Endpoint.Port) + assert.Equal(t, "198.51.100.42", got.Spec.Endpoint.IP) +} + +func TestBuildPeer_ExternalIPFallback(t *testing.T) { + t.Parallel() + + // With no endpoint annotations on the node, BuildPeer must synthesise + // the endpoint from Node.Status.Addresses (ExternalIP, preferring IPv4) + // combined with entry.WireguardPort. + annotations := baseAnnotations() + delete(annotations, kilonode.AnnotationForceEndpoint) + + node := testNode("worker-1", testPodCIDR, annotations) + node.Status.Addresses = []corev1.NodeAddress{ + {Type: corev1.NodeInternalIP, Address: "10.0.0.1"}, + {Type: corev1.NodeExternalIP, Address: "203.0.113.99"}, + } + + entry := &v1alpha1.ClusterEntry{Name: "cluster-a", WireguardPort: 51820} + + got, err := peer.BuildPeer("my-mesh", entry, node) + + require.NoError(t, err) + require.NotNil(t, got) + require.NotNil(t, got.Spec.Endpoint) + assert.Equal(t, uint32(51820), got.Spec.Endpoint.Port) + assert.Equal(t, "203.0.113.99", got.Spec.Endpoint.IP) +} + +func TestBuildPeer_MalformedClustermeshEndpoint_ReturnsError(t *testing.T) { + t.Parallel() + + // A present-but-malformed clustermesh-endpoint annotation is a hard + // error, even if force-endpoint is also set. Strict validation on the + // highest-priority source prevents typos from silently falling + // through to a lower-priority source. + annotations := baseAnnotations() + annotations[kilonode.AnnotationClustermeshEndpoint] = "garbage" + + node := testNode("worker-1", testPodCIDR, annotations) + + got, err := peer.BuildPeer("my-mesh", testEntry(), node) + + require.Error(t, err) + assert.Nil(t, got) +} + +func TestBuildAnchorPeer_NoEndpointSource_ReturnsNil(t *testing.T) { + t.Parallel() + + // An anchor peer without an endpoint cannot terminate cross-cluster + // traffic for its CIDRs, so BuildAnchorPeer returns nil when the + // anchor node has no resolvable endpoint. + entry := &v1alpha1.ClusterEntry{ + Name: "cluster-a", + ServiceCIDR: "10.96.0.0/12", + } + + annotations := baseAnnotations() + delete(annotations, kilonode.AnnotationForceEndpoint) + + node := testNode("worker-1", testPodCIDR, annotations) + + got := peer.BuildAnchorPeer("my-mesh", entry, node) + + assert.Nil(t, got, "anchor without resolvable endpoint must be nil") +} + +func TestBuildAnchorPeer_ExternalIPFallback(t *testing.T) { + t.Parallel() + + // The anchor peer participates in the same fallback chain — when the + // anchor node has no annotations but does have an ExternalIP, the + // endpoint is synthesised from Node.Status.Addresses. + entry := &v1alpha1.ClusterEntry{ + Name: "cluster-a", + ServiceCIDR: "10.96.0.0/12", + WireguardPort: 51820, + } + + annotations := baseAnnotations() + delete(annotations, kilonode.AnnotationForceEndpoint) + + node := testNode("worker-1", testPodCIDR, annotations) + node.Status.Addresses = []corev1.NodeAddress{ + {Type: corev1.NodeExternalIP, Address: "203.0.113.99"}, + } + + got := peer.BuildAnchorPeer("my-mesh", entry, node) + + require.NotNil(t, got) + require.NotNil(t, got.Spec.Endpoint) + assert.Equal(t, "203.0.113.99", got.Spec.Endpoint.IP) + assert.Equal(t, uint32(51820), got.Spec.Endpoint.Port) +} + +func TestBuildPeer_BracketedDNSEndpoint(t *testing.T) { + t.Parallel() + + // A bracketed DNS name like [dns.example.com]:51820 is unusual but valid input + // for net.JoinHostPort. buildDNSOrIP must strip the brackets and return the + // clean hostname — not "[dns.example.com]" — in the DNS field. + annotations := baseAnnotations() + annotations[kilonode.AnnotationForceEndpoint] = "[dns.example.com]:51820" + + node := testNode("worker-1", testPodCIDR, annotations) + + got, err := peer.BuildPeer("my-mesh", testEntry(), node) + + require.NoError(t, err) + require.NotNil(t, got) + require.NotNil(t, got.Spec.Endpoint) + assert.Equal(t, uint32(51820), got.Spec.Endpoint.Port) + assert.Equal(t, "dns.example.com", got.Spec.Endpoint.DNS, + "brackets must be stripped from the DNS field; got %q", got.Spec.Endpoint.DNS) + assert.Empty(t, got.Spec.Endpoint.IP) } diff --git a/internal/peer/endpoint_test.go b/internal/peer/endpoint_test.go new file mode 100644 index 0000000..2cf73e8 --- /dev/null +++ b/internal/peer/endpoint_test.go @@ -0,0 +1,103 @@ +/* +Copyright 2026 The Kilo Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// This file is intentionally in package peer (not peer_test) so it can access +// the unexported parseEndpoint function. +package peer + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + kilov1alpha1 "github.com/squat/kilo-clustermesh-operator/pkg/kilo/v1alpha1" +) + +func TestParseEndpoint(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + raw string + want *kilov1alpha1.PeerEndpoint + wantErr bool + }{ + { + name: "IPv6 bracketed address", + raw: "[2001:db8::1]:51820", + want: &kilov1alpha1.PeerEndpoint{ + Port: 51820, + DNSOrIP: kilov1alpha1.DNSOrIP{IP: "2001:db8::1"}, + }, + }, + { + name: "IPv6 loopback", + raw: "[::1]:51820", + want: &kilov1alpha1.PeerEndpoint{ + Port: 51820, + DNSOrIP: kilov1alpha1.DNSOrIP{IP: "::1"}, + }, + }, + { + name: "IPv4 address", + raw: "203.0.113.1:51820", + want: &kilov1alpha1.PeerEndpoint{ + Port: 51820, + DNSOrIP: kilov1alpha1.DNSOrIP{IP: "203.0.113.1"}, + }, + }, + { + name: "DNS name", + raw: "node.example.com:51820", + want: &kilov1alpha1.PeerEndpoint{ + Port: 51820, + DNSOrIP: kilov1alpha1.DNSOrIP{DNS: "node.example.com"}, + }, + }, + { + name: "missing colon - invalid format", + raw: "no-colon-at-all", + wantErr: true, + }, + { + name: "non-numeric port", + raw: "1.2.3.4:notaport", + wantErr: true, + }, + } + + for _, testCase := range tests { + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + + got, err := parseEndpoint(testCase.raw) + + if testCase.wantErr { + require.Error(t, err) + assert.Nil(t, got) + + return + } + + require.NoError(t, err) + require.NotNil(t, got) + assert.Equal(t, testCase.want.Port, got.Port) + assert.Equal(t, testCase.want.IP, got.IP) + assert.Equal(t, testCase.want.DNS, got.DNS) + }) + } +} diff --git a/internal/restart/watcher.go b/internal/restart/watcher.go index 5712e51..aae7d6e 100644 --- a/internal/restart/watcher.go +++ b/internal/restart/watcher.go @@ -62,7 +62,10 @@ func (w *ChangeWatcher) Reconcile(ctx context.Context, _ reconcile.Request) (rec slog.String("old", w.StartFingerprint), slog.String("new", fingerprint), ) - w.Cancel() + + if w.Cancel != nil { + w.Cancel() + } } return reconcile.Result{}, nil diff --git a/internal/restart/watcher_test.go b/internal/restart/watcher_test.go index 2e4f0d3..4925e76 100644 --- a/internal/restart/watcher_test.go +++ b/internal/restart/watcher_test.go @@ -233,3 +233,39 @@ func TestFingerprint_NoMeshes(t *testing.T) { require.NoError(t, err) assert.NotEmpty(t, fp) } + +func TestReconcile_NilCancel_NoPanic(t *testing.T) { + t.Parallel() + + // This test verifies that Reconcile does not panic when Cancel is nil. + // A bootstrap ChangeWatcher (used only for fingerprint computation) has no + // Cancel set; if any future code path calls Reconcile on it and the + // fingerprint differs from the start fingerprint, the nil dereference must + // be guarded. + scheme := testScheme(t) + + mesh := &v1alpha1.ClusterMesh{ + ObjectMeta: metav1.ObjectMeta{Name: "mesh1", Namespace: "default"}, + Spec: v1alpha1.ClusterMeshSpec{ + Clusters: []v1alpha1.ClusterEntry{ + {Name: "local", Local: true, PodCIDRs: []string{"10.0.0.0/16"}, WireguardCIDR: "10.4.0.0/16"}, + }, + }, + } + + fc := fake.NewClientBuilder().WithScheme(scheme).WithObjects(mesh).Build() + + // Set StartFingerprint to a value that will not match the freshly computed + // fingerprint, forcing the fingerprint-changed branch to execute. + watcher := &ChangeWatcher{ + Client: fc, + Cancel: nil, // intentionally nil + Namespace: "default", + Log: testLogger(), + StartFingerprint: "this-will-not-match", + } + + result, err := watcher.Reconcile(context.Background(), reconcile.Request{}) + require.NoError(t, err) + assert.Equal(t, reconcile.Result{}, result) +} diff --git a/internal/validation/mesh_test.go b/internal/validation/mesh_test.go new file mode 100644 index 0000000..3d6ae7e --- /dev/null +++ b/internal/validation/mesh_test.go @@ -0,0 +1,315 @@ +/* +Copyright 2026 The Kilo Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package validation_test + +import ( + "os" + "path/filepath" + "runtime" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/yaml" + + v1alpha1 "github.com/squat/kilo-clustermesh-operator/api/v1alpha1" + "github.com/squat/kilo-clustermesh-operator/internal/validation" +) + +// makeCluster is a helper that constructs a ClusterEntry with the given fields. +func makeCluster(name, podCIDR, wireguardCIDR, serviceCIDR string, additionalCIDRs ...string) v1alpha1.ClusterEntry { + return v1alpha1.ClusterEntry{ + Name: name, + PodCIDRs: []string{podCIDR}, + WireguardCIDR: wireguardCIDR, + ServiceCIDR: serviceCIDR, + AdditionalCIDRs: additionalCIDRs, + } +} + +// makeMesh is a helper that constructs a ClusterMesh with the given name and clusters. +func makeMesh(name string, clusters ...v1alpha1.ClusterEntry) v1alpha1.ClusterMesh { + return v1alpha1.ClusterMesh{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: v1alpha1.ClusterMeshSpec{Clusters: clusters}, + } +} + +func TestValidateClusterNetworks(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + clusters []v1alpha1.ClusterEntry + wantErr bool + errContains []string + }{ + { + name: "single cluster no overlaps", + clusters: []v1alpha1.ClusterEntry{ + makeCluster("cluster-a", "10.0.0.0/16", "10.4.0.0/24", "10.96.0.0/12"), + }, + wantErr: false, + }, + { + name: "empty cluster list", + clusters: []v1alpha1.ClusterEntry{}, + wantErr: false, + }, + { + name: "two clusters disjoint CIDRs", + clusters: []v1alpha1.ClusterEntry{ + makeCluster("cluster-a", "10.0.0.0/16", "10.4.0.0/24", "10.96.0.0/12"), + makeCluster("cluster-b", "10.1.0.0/16", "10.4.1.0/24", "10.112.0.0/12"), + }, + wantErr: false, + }, + { + name: "two clusters overlapping serviceCIDR", + clusters: []v1alpha1.ClusterEntry{ + makeCluster("cluster-a", "10.0.0.0/16", "10.4.0.0/24", "10.96.0.0/12"), + makeCluster("cluster-b", "10.1.0.0/16", "10.4.1.0/24", "10.96.0.0/12"), + }, + wantErr: true, + errContains: []string{"cluster-a", "cluster-b"}, + }, + { + name: "overlap within a single cluster between serviceCIDR and additionalCIDR", + clusters: []v1alpha1.ClusterEntry{ + makeCluster("cluster-a", "10.0.0.0/16", "10.4.0.0/24", "10.96.0.0/12", "10.96.128.0/17"), + }, + wantErr: true, + errContains: []string{"cluster-a"}, + }, + { + name: "invalid CIDR string", + clusters: []v1alpha1.ClusterEntry{ + { + Name: "cluster-a", + PodCIDRs: []string{"not-a-cidr"}, + WireguardCIDR: "10.4.0.0/24", + }, + }, + wantErr: true, + errContains: []string{"cluster-a"}, + }, + { + name: "two clusters with overlapping podCIDR", + clusters: []v1alpha1.ClusterEntry{ + makeCluster("cluster-a", "10.0.0.0/16", "10.4.0.0/24", ""), + makeCluster("cluster-b", "10.0.128.0/17", "10.4.1.0/24", ""), + }, + wantErr: true, + errContains: []string{"cluster-a", "cluster-b"}, + }, + { + name: "single cluster with multiple distinct CIDRs no overlap", + clusters: []v1alpha1.ClusterEntry{ + { + Name: "cluster-a", + PodCIDRs: []string{"10.0.0.0/16"}, + WireguardCIDR: "10.4.0.0/24", + ServiceCIDR: "10.96.0.0/12", + AdditionalCIDRs: []string{"172.16.0.0/12"}, + }, + }, + wantErr: false, + }, + } + + for _, testCase := range tests { + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + + err := validation.ValidateClusterNetworks(testCase.clusters) + + if testCase.wantErr { + require.Error(t, err) + + for _, fragment := range testCase.errContains { + assert.Contains(t, err.Error(), fragment) + } + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestValidateMeshNetworks(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + meshes []v1alpha1.ClusterMesh + wantErr bool + errContains []string + }{ + { + name: "empty mesh list", + meshes: []v1alpha1.ClusterMesh{}, + wantErr: false, + }, + { + name: "single mesh valid", + meshes: []v1alpha1.ClusterMesh{ + makeMesh("mesh-a", + makeCluster("cluster-a", "10.0.0.0/16", "10.4.0.0/24", "10.96.0.0/12"), + makeCluster("cluster-b", "10.1.0.0/16", "10.4.1.0/24", "10.112.0.0/12"), + ), + }, + wantErr: false, + }, + { + name: "two meshes disjoint network plans", + meshes: []v1alpha1.ClusterMesh{ + makeMesh("mesh-a", + makeCluster("cluster-a", "10.0.0.0/16", "10.4.0.0/24", "10.96.0.0/12"), + makeCluster("cluster-b", "10.1.0.0/16", "10.4.1.0/24", "10.112.0.0/12"), + ), + makeMesh("mesh-b", + makeCluster("cluster-c", "10.2.0.0/16", "10.4.2.0/24", "172.20.0.0/16"), + makeCluster("cluster-d", "10.3.0.0/16", "10.4.3.0/24", "172.21.0.0/16"), + ), + }, + wantErr: false, + }, + { + name: "two meshes overlapping CIDR", + meshes: []v1alpha1.ClusterMesh{ + makeMesh("mesh-a", + makeCluster("cluster-a", "10.0.0.0/16", "10.4.0.0/24", "10.96.0.0/12"), + makeCluster("cluster-b", "10.1.0.0/16", "10.4.1.0/24", ""), + ), + makeMesh("mesh-b", + makeCluster("cluster-c", "10.0.0.0/16", "10.4.2.0/24", ""), + makeCluster("cluster-d", "10.2.0.0/16", "10.4.3.0/24", ""), + ), + }, + wantErr: true, + errContains: []string{"mesh-a", "mesh-b"}, + }, + { + name: "intra-mesh overlap is caught", + meshes: []v1alpha1.ClusterMesh{ + makeMesh("mesh-a", + makeCluster("cluster-a", "10.0.0.0/16", "10.4.0.0/24", "10.96.0.0/12"), + makeCluster("cluster-b", "10.0.0.0/16", "10.4.1.0/24", ""), + ), + }, + wantErr: true, + errContains: []string{"cluster-a", "cluster-b"}, + }, + } + + for _, testCase := range tests { + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + + err := validation.ValidateMeshNetworks(testCase.meshes) + + if testCase.wantErr { + require.Error(t, err) + + for _, fragment := range testCase.errContains { + assert.Contains(t, err.Error(), fragment) + } + } else { + assert.NoError(t, err) + } + }) + } +} + +// repoRootForValidation returns the repository root by walking up from the +// directory of the current test file until a go.mod is found. +// It mirrors the pattern used in internal/containerfile/containerfile_test.go. +func repoRootForValidation(t *testing.T) string { + t.Helper() + + _, callerFile, _, ok := runtime.Caller(0) + require.True(t, ok, "runtime.Caller returned no file info") + + dir := filepath.Dir(callerFile) + + for { + if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil { + return dir + } + + parent := filepath.Dir(dir) + require.NotEqual(t, parent, dir, "reached filesystem root without finding go.mod") + + dir = parent + } +} + +// extractFirstClusterMeshYAML finds the first YAML fenced block in src that +// contains a ClusterMesh document (identified by "kind: ClusterMesh") and +// returns its content. +func extractFirstClusterMeshYAML(src string) (string, bool) { + const fence = "```yaml" + + rest := src + + for { + start := strings.Index(rest, fence) + if start < 0 { + return "", false + } + + rest = rest[start+len(fence):] + + end := strings.Index(rest, "```") + if end < 0 { + return "", false + } + + block := rest[:end] + rest = rest[end+3:] + + if strings.Contains(block, "kind: ClusterMesh") { + return block, true + } + } +} + +// TestREADMEQuickStartManifestIsValid is a regression guard that ensures the +// ClusterMesh manifest in the Quick Start section of README.md uses +// non-overlapping CIDRs and would pass ValidateClusterNetworks. +func TestREADMEQuickStartManifestIsValid(t *testing.T) { + t.Parallel() + + root := repoRootForValidation(t) + readmePath := filepath.Join(root, "README.md") + + src, err := os.ReadFile(readmePath) + require.NoError(t, err, "reading README.md") + + yamlBlock, found := extractFirstClusterMeshYAML(string(src)) + require.True(t, found, "no ClusterMesh YAML block found in README.md") + + var mesh v1alpha1.ClusterMesh + require.NoError(t, yaml.Unmarshal([]byte(yamlBlock), &mesh), "unmarshalling ClusterMesh YAML from README.md") + + require.NotEmpty(t, mesh.Spec.Clusters, "README ClusterMesh spec has no clusters") + + err = validation.ValidateClusterNetworks(mesh.Spec.Clusters) + assert.NoError(t, err, "README Quick Start ClusterMesh has overlapping CIDRs") +} diff --git a/internal/validation/node.go b/internal/validation/node.go index 54eaea5..b9a08a7 100644 --- a/internal/validation/node.go +++ b/internal/validation/node.go @@ -37,11 +37,15 @@ const ( ReasonWGIPOutOfRange NodeSkipReason = "WGIPOutOfRange" ReasonWGIPDuplicate NodeSkipReason = "WGIPDuplicate" ReasonNoPublicKey NodeSkipReason = "NodeNoPublicKey" + ReasonNoEndpoint NodeSkipReason = "NodeNoEndpoint" + ReasonEndpointInvalid NodeSkipReason = "NodeEndpointInvalid" ) // ValidateNode checks whether a node is eligible to be peered. // It validates the node's PodCIDR against the cluster's PodCIDRs, -// and the node's WireGuard IP against the cluster's WireguardCIDR. +// the node's WireGuard IP against the cluster's WireguardCIDR, and +// that the node exposes a resolvable WireGuard endpoint via the +// kilonode fallback chain. // Returns (true, reason, message) if the node should be skipped, (false, "", "") if valid. func ValidateNode(node *corev1.Node, entry *v1alpha1.ClusterEntry) (bool, NodeSkipReason, string) { if skip, reason, msg := validatePodCIDR(node, entry); skip { @@ -56,6 +60,10 @@ func ValidateNode(node *corev1.Node, entry *v1alpha1.ClusterEntry) (bool, NodeSk return true, reason, msg } + if skip, reason, msg := validateEndpoint(node, entry); skip { + return true, reason, msg + } + return false, "", "" } @@ -94,7 +102,11 @@ func validateWireguardIP(node *corev1.Node, entry *v1alpha1.ClusterEntry) (bool, ) } - wgNet, err := netutil.ParseCIDR(wgIP) + // The annotation may carry any prefix length. Upstream Kilo writes a /32 + // host route ("10.4.0.1/32"); the cozystack-patched Kilo writes the host + // IP with the wireguard subnet mask ("100.66.0.3/16"). Both are accepted; + // only the host IP is checked against the cluster's wireguardCIDR. + hostIP, _, err := netutil.ParseHostInCIDR(wgIP) if err != nil { return true, ReasonWGIPInvalid, fmt.Sprintf( "node %q annotation %q value %q is not a valid CIDR: %v", @@ -102,13 +114,6 @@ func validateWireguardIP(node *corev1.Node, entry *v1alpha1.ClusterEntry) (bool, ) } - if !netutil.IsHostRoute(wgNet) { - return true, ReasonWGIPInvalid, fmt.Sprintf( - "node %q annotation %q value %q is not a host route (/32 or /128)", - node.Name, kilonode.AnnotationWireguardIP, wgIP, - ) - } - wgCIDR, err := netutil.ParseCIDR(entry.WireguardCIDR) if err != nil { return true, ReasonWGIPOutOfRange, fmt.Sprintf( @@ -116,7 +121,7 @@ func validateWireguardIP(node *corev1.Node, entry *v1alpha1.ClusterEntry) (bool, ) } - if !netutil.CIDRContains(wgCIDR, wgNet) { + if !wgCIDR.Contains(hostIP) { return true, ReasonWGIPOutOfRange, fmt.Sprintf( "node %q WireGuard IP %q is not within cluster WireguardCIDR %q", node.Name, wgIP, entry.WireguardCIDR, @@ -137,12 +142,40 @@ func validatePublicKey(node *corev1.Node) (bool, NodeSkipReason, string) { return false, "", "" } +// validateEndpoint checks that the node has a resolvable WireGuard endpoint +// via the kilonode fallback chain. A present-but-malformed annotation is a +// distinct failure mode (ReasonEndpointInvalid) from a node with no source +// at all (ReasonNoEndpoint). +func validateEndpoint(node *corev1.Node, entry *v1alpha1.ClusterEntry) (bool, NodeSkipReason, string) { + _, found, err := kilonode.ResolveEndpoint(node, entry.WireguardPort) + if err != nil { + return true, ReasonEndpointInvalid, fmt.Sprintf( + "node %q has an invalid endpoint annotation: %v", node.Name, err, + ) + } + + if !found { + return true, ReasonNoEndpoint, fmt.Sprintf( + "node %q has no resolvable endpoint (no clustermesh-endpoint, force-endpoint, or ExternalIP)", + node.Name, + ) + } + + return false, "", "" +} + // FindDuplicateWGIPs returns the names of nodes that have duplicate -// kilo.squat.ai/wireguard-ip annotation values. The first node with -// a given IP is kept; subsequent duplicates are returned as a map of -// node name to NodeSkipReason. +// kilo.squat.ai/wireguard-ip annotation values. Two annotations are +// considered duplicates when they resolve to the same host IP, regardless +// of the prefix length used (e.g. "10.4.0.1/16" and "10.4.0.1/32" are the +// same host IP and therefore conflict). The first node with a given host IP +// is kept; subsequent nodes with the same host IP are returned as duplicates. +// +// If an annotation cannot be parsed as a CIDR, the raw string is used as the +// dedup key so that identical-invalid copies are still caught, but an invalid +// annotation never collides with a valid one. func FindDuplicateWGIPs(nodes []*corev1.Node) map[string]NodeSkipReason { - seen := make(map[string]string) // ip → first node name + seen := make(map[string]string) // normalized host IP (or raw string) → first node name duplicates := make(map[string]NodeSkipReason) for _, node := range nodes { @@ -151,12 +184,31 @@ func FindDuplicateWGIPs(nodes []*corev1.Node) map[string]NodeSkipReason { continue } - if _, exists := seen[wgIP]; exists { + // Normalize: extract the host IP so that "10.4.0.1/16" and "10.4.0.1/32" + // map to the same key ("10.4.0.1"). Fall back to the raw string when + // parsing fails so that identical-invalid annotations still deduplicate. + key := normalizeWGIPKey(wgIP) + + if _, exists := seen[key]; exists { duplicates[node.Name] = ReasonWGIPDuplicate } else { - seen[wgIP] = node.Name + seen[key] = node.Name } } return duplicates } + +// normalizeWGIPKey returns a canonical string for use as a dedup key. +// It parses the annotation as a CIDR and returns the host IP string so that +// "10.4.0.1/16" and "10.4.0.1/32" both map to "10.4.0.1". When parsing +// fails the raw annotation value is returned unchanged, ensuring identical +// invalid annotations still deduplicate without colliding with valid IPs. +func normalizeWGIPKey(wgIP string) string { + hostIP, _, err := netutil.ParseHostInCIDR(wgIP) + if err != nil { + return wgIP + } + + return hostIP.String() +} diff --git a/internal/validation/node_test.go b/internal/validation/node_test.go index f4fc2d4..9545516 100644 --- a/internal/validation/node_test.go +++ b/internal/validation/node_test.go @@ -48,8 +48,9 @@ var baseEntry = &v1alpha1.ClusterEntry{ func baseAnnotations() map[string]string { return map[string]string{ - kilonode.AnnotationWireguardIP: "10.4.0.1/32", - kilonode.AnnotationPublicKey: "dGVzdGtleQo=", + kilonode.AnnotationWireguardIP: "10.4.0.1/32", + kilonode.AnnotationPublicKey: "dGVzdGtleQo=", + kilonode.AnnotationForceEndpoint: "203.0.113.1:51820", } } @@ -93,15 +94,77 @@ func TestValidateNode(t *testing.T) { wantReason: validation.ReasonNoWireguardIP, }, { - name: "wireguard IP not /32", + name: "wireguard IP unparseable", node: makeNode("node-1", []string{"10.0.1.0/24"}, map[string]string{ - kilonode.AnnotationWireguardIP: "10.4.0.0/24", + kilonode.AnnotationWireguardIP: "not-a-cidr", kilonode.AnnotationPublicKey: "dGVzdGtleQo=", }), entry: baseEntry, wantSkipped: true, wantReason: validation.ReasonWGIPInvalid, }, + { + // Regression guard: the operator previously rejected annotations with a + // prefix length other than /32 (or /128) via an IsHostRoute check. That + // check was intentionally dropped to support cozystack-patched Kilo, which + // writes the full subnet mask (e.g. "10.4.0.1/24") into the annotation. + // Only the host portion of the address is now validated against WireguardCIDR. + name: "wireguard IP with subnet mask (cozystack-Kilo style)", + node: makeNode("node-1", []string{"10.0.1.0/24"}, map[string]string{ + kilonode.AnnotationWireguardIP: "10.4.0.1/24", + kilonode.AnnotationPublicKey: "dGVzdGtleQo=", + kilonode.AnnotationForceEndpoint: "203.0.113.1:51820", + }), + entry: baseEntry, + wantSkipped: false, + }, + { + name: "no endpoint source skips node", + node: makeNode("node-1", []string{"10.0.1.0/24"}, map[string]string{ + kilonode.AnnotationWireguardIP: "10.4.0.1/32", + kilonode.AnnotationPublicKey: "dGVzdGtleQo=", + }), + entry: baseEntry, + wantSkipped: true, + wantReason: validation.ReasonNoEndpoint, + }, + { + name: "malformed clustermesh-endpoint skips node", + node: makeNode("node-1", []string{"10.0.1.0/24"}, map[string]string{ + kilonode.AnnotationWireguardIP: "10.4.0.1/32", + kilonode.AnnotationPublicKey: "dGVzdGtleQo=", + kilonode.AnnotationClustermeshEndpoint: "garbage", + }), + entry: baseEntry, + wantSkipped: true, + wantReason: validation.ReasonEndpointInvalid, + }, + { + name: "malformed force-endpoint skips node", + node: makeNode("node-1", []string{"10.0.1.0/24"}, map[string]string{ + kilonode.AnnotationWireguardIP: "10.4.0.1/32", + kilonode.AnnotationPublicKey: "dGVzdGtleQo=", + kilonode.AnnotationForceEndpoint: "no-colon-at-all", + }), + entry: baseEntry, + wantSkipped: true, + wantReason: validation.ReasonEndpointInvalid, + }, + { + name: "ExternalIP-only is accepted", + node: func() *corev1.Node { + n := makeNode("node-1", []string{"10.0.1.0/24"}, map[string]string{ + kilonode.AnnotationWireguardIP: "10.4.0.1/32", + kilonode.AnnotationPublicKey: "dGVzdGtleQo=", + }) + n.Status.Addresses = []corev1.NodeAddress{ + {Type: corev1.NodeExternalIP, Address: "203.0.113.42"}, + } + return n + }(), + entry: baseEntry, + wantSkipped: false, + }, { name: "wireguard IP outside wireguardCIDR", node: makeNode("node-1", []string{"10.0.1.0/24"}, map[string]string{ @@ -112,6 +175,17 @@ func TestValidateNode(t *testing.T) { wantSkipped: true, wantReason: validation.ReasonWGIPOutOfRange, }, + { + name: "wireguard IP host outside but network overlaps", + node: makeNode("node-1", []string{"10.0.1.0/24"}, map[string]string{ + // 10.5.0.1/16 → host 10.5.0.1 is outside 10.4.0.0/24 + kilonode.AnnotationWireguardIP: "10.5.0.1/16", + kilonode.AnnotationPublicKey: "dGVzdGtleQo=", + }), + entry: baseEntry, + wantSkipped: true, + wantReason: validation.ReasonWGIPOutOfRange, + }, { name: "no public key annotation", node: makeNode("node-1", []string{"10.0.1.0/24"}, map[string]string{ @@ -188,6 +262,39 @@ func TestFindDuplicateWGIPs(t *testing.T) { }, wantDuplicates: map[string]validation.NodeSkipReason{}, }, + { + // Same host IP with different prefix lengths must be detected as duplicate. + // cozystack-Kilo writes "10.4.0.1/16"; upstream Kilo writes "10.4.0.1/32". + // Both result in AllowedIPs = 10.4.0.1/32, so they conflict. + name: "same host IP different prefix lengths is a duplicate", + nodes: []*corev1.Node{ + makeNode("node-1", nil, map[string]string{kilonode.AnnotationWireguardIP: "10.4.0.1/16"}), + makeNode("node-2", nil, map[string]string{kilonode.AnnotationWireguardIP: "10.4.0.1/32"}), + }, + wantDuplicates: map[string]validation.NodeSkipReason{ + "node-2": validation.ReasonWGIPDuplicate, + }, + }, + { + // Sanity check: different host IPs with same prefix length must not collide. + name: "different host IPs are not duplicates", + nodes: []*corev1.Node{ + makeNode("node-1", nil, map[string]string{kilonode.AnnotationWireguardIP: "10.4.0.1/32"}), + makeNode("node-2", nil, map[string]string{kilonode.AnnotationWireguardIP: "10.4.0.2/32"}), + }, + wantDuplicates: map[string]validation.NodeSkipReason{}, + }, + { + // Invalid annotation must not collide with a valid annotation that happens + // to share no parseable IP. Invalid values fall back to raw-string keying + // so they can still detect identical-invalid copies but never match a valid IP. + name: "invalid annotation does not match valid annotation", + nodes: []*corev1.Node{ + makeNode("node-1", nil, map[string]string{kilonode.AnnotationWireguardIP: "not-an-ip"}), + makeNode("node-2", nil, map[string]string{kilonode.AnnotationWireguardIP: "10.4.0.1/32"}), + }, + wantDuplicates: map[string]validation.NodeSkipReason{}, + }, } for _, testCase := range tests { diff --git a/pkg/kilo/v1alpha1/register.go b/pkg/kilo/v1alpha1/register.go index a059635..a52e5e7 100644 --- a/pkg/kilo/v1alpha1/register.go +++ b/pkg/kilo/v1alpha1/register.go @@ -22,8 +22,14 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" ) +// GroupName is the API group used by Kilo's Peer type. +const GroupName = "kilo.squat.ai" + +// GroupVersion is the API group version used by Kilo's Peer type. +const GroupVersion = "v1alpha1" + // SchemeGroupVersion is group version used to register these objects. -var SchemeGroupVersion = schema.GroupVersion{Group: "kilo.squat.ai", Version: "v1alpha1"} //nolint:gochecknoglobals // kubebuilder-required package-level scheme registration variable. +var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: GroupVersion} //nolint:gochecknoglobals // kubebuilder-required package-level scheme registration variable. // SchemeBuilder is used to add go types to the GroupVersionKind scheme. var SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) //nolint:gochecknoglobals // kubebuilder-required package-level scheme registration variable. diff --git a/pkg/kilo/v1alpha1/types_test.go b/pkg/kilo/v1alpha1/types_test.go index 33d0ba0..52aa595 100644 --- a/pkg/kilo/v1alpha1/types_test.go +++ b/pkg/kilo/v1alpha1/types_test.go @@ -30,12 +30,12 @@ func TestAddToScheme(t *testing.T) { s := runtime.NewScheme() require.NoError(t, AddToScheme(s)) - gvk := schema.GroupVersionKind{Group: "kilo.squat.ai", Version: "v1alpha1", Kind: "Peer"} + gvk := schema.GroupVersionKind{Group: GroupName, Version: GroupVersion, Kind: "Peer"} obj, err := s.New(gvk) require.NoError(t, err) assert.IsType(t, &Peer{}, obj) - listGVK := schema.GroupVersionKind{Group: "kilo.squat.ai", Version: "v1alpha1", Kind: "PeerList"} + listGVK := schema.GroupVersionKind{Group: GroupName, Version: GroupVersion, Kind: "PeerList"} listObj, err := s.New(listGVK) require.NoError(t, err) assert.IsType(t, &PeerList{}, listObj) diff --git a/pkg/kilo/v1alpha1/zz_generated.deepcopy.go b/pkg/kilo/v1alpha1/zz_generated.deepcopy.go index 64cafd0..3975336 100644 --- a/pkg/kilo/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/kilo/v1alpha1/zz_generated.deepcopy.go @@ -1,5 +1,21 @@ //go:build !ignore_autogenerated +/* +Copyright 2026 The Kilo Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + // Code generated by controller-gen. DO NOT EDIT. package v1alpha1 diff --git a/test/integration/helpers_test.go b/test/integration/helpers_test.go index 35f49de..b828514 100644 --- a/test/integration/helpers_test.go +++ b/test/integration/helpers_test.go @@ -39,8 +39,17 @@ const ( eventuallyInterval = 100 * time.Millisecond ) +// fallbackExternalIP is the default ExternalIP used by makeNode so that +// kilonode.ResolveEndpoint always finds a source. Tests that need to exercise +// the "no resolvable endpoint" path must clear node.Status.Addresses explicitly. +// The address is from the RFC 5737 documentation range. +const fallbackExternalIP = "203.0.113.10" + // makeNode builds a corev1.Node with the Kilo annotations required for peering. -// endpoint may be empty (no kilo.squat.ai/force-endpoint annotation is set). +// endpoint, when non-empty, is set as the kilo.squat.ai/force-endpoint annotation +// and takes precedence over the ExternalIP fallback in ResolveEndpoint. +// A NodeExternalIP is always attached so endpoint resolution succeeds even when +// no force-endpoint is configured. func makeNode(name, podCIDR, wgIP, pubKey, endpoint string) *corev1.Node { annotations := map[string]string{ kilonode.AnnotationWireguardIP: wgIP, @@ -59,7 +68,31 @@ func makeNode(name, podCIDR, wgIP, pubKey, endpoint string) *corev1.Node { PodCIDR: podCIDR, PodCIDRs: []string{podCIDR}, }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{ + {Type: corev1.NodeExternalIP, Address: fallbackExternalIP}, + }, + }, + } +} + +// createNode persists a Node and its Status.Addresses on the given cluster. +// kube-apiserver treats Status as a subresource, so Create persists Spec and +// Metadata only — Status must be set via a separate UpdateStatus call. +func createNode(t *testing.T, cl client.Client, node *corev1.Node) { + t.Helper() + ctx := context.Background() + require.NoError(t, cl.Create(ctx, node)) + + if len(node.Status.Addresses) == 0 { + return } + + // Re-fetch to get the resourceVersion assigned by Create, then patch status. + current := &corev1.Node{} + require.NoError(t, cl.Get(ctx, client.ObjectKeyFromObject(node), current)) + current.Status.Addresses = node.Status.Addresses + require.NoError(t, cl.Status().Update(ctx, current)) } // reconcileOnce calls Reconcile for the given ClusterMesh and returns the error. diff --git a/test/integration/labels_test.go b/test/integration/labels_test.go index 2a1ceac..ae6738a 100644 --- a/test/integration/labels_test.go +++ b/test/integration/labels_test.go @@ -46,8 +46,8 @@ func TestLabelIsolation_TwoMeshes(t *testing.T) { alphaLocalNode := makeNode("alpha-local-node", "10.0.0.0/24", "10.100.10.1/32", "pubkey-alpha-local", "") alphaRemoteNode := makeNode("alpha-remote-node", "10.10.0.0/24", "10.100.11.1/32", "pubkey-alpha-remote", "192.0.2.10:51820") - require.NoError(t, globalEnv.localClient.Create(ctx, alphaLocalNode)) - require.NoError(t, globalEnv.remoteClient.Create(ctx, alphaRemoteNode)) + createNode(t, globalEnv.localClient, alphaLocalNode) + createNode(t, globalEnv.remoteClient, alphaRemoteNode) t.Cleanup(func() { _ = globalEnv.localClient.Delete(ctx, alphaLocalNode) @@ -58,8 +58,8 @@ func TestLabelIsolation_TwoMeshes(t *testing.T) { betaLocalNode := makeNode("beta-local-node", "10.20.0.0/24", "10.100.20.1/32", "pubkey-beta-local", "") betaRemoteNode := makeNode("beta-remote-node", "10.30.0.0/24", "10.100.21.1/32", "pubkey-beta-remote", "") - require.NoError(t, globalEnv.localClient.Create(ctx, betaLocalNode)) - require.NoError(t, globalEnv.remoteClient.Create(ctx, betaRemoteNode)) + createNode(t, globalEnv.localClient, betaLocalNode) + createNode(t, globalEnv.remoteClient, betaRemoteNode) t.Cleanup(func() { _ = globalEnv.localClient.Delete(ctx, betaLocalNode) diff --git a/test/integration/reconcile_test.go b/test/integration/reconcile_test.go index f8a2e4b..6e3febc 100644 --- a/test/integration/reconcile_test.go +++ b/test/integration/reconcile_test.go @@ -47,14 +47,14 @@ func TestHappyPath_TwoClusters(t *testing.T) { // Create nodes in the local envtest. localNode1 := makeNode("local-node-1", "10.1.0.0/24", "10.100.0.1/32", "pubkey-local-1", "") localNode2 := makeNode("local-node-2", "10.1.1.0/24", "10.100.0.2/32", "pubkey-local-2", "") - require.NoError(t, globalEnv.localClient.Create(ctx, localNode1)) - require.NoError(t, globalEnv.localClient.Create(ctx, localNode2)) + createNode(t, globalEnv.localClient, localNode1) + createNode(t, globalEnv.localClient, localNode2) // Create nodes in the remote envtest. remoteNode1 := makeNode("remote-node-1", "10.2.0.0/24", "10.100.1.1/32", "pubkey-remote-1", "192.0.2.1:51820") remoteNode2 := makeNode("remote-node-2", "10.2.1.0/24", "10.100.1.2/32", "pubkey-remote-2", "") - require.NoError(t, globalEnv.remoteClient.Create(ctx, remoteNode1)) - require.NoError(t, globalEnv.remoteClient.Create(ctx, remoteNode2)) + createNode(t, globalEnv.remoteClient, remoteNode1) + createNode(t, globalEnv.remoteClient, remoteNode2) t.Cleanup(func() { _ = globalEnv.localClient.Delete(ctx, localNode1) @@ -110,16 +110,23 @@ func TestHappyPath_TwoClusters(t *testing.T) { assert.NotEmpty(t, p.Spec.AllowedIPs) } - // Verify that the peer with an explicit endpoint carries it through. - var peerWithEndpoint *kilov1alpha1.Peer + // Verify that the force-endpoint annotation on remote-node-1 is carried + // through to the resulting Peer (rather than the ExternalIP fallback). + expectedName := peer.Name(mesh.Name, "remote", "remote-node-1") + + var peerWithForceEndpoint *kilov1alpha1.Peer + for i := range localPeers.Items { - if localPeers.Items[i].Spec.Endpoint != nil { - peerWithEndpoint = &localPeers.Items[i] + if localPeers.Items[i].Name == expectedName { + peerWithForceEndpoint = &localPeers.Items[i] break } } - require.NotNil(t, peerWithEndpoint, "expected one peer to carry the force-endpoint") - assert.Equal(t, uint32(51820), peerWithEndpoint.Spec.Endpoint.Port) + + require.NotNil(t, peerWithForceEndpoint, "expected peer %q for remote-node-1", expectedName) + require.NotNil(t, peerWithForceEndpoint.Spec.Endpoint) + assert.Equal(t, uint32(51820), peerWithForceEndpoint.Spec.Endpoint.Port) + assert.Equal(t, "192.0.2.1", peerWithForceEndpoint.Spec.Endpoint.DNSOrIP.IP) // --- assert ClusterMesh status --- waitForCondition(t, globalEnv.localClient, mesh, "Ready", metav1.ConditionTrue, eventuallyTimeout) diff --git a/test/integration/suite_test.go b/test/integration/suite_test.go index 0726c51..825e744 100644 --- a/test/integration/suite_test.go +++ b/test/integration/suite_test.go @@ -25,7 +25,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/tools/record" + "k8s.io/client-go/tools/events" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/envtest" @@ -107,7 +107,7 @@ func TestMain(m *testing.M) { Scheme: scheme, Registry: registry, Log: slog.Default(), - Recorder: record.NewFakeRecorder(100), + Recorder: events.NewFakeRecorder(100), } globalEnv = testEnv{ diff --git a/test/integration/validation_test.go b/test/integration/validation_test.go index bd7c856..0eb3ed6 100644 --- a/test/integration/validation_test.go +++ b/test/integration/validation_test.go @@ -128,8 +128,8 @@ func TestInvalidNodeWGIP_NodeSkipped(t *testing.T) { // invalid-node-wgip: WireGuard IP NOT within 10.100.0.0/24 (it's in 10.200.0.0/24). invalidNode := makeNode("wgip-invalid-node", "10.1.1.0/24", "10.200.0.10/32", "pubkey-wgip-invalid", "") - require.NoError(t, globalEnv.localClient.Create(ctx, validNode)) - require.NoError(t, globalEnv.localClient.Create(ctx, invalidNode)) + createNode(t, globalEnv.localClient, validNode) + createNode(t, globalEnv.localClient, invalidNode) t.Cleanup(func() { _ = globalEnv.localClient.Delete(ctx, validNode) @@ -185,7 +185,7 @@ func TestMissingPublicKey_NodeSkipped(t *testing.T) { // entirely so validatePublicKey skips it. delete(node.Annotations, "kilo.squat.ai/key") - require.NoError(t, globalEnv.localClient.Create(ctx, node)) + createNode(t, globalEnv.localClient, node) t.Cleanup(func() { _ = globalEnv.localClient.Delete(ctx, node) }) mesh := simpleMeshSpec("no-pubkey-mesh", "default") @@ -232,8 +232,8 @@ func TestDeletion_PeersCleanedUp(t *testing.T) { localNode := makeNode("del-local-node", "10.1.0.0/24", "10.100.0.30/32", "pubkey-del-local", "") remoteNode := makeNode("del-remote-node", "10.2.0.0/24", "10.100.1.30/32", "pubkey-del-remote", "192.0.2.30:51820") - require.NoError(t, globalEnv.localClient.Create(ctx, localNode)) - require.NoError(t, globalEnv.remoteClient.Create(ctx, remoteNode)) + createNode(t, globalEnv.localClient, localNode) + createNode(t, globalEnv.remoteClient, remoteNode) t.Cleanup(func() { _ = globalEnv.localClient.Delete(ctx, localNode)