Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions charts/kamaji/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ Here the values you can override:
| telemetry | object | `{"disabled":false}` | Disable the analytics traces collection |
| temporaryDirectoryPath | string | `"/tmp/kamaji"` | Directory which will be used to work with temporary files. (default "/tmp/kamaji") |
| tolerations | list | `[]` | Kubernetes node taints that the Kamaji controller pods would tolerate |
| watchNamespaces | list | `[]` | Optional list of namespaces the operator restricts its watch to. When empty (default) Kamaji watches every namespace. Cluster-scoped resources (DataStore, ClusterRole, ...) are never affected, and the release namespace is always watched implicitly to keep the migration Job lifecycle working. |

## Installing and managing etcd as DataStore

Expand Down
3 changes: 3 additions & 0 deletions charts/kamaji/templates/controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ spec:
{{- if not (eq .Values.defaultDatastoreName "") }}
- --datastore={{ .Values.defaultDatastoreName }}
{{- end }}
{{- with .Values.watchNamespaces }}
- --watch-namespaces={{ join "," . }}
{{- end }}
{{- if .Values.telemetry.disabled }}
- --disable-telemetry
{{- end }}
Expand Down
8 changes: 8 additions & 0 deletions charts/kamaji/templates/kubeconfiggenerator-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,18 @@ spec:
- kubeconfig-generator
- --health-probe-bind-address={{ .Values.kubeconfigGenerator.healthProbeBindAddress }}
- --leader-elect={{ .Values.kubeconfigGenerator.enableLeaderElect }}
{{- with .Values.watchNamespaces }}
- --watch-namespaces={{ join "," . }}
{{- end }}
{{- if .Values.kubeconfigGenerator.loggingDevel.enable }}- --zap-devel{{- end }}
{{- with .Values.kubeconfigGenerator.extraArgs }}
{{- toYaml . | nindent 10 }}
{{- end }}
env:
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
name: controller
Expand Down
7 changes: 7 additions & 0 deletions charts/kamaji/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,13 @@ loggingDevel:
# -- If specified, all the Kamaji instances with an unassigned DataStore will inherit this default value.
defaultDatastoreName: default

# -- Optional list of namespaces the operator restricts its watch to.
# When empty (default) Kamaji watches every namespace.
# Cluster-scoped resources (DataStore, ClusterRole, ...) are never affected,
# and the release namespace is always watched implicitly to keep the migration
# Job lifecycle working.
watchNamespaces: []

# -- Subchart: See https://github.com/clastix/kamaji-etcd/blob/master/charts/kamaji-etcd/values.yaml
kamaji-etcd:
deploy: true
Expand Down
42 changes: 34 additions & 8 deletions cmd/kubeconfig-generator/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@ import (
"github.com/spf13/cobra"
"github.com/spf13/viper"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/rest"
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"

cmdutils "github.com/clastix/kamaji/cmd/utils"
"github.com/clastix/kamaji/controllers"
"github.com/clastix/kamaji/internal"
kamajimanager "github.com/clastix/kamaji/internal/manager"
"github.com/clastix/kamaji/internal/metrics"
)

Expand All @@ -38,22 +38,39 @@ func NewCmd(scheme *runtime.Scheme) *cobra.Command {
cacheResyncPeriod time.Duration
managerNamespace string
certificateExpirationDeadline time.Duration
watchNamespaces []string
)

cmd := &cobra.Command{
Use: "kubeconfig-generator",
Short: "Start the Kubeconfig Generator manager",
SilenceErrors: false,
SilenceUsage: true,
PreRunE: func(*cobra.Command, []string) error {
PreRunE: func(cmd *cobra.Command, _ []string) error {
// Avoid polluting stdout with useless details by the underlying klog implementations
klog.SetOutput(io.Discard)
klog.LogToStderr(false)

// pod-namespace is required: the operator merges it into the
// cache watch set when --watch-namespaces is non-empty (so
// leader-election Lease access stays in scope), and the
// migration Job watch needs it as well. The chart projects
// it from metadata.namespace; binary-direct callers must
// pass --pod-namespace=$NS or set POD_NAMESPACE.
err := cmdutils.CheckFlags(cmd.Flags(), "pod-namespace")
if err != nil {
return err
}

if certificateExpirationDeadline < 24*time.Hour {
return fmt.Errorf("certificate expiration deadline must be at least 24 hours")
}

err = kamajimanager.ValidateNamespaces(watchNamespaces)
if err != nil {
return err
}

return nil
},
RunE: func(*cobra.Command, []string) error {
Expand All @@ -67,6 +84,18 @@ func NewCmd(scheme *runtime.Scheme) *cobra.Command {
setupLog.Info(fmt.Sprintf("Go Version: %s", goRuntime.Version()))
setupLog.Info(fmt.Sprintf("Go OS/Arch: %s/%s", goRuntime.GOOS, goRuntime.GOARCH))

// kubeconfig-generator watches the cluster-scoped KubeconfigGenerator
// CRD plus TenantControlPlane resources and labelled kubeconfig
// Secrets in tenant namespaces. The install namespace is included
// for symmetry with the main controller and to keep
// leader-election working defensively if controller-runtime ever
// routes the Lease informer through the manager cache.
cachedNamespaces := kamajimanager.MergeWatchedNamespaces(watchNamespaces, managerNamespace)

if len(cachedNamespaces) > 0 {
setupLog.Info("restricting cache to namespaces", "namespaces", cachedNamespaces)
}

ctrlOpts := ctrl.Options{
Scheme: scheme,
Metrics: metricsserver.Options{
Expand All @@ -76,11 +105,7 @@ func NewCmd(scheme *runtime.Scheme) *cobra.Command {
LeaderElection: leaderElect,
LeaderElectionNamespace: managerNamespace,
LeaderElectionID: "kubeconfiggenerator.kamaji.clastix.io",
NewCache: func(config *rest.Config, opts cache.Options) (cache.Cache, error) {
opts.SyncPeriod = &cacheResyncPeriod

return cache.New(config, opts)
},
NewCache: kamajimanager.NewCacheFunc(cacheResyncPeriod, cachedNamespaces),
}

triggerChan := make(chan event.GenericEvent)
Expand Down Expand Up @@ -160,6 +185,7 @@ func NewCmd(scheme *runtime.Scheme) *cobra.Command {
cmd.Flags().DurationVar(&cacheResyncPeriod, "cache-resync-period", 10*time.Hour, "The controller-runtime.Manager cache resync period.")
cmd.Flags().StringVar(&managerNamespace, "pod-namespace", os.Getenv("POD_NAMESPACE"), "The Kubernetes Namespace on which the Operator is running in, required for the TenantControlPlane migration jobs.")
cmd.Flags().DurationVar(&certificateExpirationDeadline, "certificate-expiration-deadline", 24*time.Hour, "Define the deadline upon certificate expiration to start the renewal process, cannot be less than a 24 hours.")
cmd.Flags().StringSliceVar(&watchNamespaces, "watch-namespaces", nil, "Optional, comma-separated list of namespaces the controller should watch for TenantControlPlane (and dependent) resources. When empty every namespace is watched. Cluster-scoped resources are never affected by this flag, and the install namespace is always watched implicitly.")

cobra.OnInitialize(func() {
viper.AutomaticEnv()
Expand Down
26 changes: 19 additions & 7 deletions cmd/manager/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@ import (
"github.com/spf13/viper"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/discovery"
"k8s.io/client-go/rest"
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
Expand All @@ -33,6 +31,7 @@ import (
"github.com/clastix/kamaji/controllers/soot"
"github.com/clastix/kamaji/internal"
"github.com/clastix/kamaji/internal/builders/controlplane"
kamajimanager "github.com/clastix/kamaji/internal/manager"
"github.com/clastix/kamaji/internal/metrics"
"github.com/clastix/kamaji/internal/utilities"
"github.com/clastix/kamaji/internal/webhook"
Expand Down Expand Up @@ -60,6 +59,7 @@ func NewCmd(scheme *runtime.Scheme) *cobra.Command {
maxConcurrentReconciles int
disableTelemetry bool
certificateExpirationDeadline time.Duration
watchNamespaces []string

webhookCAPath string
)
Expand Down Expand Up @@ -90,6 +90,11 @@ func NewCmd(scheme *runtime.Scheme) *cobra.Command {
return fmt.Errorf("the controller reconcile timeout must be greater than zero")
}

err = kamajimanager.ValidateNamespaces(watchNamespaces)
if err != nil {
return err
}

return nil
},
RunE: func(*cobra.Command, []string) error {
Expand All @@ -109,6 +114,16 @@ func NewCmd(scheme *runtime.Scheme) *cobra.Command {
telemetryClient = telemetryclient.NewNewOp()
}

// When namespace scoping is requested, the operator's install namespace
// must remain in the watch set: TenantControlPlane migration Jobs are
// created and watched there (see TenantControlPlaneReconciler.Watches
// on batchv1.Job, predicated by KamajiNamespace).
cachedNamespaces := kamajimanager.MergeWatchedNamespaces(watchNamespaces, managerNamespace)

if len(cachedNamespaces) > 0 {
setupLog.Info("restricting cache to namespaces", "namespaces", cachedNamespaces)
}

ctrlOpts := ctrl.Options{
Scheme: scheme,
Metrics: metricsserver.Options{
Expand All @@ -121,11 +136,7 @@ func NewCmd(scheme *runtime.Scheme) *cobra.Command {
LeaderElection: leaderElect,
LeaderElectionNamespace: managerNamespace,
LeaderElectionID: "kamaji.clastix.io",
NewCache: func(config *rest.Config, opts cache.Options) (cache.Cache, error) {
opts.SyncPeriod = &cacheResyncPeriod

return cache.New(config, opts)
},
NewCache: kamajimanager.NewCacheFunc(cacheResyncPeriod, cachedNamespaces),
}

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrlOpts)
Expand Down Expand Up @@ -336,6 +347,7 @@ func NewCmd(scheme *runtime.Scheme) *cobra.Command {
cmd.Flags().DurationVar(&cacheResyncPeriod, "cache-resync-period", 10*time.Hour, "The controller-runtime.Manager cache resync period.")
cmd.Flags().BoolVar(&disableTelemetry, "disable-telemetry", false, "Disable the analytics traces collection.")
cmd.Flags().DurationVar(&certificateExpirationDeadline, "certificate-expiration-deadline", 24*time.Hour, "Define the deadline upon certificate expiration to start the renewal process, cannot be less than a 24 hours.")
cmd.Flags().StringSliceVar(&watchNamespaces, "watch-namespaces", nil, "Optional, comma-separated list of namespaces the operator should watch for TenantControlPlane (and dependent) resources. When empty Kamaji watches every namespace. Cluster-scoped resources are never affected by this flag, and the operator's own install namespace is always watched implicitly.")

cobra.OnInitialize(func() {
viper.AutomaticEnv()
Expand Down
84 changes: 84 additions & 0 deletions docs/content/guides/namespace-scoping.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Restricting the namespaces watched by Kamaji

By default Kamaji watches every namespace of the management cluster for
`TenantControlPlane` resources and the dependent objects it owns
(`Secret`, `ConfigMap`, `Deployment`, `Service`, `Ingress`, ...).

In multi-team or multi-tenant management clusters this is sometimes too broad.
You can opt-in to a smaller watch surface with the `--watch-namespaces` flag,
which leverages controller-runtime's per-namespace cache.

## The `--watch-namespaces` flag

```text
--watch-namespaces=team-a,team-b
```

- Accepts a comma-separated list (or repeated `--watch-namespaces=...` flags).
- Each entry must be a valid Kubernetes namespace identifier (DNS-1123
label); invalid values cause the operator to fail fast at startup rather
than surface opaque watch errors at runtime.
- When **omitted or empty**, Kamaji keeps the default cluster-wide behaviour.
- When set, namespaced informers only watch the listed namespaces.
- The operator's own install namespace is always added to the watch set
implicitly: `TenantControlPlane` migration `Job`s live there and would not
be reconciled otherwise.
- The flag is honoured by both the main controller and the optional
**kubeconfig-generator** deployment; the Helm chart threads the same
`watchNamespaces` value into both.

## What is and is not affected

Namespace scoping only constrains **namespaced** resources. Cluster-scoped
resources continue to be cached cluster-wide and keep working unchanged:

| Resource | Scope | Honours `--watch-namespaces`? |
| ----------------------------------- | -------------- | ----------------------------- |
| `TenantControlPlane` | Namespaced | Yes |
| `Secret`, `ConfigMap`, `Deployment`, `Service`, `Ingress` (TCP children) | Namespaced | Yes |
| Migration `Job` in the install ns | Namespaced | Implicitly always included |
| `DataStore` | Cluster | No (always cluster-wide) |
| `ValidatingWebhookConfiguration` | Cluster | No |
| `ClusterRole`, `ClusterRoleBinding` | Cluster | No |
| Soot controllers (kubeadm phases, kube-proxy, CoreDNS, ...) | N/A | These run against the **tenant** cluster's API server with their own cache, so the management-cluster scoping does not apply to them. |

## Caveats

- **Gateway API**: when a `TenantControlPlane` references a `Gateway` that
lives in a namespace **outside** the watch set, the operator will not be able
to read it from the cache. Add every namespace hosting referenced
`Gateway` resources to `--watch-namespaces`.
- **RBAC**: scoping only affects what the cache subscribes to, not what the
Kubernetes API authorises. The default Helm chart still installs a
`ClusterRole` to keep upgrades and cluster-scoped reconciliations safe.
- **Scaling**: controller-runtime allocates one informer per `(namespace,
kind)` pair when `DefaultNamespaces` is set. Kamaji watches roughly seven
namespaced kinds in the management cluster (`TenantControlPlane` plus its
owned `Secret`, `ConfigMap`, `Deployment`, `Service`, `Ingress`, `Job`).
Listing a few dozen namespaces is comfortable; listing several hundred
multiplies the number of `LIST/WATCH` connections and goroutines and may
exceed client-go QPS defaults — at that point the cluster-wide single
informer is cheaper. Prefer per-cluster Kamaji instances over very long
watch lists.

## Helm

The chart exposes a top-level `watchNamespaces` value that maps to the flag:

```yaml
# values.yaml
watchNamespaces:
- team-a
- team-b
```

Or via `--set` on the command line:

```bash
helm upgrade --install kamaji clastix/kamaji \
--namespace kamaji-system --create-namespace \
--set 'watchNamespaces={team-a,team-b}'
```

Leaving the list empty (the default) renders no `--watch-namespaces` argument
and preserves the cluster-wide behaviour.
1 change: 1 addition & 0 deletions docs/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ nav:
- guides/backup-and-restore.md
- guides/certs-lifecycle.md
- guides/pausing.md
- guides/namespace-scoping.md
- guides/write-permissions.md
- guides/datastore-migration.md
- guides/datastore-overrides.md
Expand Down
20 changes: 11 additions & 9 deletions e2e/worker_kubeadm_join_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ var _ = Describe("starting a kind worker with kubeadm", func() {

By("enabling br_netfilter", func() {
exitCode, stdout, err := workerContainer.Exec(ctx, []string{"modprobe", "br_netfilter"})
if err != nil {
_, _ = fmt.Fprintln(GinkgoWriter, "modprobe error: "+err.Error())

return
}

out, _ := io.ReadAll(stdout)
if len(out) > 0 {
Expand All @@ -149,14 +154,15 @@ var _ = Describe("starting a kind worker with kubeadm", func() {
if exitCode != 0 {
_, _ = fmt.Fprintln(GinkgoWriter, "modprobe exit code: "+strconv.FormatUint(uint64(exitCode), 10))
}

if err != nil {
_, _ = fmt.Fprintln(GinkgoWriter, "modprobe error: "+err.Error())
}
})

By("disabling swap", func() {
exitCode, stdout, err := workerContainer.Exec(ctx, []string{"swapoff", "-a"})
if err != nil {
_, _ = fmt.Fprintln(GinkgoWriter, "swapoff error: "+err.Error())

return
}

out, _ := io.ReadAll(stdout)
if len(out) > 0 {
Expand All @@ -166,24 +172,20 @@ var _ = Describe("starting a kind worker with kubeadm", func() {
if exitCode != 0 {
_, _ = fmt.Fprintln(GinkgoWriter, "swapoff exit code: "+strconv.FormatUint(uint64(exitCode), 10))
}

if err != nil {
_, _ = fmt.Fprintln(GinkgoWriter, "swapoff error: "+err.Error())
}
})

By("executing the command in the worker node", func() {
cmds := append(strings.Split(strings.TrimSpace(joinCommandBuffer.String()), " "), "--ignore-preflight-errors=SystemVerification,FileExisting")

exitCode, stdout, err := workerContainer.Exec(ctx, cmds)
Expect(err).ToNot(HaveOccurred())

out, _ := io.ReadAll(stdout)
if len(out) > 0 {
_, _ = fmt.Fprintln(GinkgoWriter, "executing failed: "+string(out))
}

Expect(exitCode).To(Equal(0))
Expect(err).ToNot(HaveOccurred())
})

By("waiting for nodes", func() {
Expand Down
Loading