From cea9d4e4d5f9c00c8d3af0f0fb2935228c6a775b Mon Sep 17 00:00:00 2001 From: Harshavardhan K Date: Thu, 4 Jun 2026 15:57:21 -0700 Subject: [PATCH 1/3] fix(soot): add kubeadmRbac to soot triggers slice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PhaseClusterAdminRBAC was the only soot controller whose TriggerChannel was not included in the sootItem triggers slice. Every other controller (writePermissions, migrate, konnectivityAgent, kubeProxy, coreDNS, uploadKubeadmConfig, uploadKubeletConfig, bootstrapToken) was present. The consequence: when the TCP reconciler notifies soot controllers of a TCP change, PhaseClusterAdminRBAC is never triggered via the channel. It can only reconcile when a ClusterRoleBinding with a "kubeadm:" prefix already exists in the tenant cluster (its For() watch predicate), which means on a freshly created cluster — where no such CRB exists yet — the phase is never triggered and the kubeadm:cluster-admins ClusterRoleBinding is never created. Since kubeadm v1.29 (KEP-2305), admin.conf carries O=kubeadm:cluster-admins instead of O=system:masters. Any operator using the admin-kubeconfig secret in the window before PhaseClusterAdminRBAC has created the CRB gets HTTP 403, because the credential authenticates but lacks a binding. Adding kubeadmRbac.TriggerChannel to the triggers slice ensures PhaseClusterAdminRBAC is triggered on every TCP reconcile, closing the window for new clusters. Fixes: #1167 Co-Authored-By: Claude Sonnet 4.6 (1M context) --- controllers/soot/manager.go | 1 + 1 file changed, 1 insertion(+) diff --git a/controllers/soot/manager.go b/controllers/soot/manager.go index 11b9d55a..a30e513f 100644 --- a/controllers/soot/manager.go +++ b/controllers/soot/manager.go @@ -403,6 +403,7 @@ func (m *Manager) Reconcile(ctx context.Context, request reconcile.Request) (res uploadKubeadmConfig.TriggerChannel, uploadKubeletConfig.TriggerChannel, bootstrapToken.TriggerChannel, + kubeadmRbac.TriggerChannel, }, cancelFn: tcpCancelFn, completedCh: completedCh, From b163f2a790e831b9fa2c2ef8d331dee945d940e4 Mon Sep 17 00:00:00 2001 From: Harshavardhan K Date: Thu, 4 Jun 2026 16:50:20 -0700 Subject: [PATCH 2/3] fix(soot): buffer kubeadmRbac channel and pre-seed on startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix added kubeadmRbac to the triggers slice but that path only fires when the soot manager is already in sootMap. On initial cluster creation the first reconcile goes through the startup path and returns RequeueAfter:time.Second — meaning the CRB is not created until the next TCP reconcile, a 3-10 second window that exposes admin.conf callers to 403. Two changes close the initial-creation window: 1. Make TriggerChannel buffered (cap 1) so a pre-startup send succeeds immediately without blocking on a live consumer. 2. Pre-seed TriggerChannel with one event immediately after starting the manager goroutine. When the soot manager cache warms up, source.Channel drains the buffer and enqueues a reconcile for kubeadmRbac, which calls EnsureAdminClusterRoleBinding and creates the CRB without waiting for an external TCP object event. The trigger in the sootItem triggers slice is retained for drift correction. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- controllers/soot/manager.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/controllers/soot/manager.go b/controllers/soot/manager.go index a30e513f..d8d9a285 100644 --- a/controllers/soot/manager.go +++ b/controllers/soot/manager.go @@ -355,13 +355,15 @@ func (m *Manager) Reconcile(ctx context.Context, request reconcile.Request) (res return reconcile.Result{}, err } + // Buffered (cap 1) so we can pre-seed an initial event before the manager's + // source goroutine starts consuming, without blocking the Reconcile call. kubeadmRbac := &controllers.KubeadmPhase{ GetTenantControlPlaneFunc: m.retrieveTenantControlPlane(tcpCtx, request), Phase: &resources.KubeadmPhase{ Client: m.AdminClient, Phase: resources.PhaseClusterAdminRBAC, }, - TriggerChannel: make(chan event.GenericEvent), + TriggerChannel: make(chan event.GenericEvent, 1), ControllerName: fmt.Sprintf("%s-kubeadmrbac", controllerNamePrefix), } if err = kubeadmRbac.SetupWithManager(mgr); err != nil { @@ -393,6 +395,19 @@ func (m *Manager) Reconcile(ctx context.Context, request reconcile.Request) (res close(completedCh) }() + // Pre-seed kubeadmRbac so PhaseClusterAdminRBAC runs as soon as the soot + // manager's caches warm up, rather than waiting for the next TCP reconcile + // cycle (RequeueAfter: time.Second) or a kubeadm: ClusterRoleBinding event. + // Without this, a freshly created cluster has no kubeadm:* CRBs to trigger + // the watch, so the phase is never scheduled and admin.conf callers that + // rely on kubeadm:cluster-admins being bound receive 403 until the next + // TCP reconcile fires. The buffered channel absorbs this send before the + // manager's source goroutine starts consuming. + var initTCP kamajiv1alpha1.TenantControlPlane + initTCP.Name = tcp.Name + initTCP.Namespace = tcp.Namespace + kubeadmRbac.TriggerChannel <- event.GenericEvent{Object: &initTCP} + m.sootMap[request.NamespacedName.String()] = sootItem{ triggers: []chan event.GenericEvent{ writePermissions.TriggerChannel, From 86872f1a88dfe1392b62e22f1bd6904c4add2d00 Mon Sep 17 00:00:00 2001 From: Harshavardhan K Date: Thu, 4 Jun 2026 17:03:18 -0700 Subject: [PATCH 3/3] style: trim verbose comments in soot manager fix Co-Authored-By: Claude Sonnet 4.6 (1M context) --- controllers/soot/manager.go | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/controllers/soot/manager.go b/controllers/soot/manager.go index d8d9a285..e437f393 100644 --- a/controllers/soot/manager.go +++ b/controllers/soot/manager.go @@ -355,15 +355,13 @@ func (m *Manager) Reconcile(ctx context.Context, request reconcile.Request) (res return reconcile.Result{}, err } - // Buffered (cap 1) so we can pre-seed an initial event before the manager's - // source goroutine starts consuming, without blocking the Reconcile call. kubeadmRbac := &controllers.KubeadmPhase{ GetTenantControlPlaneFunc: m.retrieveTenantControlPlane(tcpCtx, request), Phase: &resources.KubeadmPhase{ Client: m.AdminClient, Phase: resources.PhaseClusterAdminRBAC, }, - TriggerChannel: make(chan event.GenericEvent, 1), + TriggerChannel: make(chan event.GenericEvent, 1), // buffered: pre-seeded below to run on startup ControllerName: fmt.Sprintf("%s-kubeadmrbac", controllerNamePrefix), } if err = kubeadmRbac.SetupWithManager(mgr); err != nil { @@ -395,14 +393,7 @@ func (m *Manager) Reconcile(ctx context.Context, request reconcile.Request) (res close(completedCh) }() - // Pre-seed kubeadmRbac so PhaseClusterAdminRBAC runs as soon as the soot - // manager's caches warm up, rather than waiting for the next TCP reconcile - // cycle (RequeueAfter: time.Second) or a kubeadm: ClusterRoleBinding event. - // Without this, a freshly created cluster has no kubeadm:* CRBs to trigger - // the watch, so the phase is never scheduled and admin.conf callers that - // rely on kubeadm:cluster-admins being bound receive 403 until the next - // TCP reconcile fires. The buffered channel absorbs this send before the - // manager's source goroutine starts consuming. + // Pre-seed so PhaseClusterAdminRBAC runs on startup rather than waiting for a kubeadm: CRB watch event. var initTCP kamajiv1alpha1.TenantControlPlane initTCP.Name = tcp.Name initTCP.Namespace = tcp.Namespace