yunionio · zexi · May 27, 2026
diff --git a/cmd/climc/shell/llm/llm_sku.go b/cmd/climc/shell/llm/llm_sku.go
@@ -17,4 +17,5 @@ func init() {
 	cmd.Delete(new(options.LLMSkuDeleteOptions))
 	cmd.Perform("public", &base_options.BasePublicOptions{})
 	cmd.Perform("private", &base_options.BaseIdOptions{})
+	cmd.Get("schedulable-check", &options.LLMSkuSchedulableCheckOptions{})
 }
diff --git a/pkg/apis/llm/sku.go b/pkg/apis/llm/sku.go
@@ -289,3 +289,33 @@ type LLMSkuUpdateInput struct {
 // 	DifySSRFImageId     string `json:"dify_ssrf_image_id"`
 // 	DifyWeaviateImageId string `json:"dify_weaviate_image_id"`
 // }
+
+// LLMSchedulableCheckInput is the query params for
+// GET /llm_skus/{id}/schedulable-check
+type LLMSchedulableCheckInput struct {
+	GpuCount int `json:"gpu_count,omitempty"`
+}
+
+// LLMSchedulableHostInfo describes GPU availability on one candidate host.
+type LLMSchedulableHostInfo struct {
+	HostId        string `json:"host_id"`
+	HostName      string `json:"host_name"`
+	GpuAvailable  int    `json:"gpu_available"`
+	BestGpuVramMb int    `json:"best_gpu_vram_mb"`
+	BestGpuModel  string `json:"best_gpu_model,omitempty"`
+}
+
+// LLMSchedulableCheckOutput mirrors GPUStack's ModelEvaluationResult:
+// a yes/no verdict plus per-host detail so the caller can surface a
+// meaningful message ("not enough VRAM on any host", "host X qualifies", …).
+type LLMSchedulableCheckOutput struct {
+	Schedulable        bool                     `json:"schedulable"`
+	VramClaimMb        int                      `json:"vram_claim_mb"`
+	PerDevMinMb        int                      `json:"per_dev_min_mb"`
+	GpuCount           int                      `json:"gpu_count"`
+	Reason             string                   `json:"reason,omitempty"`
+	FilteredCandidates jsonutils.JSONObject     `json:"filtered_candidates,omitempty"`
+	Hosts              []LLMSchedulableHostInfo `json:"hosts,omitempty"`
+	TotalGpuHosts      int                      `json:"total_gpu_hosts"`
+	QualifiedHosts     int                      `json:"qualified_hosts"`
+}
diff --git a/pkg/llm/models/sku.go b/pkg/llm/models/sku.go
@@ -2,6 +2,7 @@ package models
 
 import (
 	"context"
+	"fmt"
 
 	"yunion.io/x/jsonutils"
 	"yunion.io/x/pkg/errors"
@@ -10,9 +11,13 @@ import (
 	"yunion.io/x/onecloud/pkg/apis"
 	computeapi "yunion.io/x/onecloud/pkg/apis/compute"
 	api "yunion.io/x/onecloud/pkg/apis/llm"
+	schedulerapi "yunion.io/x/onecloud/pkg/apis/scheduler"
 	"yunion.io/x/onecloud/pkg/cloudcommon/db"
 	"yunion.io/x/onecloud/pkg/httperrors"
+	"yunion.io/x/onecloud/pkg/llm/utils/vram"
 	"yunion.io/x/onecloud/pkg/mcclient"
+	"yunion.io/x/onecloud/pkg/mcclient/auth"
+	schedulermodules "yunion.io/x/onecloud/pkg/mcclient/modules/scheduler"
 )
 
 func NewSLLMSkuBaseManager(dt interface{}, tableName string, keyword string, keywordPlural string) SLLMSkuBaseManager {
@@ -96,6 +101,146 @@ func (man *SLLMSkuBaseManager) ValidateCreateData(ctx context.Context, userCred
 	return input, nil
 }
 
+// GetDetailsSchedulableCheck is the per-row endpoint
+// `GET /llm_skus/{id}/schedulable-check?gpu_count=1`.
+// It delegates to the scheduler's forecast API so every predicate runs
+// (IsolatedDevicePredicate with VRAM, CPU, memory, network, ...) —
+// not just a bare VRAM scan. Mirrors GPUStack's `evaluate_models`.
+func (sku *SLLMSku) GetDetailsSchedulableCheck(
+	ctx context.Context, userCred mcclient.TokenCredential, query jsonutils.JSONObject,
+) (*api.LLMSchedulableCheckOutput, error) {
+	skuBase := &sku.SLLMSkuBase
+	out := &api.LLMSchedulableCheckOutput{
+		VramClaimMb: skuBase.VramClaimMb,
+		GpuCount:    1,
+	}
+
+	if query != nil {
+		if gc, _ := query.Int("gpu_count"); gc > 0 {
+			out.GpuCount = int(gc)
+		}
+	}
+
+	devCount := 0
+	if skuBase.Devices != nil {
+		devCount = len(*skuBase.Devices)
+	}
+	if devCount == 0 {
+		out.Reason = "SKU has no devices configured"
+		return out, nil
+	}
+	if out.VramClaimMb <= 0 {
+		// Auto-compute from mounted InstantModels. Same logic as
+		// llm_deployment_create_task.createSkuAndReconcile.
+		mountedIds := sku.GetMountedModels()
+		var maxWeight int64
+		for _, id := range mountedIds {
+			obj, err := GetInstantModelManager().FetchById(id)
+			if err != nil {
+				continue
+			}
+			if w := obj.(*SInstantModel).WeightSizeBytes; w > maxWeight {
+				maxWeight = w
+			}
+		}
+		if maxWeight > 0 {
+			out.VramClaimMb = vram.EstimateClaimMb(maxWeight, sku.LLMType)
+			out.PerDevMinMb = (out.VramClaimMb + out.GpuCount - 1) / out.GpuCount
+		} else {
+			out.Reason = "Auto VRAM calculation failed — mounted instant models have unknown weight (not yet backfilled)"
+			return out, nil
+		}
+	}
+	out.PerDevMinMb = (out.VramClaimMb + out.GpuCount - 1) / out.GpuCount
+
+	// --- build a minimal ScheduleInput so the scheduler runs predicates
+	cpu := skuBase.Cpu
+	if cpu <= 0 {
+		cpu = 4
+	}
+	mem := skuBase.Memory
+	if mem <= 0 {
+		mem = 4096
+	}
+	isoDevs := make([]*computeapi.IsolatedDeviceConfig, 0, out.GpuCount)
+	for i := 0; i < out.GpuCount; i++ {
+		// If the SKU pins specific device details, forward them; otherwise
+		// GPU type-only (NVIDIA_GPU default) so the VRAM filter drives
+		// placement.
+		devSpec := computeapi.IsolatedDeviceConfig{
+			DevType:  computeapi.CONTAINER_DEV_NVIDIA_GPU,
+			MemoryMb: out.PerDevMinMb,
+		}
+		if i < devCount {
+			src := (*skuBase.Devices)[i]
+			if src.DevType != "" {
+				devSpec.DevType = src.DevType
+			}
+			devSpec.Model = src.Model
+			devSpec.DevicePath = src.DevicePath
+		}
+		isoDevs = append(isoDevs, &devSpec)
+	}
+
+	input := &schedulerapi.ScheduleInput{
+		ServerConfig: schedulerapi.ServerConfig{
+			ServerConfigs: &computeapi.ServerConfigs{
+				Hypervisor:      computeapi.HOST_TYPE_CONTAINER,
+				Count:           1,
+				IsolatedDevices: isoDevs,
+				Disks: []*computeapi.DiskConfig{
+					{SizeMb: 10240, DiskType: "data"},
+				},
+			},
+			Ncpu:   cpu,
+			Memory: mem,
+		},
+	}
+
+	s := auth.GetAdminSession(ctx, "")
+	canCreate, raw, err := schedulermodules.SchedManager.DoScheduleForecast(s, input, 1)
+	if err != nil {
+		return nil, errors.Wrap(err, "scheduler forecast")
+	}
+
+	// --- translate forecast result → LLM output
+	out.Schedulable = canCreate
+	out.Reason = "Scheduler forecast completed — see hosts for qualifying candidates"
+
+	candidates, _ := raw.GetArray("candidates")
+	out.TotalGpuHosts = len(candidates)
+	for _, c := range candidates {
+		hostID, _ := c.GetString("host_id")
+		hostName, _ := c.GetString("name")
+		out.Hosts = append(out.Hosts, api.LLMSchedulableHostInfo{
+			HostId:   hostID,
+			HostName: hostName,
+		})
+		if hostID != "" {
+			out.QualifiedHosts++
+		}
+	}
+
+	if !canCreate {
+		var reasons []string
+		notAllow, _ := raw.GetArray("not_allow_reasons")
+		for _, r := range notAllow {
+			if s, _ := r.GetString(); s != "" {
+				reasons = append(reasons, s)
+			}
+		}
+		if len(reasons) > 0 {
+			out.Reason = fmt.Sprintf("not schedulable: %s", reasons[0])
+		} else {
+			out.Reason = "not schedulable — no host satisfies all predicates"
+		}
+		fc, _ := raw.Get("filtered_candidates")
+		out.FilteredCandidates = fc
+	}
+
+	return out, nil
+}
+
 func (skuBase *SLLMSkuBase) ValidateUpdateData(ctx context.Context, userCred mcclient.TokenCredential, query jsonutils.JSONObject, input api.LLMSkuBaseUpdateInput) (api.LLMSkuBaseUpdateInput, error) {
 	var err error
 	input.SharableVirtualResourceBaseUpdateInput, err = skuBase.SSharableVirtualResourceBase.ValidateUpdateData(ctx, userCred, query, input.SharableVirtualResourceBaseUpdateInput)

diff --git a/pkg/mcclient/options/llm/llm_sku.go b/pkg/mcclient/options/llm/llm_sku.go
@@ -192,3 +192,8 @@ func (o *LLMSkuUpdateOptions) Params() (jsonutils.JSONObject, error) {
 	}
 	return dict, nil
 }
+
+type LLMSkuSchedulableCheckOptions struct {
+	options.BaseIdOptions
+	GpuCount int `help:"Number of GPUs to validate (default 1)" json:"gpu_count"`
+}