Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/climc/shell/llm/llm_sku.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ func init() {
cmd.Delete(new(options.LLMSkuDeleteOptions))
cmd.Perform("public", &base_options.BasePublicOptions{})
cmd.Perform("private", &base_options.BaseIdOptions{})
cmd.Get("schedulable-check", &options.LLMSkuSchedulableCheckOptions{})
}
30 changes: 30 additions & 0 deletions pkg/apis/llm/sku.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,3 +289,33 @@ type LLMSkuUpdateInput struct {
// DifySSRFImageId string `json:"dify_ssrf_image_id"`
// DifyWeaviateImageId string `json:"dify_weaviate_image_id"`
// }

// LLMSchedulableCheckInput is the query params for
// GET /llm_skus/{id}/schedulable-check
type LLMSchedulableCheckInput struct {
GpuCount int `json:"gpu_count,omitempty"`
}

// LLMSchedulableHostInfo describes GPU availability on one candidate host.
type LLMSchedulableHostInfo struct {
HostId string `json:"host_id"`
HostName string `json:"host_name"`
GpuAvailable int `json:"gpu_available"`
BestGpuVramMb int `json:"best_gpu_vram_mb"`
BestGpuModel string `json:"best_gpu_model,omitempty"`
}

// LLMSchedulableCheckOutput mirrors GPUStack's ModelEvaluationResult:
// a yes/no verdict plus per-host detail so the caller can surface a
// meaningful message ("not enough VRAM on any host", "host X qualifies", …).
type LLMSchedulableCheckOutput struct {
Schedulable bool `json:"schedulable"`
VramClaimMb int `json:"vram_claim_mb"`
PerDevMinMb int `json:"per_dev_min_mb"`
GpuCount int `json:"gpu_count"`
Reason string `json:"reason,omitempty"`
FilteredCandidates jsonutils.JSONObject `json:"filtered_candidates,omitempty"`
Hosts []LLMSchedulableHostInfo `json:"hosts,omitempty"`
TotalGpuHosts int `json:"total_gpu_hosts"`
QualifiedHosts int `json:"qualified_hosts"`
}
145 changes: 145 additions & 0 deletions pkg/llm/models/sku.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package models

import (
"context"
"fmt"

"yunion.io/x/jsonutils"
"yunion.io/x/pkg/errors"
Expand All @@ -10,9 +11,13 @@ import (
"yunion.io/x/onecloud/pkg/apis"
computeapi "yunion.io/x/onecloud/pkg/apis/compute"
api "yunion.io/x/onecloud/pkg/apis/llm"
schedulerapi "yunion.io/x/onecloud/pkg/apis/scheduler"
"yunion.io/x/onecloud/pkg/cloudcommon/db"
"yunion.io/x/onecloud/pkg/httperrors"
"yunion.io/x/onecloud/pkg/llm/utils/vram"
"yunion.io/x/onecloud/pkg/mcclient"
"yunion.io/x/onecloud/pkg/mcclient/auth"
schedulermodules "yunion.io/x/onecloud/pkg/mcclient/modules/scheduler"
)

func NewSLLMSkuBaseManager(dt interface{}, tableName string, keyword string, keywordPlural string) SLLMSkuBaseManager {
Expand Down Expand Up @@ -96,6 +101,146 @@ func (man *SLLMSkuBaseManager) ValidateCreateData(ctx context.Context, userCred
return input, nil
}

// GetDetailsSchedulableCheck is the per-row endpoint
// `GET /llm_skus/{id}/schedulable-check?gpu_count=1`.
// It delegates to the scheduler's forecast API so every predicate runs
// (IsolatedDevicePredicate with VRAM, CPU, memory, network, ...) —
// not just a bare VRAM scan. Mirrors GPUStack's `evaluate_models`.
func (sku *SLLMSku) GetDetailsSchedulableCheck(
ctx context.Context, userCred mcclient.TokenCredential, query jsonutils.JSONObject,
) (*api.LLMSchedulableCheckOutput, error) {
skuBase := &sku.SLLMSkuBase
out := &api.LLMSchedulableCheckOutput{
VramClaimMb: skuBase.VramClaimMb,
GpuCount: 1,
}

if query != nil {
if gc, _ := query.Int("gpu_count"); gc > 0 {
out.GpuCount = int(gc)
}
}

devCount := 0
if skuBase.Devices != nil {
devCount = len(*skuBase.Devices)
}
if devCount == 0 {
out.Reason = "SKU has no devices configured"
return out, nil
}
if out.VramClaimMb <= 0 {
// Auto-compute from mounted InstantModels. Same logic as
// llm_deployment_create_task.createSkuAndReconcile.
mountedIds := sku.GetMountedModels()
var maxWeight int64
for _, id := range mountedIds {
obj, err := GetInstantModelManager().FetchById(id)
if err != nil {
continue
}
if w := obj.(*SInstantModel).WeightSizeBytes; w > maxWeight {
maxWeight = w
}
}
if maxWeight > 0 {
out.VramClaimMb = vram.EstimateClaimMb(maxWeight, sku.LLMType)
out.PerDevMinMb = (out.VramClaimMb + out.GpuCount - 1) / out.GpuCount
} else {
out.Reason = "Auto VRAM calculation failed — mounted instant models have unknown weight (not yet backfilled)"
return out, nil
}
}
out.PerDevMinMb = (out.VramClaimMb + out.GpuCount - 1) / out.GpuCount

// --- build a minimal ScheduleInput so the scheduler runs predicates
cpu := skuBase.Cpu
if cpu <= 0 {
cpu = 4
}
mem := skuBase.Memory
if mem <= 0 {
mem = 4096
}
isoDevs := make([]*computeapi.IsolatedDeviceConfig, 0, out.GpuCount)
for i := 0; i < out.GpuCount; i++ {
// If the SKU pins specific device details, forward them; otherwise
// GPU type-only (NVIDIA_GPU default) so the VRAM filter drives
// placement.
devSpec := computeapi.IsolatedDeviceConfig{
DevType: computeapi.CONTAINER_DEV_NVIDIA_GPU,
MemoryMb: out.PerDevMinMb,
}
if i < devCount {
src := (*skuBase.Devices)[i]
if src.DevType != "" {
devSpec.DevType = src.DevType
}
devSpec.Model = src.Model
devSpec.DevicePath = src.DevicePath
}
isoDevs = append(isoDevs, &devSpec)
}

input := &schedulerapi.ScheduleInput{
ServerConfig: schedulerapi.ServerConfig{
ServerConfigs: &computeapi.ServerConfigs{
Hypervisor: computeapi.HOST_TYPE_CONTAINER,
Count: 1,
IsolatedDevices: isoDevs,
Disks: []*computeapi.DiskConfig{
{SizeMb: 10240, DiskType: "data"},
},
},
Ncpu: cpu,
Memory: mem,
},
}

s := auth.GetAdminSession(ctx, "")
canCreate, raw, err := schedulermodules.SchedManager.DoScheduleForecast(s, input, 1)
if err != nil {
return nil, errors.Wrap(err, "scheduler forecast")
}

// --- translate forecast result → LLM output
out.Schedulable = canCreate
out.Reason = "Scheduler forecast completed — see hosts for qualifying candidates"

candidates, _ := raw.GetArray("candidates")
out.TotalGpuHosts = len(candidates)
for _, c := range candidates {
hostID, _ := c.GetString("host_id")
hostName, _ := c.GetString("name")
out.Hosts = append(out.Hosts, api.LLMSchedulableHostInfo{
HostId: hostID,
HostName: hostName,
})
if hostID != "" {
out.QualifiedHosts++
}
}

if !canCreate {
var reasons []string
notAllow, _ := raw.GetArray("not_allow_reasons")
for _, r := range notAllow {
if s, _ := r.GetString(); s != "" {
reasons = append(reasons, s)
}
}
if len(reasons) > 0 {
out.Reason = fmt.Sprintf("not schedulable: %s", reasons[0])
} else {
out.Reason = "not schedulable — no host satisfies all predicates"
}
fc, _ := raw.Get("filtered_candidates")
out.FilteredCandidates = fc
}

return out, nil
}

func (skuBase *SLLMSkuBase) ValidateUpdateData(ctx context.Context, userCred mcclient.TokenCredential, query jsonutils.JSONObject, input api.LLMSkuBaseUpdateInput) (api.LLMSkuBaseUpdateInput, error) {
var err error
input.SharableVirtualResourceBaseUpdateInput, err = skuBase.SSharableVirtualResourceBase.ValidateUpdateData(ctx, userCred, query, input.SharableVirtualResourceBaseUpdateInput)
Expand Down
5 changes: 5 additions & 0 deletions pkg/mcclient/options/llm/llm_sku.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,8 @@ func (o *LLMSkuUpdateOptions) Params() (jsonutils.JSONObject, error) {
}
return dict, nil
}

type LLMSkuSchedulableCheckOptions struct {
options.BaseIdOptions
GpuCount int `help:"Number of GPUs to validate (default 1)" json:"gpu_count"`
}
Loading