diff --git a/pkg/scheduler/actions/preempt/preempt.go b/pkg/scheduler/actions/preempt/preempt.go index f237e3a162..0b303c24d5 100644 --- a/pkg/scheduler/actions/preempt/preempt.go +++ b/pkg/scheduler/actions/preempt/preempt.go @@ -29,6 +29,7 @@ import ( "math" "math/rand" "sort" + "strings" "sync" "sync/atomic" "time" @@ -56,6 +57,15 @@ const ( MinCandidateNodesPercentageKey = "minCandidateNodesPercentage" MinCandidateNodesAbsoluteKey = "minCandidateNodesAbsolute" MaxCandidateNodesAbsoluteKey = "maxCandidateNodesAbsolute" + GangPreemptionModeKey = "gangPreemptionMode" +) + +type GangPreemptionMode uint8 + +const ( + GPModeOff GangPreemptionMode = iota + GPModeMinimal + GPModeAtomic ) type Action struct { @@ -69,6 +79,8 @@ type Action struct { minCandidateNodesPercentage int minCandidateNodesAbsolute int maxCandidateNodesAbsolute int + + gpMode GangPreemptionMode } func New() *Action { @@ -79,6 +91,7 @@ func New() *Action { minCandidateNodesPercentage: 10, minCandidateNodesAbsolute: 1, maxCandidateNodesAbsolute: 100, + gpMode: GPModeOff, } } @@ -96,9 +109,26 @@ func (pmpt *Action) parseArguments(ssn *framework.Session) { arguments.GetInt(&pmpt.minCandidateNodesPercentage, MinCandidateNodesPercentageKey) arguments.GetInt(&pmpt.minCandidateNodesAbsolute, MinCandidateNodesAbsoluteKey) arguments.GetInt(&pmpt.maxCandidateNodesAbsolute, MaxCandidateNodesAbsoluteKey) + var gpModeStr string + arguments.GetString(&gpModeStr, GangPreemptionModeKey) + pmpt.gpMode = parseGPMode(gpModeStr) pmpt.ssn = ssn } +func parseGPMode(s string) GangPreemptionMode { + switch strings.ToLower(strings.TrimSpace(s)) { + case "", "off", "disabled": + return GPModeOff + case "minimal": + return GPModeMinimal + case "atomic": + return GPModeAtomic + default: + klog.V(3).Infof("Unrecognized Gang Preemption Mode, defaulting to `disabled`") + return GPModeOff + } +} + func (pmpt *Action) Execute(ssn *framework.Session) { klog.V(5).Infof("Enter Preempt ...") defer klog.V(5).Infof("Leaving Preempt ...") @@ -294,7 +324,9 @@ func (pmpt *Action) preempt( if pmpt.enableTopologyAwarePreemption { return pmpt.topologyAwarePreempt(ssn, stmt, preemptor, filter, predicateNodes) } - + if pmpt.gpMode != GPModeOff { + return pmpt.gangPreempt(ssn, stmt, preemptor, filter, predicateNodes) + } return pmpt.normalPreempt(ssn, stmt, preemptor, filter, predicateNodes) } @@ -397,6 +429,216 @@ func (pmpt *Action) normalPreempt( return assigned, nil } +func (pmpt *Action) gangPreempt( + ssn *framework.Session, + stmt *framework.Statement, + preemptor *api.TaskInfo, + filter func(*api.TaskInfo) bool, + predicateNodes []*api.NodeInfo, +) (bool, error) { + klog.V(3).Infof("Running Gang Preemption with mode: %v", pmpt.gpMode) + preemptorJob, found := ssn.Jobs[preemptor.Job] + if !found { + return false, fmt.Errorf("not found Job %s in Session", preemptor.Job) + } + currentQueue := ssn.Queues[preemptorJob.Queue] + var preemptees []*api.TaskInfo + for _, node := range predicateNodes { + for _, task := range node.Tasks { + if filter == nil { + preemptees = append(preemptees, task.Clone()) + } else if filter(task) { + preemptees = append(preemptees, task.Clone()) + } + } + } + preemptees = ssn.Preemptable(preemptor, preemptees) + nodeJobPreempteesMap := make(map[string]map[api.JobID][]*api.TaskInfo) + for _, p := range preemptees { + nodeName := p.NodeName + if _, ok := nodeJobPreempteesMap[nodeName]; !ok { + nodeJobPreempteesMap[nodeName] = make(map[api.JobID][]*api.TaskInfo) + } + jobID := p.Job + if preemptorJob.UID == jobID { + continue + } + nodeJobPreempteesMap[nodeName][jobID] = append(nodeJobPreempteesMap[nodeName][jobID], p) + } + // Node order comes into play to keep results deterministic when there is a tie for best fitting gang + nodeScores := util.PrioritizeNodes(preemptor, predicateNodes, ssn.BatchNodeOrderFn, ssn.NodeOrderMapFn, ssn.NodeOrderReduceFn) + selectedNodes := util.SortNodes(nodeScores) + preempteeJobIds, targetNode := pmpt.findBestPreemptionTarget(selectedNodes, nodeJobPreempteesMap, preemptor) + if targetNode == nil { + klog.V(3).Infof("No suitable target nodes for preemptor: <%s/%s>", preemptor.Namespace, preemptor.Name) + return false, nil + } + preempted := api.EmptyResource() + var victims []*api.TaskInfo + for _, jid := range preempteeJobIds { + var vics []*api.TaskInfo + if pmpt.gpMode == GPModeMinimal { + vics = nodeJobPreempteesMap[targetNode.Name][jid] + } else { + for _, t := range ssn.Jobs[jid].Tasks { + vics = append(vics, t) + } + } + victims = append(victims, vics...) + } + + if pmpt.gpMode == GPModeMinimal { + // If we are evicting minimal tasks, preempt the lower priority ones first. + vq := ssn.BuildVictimsPriorityQueue(victims, preemptor) + victims = victims[:0] + for vq.Len() > 0 { + victims = append(victims, vq.Pop().(*api.TaskInfo)) + } + } + + for _, preemptee := range victims { + metrics.RegisterPreemptionAttempts() + klog.V(3).Infof("Try to preempt Task <%s/%s> for Task <%s/%s>", + preemptee.Namespace, preemptee.Name, preemptor.Namespace, preemptor.Name) + if err := stmt.Evict(preemptee, "preempt"); err != nil { + klog.Errorf("Failed to preempt Task <%s/%s> for Task <%s/%s>: %v", + preemptee.Namespace, preemptee.Name, preemptor.Namespace, preemptor.Name, err) + continue + } + preempted.Add(preemptee.Resreq) + if pmpt.gpMode == GPModeMinimal && preemptor.InitResreq.LessEqual(targetNode.FutureIdle(), api.Zero) { + break + } + } + assigned := false + // If this check fails, it implies some Evictions failed. + // Since we are optimizing for gangs per node we should try again in next session + if ssn.Allocatable(currentQueue, preemptor) && preemptor.InitResreq.LessEqual(targetNode.FutureIdle(), api.Zero) { + if err := stmt.Pipeline(preemptor, targetNode.Name, !preempted.IsEmpty()); err != nil { + klog.Errorf("Failed to pipeline Task <%s/%s> on Node <%s>", + preemptor.Namespace, preemptor.Name, targetNode.Name) + if rollbackErr := stmt.UnPipeline(preemptor); rollbackErr != nil { + klog.Errorf("Failed to unpipeline Task %v on %v in Session %v for %v.", + preemptor.UID, targetNode.Name, ssn.UID, rollbackErr) + } + } else { + assigned = true + } + } + return assigned, nil +} + +// Returns the node and the minimal-count set of victim jobs on that node. +// If no preemption is needed: jobs == nil, node != nil. +// If impossible on all nodes: jobs == nil, node == nil. +func (pmpt *Action) findBestPreemptionTarget( + selectedNodes []*api.NodeInfo, + nodeJobPreempteesMap map[string]map[api.JobID][]*api.TaskInfo, + preemptor *api.TaskInfo, +) (jobs []api.JobID, node *api.NodeInfo) { + // ---- Phase 1: try best single-job victim (minimal overage) ---- + var ( + bestSingleOver *api.Resource + bestSingleJob api.JobID + bestSingleNode *api.NodeInfo + ) + for _, n := range selectedNodes { + idle := n.FutureIdle() + idle.SetScalar("pods", 0) + // Fits without preemption + if preemptor.InitResreq.LessEqual(idle, api.Zero) { + return nil, n + } + need := preemptor.InitResreq.Clone() + // idle could be 0 + need.Sub(idle) + for j, jtasks := range nodeJobPreempteesMap[n.Name] { + sum := api.EmptyResource() + for _, t := range jtasks { + sum.Add(t.Resreq) + } + if !need.LessEqual(sum, api.Zero) { + continue + } + diff := sum.Sub(need) // overage = sum - need + if bestSingleOver == nil || !bestSingleOver.LessEqual(diff, api.Zero) { + bestSingleOver = diff + bestSingleJob = j + bestSingleNode = n + } + } + } + if bestSingleOver != nil { + return []api.JobID{bestSingleJob}, bestSingleNode + } + + // ---- Phase 2: greedy, largest first fit Jobs per node, minimal Job count combo per node ---- + type combo struct { + node *api.NodeInfo + jobs []api.JobID + overage *api.Resource + } + var best *combo + + for _, n := range selectedNodes { + idle := n.FutureIdle() + idle.SetScalar("pods", 0) + need := preemptor.InitResreq.Clone() + need.Sub(idle) + + // Build candidates: per-job sum on this node + type jr struct { + id api.JobID + res *api.Resource + } + var cand []jr + for j, jtasks := range nodeJobPreempteesMap[n.Name] { + sum := api.EmptyResource() + for _, t := range jtasks { + sum.Add(t.Resreq) + } + cand = append(cand, jr{j, sum}) + } + // Sort descending by resource (largest first). + // We need not-less-equal sort because number of pods in the ScalarResources can be same (like 0). + // In this case strictly less check fails hence we use strictly greater (not-less-equal) + sort.Slice(cand, func(i, j int) bool { + return !cand[i].res.LessEqual(cand[j].res, api.Zero) + }) + + acc := api.EmptyResource() + var picked []api.JobID + for _, c := range cand { + if need.LessEqual(acc, api.Zero) { + break + } + picked = append(picked, c.id) + acc.Add(c.res) + } + + if !need.LessEqual(acc, api.Zero) { + continue + } + + over := acc.Clone() + over.Sub(need) + // If we don't have the best candidate jobs OR + // If number of selected Jobs is less than the best candidates (disrupt min gangs) OR + // If the disruption cost is same, pick the ones with the least wastage + if best == nil || + len(picked) < len(best.jobs) || + (len(picked) == len(best.jobs) && over.Less(best.overage, api.Zero)) { + cp := append([]api.JobID(nil), picked...) + best = &combo{node: n, jobs: cp, overage: over} + } + } + + if best != nil { + return best.jobs, best.node + } + return nil, nil +} + func (pmpt *Action) taskEligibleToPreempt(preemptor *api.TaskInfo) error { if preemptor.Pod.Spec.PreemptionPolicy != nil && *preemptor.Pod.Spec.PreemptionPolicy == v1.PreemptNever { return fmt.Errorf("not eligible to preempt other tasks due to preemptionPolicy is Never") diff --git a/pkg/scheduler/actions/preempt/preempt_test.go b/pkg/scheduler/actions/preempt/preempt_test.go index bc332f8838..2c90cef68c 100644 --- a/pkg/scheduler/actions/preempt/preempt_test.go +++ b/pkg/scheduler/actions/preempt/preempt_test.go @@ -23,6 +23,7 @@ package preempt import ( "flag" + "strconv" "testing" v1 "k8s.io/api/core/v1" @@ -611,6 +612,398 @@ func TestTopologyAwarePreempt(t *testing.T) { } } +func TestGangPreempt(t *testing.T) { + plugins := map[string]framework.PluginBuilder{ + conformance.PluginName: conformance.New, + gang.PluginName: gang.New, + priority.PluginName: priority.New, + proportion.PluginName: proportion.New, + } + highPrio := util.BuildPriorityClass("high-priority", 100000) + lowPrio := util.BuildPriorityClass("low-priority", 10) + priority3, priority2, priority1 := int32(3), int32(2), int32(1) + testsMinimal := []uthelper.TestCommonStruct{ + { + Name: "minimal mode: pick the gang with the least overage and exact fit", + PodGroups: []*schedulingv1beta1.PodGroup{ + util.BuildPodGroupWithPrio("pg1", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg2", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg3", "c1", "q1", 1, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "high-priority"), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee3", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee4", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee5", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("2", "2G"), "pg3", make(map[string]string), make(map[string]string)), + }, + Nodes: []*v1.Node{ + util.BuildNode("n1", api.BuildResourceList("3", "3G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + util.BuildNode("n2", api.BuildResourceList("2", "2G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + }, + Queues: []*schedulingv1beta1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + ExpectEvicted: []string{"c1/preemptee4", "c1/preemptee5"}, + ExpectEvictNum: 2, + }, + { + Name: "minimal mode: pick the gang with the least overage and deterministic nodes for tied overage", + PodGroups: []*schedulingv1beta1.PodGroup{ + util.BuildPodGroupWithPrio("pg1", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg2", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg3", "c1", "q1", 1, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "high-priority"), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "false"}, make(map[string]string)), + util.BuildPod("c1", "preemptee3", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee4", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee5", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("2", "2G"), "pg3", make(map[string]string), make(map[string]string)), + }, + Nodes: []*v1.Node{ + util.BuildNode("n1", api.BuildResourceList("3", "3G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + util.BuildNode("n2", api.BuildResourceList("2", "2G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + }, + Queues: []*schedulingv1beta1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + ExpectEvicted: []string{"c1/preemptee1", "c1/preemptee3"}, + ExpectEvictNum: 2, + }, + { + Name: "minimal mode: only one gang should be suitable after preemptable filter", + PodGroups: []*schedulingv1beta1.PodGroup{ + util.BuildPodGroupWithPrio("pg1", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg2", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg3", "c1", "q1", 1, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "high-priority"), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "false"}, make(map[string]string)), + util.BuildPod("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "false"}, make(map[string]string)), + util.BuildPod("c1", "preemptee3", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee4", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee5", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("2", "2G"), "pg3", make(map[string]string), make(map[string]string)), + }, + Nodes: []*v1.Node{ + util.BuildNode("n1", api.BuildResourceList("3", "3G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + util.BuildNode("n2", api.BuildResourceList("2", "2G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + }, + Queues: []*schedulingv1beta1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + ExpectEvicted: []string{"c1/preemptee4", "c1/preemptee5"}, + ExpectEvictNum: 2, + }, + { + Name: "minimal mode: pick best gang when cluster has just one node", + PodGroups: []*schedulingv1beta1.PodGroup{ + util.BuildPodGroupWithPrio("pg1", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg2", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg3", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg4", "c1", "q1", 1, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "high-priority"), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee3", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee4", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee5", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee6", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg3", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("1", "1G"), "pg4", make(map[string]string), make(map[string]string)), + }, + Nodes: []*v1.Node{ + util.BuildNode("n1", api.BuildResourceList("6", "6G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + }, + Queues: []*schedulingv1beta1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + ExpectEvicted: []string{"c1/preemptee6"}, + ExpectEvictNum: 1, + }, + { + Name: "minimal mode: multiple gangs need to be preempted on the same node for a large preemptor", + PodGroups: []*schedulingv1beta1.PodGroup{ + util.BuildPodGroupWithPrio("pg1", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg2", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg3", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg4", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg5", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg6", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg7", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg8", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg9", "c1", "q1", 1, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "high-priority"), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee3", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee4", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee5", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg3", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee6", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg3", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee7", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg4", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee8", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg4", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee9", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg5", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee10", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg5", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee11", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg5", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee12", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg6", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee13", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg6", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee14", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg6", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee15", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg7", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee16", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg8", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("6", "6G"), "pg9", make(map[string]string), make(map[string]string)), + }, + Nodes: []*v1.Node{ + util.BuildNode("n1", api.BuildResourceList("8", "8G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + util.BuildNode("n2", api.BuildResourceList("8", "8G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + }, + Queues: []*schedulingv1beta1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + ExpectEvicted: []string{"c1/preemptee9", "c1/preemptee10", "c1/preemptee11", "c1/preemptee12", "c1/preemptee13", "c1/preemptee14"}, + ExpectEvictNum: 6, + }, + { + Name: "minimal mode: single gang can be preempted on a node (with idle resources) for a large preemptor", + PodGroups: []*schedulingv1beta1.PodGroup{ + util.BuildPodGroupWithPrio("pg1", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg2", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg3", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg4", "c1", "q1", 1, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "high-priority"), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee5", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee6", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee7", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee8", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg3", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee9", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg3", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee10", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg3", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("6", "6G"), "pg4", make(map[string]string), make(map[string]string)), + }, + Nodes: []*v1.Node{ + util.BuildNode("n1", api.BuildResourceList("6", "6G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + util.BuildNode("n2", api.BuildResourceList("6", "6G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + }, + Queues: []*schedulingv1beta1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + ExpectEvicted: []string{"c1/preemptee1", "c1/preemptee2"}, + ExpectEvictNum: 2, + }, + { + Name: "minimal mode: after selecting the gang, preempt the lowest priority task for minimal mode", + PodGroups: []*schedulingv1beta1.PodGroup{ + util.BuildPodGroupWithPrio("pg1", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg2", "c1", "q1", 1, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "high-priority"), + }, + Pods: []*v1.Pod{ + util.BuildPodWithPriority("c1", "preemptee3", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string), &priority1), + util.BuildPodWithPriority("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string), &priority2), + util.BuildPodWithPriority("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string), &priority3), + util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("1", "1G"), "pg2", make(map[string]string), make(map[string]string)), + }, + Nodes: []*v1.Node{ + util.BuildNode("n1", api.BuildResourceList("3", "3G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + }, + Queues: []*schedulingv1beta1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + ExpectEvicted: []string{"c1/preemptee3"}, + ExpectEvictNum: 1, + }, + } + testsAtomic := []uthelper.TestCommonStruct{ + { + Name: "atomic mode: pick best-fit gang on one node and evict all its tasks across nodes", + PodGroups: []*schedulingv1beta1.PodGroup{ + util.BuildPodGroupWithPrio("pg1", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg2", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg3", "c1", "q1", 1, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "high-priority"), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee3", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee4", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee5", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee6", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("2", "2G"), "pg3", make(map[string]string), make(map[string]string)), + }, + Nodes: []*v1.Node{ + util.BuildNode("n1", api.BuildResourceList("4", "4G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + util.BuildNode("n2", api.BuildResourceList("2", "2G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + }, + Queues: []*schedulingv1beta1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + ExpectEvicted: []string{"c1/preemptee4", "c1/preemptee5", "c1/preemptee6"}, + ExpectEvictNum: 3, + }, + { + Name: "atomic mode: deterministic node selection on tied overage, with cluster-wide gang eviction", + PodGroups: []*schedulingv1beta1.PodGroup{ + util.BuildPodGroupWithPrio("pg1", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg2", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg3", "c1", "q1", 1, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "high-priority"), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "false"}, make(map[string]string)), + util.BuildPod("c1", "preemptee3", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee4", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee5", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee6", "n3", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee7", "n4", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("2", "2G"), "pg3", make(map[string]string), make(map[string]string)), + }, + Nodes: []*v1.Node{ + util.BuildNode("n1", api.BuildResourceList("3", "3G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + util.BuildNode("n2", api.BuildResourceList("2", "2G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + util.BuildNode("n3", api.BuildResourceList("1", "1G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + util.BuildNode("n4", api.BuildResourceList("1", "1G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + }, + Queues: []*schedulingv1beta1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + // overage is tied on n1 and n2 as one pod is not preemtable on n1 + ExpectEvicted: []string{"c1/preemptee1", "c1/preemptee2", "c1/preemptee3", "c1/preemptee6", "c1/preemptee7"}, + ExpectEvictNum: 5, + }, + { + Name: "Multiple gangs need to be preempted on the same node for a large preemptor plus other gang members on different nodes", + PodGroups: []*schedulingv1beta1.PodGroup{ + util.BuildPodGroupWithPrio("pg1", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg2", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg3", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg4", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg5", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg6", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg7", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg8", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg9", "c1", "q1", 1, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "high-priority"), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee3", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee4", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee5", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg3", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee6", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg3", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee7", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg4", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee8", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg4", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee9", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg5", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee10", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg5", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee11", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg5", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee12", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg6", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee13", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg6", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee14", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg6", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee15", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg7", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee16", "n2", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg8", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee17", "n3", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg6", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee18", "n4", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg6", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptee19", "n5", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg6", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)), + util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("6", "6G"), "pg9", make(map[string]string), make(map[string]string)), + }, + Nodes: []*v1.Node{ + util.BuildNode("n1", api.BuildResourceList("8", "8G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + util.BuildNode("n2", api.BuildResourceList("8", "8G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + util.BuildNode("n3", api.BuildResourceList("1", "1G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + util.BuildNode("n4", api.BuildResourceList("1", "1G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + util.BuildNode("n5", api.BuildResourceList("1", "1G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + }, + Queues: []*schedulingv1beta1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + ExpectEvicted: []string{"c1/preemptee9", "c1/preemptee10", "c1/preemptee11", "c1/preemptee12", "c1/preemptee13", "c1/preemptee14", "c1/preemptee17", "c1/preemptee18", "c1/preemptee19"}, + ExpectEvictNum: 9, + }, + { + Name: "After selecting the gang, preempt all tasks irrespective of priority", + PodGroups: []*schedulingv1beta1.PodGroup{ + util.BuildPodGroupWithPrio("pg1", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"), + util.BuildPodGroupWithPrio("pg2", "c1", "q1", 1, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "high-priority"), + }, + Pods: []*v1.Pod{ + util.BuildPodWithPriority("c1", "preemptee3", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string), &priority1), + util.BuildPodWithPriority("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string), &priority2), + util.BuildPodWithPriority("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string), &priority3), + util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("1", "1G"), "pg2", make(map[string]string), make(map[string]string)), + }, + Nodes: []*v1.Node{ + util.BuildNode("n1", api.BuildResourceList("3", "3G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)), + }, + Queues: []*schedulingv1beta1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + ExpectEvicted: []string{"c1/preemptee1", "c1/preemptee2", "c1/preemptee3"}, + ExpectEvictNum: 3, + }, + } + trueValue := true + tiers := []conf.Tier{ + { + Plugins: []conf.PluginOption{ + { + Name: conformance.PluginName, + EnabledPreemptable: &trueValue, + }, + { + Name: gang.PluginName, + EnabledPreemptable: &trueValue, + EnabledJobPipelined: &trueValue, + EnabledJobStarving: &trueValue, + }, + { + Name: priority.PluginName, + EnabledTaskOrder: &trueValue, + EnabledJobOrder: &trueValue, + EnabledPreemptable: &trueValue, + EnabledJobPipelined: &trueValue, + EnabledJobStarving: &trueValue, + EnabledNodeOrder: &trueValue, + }, + { + Name: proportion.PluginName, + EnabledOverused: &trueValue, + EnabledAllocatable: &trueValue, + EnabledQueueOrder: &trueValue, + }, + }, + }} + + actions := []framework.Action{New()} + testCases := map[string][]uthelper.TestCommonStruct{ + "minimal": testsMinimal, + "atomic": testsAtomic, + } + for key, tests := range testCases { + for i, test := range tests { + test.Plugins = plugins + test.PriClass = []*schedulingv1.PriorityClass{highPrio, lowPrio} + t.Run(test.Name, func(t *testing.T) { + testSsn := test.RegisterSession(tiers, []conf.Configuration{{Name: actions[0].Name(), + Arguments: map[string]interface{}{GangPreemptionModeKey: key}}}) + // node score = {n1=-1, n2=-2, n3=-3...} + testSsn.AddNodeOrderFn("priority", func(_ *api.TaskInfo, n *api.NodeInfo) (float64, error) { + i, _ := strconv.Atoi(n.Name[1:]) + return -float64(i), nil + }) + defer test.Close() + test.Run(actions) + if err := test.CheckAll(i); err != nil { + t.Fatal(err) + } + }) + } + } + +} + func buildPodWithPodAntiAffinity(name, namespace, node string, phase v1.PodPhase, req v1.ResourceList, groupName string, labels map[string]string, selector map[string]string, topologyKey string) *v1.Pod { pod := util.BuildPod(name, namespace, node, phase, req, groupName, labels, selector)