diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d9b868a3..8c4f97cfe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Fixed missing `podGrouper` configuration in Helm template that prevented podgrouper values from being applied [#860](https://github.com/NVIDIA/KAI-Scheduler/pull/860) - Fixed rollback for failed bind attempts [#847](https://github.com/NVIDIA/KAI-Scheduler/pull/847) [itsomri](https://github.com/itsomri) - Fixed missing `namespace`, `serviceAccountName`, and `appLabel` fields in `resourceReservation` section of kai-config Helm template [#860](https://github.com/NVIDIA/KAI-Scheduler/pull/891) [dttung2905](https://github.com/dttung2905) +- If a preferred topology constraint is set, do not try to find a lowest common subtree (as a part of the calculations optimizations) which is lower then the preferred level - Added dedicated `usage-prometheus` service for scheduler Prometheus access with configurable instance name [#896](https://github.com/NVIDIA/KAI-Scheduler/pull/896) [itsomri](https://github.com/itsomri) - ClusterPolicy CDI parsing for gpu-operator > v25.10.0 - Fixed missing `repository`, `tag`, and `pullPolicy` fields in `resourceReservationImage` section of kai-config Helm template [#895](https://github.com/NVIDIA/KAI-Scheduler/pull/895) [dttung2905](https://github.com/dttung2905) diff --git a/pkg/scheduler/plugins/topology/common.go b/pkg/scheduler/plugins/topology/common.go index b8a40ff35..1e4302c3d 100644 --- a/pkg/scheduler/plugins/topology/common.go +++ b/pkg/scheduler/plugins/topology/common.go @@ -8,12 +8,14 @@ import ( kaiv1alpha1 "github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1alpha1" "github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/node_info" + "github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/topology_info" ) // lowestCommonDomainID returns the lowest common domain ID, level, and valid (=in domain) nodes for a given node // set and levels in descending order (like in the topology CRD). If a node is missing one of the levels, the function // will assume it's outside the topology and it will not be included in the valid nodes map. -func lowestCommonDomainID(nodeSet node_info.NodeSet, levels []kaiv1alpha1.TopologyLevel) (DomainID, DomainLevel, map[string]*node_info.NodeInfo) { +func lowestCommonDomainID(nodeSet node_info.NodeSet, levels []kaiv1alpha1.TopologyLevel, + topologyConstraint *topology_info.TopologyConstraintInfo) (DomainID, DomainLevel, map[string]*node_info.NodeInfo) { validNodes := map[string]*node_info.NodeInfo{} for _, node := range nodeSet { if !isNodePartOfTopology(node, levels) { @@ -22,6 +24,11 @@ func lowestCommonDomainID(nodeSet node_info.NodeSet, levels []kaiv1alpha1.Topolo validNodes[node.Name] = node } + var leastCommonLevel string + if len(topologyConstraint.PreferredLevel) > 0 { + leastCommonLevel = topologyConstraint.PreferredLevel + } + var domainParts []string for _, level := range levels { allMatch := true @@ -44,6 +51,12 @@ func lowestCommonDomainID(nodeSet node_info.NodeSet, levels []kaiv1alpha1.Topolo } domainParts = append(domainParts, value) + + if leastCommonLevel == level.NodeLabel { + // There is no reason to try and find a lower level then the preferred level because + // it's the lowest level that we will look at for the topology allocation + break + } } if len(domainParts) == 0 { diff --git a/pkg/scheduler/plugins/topology/common_test.go b/pkg/scheduler/plugins/topology/common_test.go index 0e3315564..5c5300e2f 100644 --- a/pkg/scheduler/plugins/topology/common_test.go +++ b/pkg/scheduler/plugins/topology/common_test.go @@ -12,6 +12,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/node_info" + "github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/topology_info" ) func newNodeInfo(name string, labels map[string]string) *node_info.NodeInfo { @@ -69,11 +70,12 @@ func TestLowestCommonDomainID(t *testing.T) { } tests := []struct { - name string - nodes node_info.NodeSet - expectedID DomainID - expectedLevel DomainLevel - expectedValid []string + name string + nodes node_info.NodeSet + topologyConstraint *topology_info.TopologyConstraintInfo + expectedID DomainID + expectedLevel DomainLevel + expectedValid []string }{ { name: "all nodes share full topology", @@ -81,29 +83,45 @@ func TestLowestCommonDomainID(t *testing.T) { newNodeInfo("node-1", map[string]string{"zone": "z1", "rack": "r1"}), newNodeInfo("node-2", map[string]string{"zone": "z1", "rack": "r1"}), }, - expectedID: DomainID("z1.r1"), - expectedLevel: DomainLevel("rack"), - expectedValid: []string{"node-1", "node-2"}, + topologyConstraint: &topology_info.TopologyConstraintInfo{}, + expectedID: DomainID("z1.r1"), + expectedLevel: DomainLevel("rack"), + expectedValid: []string{"node-1", "node-2"}, }, { - name: "mismatch at deeper level returns common prefix", + name: "all nodes share full topology - but the preferred level is zone", nodes: node_info.NodeSet{ newNodeInfo("node-1", map[string]string{"zone": "z1", "rack": "r1"}), - newNodeInfo("node-2", map[string]string{"zone": "z1", "rack": "r2"}), + newNodeInfo("node-2", map[string]string{"zone": "z1", "rack": "r1"}), + }, + topologyConstraint: &topology_info.TopologyConstraintInfo{ + PreferredLevel: "zone", }, expectedID: DomainID("z1"), expectedLevel: DomainLevel("zone"), expectedValid: []string{"node-1", "node-2"}, }, + { + name: "mismatch at deeper level returns common prefix", + nodes: node_info.NodeSet{ + newNodeInfo("node-1", map[string]string{"zone": "z1", "rack": "r1"}), + newNodeInfo("node-2", map[string]string{"zone": "z1", "rack": "r2"}), + }, + topologyConstraint: &topology_info.TopologyConstraintInfo{}, + expectedID: DomainID("z1"), + expectedLevel: DomainLevel("zone"), + expectedValid: []string{"node-1", "node-2"}, + }, { name: "mismatch at first level returns root", nodes: node_info.NodeSet{ newNodeInfo("node-1", map[string]string{"zone": "z1", "rack": "r1"}), newNodeInfo("node-2", map[string]string{"zone": "z2", "rack": "r1"}), }, - expectedID: DomainID(rootDomainId), - expectedLevel: DomainLevel(rootLevel), - expectedValid: []string{"node-1", "node-2"}, + topologyConstraint: &topology_info.TopologyConstraintInfo{}, + expectedID: DomainID(rootDomainId), + expectedLevel: DomainLevel(rootLevel), + expectedValid: []string{"node-1", "node-2"}, }, { name: "invalid nodes are filtered out", @@ -111,24 +129,26 @@ func TestLowestCommonDomainID(t *testing.T) { newNodeInfo("node-1", map[string]string{"zone": "z1", "rack": "r1"}), newNodeInfo("node-2", map[string]string{"zone": "z1"}), // missing rack }, - expectedID: DomainID("z1.r1"), - expectedLevel: DomainLevel("rack"), - expectedValid: []string{"node-1"}, + topologyConstraint: &topology_info.TopologyConstraintInfo{}, + expectedID: DomainID("z1.r1"), + expectedLevel: DomainLevel("rack"), + expectedValid: []string{"node-1"}, }, { name: "no valid nodes returns root and empty map", nodes: node_info.NodeSet{ newNodeInfo("node-1", map[string]string{"zone": "z1"}), // missing rack }, - expectedID: DomainID(rootDomainId), - expectedLevel: DomainLevel(rootLevel), - expectedValid: []string{}, + topologyConstraint: &topology_info.TopologyConstraintInfo{}, + expectedID: DomainID(rootDomainId), + expectedLevel: DomainLevel(rootLevel), + expectedValid: []string{}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - domainID, domainLevel, valid := lowestCommonDomainID(tt.nodes, levels) + domainID, domainLevel, valid := lowestCommonDomainID(tt.nodes, levels, tt.topologyConstraint) assert.Equal(t, tt.expectedID, domainID) assert.Equal(t, tt.expectedLevel, domainLevel) diff --git a/pkg/scheduler/plugins/topology/job_filtering.go b/pkg/scheduler/plugins/topology/job_filtering.go index bf2f4f568..c78dd71f8 100644 --- a/pkg/scheduler/plugins/topology/job_filtering.go +++ b/pkg/scheduler/plugins/topology/job_filtering.go @@ -46,7 +46,7 @@ func (t *topologyPlugin) subSetNodesFn( return []node_info.NodeSet{nodeSet}, nil } - id, level, validNodes := lowestCommonDomainID(nodeSet, topologyTree.TopologyResource.Spec.Levels) + id, level, validNodes := lowestCommonDomainID(nodeSet, topologyTree.TopologyResource.Spec.Levels, subGroup.GetTopologyConstraint()) domain, ok := topologyTree.DomainsByLevel[level][id] if !ok { return nil, fmt.Errorf("domain not found for node set in topology %s", topologyTree.Name)