Skip to content

Commit 0604f8b

Browse files
committed
instantiate device plugin
1 parent a508336 commit 0604f8b

File tree

2 files changed

+138
-3
lines changed

2 files changed

+138
-3
lines changed

cns/service/main.go

Lines changed: 129 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"net/http"
1313
"os"
1414
"os/signal"
15+
"reflect"
1516
"runtime"
1617
"strconv"
1718
"strings"
@@ -26,6 +27,7 @@ import (
2627
"github.com/Azure/azure-container-networking/cns/cnireconciler"
2728
"github.com/Azure/azure-container-networking/cns/common"
2829
"github.com/Azure/azure-container-networking/cns/configuration"
30+
"github.com/Azure/azure-container-networking/cns/deviceplugin"
2931
"github.com/Azure/azure-container-networking/cns/fsnotify"
3032
"github.com/Azure/azure-container-networking/cns/grpc"
3133
"github.com/Azure/azure-container-networking/cns/healthserver"
@@ -102,9 +104,11 @@ const (
102104
// envVarEnableCNIConflistGeneration enables cni conflist generation if set (value doesn't matter)
103105
envVarEnableCNIConflistGeneration = "CNS_ENABLE_CNI_CONFLIST_GENERATION"
104106

105-
cnsReqTimeout = 15 * time.Second
106-
defaultLocalServerIP = "localhost"
107-
defaultLocalServerPort = "10090"
107+
cnsReqTimeout = 15 * time.Second
108+
defaultLocalServerIP = "localhost"
109+
defaultLocalServerPort = "10090"
110+
defaultDevicePluginRetryInterval = 2 * time.Second
111+
defaultNodeInfoCRDPollInterval = 5 * time.Second
108112
)
109113

110114
type cniConflistScenario string
@@ -880,6 +884,43 @@ func main() {
880884
}
881885
}
882886

887+
if cnsconfig.EnableSwiftV2 && cnsconfig.EnableK8sDevicePlugin {
888+
initialVnetNICCount := 0
889+
initialIBNICCount := 0
890+
// Create device plugin manager instance
891+
pluginManager := deviceplugin.NewPluginManager(z)
892+
pluginManager.AddPlugin(mtv1alpha1.DeviceTypeVnetNIC, initialVnetNICCount)
893+
pluginManager.AddPlugin(mtv1alpha1.DeviceTypeInfiniBandNIC, initialIBNICCount)
894+
895+
ctx, cancel := context.WithCancel(context.Background())
896+
defer cancel()
897+
898+
// Start device plugin manager in a separate goroutine
899+
go func() {
900+
for {
901+
select {
902+
case <-ctx.Done():
903+
z.Info("Context canceled, stopping plugin manager")
904+
return
905+
default:
906+
if pluginErr := pluginManager.Run(ctx); pluginErr != nil {
907+
z.Error("plugin manager exited with error", zap.Error(pluginErr))
908+
time.Sleep(defaultDevicePluginRetryInterval)
909+
} else {
910+
return
911+
}
912+
}
913+
}
914+
}()
915+
916+
// go routine to poll node info crd and update device counts
917+
go func() {
918+
if pollErr := pollNodeInfoCRDAndUpdatePlugin(ctx, z, pluginManager); pollErr != nil {
919+
z.Error("Error in pollNodeInfoCRDAndUpdatePlugin", zap.Error(pollErr))
920+
}
921+
}()
922+
}
923+
883924
// Conditionally initialize and start the gRPC server
884925
if cnsconfig.GRPCSettings.Enable {
885926
// Define gRPC server settings
@@ -1037,6 +1078,91 @@ func main() {
10371078
logger.Close()
10381079
}
10391080

1081+
// Poll CRD until it's set and update PluginManager
1082+
func pollNodeInfoCRDAndUpdatePlugin(ctx context.Context, zlog *zap.Logger, pluginManager *deviceplugin.PluginManager) error {
1083+
kubeConfig, err := ctrl.GetConfig()
1084+
if err != nil {
1085+
logger.Errorf("Failed to get kubeconfig for request controller: %v", err)
1086+
return errors.Wrap(err, "failed to get kubeconfig")
1087+
}
1088+
kubeConfig.UserAgent = "azure-cns-" + version
1089+
1090+
clientset, err := kubernetes.NewForConfig(kubeConfig)
1091+
if err != nil {
1092+
return errors.Wrap(err, "failed to build clientset")
1093+
}
1094+
1095+
nodeName, err := configuration.NodeName()
1096+
if err != nil {
1097+
return errors.Wrap(err, "failed to get NodeName")
1098+
}
1099+
1100+
node, err := clientset.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
1101+
if err != nil {
1102+
return errors.Wrapf(err, "failed to get node %s", nodeName)
1103+
}
1104+
1105+
// check the Node labels for Swift V2
1106+
if _, ok := node.Labels[configuration.LabelNodeSwiftV2]; !ok {
1107+
zlog.Info("Node is not labeled for Swift V2, skipping polling nodeinfo crd")
1108+
return nil
1109+
}
1110+
1111+
directcli, err := client.New(kubeConfig, client.Options{Scheme: multitenancy.Scheme})
1112+
if err != nil {
1113+
return errors.Wrap(err, "failed to create ctrl client")
1114+
}
1115+
1116+
nodeInfoCli := multitenancy.NodeInfoClient{
1117+
Cli: directcli,
1118+
}
1119+
1120+
for {
1121+
select {
1122+
case <-ctx.Done():
1123+
zlog.Info("Polling context canceled, exiting")
1124+
return nil
1125+
default:
1126+
// Fetch the CRD status
1127+
nodeInfo, err := nodeInfoCli.Get(ctx, node.Name)
1128+
if err != nil {
1129+
zlog.Error("Error fetching nodeinfo CRD", zap.Error(err))
1130+
return errors.Wrap(err, "failed to get nodeinfo crd")
1131+
}
1132+
1133+
// Check if the status is set
1134+
if !reflect.DeepEqual(nodeInfo.Status, mtv1alpha1.NodeInfoStatus{}) && len(nodeInfo.Status.DeviceInfos) > 0 {
1135+
// Create a map to count devices by type
1136+
deviceCounts := map[mtv1alpha1.DeviceType]int{
1137+
mtv1alpha1.DeviceTypeVnetNIC: 0,
1138+
mtv1alpha1.DeviceTypeInfiniBandNIC: 0,
1139+
}
1140+
1141+
// Aggregate device counts from the CRD
1142+
for _, deviceInfo := range nodeInfo.Status.DeviceInfos {
1143+
switch deviceInfo.DeviceType {
1144+
case mtv1alpha1.DeviceTypeVnetNIC, mtv1alpha1.DeviceTypeInfiniBandNIC:
1145+
deviceCounts[deviceInfo.DeviceType]++
1146+
default:
1147+
zlog.Error("Unknown device type", zap.String("deviceType", string(deviceInfo.DeviceType)))
1148+
}
1149+
}
1150+
1151+
// Update the plugin manager with device counts
1152+
for deviceType, count := range deviceCounts {
1153+
pluginManager.TrackDevices(deviceType, count)
1154+
}
1155+
1156+
// Exit polling loop once the CRD status is successfully processed
1157+
return nil
1158+
}
1159+
1160+
// Wait before polling again
1161+
time.Sleep(defaultNodeInfoCRDPollInterval)
1162+
}
1163+
}
1164+
}
1165+
10401166
func InitializeMultiTenantController(ctx context.Context, httpRestService cns.HTTPService, cnsconfig configuration.CNSConfig) error {
10411167
var multiTenantController multitenantcontroller.RequestController
10421168
kubeConfig, err := ctrl.GetConfig()

crd/multitenancy/client.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,12 @@ func (n *NodeInfoClient) CreateOrUpdate(ctx context.Context, nodeInfo *v1alpha1.
216216
}
217217
return nil
218218
}
219+
220+
// Get retrieves the NodeInfo CRD by name.
221+
func (n *NodeInfoClient) Get(ctx context.Context, name string) (*v1alpha1.NodeInfo, error) {
222+
var nodeInfo v1alpha1.NodeInfo
223+
if err := n.Cli.Get(ctx, client.ObjectKey{Name: name}, &nodeInfo); err != nil {
224+
return nil, errors.Wrap(err, "error getting nodeinfo crd")
225+
}
226+
return &nodeInfo, nil
227+
}

0 commit comments

Comments
 (0)