@@ -20,6 +20,8 @@ import (
2020 "context"
2121 "fmt"
2222 "net"
23+ "os"
24+ "path/filepath"
2325 "strings"
2426 "sync"
2527 "time"
@@ -116,10 +118,6 @@ func (db *DB) GetPodNamespace(pod string) string {
116118func (db * DB ) Run (ctx context.Context ) error {
117119 defer close (db .notifications )
118120
119- nlHandle , err := netlink .NewHandle ()
120- if err != nil {
121- return fmt .Errorf ("error creating netlink handle %v" , err )
122- }
123121 // Resources are published periodically or if there is a netlink notification
124122 // indicating a new interfaces was added or changed
125123 nlChannel := make (chan netlink.LinkUpdate )
@@ -131,54 +129,66 @@ func (db *DB) Run(ctx context.Context) error {
131129
132130 // Obtain data that will not change after the startup
133131 db .instance = getInstanceProperties (ctx )
134- // TODO: it is not common but may happen in edge cases that the default gateway changes
135- // revisit once we have more evidence this can be a potential problem or break some use
136- // cases.
137- gwInterfaces := getDefaultGwInterfaces ()
138132
139133 for {
134+
140135 err := db .rateLimiter .Wait (ctx )
141136 if err != nil {
142137 klog .Error (err , "unexpected rate limited error trying to get system interfaces" )
143138 }
144139
145- devices := []resourceapi.Device {}
146- ifaces , err := nlHandle .LinkList ()
140+ // Device lookup to prevent duplicated
141+ devices := make (map [string ]* resourceapi.Device )
142+
143+ // We need to not register an RDMA device twice
144+ seenRdmaDevices := sets .New [string ]()
145+
146+ // Kernel network interfaces
147+ err , netlinkDevices := db .discoverNetlinkDevices ()
147148 if err != nil {
148- klog . Error ( err , "unexpected error trying to get system interfaces" )
149+ return err
149150 }
150- for _ , iface := range ifaces {
151- klog .V (7 ).InfoS ("Checking network interface" , "name" , iface .Attrs ().Name )
152- if gwInterfaces .Has (iface .Attrs ().Name ) {
153- klog .V (4 ).Infof ("iface %s is an uplink interface" , iface .Attrs ().Name )
154- continue
155- }
151+ for pciAddr , device := range netlinkDevices {
156152
157- if ignoredInterfaceNames .Has (iface .Attrs ().Name ) {
158- klog .V (4 ).Infof ("iface %s is in the list of ignored interfaces" , iface .Attrs ().Name )
153+ // I don't think we want to add un-named network interfaces
154+ ifName , ok := device .Basic .Attributes ["dra.net/ifName" ]
155+ if ! ok {
159156 continue
160157 }
161158
162- // skip loopback interfaces
163- if iface .Attrs ().Flags & net .FlagLoopback != 0 {
164- continue
159+ // If it has RDMA, mark as seen
160+ if rdmaName , err := rdmamap .GetRdmaDeviceForNetdevice (* ifName .StringValue ); err == nil && rdmaName != "" {
161+ klog .V (4 ).Infof ("Found netdev '%s' with associated RDMA device '%s'. Merging." , ifName , rdmaName )
162+
163+ // This is the value from getPciAddress
164+ seenRdmaDevices .Insert (pciAddr )
165165 }
166+ devices [* ifName .StringValue ] = device
167+
168+ }
169+ // We only allow rdma devices that have PCI address
170+ for pciAddr , rdmaDevice := range db .discoverRawRdmaDevices () {
166171
167- // publish this network interface
168- device , err := db .netdevToDRAdev (iface )
169- if err != nil {
170- klog .V (2 ).Infof ("could not obtain attributes for iface %s : %v" , iface .Attrs ().Name , err )
172+ // Have we already seen it?
173+ _ , ok := seenRdmaDevices [pciAddr ]
174+ if ok {
171175 continue
172176 }
177+ devices [rdmaDevice .Name ] = rdmaDevice
178+ }
173179
174- devices = append (devices , * device )
175- klog .V (4 ).Infof ("Found following network interface %s" , iface .Attrs ().Name )
180+ // Create final list to publish
181+ finalDevices := make ([]resourceapi.Device , 0 , len (devices ))
182+ for _ , device := range devices {
183+ finalDevices = append (finalDevices , * device )
176184 }
177185
178- klog .V (4 ).Infof ("Found %d devices" , len (devices ))
179- if len (devices ) > 0 {
180- db .notifications <- devices
186+ klog .V (4 ).Infof ("Found %d devices" , len (finalDevices ))
187+ if len (finalDevices ) > 0 {
188+ db .notifications <- finalDevices
181189 }
190+
191+ // Wait for the next event or timeout
182192 select {
183193 // trigger a reconcile
184194 case <- nlChannel :
@@ -197,6 +207,105 @@ func (db *DB) GetResources(ctx context.Context) <-chan []resourceapi.Device {
197207 return db .notifications
198208}
199209
210+ // discoverNetlinkDevices scans for kernel network interfaces
211+ func (db * DB ) discoverNetlinkDevices () (error , map [string ]* resourceapi.Device ) {
212+ klog .V (4 ).Info ("Starting netlink device discovery..." )
213+ devices := make (map [string ]* resourceapi.Device )
214+
215+ // TODO: it is not common but may happen in edge cases that the default gateway changes
216+ // revisit once we have more evidence this can be a potential problem or break some use
217+ // cases.
218+ gwInterfaces := getDefaultGwInterfaces ()
219+ nlHandle , err := netlink .NewHandle ()
220+ if err != nil {
221+ return fmt .Errorf ("error creating netlink handle %v" , err ), devices
222+ }
223+
224+ // Don't return early - we want print to user at end of function
225+ // @vsoch This is logic from previous refactored version.
226+ ifaces , err := nlHandle .LinkList ()
227+ if err != nil {
228+ klog .Error (err , "unexpected error trying to get system interfaces" )
229+ }
230+
231+ for _ , iface := range ifaces {
232+ attrs := iface .Attrs ()
233+
234+ klog .V (7 ).InfoS ("Checking network interface" , "name" , attrs .Name )
235+ if gwInterfaces .Has (attrs .Name ) {
236+ klog .V (4 ).Infof ("iface %s is an uplink interface" , attrs .Name )
237+ continue
238+ }
239+
240+ if ignoredInterfaceNames .Has (attrs .Name ) {
241+ klog .V (4 ).Infof ("iface %s is in the list of ignored interfaces" , attrs .Name )
242+ continue
243+ }
244+
245+ // skip loopback interfaces
246+ if attrs .Flags & net .FlagLoopback != 0 {
247+ continue
248+ }
249+
250+ // publish this network interface
251+ device , err := db .netdevToDRAdev (iface )
252+ if err != nil {
253+ klog .V (2 ).Infof ("could not obtain attributes for iface %s : %v" , attrs .Name , err )
254+ continue
255+ }
256+
257+ // This could be error prone if a missing address leads to a second entry in rdma devices
258+ pciAddr , err := getPciAddress (attrs .Name , sysnetPath )
259+ if err != nil {
260+ klog .Warningf ("could not get PCI address for netdev %s, using fallback key. error: %v" , attrs .Name , err )
261+ pciAddr = "netdev-" + attrs .Name
262+ }
263+ devices [pciAddr ] = device
264+ }
265+ klog .V (4 ).Infof ("Finished netlink discovery. Found %d devices." , len (devices ))
266+ return nil , devices
267+ }
268+
269+ // discoverRawRdmaDevices scans for raw RDMA devices using rdmamap listing
270+ func (db * DB ) discoverRawRdmaDevices () map [string ]* resourceapi.Device {
271+ klog .V (4 ).Info ("Starting raw RDMA device discovery..." )
272+ devices := make (map [string ]* resourceapi.Device )
273+
274+ // This was tested to work to list an Infiniband device without an associated netlink
275+ deviceNames := rdmamap .GetRdmaDeviceList ()
276+
277+ for _ , rdmaName := range deviceNames {
278+ pciAddr , err := getPciAddress (rdmaName , rdmamap .RdmaClassDir )
279+
280+ // Assume that a missing PCI address would be missing for both netlink and rdma (not sure if this is true)
281+ // I think there are cases when we wouldn't have one, but I want to be conservative and only
282+ // allow RDMA interfaces with associated PCI addresses. This can change if needed.
283+ if err != nil {
284+ klog .Warningf ("could not get PCI address for RDMA device %s, skipping: %v" , rdmaName , err )
285+ continue
286+ }
287+ sanitizedName := names .SetDeviceName (rdmaName )
288+
289+ // Create a new resourceapi device for the RDMA raw device
290+ device := & resourceapi.Device {
291+ Name : sanitizedName ,
292+ Basic : & resourceapi.BasicDevice {
293+ Attributes : map [resourceapi.QualifiedName ]resourceapi.DeviceAttribute {
294+ "dra.net/rdma" : {BoolValue : ptr .To (true )},
295+ "dra.net/ifName" : {StringValue : & rdmaName },
296+ // https://github.com/vishvananda/netlink/blob/master/nl/nl_linux.go#L143
297+ // This could also be ib, but "infiniband" is more clear
298+ "dra.net/type" : {StringValue : ptr .To ("infiniband" )},
299+ },
300+ },
301+ }
302+ addPCIAttributes (device .Basic , rdmaName , rdmamap .RdmaClassDir )
303+ devices [pciAddr ] = device
304+ }
305+ klog .V (4 ).Infof ("Finished raw RDMA discovery. Found %d devices." , len (devices ))
306+ return devices
307+ }
308+
200309func (db * DB ) netdevToDRAdev (link netlink.Link ) (* resourceapi.Device , error ) {
201310 ifName := link .Attrs ().Name
202311 device := resourceapi.Device {
@@ -207,6 +316,7 @@ func (db *DB) netdevToDRAdev(link netlink.Link) (*resourceapi.Device, error) {
207316 }
208317 // Set the device name. It will be normalized only if necessary.
209318 device .Name = names .SetDeviceName (ifName )
319+
210320 // expose the real interface name as an attribute in case it is normalized.
211321 device .Basic .Attributes ["dra.net/ifName" ] = resourceapi.DeviceAttribute {StringValue : & ifName }
212322
@@ -291,6 +401,7 @@ func (db *DB) netdevToDRAdev(link netlink.Link) (*resourceapi.Device, error) {
291401 return & device , nil
292402}
293403
404+ // addPCIAttributes adds dra.net device.Attributes
294405func addPCIAttributes (device * resourceapi.BasicDevice , ifName string , path string ) {
295406 device .Attributes ["dra.net/virtual" ] = resourceapi.DeviceAttribute {BoolValue : ptr .To (false )}
296407
@@ -303,7 +414,7 @@ func addPCIAttributes(device *resourceapi.BasicDevice, ifName string, path strin
303414 device .Attributes ["resource.kubernetes.io/pcieRoot" ] = resourceapi.DeviceAttribute {StringValue : & pcieRoot }
304415 }
305416 } else {
306- klog .Infof ("could not get pci root : %v" , err )
417+ klog .Infof ("could not get pci root for %s : %v" , ifName , err )
307418 }
308419
309420 entry , err := ids (ifName , path )
@@ -318,11 +429,21 @@ func addPCIAttributes(device *resourceapi.BasicDevice, ifName string, path strin
318429 device .Attributes ["dra.net/pciSubsystem" ] = resourceapi.DeviceAttribute {StringValue : & entry .Subsystem }
319430 }
320431 } else {
321- klog .Infof ("could not get pci vendor information : %v" , err )
432+ klog .Infof ("could not get pci vendor information for %s : %v" , ifName , err )
322433 }
323434
324435 numa , err := numaNode (ifName , path )
325436 if err == nil {
326437 device .Attributes ["dra.net/numaNode" ] = resourceapi.DeviceAttribute {IntValue : & numa }
327438 }
328439}
440+
441+ // getPciAddress is a helper function to get PCI address
442+ func getPciAddress (ifName string , basePath string ) (string , error ) {
443+ devicePath := filepath .Join (basePath , ifName , "device" )
444+ pciLinkTarget , err := os .Readlink (devicePath )
445+ if err != nil {
446+ return "" , err
447+ }
448+ return filepath .Base (pciLinkTarget ), nil
449+ }
0 commit comments