Skip to content

Commit f87ab2b

Browse files
committed
fix network DB desync after failed connect/disconnect
Networks are stored in two ways in the DB, first a static network list which holds all the network with its option for the container. Second, the network status which hold the actual network result from netavark but only when the container is running. If the container is running they must be in sync and podman inspect has checks to ensure that as well it errors out of there is a desync between the two. As the adding to the db and doing actual networking configuration are diffeent parts it possible that one worked while the other failed which triggers the desync. To avoid this make the network connect/disconnect code more robust against partial failures. When the network calls fail we update the db again to remove/add the network back. Fixes: https://issues.redhat.com/browse/RHEL-78037 Signed-off-by: Paul Holzinger <[email protected]>
1 parent 0a0d05b commit f87ab2b

File tree

2 files changed

+28
-1
lines changed

2 files changed

+28
-1
lines changed

libpod/networking_common.go

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ func (c *Container) NetworkDisconnect(nameOrID, netName string, force bool) erro
378378
return err
379379
}
380380

381-
_, nameExists := networks[netName]
381+
netOpts, nameExists := networks[netName]
382382
if !nameExists && len(networks) > 0 {
383383
return fmt.Errorf("container %s is not connected to network %s", nameOrID, netName)
384384
}
@@ -393,12 +393,20 @@ func (c *Container) NetworkDisconnect(nameOrID, netName string, force bool) erro
393393
return err
394394
}
395395

396+
// Since we removed the new network from the container db we must have to add it back during partial setup errors
397+
addContainerNetworkToDB := func() {
398+
if err := c.runtime.state.NetworkConnect(c, netName, netOpts); err != nil {
399+
logrus.Errorf("Failed to add network %s for container %s to DB after failed network disconnect", netName, nameOrID)
400+
}
401+
}
402+
396403
c.newNetworkEvent(events.NetworkDisconnect, netName)
397404
if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
398405
return nil
399406
}
400407

401408
if c.state.NetNS == "" {
409+
addContainerNetworkToDB()
402410
return fmt.Errorf("unable to disconnect %s from %s: %w", nameOrID, netName, define.ErrNoNetwork)
403411
}
404412

@@ -412,6 +420,7 @@ func (c *Container) NetworkDisconnect(nameOrID, netName string, force bool) erro
412420
}
413421

414422
if err := c.runtime.teardownNetworkBackend(c.state.NetNS, opts); err != nil {
423+
addContainerNetworkToDB()
415424
return err
416425
}
417426

@@ -524,11 +533,20 @@ func (c *Container) NetworkConnect(nameOrID, netName string, netOpts types.PerNe
524533

525534
return err
526535
}
536+
537+
// Since we added the new network to the container db we must have to remove it from that during partial setup errors
538+
removeContainerNetworkFromDB := func() {
539+
if err := c.runtime.state.NetworkDisconnect(c, netName); err != nil {
540+
logrus.Errorf("Failed to remove network %s for container %s from DB after failed network connect", netName, nameOrID)
541+
}
542+
}
543+
527544
c.newNetworkEvent(events.NetworkConnect, netName)
528545
if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
529546
return nil
530547
}
531548
if c.state.NetNS == "" {
549+
removeContainerNetworkFromDB()
532550
return fmt.Errorf("unable to connect %s to %s: %w", nameOrID, netName, define.ErrNoNetwork)
533551
}
534552

@@ -543,6 +561,7 @@ func (c *Container) NetworkConnect(nameOrID, netName string, netOpts types.PerNe
543561

544562
results, err := c.runtime.setUpNetwork(c.state.NetNS, opts)
545563
if err != nil {
564+
removeContainerNetworkFromDB()
546565
return err
547566
}
548567
if len(results) != 1 {

test/system/500-networking.bats

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,14 @@ load helpers.network
583583
run_podman network connect $netname $background_cid
584584
is "$output" "" "(re)connect of container with no open ports"
585585

586+
# connect a network with an intentional error (bad mac address)
587+
run_podman 125 network connect --mac-address 00:00:00:00:00:00 $netname2 $cid
588+
assert "$output" =~ "Cannot assign requested address" "mac address error"
589+
590+
# podman inspect must still work correctly and not error due network desync
591+
run_podman inspect --format '{{ range $index, $value := .NetworkSettings.Networks }}{{$index}}{{end}}' $cid
592+
assert "$output" == "$netname" "only network1 must be connected"
593+
586594
# connect a second network
587595
run_podman network connect $netname2 $cid
588596
is "$output" "" "Output should be empty (no errors)"

0 commit comments

Comments
 (0)