@@ -390,6 +390,17 @@ func (c *Client) ensureConnected() error {
390390 return c .connect ()
391391}
392392
393+ // requireDriver checks if the driver is initialized and returns an error if not.
394+ // This helper centralizes nil driver checks and provides consistent error messages.
395+ //
396+ // PRECONDITION: Caller must hold c.mu (read or write lock)
397+ func (c * Client ) requireDriver (operation string ) error {
398+ if c .driver == nil {
399+ return fmt .Errorf ("operation %s failed: driver is nil (connection closed)" , operation )
400+ }
401+ return nil
402+ }
403+
393404// Close closes the NETCONF session and cleans up resources
394405//
395406// This sends a close-session RPC to the server and closes the underlying
@@ -467,6 +478,11 @@ func (c *Client) Get(ctx context.Context, filter Filter, mods ...func(*Req)) (Re
467478 c .mu .RLock ()
468479 defer c .mu .RUnlock ()
469480
481+ // Defensive check: Verify driver still valid after acquiring lock
482+ if err := c .requireDriver ("get" ); err != nil {
483+ return Res {}, err
484+ }
485+
470486 // Build request
471487 req := & Req {
472488 Operation : "get" ,
@@ -501,6 +517,11 @@ func (c *Client) GetConfig(ctx context.Context, source string, filter Filter, mo
501517 c .mu .RLock ()
502518 defer c .mu .RUnlock ()
503519
520+ // Defensive check: Verify driver still valid after acquiring lock
521+ if err := c .requireDriver ("get-config" ); err != nil {
522+ return Res {}, err
523+ }
524+
504525 // Build request
505526 req := & Req {
506527 Operation : "get-config" ,
@@ -539,6 +560,11 @@ func (c *Client) EditConfig(ctx context.Context, target, config string, mods ...
539560 c .mu .Lock ()
540561 defer c .mu .Unlock ()
541562
563+ // Defensive check: Verify driver still valid after acquiring lock
564+ if err := c .requireDriver ("edit-config" ); err != nil {
565+ return Res {}, err
566+ }
567+
542568 // Build request
543569 req := & Req {
544570 Operation : "edit-config" ,
@@ -572,6 +598,11 @@ func (c *Client) CopyConfig(ctx context.Context, source, target string, mods ...
572598 c .mu .Lock ()
573599 defer c .mu .Unlock ()
574600
601+ // Defensive check: Verify driver still valid after acquiring lock
602+ if err := c .requireDriver ("copy-config" ); err != nil {
603+ return Res {}, err
604+ }
605+
575606 // Build request
576607 req := & Req {
577608 Operation : "copy-config" ,
@@ -607,6 +638,11 @@ func (c *Client) DeleteConfig(ctx context.Context, target string, mods ...func(*
607638 c .mu .Lock ()
608639 defer c .mu .Unlock ()
609640
641+ // Defensive check: Verify driver still valid after acquiring lock
642+ if err := c .requireDriver ("delete-config" ); err != nil {
643+ return Res {}, err
644+ }
645+
610646 // Additional check: only startup can be deleted per RFC 6241
611647 target = strings .TrimSpace (strings .ToLower (target ))
612648 if target != "startup" {
@@ -634,14 +670,21 @@ func (c *Client) DeleteConfig(ctx context.Context, target string, mods ...func(*
634670//
635671// Per RFC 6241 Section 7.5, a lock prevents other NETCONF sessions from
636672// performing configuration changes. If another session holds the lock,
637- // Lock() will block until the lock becomes available or the operation times out.
673+ // Lock() will automatically retry with exponential backoff until the lock
674+ // becomes available or the context deadline is reached.
675+ //
676+ // Connection Guarantee: This method ensures a stable connection exists
677+ // before attempting to acquire the lock. This prevents race conditions in
678+ // concurrent environments.
638679//
639680// IMPORTANT: Always use defer to ensure locks are released even if errors occur.
640681// Failure to unlock can cause deadlocks and prevent other sessions from operating.
641682//
642683// Example:
643684//
644- // ctx := context.Background()
685+ // ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
686+ // defer cancel()
687+ //
645688// res, err := client.Lock(ctx, "candidate")
646689// if err != nil {
647690// log.Fatal(err)
@@ -668,18 +711,27 @@ func (c *Client) Lock(ctx context.Context, target string, mods ...func(*Req)) (R
668711 mod (req )
669712 }
670713
671- // Send RPC without holding mutex - driver has its own synchronization
672- // and waitForLockRelease may recursively call Lock() to test availability
714+ // Send RPC without holding c.mu - driver has its own synchronization.
715+ // This differs from other operations (Get, EditConfig, etc.) which hold
716+ // c.mu during sendRPC(). Lock/Unlock operations are NETCONF protocol
717+ // operations that don't access client state, so driver-level
718+ // synchronization is sufficient. Connection validation happens in
719+ // executeRPC() (line 1582).
673720 return c .sendRPC (ctx , req )
674721}
675722
676723// Unlock unlocks the specified datastore
677724//
725+ // Connection Guarantee: Like Lock(), this method ensures a stable connection exists
726+ // before attempting to release the lock.
727+ //
678728// See Lock() for complete lock/unlock documentation and proper usage with defer.
679729//
680730// Example:
681731//
682- // ctx := context.Background()
732+ // ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
733+ // defer cancel()
734+ //
683735// res, err := client.Lock(ctx, "candidate")
684736// if err != nil {
685737// log.Fatal(err)
@@ -702,7 +754,8 @@ func (c *Client) Unlock(ctx context.Context, target string, mods ...func(*Req))
702754 mod (req )
703755 }
704756
705- // Send RPC without holding mutex - driver has its own synchronization
757+ // Send RPC without holding c.mu - driver has its own synchronization.
758+ // See Lock() for explanation of why this differs from other operations.
706759 return c .sendRPC (ctx , req )
707760}
708761
@@ -754,6 +807,11 @@ func (c *Client) Commit(ctx context.Context, mods ...func(*Req)) (Res, error) {
754807 c .mu .Lock ()
755808 defer c .mu .Unlock ()
756809
810+ // Defensive check: Verify driver still valid after acquiring lock
811+ if err := c .requireDriver ("commit" ); err != nil {
812+ return Res {}, err
813+ }
814+
757815 // Build request
758816 req := & Req {
759817 Operation : "commit" ,
@@ -785,6 +843,11 @@ func (c *Client) Discard(ctx context.Context, mods ...func(*Req)) (Res, error) {
785843 c .mu .Lock ()
786844 defer c .mu .Unlock ()
787845
846+ // Defensive check: Verify driver still valid after acquiring lock
847+ if err := c .requireDriver ("discard" ); err != nil {
848+ return Res {}, err
849+ }
850+
788851 // Build request
789852 req := & Req {
790853 Operation : "discard" ,
@@ -819,6 +882,11 @@ func (c *Client) Validate(ctx context.Context, source string, mods ...func(*Req)
819882 c .mu .RLock ()
820883 defer c .mu .RUnlock ()
821884
885+ // Defensive check: Verify driver still valid after acquiring lock
886+ if err := c .requireDriver ("validate" ); err != nil {
887+ return Res {}, err
888+ }
889+
822890 // Build request
823891 req := & Req {
824892 Operation : "validate" ,
@@ -866,6 +934,11 @@ func (c *Client) RPC(ctx context.Context, rpcXML string, mods ...func(*Req)) (Re
866934 c .mu .Lock ()
867935 defer c .mu .Unlock ()
868936
937+ // Defensive check: Verify driver still valid after acquiring lock
938+ if err := c .requireDriver ("rpc" ); err != nil {
939+ return Res {}, err
940+ }
941+
869942 // Build request
870943 req := & Req {
871944 Operation : "rpc" ,
@@ -1274,64 +1347,23 @@ func (c *Client) Backoff(attempt int) time.Duration {
12741347 return time .Duration (delay )
12751348}
12761349
1277- // waitForLockRelease waits for a datastore lock to be released
1278- //
1279- // This method polls the device to check if the lock is still held and waits
1280- // for it to be released. Used when a lock-denied error is encountered.
1281- //
1282- // Parameters:
1283- // - ctx: Context for cancellation
1284- // - target: Datastore name ("running", "candidate", "startup")
1285- //
1286- // Returns an error if the lock is not released within LockReleaseTimeout.
1287- func (c * Client ) waitForLockRelease (ctx context.Context , target string ) error {
1288- c .logger .Info (ctx , "NETCONF waiting for lock release" ,
1289- "target" , target ,
1290- "timeout" , c .LockReleaseTimeout .String ())
1291-
1292- // Apply lock release timeout
1293- ctx , cancel := context .WithTimeout (ctx , c .LockReleaseTimeout )
1294- defer cancel ()
1295-
1296- // Poll interval (1 second)
1297- ticker := time .NewTicker (1 * time .Second )
1298- defer ticker .Stop ()
1299-
1300- for {
1301- select {
1302- case <- ctx .Done ():
1303- return ErrLockReleaseTimeout
1304- case <- ticker .C :
1305- // Try to acquire lock
1306- res , err := c .Lock (ctx , target )
1307- if err == nil && res .OK {
1308- // Lock acquired, release it immediately to verify availability
1309- // Note: ignoring unlock errors is intentional - we proved lock availability
1310- _ , _ = c .Unlock (ctx , target ) //nolint:errcheck // Intentional: verifying lock availability only
1311-
1312- c .logger .Info (ctx , "NETCONF lock acquired" ,
1313- "target" , target )
1314-
1315- return nil
1316- }
1317- // Lock still held, continue waiting
1318- }
1319- }
1320- }
1321-
13221350// reconnect attempts to reconnect the NETCONF session
13231351//
13241352// This method closes the existing connection and establishes a new one,
13251353// re-negotiating capabilities. Used internally when transport errors are
13261354// detected during retry logic.
13271355//
1328- // PRECONDITION: Caller must hold c.mu.Lock() ( write lock)
1356+ // PRECONDITION: Caller must NOT hold any locks (acquires write lock internally )
13291357//
13301358// Returns an error if reconnection fails.
13311359func (c * Client ) reconnect () error {
13321360 c .logger .Info (context .Background (), "NETCONF reconnecting" ,
13331361 "host" , c .Host )
13341362
1363+ // Acquire write lock for exclusive access to driver state
1364+ c .mu .Lock ()
1365+ defer c .mu .Unlock ()
1366+
13351367 // Close existing connection (ignore errors - connection may already be broken)
13361368 if c .driver != nil {
13371369 _ = c .driver .Close () //nolint:errcheck // Explicitly ignore error (connection likely already broken)
@@ -1342,6 +1374,49 @@ func (c *Client) reconnect() error {
13421374 return c .connect ()
13431375}
13441376
1377+ // handleTransportErrorReconnect handles transport error reconnection with proper lock management.
1378+ // This helper reduces cyclomatic complexity by extracting the lock release/reacquire logic.
1379+ //
1380+ // Parameters:
1381+ // - ctx: Context for logging
1382+ // - req: Request that failed with transport error
1383+ //
1384+ // Returns an error if reconnection fails.
1385+ func (c * Client ) handleTransportErrorReconnect (ctx context.Context , req * Req ) error {
1386+ c .logger .Info (ctx , "NETCONF transport error detected, reconnecting" ,
1387+ "operation" , req .Operation )
1388+
1389+ // Determine lock type held by caller based on operation type
1390+ // Read operations: get, get-config, validate (hold RLock)
1391+ // Write operations: edit-config, copy-config, delete-config, commit, discard, rpc (hold Lock)
1392+ // Lock operations: lock, unlock (hold no lock)
1393+ isReadOp := req .Operation == opGet || req .Operation == opGetConfig || req .Operation == opValidate
1394+ isWriteOp := req .Operation == opEditConfig || req .Operation == opCopyConfig ||
1395+ req .Operation == opDeleteConfig || req .Operation == opCommit ||
1396+ req .Operation == opDiscard || req .Operation == opRPC
1397+
1398+ // Release lock before reconnect (reconnect acquires its own write lock)
1399+ if isReadOp {
1400+ c .mu .RUnlock ()
1401+ } else if isWriteOp {
1402+ c .mu .Unlock ()
1403+ }
1404+ // Lock/Unlock operations don't hold c.mu, so nothing to release
1405+
1406+ // Attempt to reconnect (acquires and releases its own write lock)
1407+ reconnectErr := c .reconnect ()
1408+
1409+ // Reacquire original lock type
1410+ if isReadOp {
1411+ c .mu .RLock ()
1412+ } else if isWriteOp {
1413+ c .mu .Lock ()
1414+ }
1415+ // Lock/Unlock operations don't hold c.mu, so nothing to reacquire
1416+
1417+ return reconnectErr
1418+ }
1419+
13451420// sendRPC sends a NETCONF RPC request via scrapligo and parses the response
13461421//
13471422// This method handles all NETCONF operations by dispatching to the appropriate
@@ -1444,38 +1519,6 @@ func (c *Client) sendRPC(ctx context.Context, req *Req) (Res, error) {
14441519 "error" , err .Error ())
14451520 }
14461521
1447- // Handle lock-denied errors with polling before general backoff
1448- if isTransient {
1449- // Check if this is a lock-denied error
1450- hasLockDenied := false
1451- var lockTarget string
1452- for _ , rpcErr := range res .Errors {
1453- if rpcErr .ErrorTag == "lock-denied" || rpcErr .ErrorTag == "in-use" {
1454- hasLockDenied = true
1455- // Extract target from request
1456- lockTarget = req .Target
1457- break
1458- }
1459- }
1460-
1461- if hasLockDenied && lockTarget != "" {
1462- // Use lock-specific polling instead of exponential backoff
1463- if waitErr := c .waitForLockRelease (ctx , lockTarget ); waitErr != nil {
1464- // Timeout waiting for lock, return error
1465- return res , & NetconfError {
1466- Operation : req .Operation ,
1467- Errors : res .Errors ,
1468- Message : "lock wait timeout" ,
1469- InternalMsg : waitErr .Error (),
1470- Retries : attempt ,
1471- IsTransient : true ,
1472- }
1473- }
1474- // Lock released, retry immediately without backoff
1475- continue
1476- }
1477- }
1478-
14791522 // Check for transport/connection errors that require reconnection
14801523 // This includes both NETCONF <rpc-error> transport errors and scrapligo Go errors
14811524 hasTransportError := c .hasTransportError (res .Errors , err )
@@ -1522,12 +1565,8 @@ func (c *Client) sendRPC(ctx context.Context, req *Req) (Res, error) {
15221565
15231566 // Handle transport errors with reconnection
15241567 if hasTransportError {
1525- c .logger .Info (ctx , "NETCONF transport error detected, reconnecting" ,
1526- "operation" , req .Operation ,
1527- "attempt" , attempt )
1528-
1529- // Attempt to reconnect
1530- if reconnectErr := c .reconnect (); reconnectErr != nil {
1568+ // Use helper to handle lock release/reacquire around reconnection
1569+ if reconnectErr := c .handleTransportErrorReconnect (ctx , req ); reconnectErr != nil {
15311570 // Reconnection failed, return original error
15321571 return res , & NetconfError {
15331572 Operation : req .Operation ,
@@ -1538,6 +1577,7 @@ func (c *Client) sendRPC(ctx context.Context, req *Req) (Res, error) {
15381577 IsTransient : true ,
15391578 }
15401579 }
1580+
15411581 c .logger .Info (ctx , "NETCONF reconnection successful" ,
15421582 "operation" , req .Operation ,
15431583 "sessionID" , c .sessionID )
0 commit comments