Skip to content

Commit 4dc16ff

Browse files
committed
nvmeof: treat "connecting" state as valid in path detection
When checking if a path to a gateway already exists, treat both "live" and "connecting" states as valid connections that should not be re-attempted. The "connecting" state indicates the NVMe kernel is actively trying to establish or re-establish a connection, which occurs in scenarios like: - Initial connection establishment - Gateway temporarily unavailable and kernel retrying - Subsystem deleted and recreated on the gateway The kernel's ctrl_loss_tmo mechanism will continue retry attempts for up to 30 minutes ( by -l param in nvme connect command). Attempting nvme connect while a path is in "connecting" state results in "already connected" errors and can cause volume attachment failures during create/delete cycles. By treating "connecting" as a valid state, we allow the kernel's retry logic to handle reconnection automatically without interference. Signed-off-by: gadi-didi <gadi.didi@ibm.com>
1 parent 80c0474 commit 4dc16ff

File tree

2 files changed

+21
-6
lines changed

2 files changed

+21
-6
lines changed

internal/nvmeof/nvmeof_initiator.go

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ func (ni *nvmeInitiator) ConnectSubsystem(ctx context.Context, req *ConnectReque
158158

159159
// Check if already connected to this specific gateway
160160
if req.HostNQN != "" && existingConnections != nil {
161-
if existingConnections.hasLivePathToGateway(
161+
if existingConnections.hasPathToGateway(
162162
req.SubsystemNQN, req.HostNQN, listener.Address, portStr) {
163163
log.DebugLog(ctx, "Already connected to subsystem %s via %s:%s with HostNQN %s",
164164
req.SubsystemNQN, listener.Address, portStr, req.HostNQN)
@@ -247,8 +247,8 @@ func listSubsystems(ctx context.Context) (nvmeHostConnections, error) {
247247
return hosts, nil
248248
}
249249

250-
// hasLivePathToGateway checks if a live path exists to the specified gateway.
251-
func (nhc nvmeHostConnections) hasLivePathToGateway(subsystemNQN, hostNQN,
250+
// hasPathToGateway checks if a path exists to the specified gateway.
251+
func (nhc nvmeHostConnections) hasPathToGateway(subsystemNQN, hostNQN,
252252
gatewayIP, gatewayPort string,
253253
) bool {
254254
for _, host := range nhc {
@@ -261,10 +261,25 @@ func (nhc nvmeHostConnections) hasLivePathToGateway(subsystemNQN, hostNQN,
261261
continue
262262
}
263263

264+
// loop through paths to find matching path
264265
for _, path := range subsys.Paths {
266+
// Check if the path matches the gateway IP and port
267+
// and is in a usable state:
268+
// - "live": connection is active and working
269+
// - "connecting": kernel is actively trying to (re)connect
270+
//
271+
// The "connecting" state occurs when:
272+
// 1. Initial connection is being established
273+
// 2. Connection lost and kernel is retrying (ctrl_loss_tmo in effect)
274+
// 3. Subsystem was deleted/recreated on the gateway
275+
//
276+
// In all cases, the kernel's retry mechanism handles reconnection
277+
// for up to ctrl_loss_tmo seconds, so we should not attempt another
278+
// connection which would fail with "already connected" error.
265279
if path.Address.Traddr == gatewayIP &&
266280
path.Address.Trsvcid == gatewayPort &&
267-
path.State == "live" {
281+
(path.State == "live" ||
282+
path.State == "connecting") {
268283
return true
269284
}
270285
}

internal/nvmeof/nvmeof_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ func TestHasLivePathToGateway(t *testing.T) {
317317
hostNQN: "nqn.2014-08.org.nvmexpress:uuid:test-host",
318318
gatewayIP: "10.129.2.45",
319319
gatewayPort: "4420",
320-
want: false,
320+
want: true,
321321
},
322322
{
323323
name: "wrong gateway IP",
@@ -357,7 +357,7 @@ func TestHasLivePathToGateway(t *testing.T) {
357357
t.Run(tt.name, func(t *testing.T) {
358358
t.Parallel()
359359

360-
got := connections.hasLivePathToGateway(
360+
got := connections.hasPathToGateway(
361361
tt.subsystemNQN,
362362
tt.hostNQN,
363363
tt.gatewayIP,

0 commit comments

Comments
 (0)