pytest: fix flake in test_reconnect_signed

We can fail the fundchannel because of a reconnect race: funder tells us to reconnect so fast that we are still cleaning up from last time. We deliberately defer clean up, to give the subds a chance to process any final messages. However, on reconnection we force them to exit immediately: but this causes new `connect` commands to see the exit and fail. The workaround is to do this cleanup when a `connect` command is issued (as well as the current case, which covers an automatic reconnection or an incoming reconnection) ``` 2025-05-09T00:40:37.1769508Z def test_reconnect_signed(node_factory): 2025-05-09T00:40:37.1770273Z # This will fail *after* both sides consider channel opening. 2025-05-09T00:40:37.1770850Z disconnects = ['<WIRE_FUNDING_SIGNED'] 2025-05-09T00:40:37.1771298Z if EXPERIMENTAL_DUAL_FUND: 2025-05-09T00:40:37.1771735Z disconnects = ['<WIRE_COMMITMENT_SIGNED'] 2025-05-09T00:40:37.1772155Z 2025-05-09T00:40:37.1772598Z l1 = node_factory.get_node(may_reconnect=True, disconnect=disconnects) 2025-05-09T00:40:37.1773210Z l2 = node_factory.get_node(may_reconnect=True) 2025-05-09T00:40:37.1773632Z 2025-05-09T00:40:37.1773917Z l1.fundwallet(2000000) 2025-05-09T00:40:37.1774268Z 2025-05-09T00:40:37.1774611Z l1.rpc.connect(l2.info['id'], 'localhost', l2.port) 2025-05-09T00:40:37.1775151Z > l1.rpc.fundchannel(l2.info['id'], CHANNEL_SIZE) 2025-05-09T00:40:37.1775388Z 2025-05-09T00:40:37.1775485Z tests/test_connection.py:667: ... 2025-05-09T00:40:37.1799527Z > raise RpcError(method, payload, resp['error']) 2025-05-09T00:40:37.1800993Z E pyln.client.lightning.RpcError: RPC call failed: method: fundchannel, payload: {'id': '022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59', 'amount': 50000, 'announce': True}, error: {'code': -1, 'message': 'Disconnected', 'data': {'id': '022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59', 'method': 'openchannel_update'}} ``` Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
2025-05-10 20:11:47 +09:30
parent 9ce3f5dde4
commit 65dccea5bd
3 changed files with 19 additions and 12 deletions
--- a/lightningd/connect_control.c
+++ b/lightningd/connect_control.c
@@ -236,6 +236,12 @@ static struct command_result *json_connect(struct command *cmd,
 					   &peer->addr);
 	}

+	/* When a peer disconnects, we give subds time to clean themselves up
+	 * (this lets connectd ensure they've seen the final messages).  But
+	 * now it's going to try to reconnect, we've gotta force them out. */
+	if (peer)
+		peer_channels_cleanup(peer);
+
 	subd_send_msg(cmd->ld->connectd,
 		      take(towire_connectd_connect_to_peer(NULL, &id_addr.id, addr, true)));

--- a/lightningd/peer_control.c
+++ b/lightningd/peer_control.c
@@ -177,16 +177,10 @@ void maybe_delete_peer(struct peer *peer)
 	delete_peer(peer);
 }

-static void peer_channels_cleanup(struct lightningd *ld,
-				  const struct node_id *id)
+void peer_channels_cleanup(struct peer *peer)
 {
-	struct peer *peer;
 	struct channel *c, **channels;

-	peer = peer_by_id(ld, id);
-	if (!peer)
-		return;
-
 	/* Freeing channels can free peer, so gather first. */
 	channels = tal_arr(tmpctx, struct channel *, 0);
 	list_for_each(&peer->channels, c, list)
@@ -1726,11 +1720,6 @@ void peer_connected(struct lightningd *ld, const u8 *msg)
 		fatal("Connectd gave bad CONNECT_PEER_CONNECTED message %s",
 		      tal_hex(msg, msg));

-	/* When a peer disconnects, we give subds time to clean themselves up
-	 * (this lets connectd ensure they've seen the final messages).  But
-	 * now it's reconnected, we've gotta force them out. */
-	peer_channels_cleanup(ld, &id);
-
 	/* If we connected, and it's a normal address */
 	if (!hook_payload->incoming
 	    && hook_payload->addr.itype == ADDR_INTERNAL_WIREADDR
@@ -1743,6 +1732,15 @@ void peer_connected(struct lightningd *ld, const u8 *msg)
 	/* If we're already dealing with this peer, hand off to correct
 	 * subdaemon.  Otherwise, we'll hand to openingd to wait there. */
 	peer = peer_by_id(ld, &id);
+	if (peer) {
+		/* When a peer disconnects, we give subds time to clean themselves up
+		 * (this lets connectd ensure they've seen the final messages).  But
+		 * now it's reconnected, we've gotta force them out.  This might free
+		 * the peer! */
+		peer_channels_cleanup(peer);
+		peer = peer_by_id(ld, &id);
+	}
+
 	if (!peer) {
 		/* If we connected to them, we know this is a good address. */
 		peer = new_peer(ld, 0, &id, &hook_payload->addr,
--- a/lightningd/peer_control.h
+++ b/lightningd/peer_control.h
@@ -165,6 +165,9 @@ command_find_channel(struct command *cmd,
 		     const char *buffer, const jsmntok_t *tok,
 		     struct channel **channel);

+/* We do this lazily, when reconnecting */
+void peer_channels_cleanup(struct peer *peer);
+
 /* Ancient (0.7.0 and before) releases could create invalid commitment txs! */
 bool invalid_last_tx(const struct bitcoin_tx *tx);