pytest: fix flake in test_reconnect_signed

We can fail the fundchannel because of a reconnect race: funder tells
us to reconnect so fast that we are still cleaning up from last time.

We deliberately defer clean up, to give the subds a chance to process any
final messages.  However, on reconnection we force them to exit immediately:
but this causes new `connect` commands to see the exit and fail.

The workaround is to do this cleanup when a `connect` command is
issued (as well as the current case, which covers an automatic
reconnection or an incoming reconnection)

```
2025-05-09T00:40:37.1769508Z     def test_reconnect_signed(node_factory):
2025-05-09T00:40:37.1770273Z         # This will fail *after* both sides consider channel opening.
2025-05-09T00:40:37.1770850Z         disconnects = ['<WIRE_FUNDING_SIGNED']
2025-05-09T00:40:37.1771298Z         if EXPERIMENTAL_DUAL_FUND:
2025-05-09T00:40:37.1771735Z             disconnects = ['<WIRE_COMMITMENT_SIGNED']
2025-05-09T00:40:37.1772155Z     
2025-05-09T00:40:37.1772598Z         l1 = node_factory.get_node(may_reconnect=True, disconnect=disconnects)
2025-05-09T00:40:37.1773210Z         l2 = node_factory.get_node(may_reconnect=True)
2025-05-09T00:40:37.1773632Z     
2025-05-09T00:40:37.1773917Z         l1.fundwallet(2000000)
2025-05-09T00:40:37.1774268Z     
2025-05-09T00:40:37.1774611Z         l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
2025-05-09T00:40:37.1775151Z >       l1.rpc.fundchannel(l2.info['id'], CHANNEL_SIZE)
2025-05-09T00:40:37.1775388Z 
2025-05-09T00:40:37.1775485Z tests/test_connection.py:667:
...
2025-05-09T00:40:37.1799527Z >           raise RpcError(method, payload, resp['error'])
2025-05-09T00:40:37.1800993Z E           pyln.client.lightning.RpcError: RPC call failed: method: fundchannel, payload: {'id': '022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59', 'amount': 50000, 'announce': True}, error: {'code': -1, 'message': 'Disconnected', 'data': {'id': '022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59', 'method': 'openchannel_update'}}
```

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell
2025-05-10 20:11:47 +09:30
parent 9ce3f5dde4
commit 65dccea5bd
3 changed files with 19 additions and 12 deletions

View File

@@ -236,6 +236,12 @@ static struct command_result *json_connect(struct command *cmd,
&peer->addr);
}
/* When a peer disconnects, we give subds time to clean themselves up
* (this lets connectd ensure they've seen the final messages). But
* now it's going to try to reconnect, we've gotta force them out. */
if (peer)
peer_channels_cleanup(peer);
subd_send_msg(cmd->ld->connectd,
take(towire_connectd_connect_to_peer(NULL, &id_addr.id, addr, true)));

View File

@@ -177,16 +177,10 @@ void maybe_delete_peer(struct peer *peer)
delete_peer(peer);
}
static void peer_channels_cleanup(struct lightningd *ld,
const struct node_id *id)
void peer_channels_cleanup(struct peer *peer)
{
struct peer *peer;
struct channel *c, **channels;
peer = peer_by_id(ld, id);
if (!peer)
return;
/* Freeing channels can free peer, so gather first. */
channels = tal_arr(tmpctx, struct channel *, 0);
list_for_each(&peer->channels, c, list)
@@ -1726,11 +1720,6 @@ void peer_connected(struct lightningd *ld, const u8 *msg)
fatal("Connectd gave bad CONNECT_PEER_CONNECTED message %s",
tal_hex(msg, msg));
/* When a peer disconnects, we give subds time to clean themselves up
* (this lets connectd ensure they've seen the final messages). But
* now it's reconnected, we've gotta force them out. */
peer_channels_cleanup(ld, &id);
/* If we connected, and it's a normal address */
if (!hook_payload->incoming
&& hook_payload->addr.itype == ADDR_INTERNAL_WIREADDR
@@ -1743,6 +1732,15 @@ void peer_connected(struct lightningd *ld, const u8 *msg)
/* If we're already dealing with this peer, hand off to correct
* subdaemon. Otherwise, we'll hand to openingd to wait there. */
peer = peer_by_id(ld, &id);
if (peer) {
/* When a peer disconnects, we give subds time to clean themselves up
* (this lets connectd ensure they've seen the final messages). But
* now it's reconnected, we've gotta force them out. This might free
* the peer! */
peer_channels_cleanup(peer);
peer = peer_by_id(ld, &id);
}
if (!peer) {
/* If we connected to them, we know this is a good address. */
peer = new_peer(ld, 0, &id, &hook_payload->addr,

View File

@@ -165,6 +165,9 @@ command_find_channel(struct command *cmd,
const char *buffer, const jsmntok_t *tok,
struct channel **channel);
/* We do this lazily, when reconnecting */
void peer_channels_cleanup(struct peer *peer);
/* Ancient (0.7.0 and before) releases could create invalid commitment txs! */
bool invalid_last_tx(const struct bitcoin_tx *tx);