From 65dccea5bde426edc04b746b2c3073636dcd885e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 May 2025 20:11:47 +0930 Subject: [PATCH] pytest: fix flake in test_reconnect_signed We can fail the fundchannel because of a reconnect race: funder tells us to reconnect so fast that we are still cleaning up from last time. We deliberately defer clean up, to give the subds a chance to process any final messages. However, on reconnection we force them to exit immediately: but this causes new `connect` commands to see the exit and fail. The workaround is to do this cleanup when a `connect` command is issued (as well as the current case, which covers an automatic reconnection or an incoming reconnection) ``` 2025-05-09T00:40:37.1769508Z def test_reconnect_signed(node_factory): 2025-05-09T00:40:37.1770273Z # This will fail *after* both sides consider channel opening. 2025-05-09T00:40:37.1770850Z disconnects = [' l1.rpc.fundchannel(l2.info['id'], CHANNEL_SIZE) 2025-05-09T00:40:37.1775388Z 2025-05-09T00:40:37.1775485Z tests/test_connection.py:667: ... 2025-05-09T00:40:37.1799527Z > raise RpcError(method, payload, resp['error']) 2025-05-09T00:40:37.1800993Z E pyln.client.lightning.RpcError: RPC call failed: method: fundchannel, payload: {'id': '022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59', 'amount': 50000, 'announce': True}, error: {'code': -1, 'message': 'Disconnected', 'data': {'id': '022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59', 'method': 'openchannel_update'}} ``` Signed-off-by: Rusty Russell --- lightningd/connect_control.c | 6 ++++++ lightningd/peer_control.c | 22 ++++++++++------------ lightningd/peer_control.h | 3 +++ 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/lightningd/connect_control.c b/lightningd/connect_control.c index ceb41d302..761e6d307 100644 --- a/lightningd/connect_control.c +++ b/lightningd/connect_control.c @@ -236,6 +236,12 @@ static struct command_result *json_connect(struct command *cmd, &peer->addr); } + /* When a peer disconnects, we give subds time to clean themselves up + * (this lets connectd ensure they've seen the final messages). But + * now it's going to try to reconnect, we've gotta force them out. */ + if (peer) + peer_channels_cleanup(peer); + subd_send_msg(cmd->ld->connectd, take(towire_connectd_connect_to_peer(NULL, &id_addr.id, addr, true))); diff --git a/lightningd/peer_control.c b/lightningd/peer_control.c index 97f50b8b9..56ea80828 100644 --- a/lightningd/peer_control.c +++ b/lightningd/peer_control.c @@ -177,16 +177,10 @@ void maybe_delete_peer(struct peer *peer) delete_peer(peer); } -static void peer_channels_cleanup(struct lightningd *ld, - const struct node_id *id) +void peer_channels_cleanup(struct peer *peer) { - struct peer *peer; struct channel *c, **channels; - peer = peer_by_id(ld, id); - if (!peer) - return; - /* Freeing channels can free peer, so gather first. */ channels = tal_arr(tmpctx, struct channel *, 0); list_for_each(&peer->channels, c, list) @@ -1726,11 +1720,6 @@ void peer_connected(struct lightningd *ld, const u8 *msg) fatal("Connectd gave bad CONNECT_PEER_CONNECTED message %s", tal_hex(msg, msg)); - /* When a peer disconnects, we give subds time to clean themselves up - * (this lets connectd ensure they've seen the final messages). But - * now it's reconnected, we've gotta force them out. */ - peer_channels_cleanup(ld, &id); - /* If we connected, and it's a normal address */ if (!hook_payload->incoming && hook_payload->addr.itype == ADDR_INTERNAL_WIREADDR @@ -1743,6 +1732,15 @@ void peer_connected(struct lightningd *ld, const u8 *msg) /* If we're already dealing with this peer, hand off to correct * subdaemon. Otherwise, we'll hand to openingd to wait there. */ peer = peer_by_id(ld, &id); + if (peer) { + /* When a peer disconnects, we give subds time to clean themselves up + * (this lets connectd ensure they've seen the final messages). But + * now it's reconnected, we've gotta force them out. This might free + * the peer! */ + peer_channels_cleanup(peer); + peer = peer_by_id(ld, &id); + } + if (!peer) { /* If we connected to them, we know this is a good address. */ peer = new_peer(ld, 0, &id, &hook_payload->addr, diff --git a/lightningd/peer_control.h b/lightningd/peer_control.h index 88a5f8866..84d11f2ca 100644 --- a/lightningd/peer_control.h +++ b/lightningd/peer_control.h @@ -165,6 +165,9 @@ command_find_channel(struct command *cmd, const char *buffer, const jsmntok_t *tok, struct channel **channel); +/* We do this lazily, when reconnecting */ +void peer_channels_cleanup(struct peer *peer); + /* Ancient (0.7.0 and before) releases could create invalid commitment txs! */ bool invalid_last_tx(const struct bitcoin_tx *tx);