pytest: fix flake in test_reconnect_signed
We can fail the fundchannel because of a reconnect race: funder tells
us to reconnect so fast that we are still cleaning up from last time.
We deliberately defer clean up, to give the subds a chance to process any
final messages. However, on reconnection we force them to exit immediately:
but this causes new `connect` commands to see the exit and fail.
The workaround is to do this cleanup when a `connect` command is
issued (as well as the current case, which covers an automatic
reconnection or an incoming reconnection)
```
2025-05-09T00:40:37.1769508Z def test_reconnect_signed(node_factory):
2025-05-09T00:40:37.1770273Z # This will fail *after* both sides consider channel opening.
2025-05-09T00:40:37.1770850Z disconnects = ['<WIRE_FUNDING_SIGNED']
2025-05-09T00:40:37.1771298Z if EXPERIMENTAL_DUAL_FUND:
2025-05-09T00:40:37.1771735Z disconnects = ['<WIRE_COMMITMENT_SIGNED']
2025-05-09T00:40:37.1772155Z
2025-05-09T00:40:37.1772598Z l1 = node_factory.get_node(may_reconnect=True, disconnect=disconnects)
2025-05-09T00:40:37.1773210Z l2 = node_factory.get_node(may_reconnect=True)
2025-05-09T00:40:37.1773632Z
2025-05-09T00:40:37.1773917Z l1.fundwallet(2000000)
2025-05-09T00:40:37.1774268Z
2025-05-09T00:40:37.1774611Z l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
2025-05-09T00:40:37.1775151Z > l1.rpc.fundchannel(l2.info['id'], CHANNEL_SIZE)
2025-05-09T00:40:37.1775388Z
2025-05-09T00:40:37.1775485Z tests/test_connection.py:667:
...
2025-05-09T00:40:37.1799527Z > raise RpcError(method, payload, resp['error'])
2025-05-09T00:40:37.1800993Z E pyln.client.lightning.RpcError: RPC call failed: method: fundchannel, payload: {'id': '022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59', 'amount': 50000, 'announce': True}, error: {'code': -1, 'message': 'Disconnected', 'data': {'id': '022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59', 'method': 'openchannel_update'}}
```
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
@@ -236,6 +236,12 @@ static struct command_result *json_connect(struct command *cmd,
|
||||
&peer->addr);
|
||||
}
|
||||
|
||||
/* When a peer disconnects, we give subds time to clean themselves up
|
||||
* (this lets connectd ensure they've seen the final messages). But
|
||||
* now it's going to try to reconnect, we've gotta force them out. */
|
||||
if (peer)
|
||||
peer_channels_cleanup(peer);
|
||||
|
||||
subd_send_msg(cmd->ld->connectd,
|
||||
take(towire_connectd_connect_to_peer(NULL, &id_addr.id, addr, true)));
|
||||
|
||||
|
||||
@@ -177,16 +177,10 @@ void maybe_delete_peer(struct peer *peer)
|
||||
delete_peer(peer);
|
||||
}
|
||||
|
||||
static void peer_channels_cleanup(struct lightningd *ld,
|
||||
const struct node_id *id)
|
||||
void peer_channels_cleanup(struct peer *peer)
|
||||
{
|
||||
struct peer *peer;
|
||||
struct channel *c, **channels;
|
||||
|
||||
peer = peer_by_id(ld, id);
|
||||
if (!peer)
|
||||
return;
|
||||
|
||||
/* Freeing channels can free peer, so gather first. */
|
||||
channels = tal_arr(tmpctx, struct channel *, 0);
|
||||
list_for_each(&peer->channels, c, list)
|
||||
@@ -1726,11 +1720,6 @@ void peer_connected(struct lightningd *ld, const u8 *msg)
|
||||
fatal("Connectd gave bad CONNECT_PEER_CONNECTED message %s",
|
||||
tal_hex(msg, msg));
|
||||
|
||||
/* When a peer disconnects, we give subds time to clean themselves up
|
||||
* (this lets connectd ensure they've seen the final messages). But
|
||||
* now it's reconnected, we've gotta force them out. */
|
||||
peer_channels_cleanup(ld, &id);
|
||||
|
||||
/* If we connected, and it's a normal address */
|
||||
if (!hook_payload->incoming
|
||||
&& hook_payload->addr.itype == ADDR_INTERNAL_WIREADDR
|
||||
@@ -1743,6 +1732,15 @@ void peer_connected(struct lightningd *ld, const u8 *msg)
|
||||
/* If we're already dealing with this peer, hand off to correct
|
||||
* subdaemon. Otherwise, we'll hand to openingd to wait there. */
|
||||
peer = peer_by_id(ld, &id);
|
||||
if (peer) {
|
||||
/* When a peer disconnects, we give subds time to clean themselves up
|
||||
* (this lets connectd ensure they've seen the final messages). But
|
||||
* now it's reconnected, we've gotta force them out. This might free
|
||||
* the peer! */
|
||||
peer_channels_cleanup(peer);
|
||||
peer = peer_by_id(ld, &id);
|
||||
}
|
||||
|
||||
if (!peer) {
|
||||
/* If we connected to them, we know this is a good address. */
|
||||
peer = new_peer(ld, 0, &id, &hook_payload->addr,
|
||||
|
||||
@@ -165,6 +165,9 @@ command_find_channel(struct command *cmd,
|
||||
const char *buffer, const jsmntok_t *tok,
|
||||
struct channel **channel);
|
||||
|
||||
/* We do this lazily, when reconnecting */
|
||||
void peer_channels_cleanup(struct peer *peer);
|
||||
|
||||
/* Ancient (0.7.0 and before) releases could create invalid commitment txs! */
|
||||
bool invalid_last_tx(const struct bitcoin_tx *tx);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user