From 3c6504a99ed3f83b5f4fb63bc1e5764c23099761 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 16 Feb 2026 17:38:33 +1030 Subject: [PATCH] askrene: limit how many children we have. Queue them before we query local channels, so they don't use stale information. Changelog-Added: Config: `askrene-max-threads` to control how many CPUs we use for routing (default 4). Signed-off-by: Rusty Russell --- doc/lightningd-config.5.md | 4 ++ plugins/askrene/askrene.c | 75 ++++++++++++++++++++++++++++++++------ plugins/askrene/askrene.h | 6 +++ 3 files changed, 73 insertions(+), 12 deletions(-) diff --git a/doc/lightningd-config.5.md b/doc/lightningd-config.5.md index 27b6901a8..38f43312b 100644 --- a/doc/lightningd-config.5.md +++ b/doc/lightningd-config.5.md @@ -563,6 +563,10 @@ command, so they invoices can also be paid onchain. This option makes the `getroutes` call fail if it takes more than this many seconds. Setting it to zero is a fun way to ensure your node never makes payments. +* **askrene-max-threads**=*NUMBER* [plugin `askrene`, *dynamic*] + + This option controls how many routes askrene will calculate at once: this is only useful on nodes which make multiple payments at once, and setting the number higher than your number of cores/CPUS will not help. The default is 4. + ### Networking options Note that for simple setups, the implicit *autolisten* option does the diff --git a/plugins/askrene/askrene.c b/plugins/askrene/askrene.c index 94fe5d715..4910b858a 100644 --- a/plugins/askrene/askrene.c +++ b/plugins/askrene/askrene.c @@ -338,6 +338,8 @@ param_algorithm(struct command *cmd, const char *name, const char *buffer, } struct getroutes_info { + /* We keep this around in askrene->waiting if we're busy */ + struct list_node list; struct command *cmd; struct node_id source, dest; struct amount_msat amount, maxfee; @@ -650,7 +652,6 @@ static struct command_result *do_getroutes(struct command *cmd, child->log_conn = io_new_conn(child, log_fd, child_log_init, child); child->cmd = cmd; - /* FIXME: limit parallelism! */ list_add_tail(&askrene->children, &child->list); tal_add_destructor(child, destroy_router_child); return command_still_pending(cmd); @@ -759,6 +760,49 @@ listpeerchannels_done(struct command *cmd, return do_getroutes(cmd, localmods, info); } +/* Mutual recursion */ +static struct command_result *begin_request(struct askrene *askrene, + struct getroutes_info *info); + +/* One is finished. Maybe wake up a waiter */ +static void destroy_live_command(struct command *cmd) +{ + struct askrene *askrene = get_askrene(cmd->plugin); + struct getroutes_info *info; + + assert(askrene->num_live_requests > 0); + askrene->num_live_requests--; + + if (askrene->num_live_requests >= askrene->max_children) + return; + + info = list_pop(&askrene->waiters, struct getroutes_info, list); + if (info) + begin_request(askrene, info); +} + +static struct command_result *begin_request(struct askrene *askrene, + struct getroutes_info *info) +{ + askrene->num_live_requests++; + + /* Wake any waiting ones when we're finished */ + tal_add_destructor(info->cmd, destroy_live_command); + + if (have_layer(info->layers, "auto.localchans")) { + struct out_req *req; + + req = jsonrpc_request_start(info->cmd, + "listpeerchannels", + listpeerchannels_done, + forward_error, info); + return send_outreq(req); + } else + info->local_layer = NULL; + + return do_getroutes(info->cmd, gossmap_localmods_new(info->cmd), info); +} + static struct command_result *json_getroutes(struct command *cmd, const char *buffer, const jsmntok_t *params) @@ -771,6 +815,7 @@ static struct command_result *json_getroutes(struct command *cmd, */ /* FIXME: Typo in spec for CLTV in descripton! But it breaks our spelling check, so we omit it above */ const u32 maxdelay_allowed = 2016; + struct askrene *askrene = get_askrene(cmd->plugin); const u32 default_maxparts = 100; struct getroutes_info *info = tal(cmd, struct getroutes_info); /* param functions require pointers */ @@ -828,18 +873,15 @@ static struct command_result *json_getroutes(struct command *cmd, info->additional_costs = new_htable(info, additional_cost_htable); info->maxparts = *maxparts; - if (have_layer(info->layers, "auto.localchans")) { - struct out_req *req; + if (askrene->num_live_requests >= askrene->max_children) { + cmd_log(tmpctx, cmd, LOG_INFORM, + "Too many running at once (%zu vs %u): waiting", + askrene->num_live_requests, askrene->max_children); + list_add_tail(&askrene->waiters, &info->list); + return command_still_pending(cmd); + } - req = jsonrpc_request_start(cmd, - "listpeerchannels", - listpeerchannels_done, - forward_error, info); - return send_outreq(req); - } else - info->local_layer = NULL; - - return do_getroutes(cmd, gossmap_localmods_new(cmd), info); + return begin_request(askrene, info); } static struct command_result *json_askrene_reserve(struct command *cmd, @@ -1355,6 +1397,8 @@ static const char *init(struct command *init_cmd, askrene->plugin = plugin; askrene->layers = new_layer_name_hash(askrene); list_head_init(&askrene->children); + list_head_init(&askrene->waiters); + askrene->num_live_requests = 0; askrene->reserved = new_reserve_htable(askrene); askrene->gossmap = gossmap_load(askrene, GOSSIP_STORE_FILENAME, plugin_gossmap_logcb, plugin); @@ -1382,6 +1426,7 @@ int main(int argc, char *argv[]) askrene = tal(NULL, struct askrene); askrene->route_seconds = 10; + askrene->max_children = 4; plugin_main(argv, init, take(askrene), PLUGIN_RESTARTABLE, true, NULL, commands, ARRAY_SIZE(commands), NULL, 0, NULL, 0, NULL, 0, plugin_option_dynamic("askrene-timeout", @@ -1390,5 +1435,11 @@ int main(int argc, char *argv[]) " Defaults to 10 seconds", u32_option, u32_jsonfmt, &askrene->route_seconds), + plugin_option_dynamic("askrene-max-threads", + "int", + "How many routes to calculate at once." + " Defaults to 4", + u32_option, u32_jsonfmt, + &askrene->max_children), NULL); } diff --git a/plugins/askrene/askrene.h b/plugins/askrene/askrene.h index a125102fc..97c65658f 100644 --- a/plugins/askrene/askrene.h +++ b/plugins/askrene/askrene.h @@ -28,8 +28,14 @@ struct askrene { struct command *layer_cmd; /* How long before we abort trying to find a route? */ u32 route_seconds; + /* Maximum number of routing children */ + u32 max_children; + /* How many requests live now? */ + size_t num_live_requests; /* Routing children currently in flight. */ struct list_head children; + /* Ones waiting */ + struct list_head waiters; }; /* Useful plugin->askrene mapping */