From 8c7ac33f882a9406d7e413d755c50669bfb61b77 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sun, 16 Nov 2025 15:36:13 +1030 Subject: [PATCH] askrene: implement reduce_num_flows in refine, using increase_flows(). Now we simply call it at the end. We need to check it hasn't violated fee maxima, but otherwise it's simple. Signed-off-by: Rusty Russell Changelog-Fixed: Plugins: `askrene` now handles limits on number of htlcs much more gracefully. --- plugins/askrene/mcf.c | 79 +++++++++++++++------------------------- plugins/askrene/refine.c | 61 +++++++++++++++++++------------ plugins/askrene/refine.h | 9 +++-- tests/test_askrene.py | 21 +++++------ 4 files changed, 83 insertions(+), 87 deletions(-) diff --git a/plugins/askrene/mcf.c b/plugins/askrene/mcf.c index 44cdfc728..c04f72022 100644 --- a/plugins/askrene/mcf.c +++ b/plugins/askrene/mcf.c @@ -1377,9 +1377,6 @@ linear_routes(const tal_t *ctx, struct route_query *rq, struct reserve_hop *reservations = new_reservations(working_ctx, rq); while (!amount_msat_is_zero(amount_to_deliver)) { - size_t num_parts, parts_slots, excess_parts; - u32 bottleneck_idx; - if (timemono_after(time_mono(), deadline)) { error_message = rq_log(ctx, rq, LOG_BROKEN, "%s: timed out after deadline", @@ -1387,24 +1384,7 @@ linear_routes(const tal_t *ctx, struct route_query *rq, goto fail; } - /* FIXME: This algorithm to limit the number of parts is dumb - * for two reasons: - * 1. it does not take into account that several loop - * iterations here may produce two flows along the same - * path that after "squash_flows" become a single flow. - * 2. limiting the number of "slots" to 1 makes us fail to - * see some solutions that use more than one of those - * existing paths. - * - * A better approach could be to run MCF, remove the excess - * paths and then recompute a MCF on a network where each arc is - * one of the previously remaining paths, ie. redistributing the - * payment amount among the selected paths in a cost-efficient - * way. */ new_flows = tal_free(new_flows); - num_parts = tal_count(*flows); - assert(num_parts < rq->maxparts); - parts_slots = rq->maxparts - num_parts; /* If the amount_to_deliver is very small we better use a single * path computation because: @@ -1415,8 +1395,7 @@ linear_routes(const tal_t *ctx, struct route_query *rq, * do not deliver the entire payment amount by just a small * amount. */ if (amount_msat_less_eq(amount_to_deliver, - SINGLE_PATH_THRESHOLD) || - parts_slots == 1) { + SINGLE_PATH_THRESHOLD)) { new_flows = single_path_flow(working_ctx, rq, srcnode, dstnode, amount_to_deliver, mu, delay_feefactor); @@ -1433,7 +1412,7 @@ linear_routes(const tal_t *ctx, struct route_query *rq, } error_message = - refine_flows(ctx, rq, amount_to_deliver, &new_flows, &bottleneck_idx); + refine_flows(ctx, rq, amount_to_deliver, &new_flows, NULL); if (error_message) goto fail; @@ -1457,32 +1436,6 @@ linear_routes(const tal_t *ctx, struct route_query *rq, assert(!amount_msat_is_zero(new_flows[i]->delivers)); } - if (tal_count(new_flows) > parts_slots) { - /* Remove the excees of parts and leave one slot for the - * next round of computations. */ - excess_parts = 1 + tal_count(new_flows) - parts_slots; - } else if (tal_count(new_flows) == parts_slots && - amount_msat_less(all_deliver, amount_to_deliver)) { - /* Leave exactly 1 slot for the next round of - * computations. */ - excess_parts = 1; - } else - excess_parts = 0; - if (excess_parts > 0) { - /* If we removed all the flows we found, avoid selecting them again, - * by disabling one. */ - if (excess_parts == tal_count(new_flows)) - bitmap_set_bit(rq->disabled_chans, bottleneck_idx); - if (!remove_flows(&new_flows, excess_parts)) { - error_message = rq_log(ctx, rq, LOG_BROKEN, - "%s: failed to remove %zu" - " flows from a set of %zu", - __func__, excess_parts, - tal_count(new_flows)); - goto fail; - } - } - /* Is this set of flows too expensive? * We can check if the new flows are within the fee budget, * however in some cases we have discarded some flows at this @@ -1606,6 +1559,34 @@ linear_routes(const tal_t *ctx, struct route_query *rq, /* all set! Now squash flows that use the same path */ squash_flows(ctx, rq, flows); + /* If we're over the number of parts, try to cram excess into the + * largest-capacity parts */ + if (tal_count(*flows) > rq->maxparts) { + struct amount_msat fee; + + error_message = reduce_num_flows(rq, rq, flows, amount, rq->maxparts); + if (error_message) { + *flows = tal_free(*flows); + return error_message; + } + + /* Check fee budget! */ + fee = flowset_fee(rq->plugin, *flows); + if (amount_msat_greater(fee, maxfee)) { + error_message = rq_log(rq, rq, LOG_INFORM, + "After reducing the flows to %zu (i.e. maxparts)," + " we had a fee of %s, greater than " + "max of %s.", + tal_count(*flows), + fmt_amount_msat(tmpctx, fee), + fmt_amount_msat(tmpctx, maxfee)); + if (error_message) { + *flows = tal_free(*flows); + return error_message; + } + } + } + /* flows_probability re-does a temporary reservation so we need to call * it after we have cleaned the reservations we used to build the flows * hence after we freed working_ctx. */ diff --git a/plugins/askrene/refine.c b/plugins/askrene/refine.c index 052d125bf..2babc216a 100644 --- a/plugins/askrene/refine.c +++ b/plugins/askrene/refine.c @@ -1,5 +1,6 @@ #include "config.h" #include +#include #include #include #include @@ -597,29 +598,43 @@ double flows_probability(const tal_t *ctx, struct route_query *rq, return probability; } -/* Compare flows by deliver amount */ -static int reverse_cmp_flows(struct flow *const *fa, struct flow *const *fb, - void *unused UNUSED) +const char *reduce_num_flows(const tal_t *ctx, + const struct route_query *rq, + struct flow ***flows, + struct amount_msat deliver, + size_t num_parts) { - if (amount_msat_eq((*fa)->delivers, (*fb)->delivers)) - return 0; - if (amount_msat_greater((*fa)->delivers, (*fb)->delivers)) - return -1; - return 1; -} + /* Keep the largest flows (not as I originally implemented, the largest + * capacity flows). Here's Lagrang3's analysis: + * + * I think it is better to keep the largest-deliver flows. If we only + * go for the highest capacity we may throw away the low cost benefits + * of the MCF. -bool remove_flows(struct flow ***flows, u32 n) -{ - if (n == 0) - goto fail; - if (n > tal_count(*flows)) - goto fail; - asort(*flows, tal_count(*flows), reverse_cmp_flows, NULL); - for (size_t count = tal_count(*flows); n > 0; n--, count--) { - assert(count > 0); - tal_arr_remove(flows, count - 1); - } - return true; -fail: - return false; + * Hypothetical scenario: MCF finds 3 flows but maxparts=2, + * flow 1: deliver=10, cost=0, capacity=0 + * flow 2: deliver=7, cost=1, capacity=5 + * flow 3: deliver=1, cost=10, capacity=100 + * + * It is better to keep flows 1 and 2 by accomodating 1 more unit of + * flow in flow2 at 1 value expense (per flow), than to keep flows 2 and + * 3 by accomodating 5 more units of flow in flow2 at cost 1 and 5 in + * flow3 at cost 100. + * + * The trade-off is: if we prioritize the delivery value already + * computed by MCF then we find better solutions, but we might fail to + * find feasible solutions sometimes. If we prioritize capacity then we + * generally find bad solutions though we find feasibility more often + * than the alternative. + */ + size_t orig_num_flows = tal_count(*flows); + asort(*flows, orig_num_flows, revcmp_flows, NULL); + tal_resize(flows, num_parts); + + if (!increase_flows(rq, *flows, deliver, -1.0)) + return rq_log(ctx, rq, LOG_INFORM, + "Failed to reduce %zu flows down to maxparts (%zu)", + orig_num_flows, num_parts); + + return NULL; } diff --git a/plugins/askrene/refine.h b/plugins/askrene/refine.h index fcd64be21..eeed8f28e 100644 --- a/plugins/askrene/refine.h +++ b/plugins/askrene/refine.h @@ -38,7 +38,10 @@ void squash_flows(const tal_t *ctx, struct route_query *rq, double flows_probability(const tal_t *ctx, struct route_query *rq, struct flow ***flows); -/* Helper function: removes n flows from the set. It will remove those flows - * with the lowest amount values. */ -bool remove_flows(struct flow ***flows, u32 n); +/* Modify flows so only N remain, if we can. Returns an error if we cannot. */ +const char *reduce_num_flows(const tal_t *ctx, + const struct route_query *rq, + struct flow ***flows, + struct amount_msat deliver, + size_t num_parts); #endif /* LIGHTNING_PLUGINS_ASKRENE_REFINE_H */ diff --git a/tests/test_askrene.py b/tests/test_askrene.py index 59e0eb872..08b870547 100644 --- a/tests/test_askrene.py +++ b/tests/test_askrene.py @@ -1782,8 +1782,7 @@ def test_simple_dummy_channel(node_factory): def test_maxparts_infloop(node_factory, bitcoind): # Three paths from l1 -> l5. - # FIXME: enhance explain_failure! - l1, l2, l3, l4, l5 = node_factory.get_nodes(5, opts=[{'broken_log': 'plugin-cln-askrene.*the obvious route'}] + [{}] * 4) + l1, l2, l3, l4, l5 = node_factory.get_nodes(5) for intermediate in (l2, l3, l4): node_factory.join_nodes([l1, intermediate, l5]) @@ -1805,16 +1804,14 @@ def test_maxparts_infloop(node_factory, bitcoind): final_cltv=5) assert len(route['routes']) == 3 - # Now with maxparts == 2. Usually askrene can't figure out why it failed, - # but sometimes it gets a theory. - with pytest.raises(RpcError): - l1.rpc.getroutes(source=l1.info['id'], - destination=l5.info['id'], - amount_msat=amount, - layers=[], - maxfee_msat=amount, - final_cltv=5, - maxparts=2) + route = l1.rpc.getroutes(source=l1.info['id'], + destination=l5.info['id'], + amount_msat=amount, + layers=[], + maxfee_msat=amount, + final_cltv=5, + maxparts=2) + assert len(route['routes']) == 2 def test_askrene_timeout(node_factory, bitcoind):