diff --git a/plugins/askrene/Makefile b/plugins/askrene/Makefile index a55136f75..989905aee 100644 --- a/plugins/askrene/Makefile +++ b/plugins/askrene/Makefile @@ -1,5 +1,5 @@ -PLUGIN_ASKRENE_SRC := plugins/askrene/askrene.c plugins/askrene/layer.c plugins/askrene/reserve.c plugins/askrene/mcf.c plugins/askrene/dijkstra.c plugins/askrene/flow.c plugins/askrene/refine.c -PLUGIN_ASKRENE_HEADER := plugins/askrene/askrene.h plugins/askrene/layer.h plugins/askrene/reserve.h plugins/askrene/mcf.h plugins/askrene/dijkstra.h plugins/askrene/flow.h plugins/askrene/refine.h +PLUGIN_ASKRENE_SRC := plugins/askrene/askrene.c plugins/askrene/layer.c plugins/askrene/reserve.c plugins/askrene/mcf.c plugins/askrene/dijkstra.c plugins/askrene/flow.c plugins/askrene/refine.c plugins/askrene/explain_failure.c +PLUGIN_ASKRENE_HEADER := plugins/askrene/askrene.h plugins/askrene/layer.h plugins/askrene/reserve.h plugins/askrene/mcf.h plugins/askrene/dijkstra.h plugins/askrene/flow.h plugins/askrene/refine.h plugins/askrene/explain_failure.h PLUGIN_ASKRENE_OBJS := $(PLUGIN_ASKRENE_SRC:.c=.o) $(PLUGIN_ASKRENE_OBJS): $(PLUGIN_ASKRENE_HEADER) @@ -7,4 +7,4 @@ $(PLUGIN_ASKRENE_OBJS): $(PLUGIN_ASKRENE_HEADER) ALL_C_SOURCES += $(PLUGIN_ASKRENE_SRC) ALL_C_HEADERS += $(PLUGIN_ASKRENE_HEADER) -plugins/cln-askrene: $(PLUGIN_ASKRENE_OBJS) $(PLUGIN_LIB_OBJS) $(PLUGIN_COMMON_OBJS) $(JSMN_OBJS) $(CCAN_OBJS) bitcoin/chainparams.o common/gossmap.o common/sciddir_or_pubkey.o common/gossmods_listpeerchannels.o common/fp16.o common/dijkstra.o common/bolt12.o common/bolt12_merkle.o wire/bolt12_wiregen.o wire/onion_wiregen.o +plugins/cln-askrene: $(PLUGIN_ASKRENE_OBJS) $(PLUGIN_LIB_OBJS) $(PLUGIN_COMMON_OBJS) $(JSMN_OBJS) $(CCAN_OBJS) bitcoin/chainparams.o common/gossmap.o common/sciddir_or_pubkey.o common/gossmods_listpeerchannels.o common/fp16.o common/dijkstra.o common/bolt12.o common/bolt12_merkle.o wire/bolt12_wiregen.o wire/onion_wiregen.o common/route.o diff --git a/plugins/askrene/askrene.c b/plugins/askrene/askrene.c index 759004cfc..ab33b9f57 100644 --- a/plugins/askrene/askrene.c +++ b/plugins/askrene/askrene.c @@ -9,6 +9,7 @@ #include "config.h" #include #include +#include #include #include #include @@ -16,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -223,6 +225,7 @@ struct amount_msat get_additional_per_htlc_cost(const struct route_query *rq, return AMOUNT_MSAT(0); } + /* Returns an error message, or sets *routes */ static const char *get_routes(const tal_t *ctx, struct command *cmd, @@ -327,10 +330,7 @@ static const char *get_routes(const tal_t *ctx, flows = minflow(rq, rq, srcnode, dstnode, amount, mu, delay_feefactor, base_fee_penalty, prob_cost_factor); if (!flows) { - /* FIXME: disjktra here to see if there is any route, and - * diagnose problem (offline peers? Not enough capacity at - * our end? Not enough at theirs?) */ - ret = tal_fmt(ctx, "Could not find route"); + ret = explain_failure(ctx, rq, srcnode, dstnode, amount); goto out; } diff --git a/plugins/askrene/explain_failure.c b/plugins/askrene/explain_failure.c new file mode 100644 index 000000000..1369a88d2 --- /dev/null +++ b/plugins/askrene/explain_failure.c @@ -0,0 +1,290 @@ +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#define NO_USABLE_PATHS_STRING "We could not find a usable set of paths." + +/* Dijkstra, reduced to ignore anything but connectivity */ +static bool always_true(const struct gossmap *map, + const struct gossmap_chan *c, + int dir, + struct amount_msat amount, + void *unused) +{ + return true; +} + +static u64 route_score_one(struct amount_msat fee UNUSED, + struct amount_msat risk UNUSED, + struct amount_msat total UNUSED, + int dir UNUSED, + const struct gossmap_chan *c UNUSED) +{ + return 1; +} + +/* This mirrors get_constraints() */ +static const char *why_max_constrained(const tal_t *ctx, + const struct route_query *rq, + struct short_channel_id_dir *scidd, + struct amount_msat amount) +{ + char *ret = NULL; + const char *reservations; + const struct layer *constrains = NULL; + struct amount_msat max = amount; + + /* Figure out the layer that constrains us (most) */ + for (size_t i = 0; i < tal_count(rq->layers); i++) { + struct amount_msat min = AMOUNT_MSAT(0), new_max = max; + + layer_apply_constraints(rq->layers[i], scidd, &min, &new_max); + if (!amount_msat_eq(new_max, max)) + constrains = rq->layers[i]; + max = new_max; + } + + if (constrains) { + if (!ret) + ret = tal_strdup(ctx, ""); + else + tal_append_fmt(&ret, ", "); + tal_append_fmt(&ret, "layer %s says max is %s", + layer_name(constrains), + fmt_amount_msat(tmpctx, max)); + } + + reservations = fmt_reservations(tmpctx, rq->reserved, scidd); + if (reservations) { + if (!ret) + ret = tal_strdup(ctx, ""); + else + tal_append_fmt(&ret, " and "); + tal_append_fmt(&ret, "already reserved %s", reservations); + } + + /* If that doesn't explain it, perhaps it violates htlc_max? */ + if (!ret) { + struct gossmap_chan *c = gossmap_find_chan(rq->gossmap, &scidd->scid); + fp16_t htlc_max = c->half[scidd->dir].htlc_max; + if (amount_msat_greater_fp16(amount, htlc_max)) + ret = tal_fmt(ctx, "exceeds htlc_maximum_msat ~%s", + fmt_amount_msat(tmpctx, + amount_msat(fp16_to_u64(htlc_max)))); + } + + /* This seems unlikely, but don't return NULL. */ + if (!ret) + ret = tal_fmt(ctx, "is constrained"); + return ret; +} + +struct stat { + size_t num_channels; + struct amount_msat capacity; +}; + +struct node_stats { + struct stat total, gossip_known, enabled; +}; + +enum node_direction { + INTO_NODE, + OUT_OF_NODE, +}; + +static void add_stat(struct stat *stat, + struct amount_msat amount) +{ + stat->num_channels++; + if (!amount_msat_accumulate(&stat->capacity, amount)) + abort(); +} + +static void node_stats(const struct route_query *rq, + const struct gossmap_node *node, + enum node_direction node_direction, + struct node_stats *stats) +{ + memset(stats, 0, sizeof(*stats)); + for (size_t i = 0; i < node->num_chans; i++) { + int dir; + struct gossmap_chan *c; + struct amount_msat cap_msat; + + c = gossmap_nth_chan(rq->gossmap, node, i, &dir); + cap_msat = gossmap_chan_get_capacity(rq->gossmap, c); + + if (node_direction == INTO_NODE) + dir = !dir; + + add_stat(&stats->total, cap_msat); + if (gossmap_chan_set(c, dir)) + add_stat(&stats->gossip_known, cap_msat); + if (c->half[dir].enabled) + add_stat(&stats->enabled, cap_msat); + } +} + +static const char *check_capacity(const tal_t *ctx, + const struct route_query *rq, + const struct gossmap_node *node, + enum node_direction node_direction, + struct amount_msat amount, + const char *name) +{ + struct node_stats stats; + + node_stats(rq, node, node_direction, &stats); + if (amount_msat_greater(amount, stats.total.capacity)) { + return tal_fmt(ctx, + NO_USABLE_PATHS_STRING + " Total %s capacity is only %s" + " (in %zu channels).", + name, + fmt_amount_msat(tmpctx, stats.total.capacity), + stats.total.num_channels); + } + if (amount_msat_greater(amount, stats.gossip_known.capacity)) { + return tal_fmt(ctx, + NO_USABLE_PATHS_STRING + " Missing gossip for %s: only known %zu/%zu channels, leaving capacity only %s of %s.", + name, + stats.gossip_known.num_channels, + stats.total.num_channels, + fmt_amount_msat(tmpctx, stats.gossip_known.capacity), + fmt_amount_msat(tmpctx, stats.total.capacity)); + } + if (amount_msat_greater(amount, stats.enabled.capacity)) { + return tal_fmt(ctx, + NO_USABLE_PATHS_STRING + " The %s has disabled %zu of %zu channels, leaving capacity only %s of %s.", + name, + stats.total.num_channels - stats.enabled.num_channels, + stats.total.num_channels, + fmt_amount_msat(tmpctx, stats.enabled.capacity), + fmt_amount_msat(tmpctx, stats.total.capacity)); + } + return NULL; +} + +/* Return description of why scidd is disabled scidd */ +static const char *describe_disabled(const tal_t *ctx, + const struct route_query *rq, + const struct short_channel_id_dir *scidd) +{ + for (int i = tal_count(rq->layers) - 1; i >= 0; i--) { + if (layer_disables(rq->layers[i], scidd)) { + return tal_fmt(ctx, "marked disabled by layer %s.", + layer_name(rq->layers[i])); + } + } + + return tal_fmt(ctx, "marked disabled by gossip message."); +} + +static const char *describe_capacity(const tal_t *ctx, + const struct route_query *rq, + const struct short_channel_id_dir *scidd, + struct amount_msat amount) +{ + for (int i = tal_count(rq->layers) - 1; i >= 0; i--) { + if (layer_created(rq->layers[i], scidd->scid)) { + return tal_fmt(ctx, " (created by layer %s) isn't big enough to carry %s.", + layer_name(rq->layers[i]), + fmt_amount_msat(tmpctx, amount)); + } + } + + return tal_fmt(ctx, "isn't big enough to carry %s.", + fmt_amount_msat(tmpctx, amount)); +} + +/* We failed to find a flow at all. Why? */ +const char *explain_failure(const tal_t *ctx, + const struct route_query *rq, + const struct gossmap_node *srcnode, + const struct gossmap_node *dstnode, + struct amount_msat amount) +{ + const struct route_hop *hops; + const struct dijkstra *dij; + char *path; + const char *cap_check; + + /* Do we have enough funds? */ + cap_check = check_capacity(ctx, rq, srcnode, OUT_OF_NODE, + amount, "source"); + if (cap_check) + return cap_check; + + /* Does destination have enough capacity? */ + cap_check = check_capacity(ctx, rq, dstnode, INTO_NODE, + amount, "destination"); + if (cap_check) + return cap_check; + + /* OK, fall back to telling them why didn't shortest path + * work. This covers the "but I have a direct channel!" + * case. */ + dij = dijkstra(tmpctx, rq->gossmap, dstnode, AMOUNT_MSAT(0), 0, + always_true, route_score_one, NULL); + hops = route_from_dijkstra(tmpctx, rq->gossmap, dij, srcnode, + AMOUNT_MSAT(0), 0); + if (!hops) + return tal_fmt(ctx, "There is no connection between source and destination at all"); + + /* Description of shortest path */ + path = tal_strdup(tmpctx, ""); + for (size_t i = 0; i < tal_count(hops); i++) { + tal_append_fmt(&path, "%s%s", + i > 0 ? "->" : "", + fmt_short_channel_id(tmpctx, hops[i].scid)); + } + + /* Now walk through this: is it disabled? Insuff capacity? */ + for (size_t i = 0; i < tal_count(hops); i++) { + const char *explanation; + struct short_channel_id_dir scidd; + struct gossmap_chan *c; + struct amount_msat cap_msat; + + scidd.scid = hops[i].scid; + scidd.dir = hops[i].direction; + c = gossmap_find_chan(rq->gossmap, &scidd.scid); + cap_msat = gossmap_chan_get_capacity(rq->gossmap, c); + if (!gossmap_chan_set(c, scidd.dir)) + explanation = "has no gossip"; + else if (!c->half[scidd.dir].enabled) + explanation = describe_disabled(tmpctx, rq, &scidd); + else if (amount_msat_greater(amount, cap_msat)) + explanation = describe_capacity(tmpctx, rq, &scidd, amount); + else { + struct amount_msat min, max; + get_constraints(rq, c, scidd.dir, &min, &max); + if (amount_msat_less(max, amount)) { + explanation = why_max_constrained(tmpctx, rq, + &scidd, amount); + } else + continue; + } + + return tal_fmt(ctx, + NO_USABLE_PATHS_STRING + " The shortest path is %s, but %s %s", + path, + fmt_short_channel_id_dir(tmpctx, &scidd), + explanation); + } + + return tal_fmt(ctx, + "Actually, I'm not sure why we didn't find the" + " obvious route %s: perhaps this is a bug?", + path); +} diff --git a/plugins/askrene/explain_failure.h b/plugins/askrene/explain_failure.h new file mode 100644 index 000000000..ed470f829 --- /dev/null +++ b/plugins/askrene/explain_failure.h @@ -0,0 +1,16 @@ +#ifndef LIGHTNING_PLUGINS_ASKRENE_EXPLAIN_FAILURE_H +#define LIGHTNING_PLUGINS_ASKRENE_EXPLAIN_FAILURE_H +#include "config.h" +#include + +struct route_query; +struct gossmap_node; + +/* When MCF returns nothing, try to explain why */ +const char *explain_failure(const tal_t *ctx, + const struct route_query *rq, + const struct gossmap_node *srcnode, + const struct gossmap_node *dstnode, + struct amount_msat amount); + +#endif /* LIGHTNING_PLUGINS_ASKRENE_EXPLAIN_FAILURE_H */ diff --git a/plugins/askrene/layer.c b/plugins/askrene/layer.c index d70fff4fe..26d602853 100644 --- a/plugins/askrene/layer.c +++ b/plugins/askrene/layer.c @@ -502,6 +502,21 @@ void json_add_layers(struct json_stream *js, json_array_end(js); } +bool layer_created(const struct layer *layer, struct short_channel_id scid) +{ + return local_channel_hash_get(layer->local_channels, scid); +} + +bool layer_disables(const struct layer *layer, + const struct short_channel_id_dir *scidd) +{ + const struct local_update *lu; + + lu = local_update_hash_get(layer->local_updates, scidd); + + return (lu && lu->enabled && *lu->enabled == false); +} + void layer_memleak_mark(struct askrene *askrene, struct htable *memtable) { struct layer *l; diff --git a/plugins/askrene/layer.h b/plugins/askrene/layer.h index 468dd7326..b8e700fdc 100644 --- a/plugins/askrene/layer.h +++ b/plugins/askrene/layer.h @@ -102,6 +102,12 @@ void json_add_constraint(struct json_stream *js, const struct constraint *c, const struct layer *layer); +/* For explain_failure: did this layer create this scid? */ +bool layer_created(const struct layer *layer, struct short_channel_id scid); + +/* For explain_failure: did this layer disable this channel? */ +bool layer_disables(const struct layer *layer, const struct short_channel_id_dir *scidd); + /* Scan for memleaks */ void layer_memleak_mark(struct askrene *askrene, struct htable *memtable); #endif /* LIGHTNING_PLUGINS_ASKRENE_LAYER_H */ diff --git a/plugins/askrene/reserve.c b/plugins/askrene/reserve.c index a58bc6d5f..82c9096d3 100644 --- a/plugins/askrene/reserve.c +++ b/plugins/askrene/reserve.c @@ -151,6 +151,32 @@ void json_add_reservations(struct json_stream *js, json_array_end(js); } +const char *fmt_reservations(const tal_t *ctx, + const struct reserve_htable *reserved, + const struct short_channel_id_dir *scidd) +{ + struct reserve *r; + struct reserve_htable_iter rit; + char *ret = NULL; + + for (r = reserve_htable_getfirst(reserved, scidd, &rit); + r; + r = reserve_htable_getnext(reserved, scidd, &rit)) { + u64 seconds; + if (!ret) + ret = tal_strdup(ctx, ""); + else + tal_append_fmt(&ret, ", "); + tal_append_fmt(&ret, "%s by command %s", + fmt_amount_msat(tmpctx, r->rhop.amount), r->cmd_id); + seconds = timemono_between(time_mono(), r->timestamp).ts.tv_sec; + /* Add a note if it's old */ + if (seconds > 0) + tal_append_fmt(&ret, " (%"PRIu64" seconds ago)", seconds); + } + return ret; +} + void reserve_memleak_mark(struct askrene *askrene, struct htable *memtable) { memleak_scan_htable(memtable, &askrene->reserved->raw); diff --git a/plugins/askrene/reserve.h b/plugins/askrene/reserve.h index 103ed1108..1c9e71eaa 100644 --- a/plugins/askrene/reserve.h +++ b/plugins/askrene/reserve.h @@ -40,6 +40,11 @@ bool reserve_accumulate(const struct reserve_htable *reserved, const struct short_channel_id_dir *scidd, struct amount_msat *amount); +/* To explain why we couldn't route */ +const char *fmt_reservations(const tal_t *ctx, + const struct reserve_htable *reserved, + const struct short_channel_id_dir *scidd); + /* Print out a json object for all reservations */ void json_add_reservations(struct json_stream *js, const struct reserve_htable *reserved, diff --git a/tests/test_askrene.py b/tests/test_askrene.py index fe7711b79..fe065c1e0 100644 --- a/tests/test_askrene.py +++ b/tests/test_askrene.py @@ -20,8 +20,10 @@ def test_reserve(node_factory): l1, l2, l3 = node_factory.line_graph(3, wait_for_announce=True) assert l1.rpc.askrene_listreservations() == {'reservations': []} - scid12dir = f"{first_scid(l1, l2)}/{direction(l1.info['id'], l2.info['id'])}" - scid23dir = f"{first_scid(l2, l3)}/{direction(l2.info['id'], l3.info['id'])}" + scid12 = first_scid(l1, l2) + scid23 = first_scid(l2, l3) + scid12dir = f"{scid12}/{direction(l1.info['id'], l2.info['id'])}" + scid23dir = f"{scid23}/{direction(l2.info['id'], l3.info['id'])}" initial_prob = l1.rpc.getroutes(source=l1.info['id'], destination=l3.info['id'], @@ -60,8 +62,12 @@ def test_reserve(node_factory): {'short_channel_id_dir': scid23dir, 'amount_msat': 1000_000_000_000}]) - # FIXME: better error! - with pytest.raises(RpcError, match="Could not find route"): + # Keep it consistent: the below will mention a time if >= 1 seconds old, + # which might happen without the sleep on slow machines. + time.sleep(2) + + # Reservations can be in either order. + with pytest.raises(RpcError, match=rf'We could not find a usable set of paths. The shortest path is {scid12}->{scid23}, but {scid12dir} already reserved 10000000*msat by command ".*" \([0-9]* seconds ago\), 10000000*msat by command ".*" \([0-9]* seconds ago\)'): l1.rpc.getroutes(source=l1.info['id'], destination=l3.info['id'], amount_msat=1000000, @@ -305,18 +311,46 @@ def test_getroutes(node_factory): # Set up l1 with this as the gossip_store l1 = node_factory.get_node(gossip_store_file=gsfile.name) + # Too much should give a decent explanation. + with pytest.raises(RpcError, match=r"We could not find a usable set of paths\. The shortest path is 0x1x0, but 0x1x0/1 isn't big enough to carry 1000000001msat\."): + l1.rpc.getroutes(source=nodemap[0], + destination=nodemap[1], + amount_msat=1000000001, + layers=[], + maxfee_msat=100000000, + final_cltv=99) + + # This should tell us source doesn't have enough. + with pytest.raises(RpcError, match=r"We could not find a usable set of paths\. Total source capacity is only 1019000000msat \(in 3 channels\)\."): + l1.rpc.getroutes(source=nodemap[0], + destination=nodemap[1], + amount_msat=2000000001, + layers=[], + maxfee_msat=20000000, + final_cltv=99) + + # This should tell us dest doesn't have enough. + with pytest.raises(RpcError, match=r"We could not find a usable set of paths\. Total destination capacity is only 1000000000msat \(in 1 channels\)\."): + l1.rpc.getroutes(source=nodemap[0], + destination=nodemap[4], + amount_msat=1000000001, + layers=[], + maxfee_msat=30000000, + final_cltv=99) + # Disabling channels makes getroutes fail l1.rpc.askrene_create_layer('chans_disabled') l1.rpc.askrene_update_channel(layer="chans_disabled", short_channel_id_dir='0x1x0/1', enabled=False) - with pytest.raises(RpcError, match="Could not find route"): + with pytest.raises(RpcError, match=r"We could not find a usable set of paths\. The shortest path is 0x1x0, but 0x1x0/1 marked disabled by layer chans_disabled\."): l1.rpc.getroutes(source=nodemap[0], destination=nodemap[1], amount_msat=1000, layers=["chans_disabled"], maxfee_msat=1000, final_cltv=99) + # Start easy assert l1.rpc.getroutes(source=nodemap[0], destination=nodemap[1],