From 8b2d563e6693527e0747fbb22c5f01eeb89a6c53 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Tue, 7 Mar 2017 16:09:12 +0000 Subject: [PATCH 01/15] xenstored: apply a write transaction rate limit This avoids a rogue client being about to stall another client (eg the toolstack) indefinitely. This is XSA-206. Signed-off-by: Ian Jackson Backported to 4.8 (not entirely trivial). Reported-by: Juergen Gross Signed-off-by: George Dunlap Acked-by: Ian Jackson --- tools/xenstore/Makefile | 3 +- tools/xenstore/xenstored_core.c | 9 ++ tools/xenstore/xenstored_core.h | 6 + tools/xenstore/xenstored_domain.c | 215 +++++++++++++++++++++++++++++++++ tools/xenstore/xenstored_domain.h | 25 ++++ tools/xenstore/xenstored_transaction.c | 5 + 6 files changed, 262 insertions(+), 1 deletion(-) diff --git a/tools/xenstore/Makefile b/tools/xenstore/Makefile index 36b6fd4..9cb54de 100644 --- a/tools/xenstore/Makefile +++ b/tools/xenstore/Makefile @@ -32,6 +32,7 @@ XENSTORED_OBJS_$(CONFIG_FreeBSD) = xenstored_posix.o XENSTORED_OBJS_$(CONFIG_MiniOS) = xenstored_minios.o XENSTORED_OBJS += $(XENSTORED_OBJS_y) +LDLIBS_xenstored += -lrt ifneq ($(XENSTORE_STATIC_CLIENTS),y) LIBXENSTORE := libxenstore.so @@ -73,7 +74,7 @@ endif $(XENSTORED_OBJS): CFLAGS += $(CFLAGS_libxengnttab) xenstored: $(XENSTORED_OBJS) - $(CC) $^ $(LDFLAGS) $(LDLIBS_libxenevtchn) $(LDLIBS_libxengnttab) $(LDLIBS_libxenctrl) $(SOCKET_LIBS) -o $@ $(APPEND_LDFLAGS) + $(CC) $^ $(LDFLAGS) $(LDLIBS_libxenevtchn) $(LDLIBS_libxengnttab) $(LDLIBS_libxenctrl) $(LDLIBS_xenstored) $(SOCKET_LIBS) -o $@ $(APPEND_LDFLAGS) xenstored.a: $(XENSTORED_OBJS) $(AR) cr $@ $^ diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c index 3df977b..d14f096 100644 --- a/tools/xenstore/xenstored_core.c +++ b/tools/xenstore/xenstored_core.c @@ -358,6 +358,7 @@ static void initialize_fds(int sock, int *p_sock_pollfd_idx, int *ptimeout) { struct connection *conn; + struct wrl_timestampt now; if (fds) memset(fds, 0, sizeof(struct pollfd) * current_array_size); @@ -377,8 +378,11 @@ static void initialize_fds(int sock, int *p_sock_pollfd_idx, xce_pollfd_idx = set_fd(xenevtchn_fd(xce_handle), POLLIN|POLLPRI); + wrl_gettime_now(&now); + list_for_each_entry(conn, &connections, list) { if (conn->domain) { + wrl_check_timeout(conn->domain, now, ptimeout); if (domain_can_read(conn) || (domain_can_write(conn) && !list_empty(&conn->out_list))) @@ -833,6 +837,7 @@ static void delete_node_single(struct connection *conn, struct node *node) corrupt(conn, "Could not delete '%s'", node->name); return; } + domain_entry_dec(conn, node); } @@ -972,6 +977,7 @@ static void do_write(struct connection *conn, struct buffered_data *in) } add_change_node(conn->transaction, name, false); + wrl_apply_debit_direct(conn); fire_watches(conn, in, name, false); send_ack(conn, XS_WRITE); } @@ -1003,6 +1009,7 @@ static void do_mkdir(struct connection *conn, struct buffered_data *in) return; } add_change_node(conn->transaction, name, false); + wrl_apply_debit_direct(conn); fire_watches(conn, in, name, false); } send_ack(conn, XS_MKDIR); @@ -1129,6 +1136,7 @@ static void do_rm(struct connection *conn, struct buffered_data *in) if (_rm(conn, node, name)) { add_change_node(conn->transaction, name, true); + wrl_apply_debit_direct(conn); fire_watches(conn, in, name, true); send_ack(conn, XS_RM); } @@ -1205,6 +1213,7 @@ static void do_set_perms(struct connection *conn, struct buffered_data *in) } add_change_node(conn->transaction, name, false); + wrl_apply_debit_direct(conn); fire_watches(conn, in, name, false); send_ack(conn, XS_SET_PERMS); } diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h index ecc614f..9e9d960 100644 --- a/tools/xenstore/xenstored_core.h +++ b/tools/xenstore/xenstored_core.h @@ -33,6 +33,12 @@ #include "list.h" #include "tdb.h" +#define MIN(a, b) (((a) < (b))? (a) : (b)) + +typedef int32_t wrl_creditt; +#define WRL_CREDIT_MAX (1000*1000*1000) +/* ^ satisfies non-overflow condition for wrl_xfer_credit */ + struct buffered_data { struct list_head list; diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c index 5de93d4..012dfe6 100644 --- a/tools/xenstore/xenstored_domain.c +++ b/tools/xenstore/xenstored_domain.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "utils.h" #include "talloc.h" @@ -74,6 +75,10 @@ struct domain /* number of watch for this domain */ int nbwatch; + + /* write rate limit */ + wrl_creditt wrl_credit; /* [ -wrl_config_writecost, +_dburst ] */ + struct wrl_timestampt wrl_timestamp; }; static LIST_HEAD(domains); @@ -206,6 +211,8 @@ static int destroy_domain(void *_domain) fire_watches(NULL, domain, "@releaseDomain", false); + wrl_domain_destroy(domain); + return 0; } @@ -253,6 +260,9 @@ void handle_event(void) bool domain_can_read(struct connection *conn) { struct xenstore_domain_interface *intf = conn->domain->interface; + + if (domain_is_unprivileged(conn) && conn->domain->wrl_credit < 0) + return false; return (intf->req_cons != intf->req_prod); } @@ -284,6 +294,8 @@ static struct domain *new_domain(void *context, unsigned int domid, domain->domid = domid; domain->path = talloc_domain_path(domain, domid); + wrl_domain_new(domain); + list_add(&domain->list, &domains); talloc_set_destructor(domain, destroy_domain); @@ -751,6 +763,209 @@ int domain_watch(struct connection *conn) : 0; } +static wrl_creditt wrl_config_writecost = WRL_FACTOR; +static wrl_creditt wrl_config_rate = WRL_RATE * WRL_FACTOR; +static wrl_creditt wrl_config_dburst = WRL_DBURST * WRL_FACTOR; +static wrl_creditt wrl_config_gburst = WRL_GBURST * WRL_FACTOR; +static wrl_creditt wrl_config_newdoms_dburst = + WRL_DBURST * WRL_NEWDOMS * WRL_FACTOR; + +long wrl_ntransactions; + +static long wrl_ndomains; +static wrl_creditt wrl_reserve; /* [-wrl_config_newdoms_dburst, +_gburst ] */ + +void wrl_gettime_now(struct wrl_timestampt *now_wt) +{ + struct timespec now_ts; + int r; + + r = clock_gettime(CLOCK_MONOTONIC, &now_ts); + if (r) + barf_perror("Could not find time (clock_gettime failed)"); + + now_wt->sec = now_ts.tv_sec; + now_wt->msec = now_ts.tv_nsec / 1000000; +} + +static void wrl_xfer_credit(wrl_creditt *debit, wrl_creditt debit_floor, + wrl_creditt *credit, wrl_creditt credit_ceil) + /* + * Transfers zero or more credit from "debit" to "credit". + * Transfers as much as possible while maintaining + * debit >= debit_floor and credit <= credit_ceil. + * (If that's violated already, does nothing.) + * + * Sufficient conditions to avoid overflow, either of: + * |every argument| <= 0x3fffffff + * |every argument| <= 1E9 + * |every argument| <= WRL_CREDIT_MAX + * (And this condition is preserved.) + */ +{ + wrl_creditt xfer = MIN( *debit - debit_floor, + credit_ceil - *credit ); + if (xfer > 0) { + *debit -= xfer; + *credit += xfer; + } +} + +void wrl_domain_new(struct domain *domain) +{ + domain->wrl_credit = 0; + wrl_gettime_now(&domain->wrl_timestamp); + wrl_ndomains++; + /* Steal up to DBURST from the reserve */ + wrl_xfer_credit(&wrl_reserve, -wrl_config_newdoms_dburst, + &domain->wrl_credit, wrl_config_dburst); +} + +void wrl_domain_destroy(struct domain *domain) +{ + wrl_ndomains--; + /* + * Don't bother recalculating domain's credit - this just + * means we don't give the reserve the ending domain's credit + * for time elapsed since last update. + */ + wrl_xfer_credit(&domain->wrl_credit, 0, + &wrl_reserve, wrl_config_dburst); +} + +void wrl_credit_update(struct domain *domain, struct wrl_timestampt now) +{ + /* + * We want to calculate + * credit += (now - timestamp) * RATE / ndoms; + * But we want it to saturate, and to avoid floating point. + * To avoid rounding errors from constantly adding small + * amounts of credit, we only add credit for whole milliseconds. + */ + long seconds = now.sec - domain->wrl_timestamp.sec; + long milliseconds = now.msec - domain->wrl_timestamp.msec; + long msec; + int64_t denom, num; + wrl_creditt surplus; + + seconds = MIN(seconds, 1000*1000); /* arbitrary, prevents overflow */ + msec = seconds * 1000 + milliseconds; + + if (msec < 0) + /* shouldn't happen with CLOCK_MONOTONIC */ + msec = 0; + + /* 32x32 -> 64 cannot overflow */ + denom = (int64_t)msec * wrl_config_rate; + num = (int64_t)wrl_ndomains * 1000; + /* denom / num <= 1E6 * wrl_config_rate, so with + reasonable wrl_config_rate, denom / num << 2^64 */ + + /* at last! */ + domain->wrl_credit = MIN( (int64_t)domain->wrl_credit + denom / num, + WRL_CREDIT_MAX ); + /* (maybe briefly violating the DBURST cap on wrl_credit) */ + + /* maybe take from the reserve to make us nonnegative */ + wrl_xfer_credit(&wrl_reserve, 0, + &domain->wrl_credit, 0); + + /* return any surplus (over DBURST) to the reserve */ + surplus = 0; + wrl_xfer_credit(&domain->wrl_credit, wrl_config_dburst, + &surplus, WRL_CREDIT_MAX); + wrl_xfer_credit(&surplus, 0, + &wrl_reserve, wrl_config_gburst); + /* surplus is now implicitly discarded */ + + domain->wrl_timestamp = now; + + trace("wrl: dom %4d %6ld msec %9ld credit %9ld reserve" + " %9ld discard\n", + domain->domid, + msec, + (long)domain->wrl_credit, (long)wrl_reserve, + (long)surplus); +} + +void wrl_check_timeout(struct domain *domain, + struct wrl_timestampt now, + int *ptimeout) +{ + uint64_t num, denom; + int wakeup; + + wrl_credit_update(domain, now); + + if (domain->wrl_credit >= 0) + /* not blocked */ + return; + + if (!*ptimeout) + /* already decided on immediate wakeup, + so no need to calculate our timeout */ + return; + + /* calculate wakeup = now + -credit / (RATE / ndoms); */ + + /* credit cannot go more -ve than one transaction, + * so the first multiplication cannot overflow even 32-bit */ + num = (uint64_t)(-domain->wrl_credit * 1000) * wrl_ndomains; + denom = wrl_config_rate; + + wakeup = MIN( num / denom /* uint64_t */, INT_MAX ); + if (*ptimeout==-1 || wakeup < *ptimeout) + *ptimeout = wakeup; + + trace("wrl: domain %u credit=%ld (reserve=%ld) SLEEPING for %d\n", + domain->domid, + (long)domain->wrl_credit, (long)wrl_reserve, + wakeup); +} + +void wrl_apply_debit_actual(struct domain *domain) +{ + struct wrl_timestampt now; + + if (!domain) + /* sockets escape the write rate limit */ + return; + + wrl_gettime_now(&now); + wrl_credit_update(domain, now); + + domain->wrl_credit -= wrl_config_writecost; + trace("wrl: domain %u credit=%ld (reserve=%ld)\n", + domain->domid, + (long)domain->wrl_credit, (long)wrl_reserve); +} + +void wrl_apply_debit_direct(struct connection *conn) +{ + if (!conn) + /* some writes are generated internally */ + return; + + if (conn->transaction) + /* these are accounted for when the transaction ends */ + return; + + if (!wrl_ntransactions) + /* we don't conflict with anyone */ + return; + + wrl_apply_debit_actual(conn->domain); +} + +void wrl_apply_debit_trans_commit(struct connection *conn) +{ + if (wrl_ntransactions <= 1) + /* our own transaction appears in the counter */ + return; + + wrl_apply_debit_actual(conn->domain); +} + /* * Local variables: * c-file-style: "linux" diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h index 2554423..cec341e 100644 --- a/tools/xenstore/xenstored_domain.h +++ b/tools/xenstore/xenstored_domain.h @@ -65,4 +65,29 @@ void domain_watch_inc(struct connection *conn); void domain_watch_dec(struct connection *conn); int domain_watch(struct connection *conn); +/* Write rate limiting */ + +#define WRL_FACTOR 1000 /* for fixed-point arithmetic */ +#define WRL_RATE 200 +#define WRL_DBURST 10 +#define WRL_GBURST 1000 +#define WRL_NEWDOMS 5 + +struct wrl_timestampt { + time_t sec; + int msec; +}; + +extern long wrl_ntransactions; + +void wrl_gettime_now(struct wrl_timestampt *now_ts); +void wrl_domain_new(struct domain *domain); +void wrl_domain_destroy(struct domain *domain); +void wrl_credit_update(struct domain *domain, struct wrl_timestampt now); +void wrl_check_timeout(struct domain *domain, + struct wrl_timestampt now, + int *ptimeout); +void wrl_apply_debit_direct(struct connection *conn); +void wrl_apply_debit_trans_commit(struct connection *conn); + #endif /* _XENSTORED_DOMAIN_H */ diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c index 84cb0bf..5059a11 100644 --- a/tools/xenstore/xenstored_transaction.c +++ b/tools/xenstore/xenstored_transaction.c @@ -120,6 +120,7 @@ static int destroy_transaction(void *_transaction) { struct transaction *trans = _transaction; + wrl_ntransactions--; trace_destroy(trans, "transaction"); if (trans->tdb) tdb_close(trans->tdb); @@ -183,6 +184,7 @@ void do_transaction_start(struct connection *conn, struct buffered_data *in) talloc_steal(conn, trans); talloc_set_destructor(trans, destroy_transaction); conn->transaction_started++; + wrl_ntransactions++; snprintf(id_str, sizeof(id_str), "%u", trans->id); send_reply(conn, XS_TRANSACTION_START, id_str, strlen(id_str)+1); @@ -218,6 +220,9 @@ void do_transaction_end(struct connection *conn, struct buffered_data *in) send_error(conn, EAGAIN); return; } + + wrl_apply_debit_trans_commit(conn); + if (!replace_tdb(trans->tdb_name, trans->tdb)) { send_error(conn, errno); return; -- 2.1.4