From 8fc56a989afed0c85eaeda409e947ff5ad827309 Mon Sep 17 00:00:00 2001
From: Mario Preksavec <mario@slackware.hr>
Date: Sat, 24 Oct 2020 04:54:05 +0200
Subject: system/xen: XSA 333-347 update.

Signed-off-by: Mario Preksavec <mario@slackware.hr>

Signed-off-by: Willy Sudiarto Raharjo <willysr@slackbuilds.org>
---
 system/xen/xsa/xsa333.patch                        |  39 ++
 system/xen/xsa/xsa334.patch                        |  51 +++
 system/xen/xsa/xsa335-qemu.patch                   |  84 +++++
 system/xen/xsa/xsa336.patch                        | 283 +++++++++++++++
 system/xen/xsa/xsa337-4.13-1.patch                 |  87 +++++
 system/xen/xsa/xsa337-4.13-2.patch                 | 181 ++++++++++
 system/xen/xsa/xsa338.patch                        |  42 +++
 system/xen/xsa/xsa339.patch                        |  76 ++++
 system/xen/xsa/xsa340.patch                        |  65 ++++
 system/xen/xsa/xsa342-4.13.patch                   | 145 ++++++++
 system/xen/xsa/xsa343-1.patch                      | 199 +++++++++++
 system/xen/xsa/xsa343-2.patch                      | 295 ++++++++++++++++
 system/xen/xsa/xsa343-3.patch                      | 392 +++++++++++++++++++++
 system/xen/xsa/xsa344-4.13-1.patch                 | 130 +++++++
 system/xen/xsa/xsa344-4.13-2.patch                 | 203 +++++++++++
 ...ctor-map_pages_to_xen-to-have-only-a-sing.patch |  94 +++++
 ...ctor-modify_xen_mappings-to-have-one-exit.patch |  68 ++++
 ...ent-some-races-in-hypervisor-mapping-upda.patch | 249 +++++++++++++
 system/xen/xsa/xsa346-4.13-1.patch                 |  50 +++
 system/xen/xsa/xsa346-4.13-2.patch                 | 204 +++++++++++
 system/xen/xsa/xsa347-4.13-1.patch                 | 149 ++++++++
 system/xen/xsa/xsa347-4.13-2.patch                 |  72 ++++
 system/xen/xsa/xsa347-4.13-3.patch                 |  59 ++++
 23 files changed, 3217 insertions(+)
 create mode 100644 system/xen/xsa/xsa333.patch
 create mode 100644 system/xen/xsa/xsa334.patch
 create mode 100644 system/xen/xsa/xsa335-qemu.patch
 create mode 100644 system/xen/xsa/xsa336.patch
 create mode 100644 system/xen/xsa/xsa337-4.13-1.patch
 create mode 100644 system/xen/xsa/xsa337-4.13-2.patch
 create mode 100644 system/xen/xsa/xsa338.patch
 create mode 100644 system/xen/xsa/xsa339.patch
 create mode 100644 system/xen/xsa/xsa340.patch
 create mode 100644 system/xen/xsa/xsa342-4.13.patch
 create mode 100644 system/xen/xsa/xsa343-1.patch
 create mode 100644 system/xen/xsa/xsa343-2.patch
 create mode 100644 system/xen/xsa/xsa343-3.patch
 create mode 100644 system/xen/xsa/xsa344-4.13-1.patch
 create mode 100644 system/xen/xsa/xsa344-4.13-2.patch
 create mode 100644 system/xen/xsa/xsa345-0001-x86-mm-Refactor-map_pages_to_xen-to-have-only-a-sing.patch
 create mode 100644 system/xen/xsa/xsa345-0002-x86-mm-Refactor-modify_xen_mappings-to-have-one-exit.patch
 create mode 100644 system/xen/xsa/xsa345-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch
 create mode 100644 system/xen/xsa/xsa346-4.13-1.patch
 create mode 100644 system/xen/xsa/xsa346-4.13-2.patch
 create mode 100644 system/xen/xsa/xsa347-4.13-1.patch
 create mode 100644 system/xen/xsa/xsa347-4.13-2.patch
 create mode 100644 system/xen/xsa/xsa347-4.13-3.patch

(limited to 'system/xen/xsa')

diff --git a/system/xen/xsa/xsa333.patch b/system/xen/xsa/xsa333.patch
new file mode 100644
index 0000000000..6b86c942fa
--- /dev/null
+++ b/system/xen/xsa/xsa333.patch
@@ -0,0 +1,39 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/pv: Handle the Intel-specific MSR_MISC_ENABLE correctly
+
+This MSR doesn't exist on AMD hardware, and switching away from the safe
+functions in the common MSR path was an erroneous change.
+
+Partially revert the change.
+
+This is XSA-333.
+
+Fixes: 4fdc932b3cc ("x86/Intel: drop another 32-bit leftover")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Wei Liu <wl@xen.org>
+
+diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
+index efeb2a727e..6332c74b80 100644
+--- a/xen/arch/x86/pv/emul-priv-op.c
++++ b/xen/arch/x86/pv/emul-priv-op.c
+@@ -924,7 +924,8 @@ static int read_msr(unsigned int reg, uint64_t *val,
+         return X86EMUL_OKAY;
+ 
+     case MSR_IA32_MISC_ENABLE:
+-        rdmsrl(reg, *val);
++        if ( rdmsr_safe(reg, *val) )
++            break;
+         *val = guest_misc_enable(*val);
+         return X86EMUL_OKAY;
+ 
+@@ -1059,7 +1060,8 @@ static int write_msr(unsigned int reg, uint64_t val,
+         break;
+ 
+     case MSR_IA32_MISC_ENABLE:
+-        rdmsrl(reg, temp);
++        if ( rdmsr_safe(reg, temp) )
++            break;
+         if ( val != guest_misc_enable(temp) )
+             goto invalid;
+         return X86EMUL_OKAY;
diff --git a/system/xen/xsa/xsa334.patch b/system/xen/xsa/xsa334.patch
new file mode 100644
index 0000000000..4260cdb2b2
--- /dev/null
+++ b/system/xen/xsa/xsa334.patch
@@ -0,0 +1,51 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: xen/memory: Don't skip the RCU unlock path in acquire_resource()
+
+In the case that an HVM Stubdomain makes an XENMEM_acquire_resource hypercall,
+the FIXME path will bypass rcu_unlock_domain() on the way out of the function.
+
+Move the check to the start of the function.  This does change the behaviour
+of the get-size path for HVM Stubdomains, but that functionality is currently
+broken and unused anyway, as well as being quite useless to entities which
+can't actually map the resource anyway.
+
+This is XSA-334.
+
+Fixes: 83fa6552ce ("common: add a new mappable resource type: XENMEM_resource_grant_table")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+diff --git a/xen/common/memory.c b/xen/common/memory.c
+index 1a3c9ffb30..29741d8904 100644
+--- a/xen/common/memory.c
++++ b/xen/common/memory.c
+@@ -1058,6 +1058,14 @@ static int acquire_resource(
+     xen_pfn_t mfn_list[32];
+     int rc;
+ 
++    /*
++     * FIXME: Until foreign pages inserted into the P2M are properly
++     *        reference counted, it is unsafe to allow mapping of
++     *        resource pages unless the caller is the hardware domain.
++     */
++    if ( paging_mode_translate(currd) && !is_hardware_domain(currd) )
++        return -EACCES;
++
+     if ( copy_from_guest(&xmar, arg, 1) )
+         return -EFAULT;
+ 
+@@ -1114,14 +1122,6 @@ static int acquire_resource(
+         xen_pfn_t gfn_list[ARRAY_SIZE(mfn_list)];
+         unsigned int i;
+ 
+-        /*
+-         * FIXME: Until foreign pages inserted into the P2M are properly
+-         *        reference counted, it is unsafe to allow mapping of
+-         *        resource pages unless the caller is the hardware domain.
+-         */
+-        if ( !is_hardware_domain(currd) )
+-            return -EACCES;
+-
+         if ( copy_from_guest(gfn_list, xmar.frame_list, xmar.nr_frames) )
+             rc = -EFAULT;
+ 
diff --git a/system/xen/xsa/xsa335-qemu.patch b/system/xen/xsa/xsa335-qemu.patch
new file mode 100644
index 0000000000..5617502359
--- /dev/null
+++ b/system/xen/xsa/xsa335-qemu.patch
@@ -0,0 +1,84 @@
+From c5bd2924c6d6a5bcbffb8b5e7798a88970131c07 Mon Sep 17 00:00:00 2001
+From: Gerd Hoffmann <kraxel@redhat.com>
+Date: Mon, 17 Aug 2020 08:34:22 +0200
+Subject: [PATCH] usb: fix setup_len init (CVE-2020-14364)
+
+Store calculated setup_len in a local variable, verify it, and only
+write it to the struct (USBDevice->setup_len) in case it passed the
+sanity checks.
+
+This prevents other code (do_token_{in,out} functions specifically)
+from working with invalid USBDevice->setup_len values and overrunning
+the USBDevice->setup_buf[] buffer.
+
+Fixes: CVE-2020-14364
+Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
+---
+ hw/usb/core.c | 16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+diff --git a/hw/usb/core.c b/hw/usb/core.c
+index 5abd128b6bc5..5234dcc73fea 100644
+--- a/hw/usb/core.c
++++ b/hw/usb/core.c
+@@ -129,6 +129,7 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream)
+ static void do_token_setup(USBDevice *s, USBPacket *p)
+ {
+     int request, value, index;
++    unsigned int setup_len;
+ 
+     if (p->iov.size != 8) {
+         p->status = USB_RET_STALL;
+@@ -138,14 +139,15 @@ static void do_token_setup(USBDevice *s, USBPacket *p)
+     usb_packet_copy(p, s->setup_buf, p->iov.size);
+     s->setup_index = 0;
+     p->actual_length = 0;
+-    s->setup_len   = (s->setup_buf[7] << 8) | s->setup_buf[6];
+-    if (s->setup_len > sizeof(s->data_buf)) {
++    setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6];
++    if (setup_len > sizeof(s->data_buf)) {
+         fprintf(stderr,
+                 "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n",
+-                s->setup_len, sizeof(s->data_buf));
++                setup_len, sizeof(s->data_buf));
+         p->status = USB_RET_STALL;
+         return;
+     }
++    s->setup_len = setup_len;
+ 
+     request = (s->setup_buf[0] << 8) | s->setup_buf[1];
+     value   = (s->setup_buf[3] << 8) | s->setup_buf[2];
+@@ -259,26 +261,28 @@ static void do_token_out(USBDevice *s, USBPacket *p)
+ static void do_parameter(USBDevice *s, USBPacket *p)
+ {
+     int i, request, value, index;
++    unsigned int setup_len;
+ 
+     for (i = 0; i < 8; i++) {
+         s->setup_buf[i] = p->parameter >> (i*8);
+     }
+ 
+     s->setup_state = SETUP_STATE_PARAM;
+-    s->setup_len   = (s->setup_buf[7] << 8) | s->setup_buf[6];
+     s->setup_index = 0;
+ 
+     request = (s->setup_buf[0] << 8) | s->setup_buf[1];
+     value   = (s->setup_buf[3] << 8) | s->setup_buf[2];
+     index   = (s->setup_buf[5] << 8) | s->setup_buf[4];
+ 
+-    if (s->setup_len > sizeof(s->data_buf)) {
++    setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6];
++    if (setup_len > sizeof(s->data_buf)) {
+         fprintf(stderr,
+                 "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n",
+-                s->setup_len, sizeof(s->data_buf));
++                setup_len, sizeof(s->data_buf));
+         p->status = USB_RET_STALL;
+         return;
+     }
++    s->setup_len = setup_len;
+ 
+     if (p->pid == USB_TOKEN_OUT) {
+         usb_packet_copy(p, s->data_buf, s->setup_len);
+-- 
+2.18.4
diff --git a/system/xen/xsa/xsa336.patch b/system/xen/xsa/xsa336.patch
new file mode 100644
index 0000000000..b44c298b70
--- /dev/null
+++ b/system/xen/xsa/xsa336.patch
@@ -0,0 +1,283 @@
+From: Roger Pau Monné <roger.pau@citrix.com>
+Subject: x86/vpt: fix race when migrating timers between vCPUs
+
+The current vPT code will migrate the emulated timers between vCPUs
+(change the pt->vcpu field) while just holding the destination lock,
+either from create_periodic_time or pt_adjust_global_vcpu_target if
+the global target is adjusted. Changing the periodic_timer vCPU field
+in this way creates a race where a third party could grab the lock in
+the unlocked region of pt_adjust_global_vcpu_target (or before
+create_periodic_time performs the vcpu change) and then release the
+lock from a different vCPU, creating a locking imbalance.
+
+Introduce a per-domain rwlock in order to protect periodic_time
+migration between vCPU lists. Taking the lock in read mode prevents
+any timer from being migrated to a different vCPU, while taking it in
+write mode allows performing migration of timers across vCPUs. The
+per-vcpu locks are still used to protect all the other fields from the
+periodic_timer struct.
+
+Note that such migration shouldn't happen frequently, and hence
+there's no performance drop as a result of such locking.
+
+This is XSA-336.
+
+Reported-by: Igor Druzhinin <igor.druzhinin@citrix.com>
+Tested-by: Igor Druzhinin <igor.druzhinin@citrix.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+Changes since v2:
+ - Re-order pt_adjust_vcpu to remove one if.
+ - Fix pt_lock to not call pt_vcpu_lock, as we might end up using a
+   stale value of pt->vcpu when taking the per-vcpu lock.
+
+Changes since v1:
+ - Use a per-domain rwlock to protect timer vCPU migration.
+
+--- a/xen/arch/x86/hvm/hvm.c
++++ b/xen/arch/x86/hvm/hvm.c
+@@ -658,6 +658,8 @@ int hvm_domain_initialise(struct domain
+     /* need link to containing domain */
+     d->arch.hvm.pl_time->domain = d;
+ 
++    rwlock_init(&d->arch.hvm.pl_time->pt_migrate);
++
+     /* Set the default IO Bitmap. */
+     if ( is_hardware_domain(d) )
+     {
+--- a/xen/arch/x86/hvm/vpt.c
++++ b/xen/arch/x86/hvm/vpt.c
+@@ -153,23 +153,32 @@ static int pt_irq_masked(struct periodic
+     return 1;
+ }
+ 
+-static void pt_lock(struct periodic_time *pt)
++static void pt_vcpu_lock(struct vcpu *v)
+ {
+-    struct vcpu *v;
++    read_lock(&v->domain->arch.hvm.pl_time->pt_migrate);
++    spin_lock(&v->arch.hvm.tm_lock);
++}
+ 
+-    for ( ; ; )
+-    {
+-        v = pt->vcpu;
+-        spin_lock(&v->arch.hvm.tm_lock);
+-        if ( likely(pt->vcpu == v) )
+-            break;
+-        spin_unlock(&v->arch.hvm.tm_lock);
+-    }
++static void pt_vcpu_unlock(struct vcpu *v)
++{
++    spin_unlock(&v->arch.hvm.tm_lock);
++    read_unlock(&v->domain->arch.hvm.pl_time->pt_migrate);
++}
++
++static void pt_lock(struct periodic_time *pt)
++{
++    /*
++     * We cannot use pt_vcpu_lock here, because we need to acquire the
++     * per-domain lock first and then (re-)fetch the value of pt->vcpu, or
++     * else we might be using a stale value of pt->vcpu.
++     */
++    read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
++    spin_lock(&pt->vcpu->arch.hvm.tm_lock);
+ }
+ 
+ static void pt_unlock(struct periodic_time *pt)
+ {
+-    spin_unlock(&pt->vcpu->arch.hvm.tm_lock);
++    pt_vcpu_unlock(pt->vcpu);
+ }
+ 
+ static void pt_process_missed_ticks(struct periodic_time *pt)
+@@ -219,7 +228,7 @@ void pt_save_timer(struct vcpu *v)
+     if ( v->pause_flags & VPF_blocked )
+         return;
+ 
+-    spin_lock(&v->arch.hvm.tm_lock);
++    pt_vcpu_lock(v);
+ 
+     list_for_each_entry ( pt, head, list )
+         if ( !pt->do_not_freeze )
+@@ -227,7 +236,7 @@ void pt_save_timer(struct vcpu *v)
+ 
+     pt_freeze_time(v);
+ 
+-    spin_unlock(&v->arch.hvm.tm_lock);
++    pt_vcpu_unlock(v);
+ }
+ 
+ void pt_restore_timer(struct vcpu *v)
+@@ -235,7 +244,7 @@ void pt_restore_timer(struct vcpu *v)
+     struct list_head *head = &v->arch.hvm.tm_list;
+     struct periodic_time *pt;
+ 
+-    spin_lock(&v->arch.hvm.tm_lock);
++    pt_vcpu_lock(v);
+ 
+     list_for_each_entry ( pt, head, list )
+     {
+@@ -248,7 +257,7 @@ void pt_restore_timer(struct vcpu *v)
+ 
+     pt_thaw_time(v);
+ 
+-    spin_unlock(&v->arch.hvm.tm_lock);
++    pt_vcpu_unlock(v);
+ }
+ 
+ static void pt_timer_fn(void *data)
+@@ -309,7 +318,7 @@ int pt_update_irq(struct vcpu *v)
+     int irq, pt_vector = -1;
+     bool level;
+ 
+-    spin_lock(&v->arch.hvm.tm_lock);
++    pt_vcpu_lock(v);
+ 
+     earliest_pt = NULL;
+     max_lag = -1ULL;
+@@ -339,7 +348,7 @@ int pt_update_irq(struct vcpu *v)
+ 
+     if ( earliest_pt == NULL )
+     {
+-        spin_unlock(&v->arch.hvm.tm_lock);
++        pt_vcpu_unlock(v);
+         return -1;
+     }
+ 
+@@ -347,7 +356,7 @@ int pt_update_irq(struct vcpu *v)
+     irq = earliest_pt->irq;
+     level = earliest_pt->level;
+ 
+-    spin_unlock(&v->arch.hvm.tm_lock);
++    pt_vcpu_unlock(v);
+ 
+     switch ( earliest_pt->source )
+     {
+@@ -394,7 +403,7 @@ int pt_update_irq(struct vcpu *v)
+                 time_cb *cb = NULL;
+                 void *cb_priv;
+ 
+-                spin_lock(&v->arch.hvm.tm_lock);
++                pt_vcpu_lock(v);
+                 /* Make sure the timer is still on the list. */
+                 list_for_each_entry ( pt, &v->arch.hvm.tm_list, list )
+                     if ( pt == earliest_pt )
+@@ -404,7 +413,7 @@ int pt_update_irq(struct vcpu *v)
+                         cb_priv = pt->priv;
+                         break;
+                     }
+-                spin_unlock(&v->arch.hvm.tm_lock);
++                pt_vcpu_unlock(v);
+ 
+                 if ( cb != NULL )
+                     cb(v, cb_priv);
+@@ -441,12 +450,12 @@ void pt_intr_post(struct vcpu *v, struct
+     if ( intack.source == hvm_intsrc_vector )
+         return;
+ 
+-    spin_lock(&v->arch.hvm.tm_lock);
++    pt_vcpu_lock(v);
+ 
+     pt = is_pt_irq(v, intack);
+     if ( pt == NULL )
+     {
+-        spin_unlock(&v->arch.hvm.tm_lock);
++        pt_vcpu_unlock(v);
+         return;
+     }
+ 
+@@ -455,7 +464,7 @@ void pt_intr_post(struct vcpu *v, struct
+     cb = pt->cb;
+     cb_priv = pt->priv;
+ 
+-    spin_unlock(&v->arch.hvm.tm_lock);
++    pt_vcpu_unlock(v);
+ 
+     if ( cb != NULL )
+         cb(v, cb_priv);
+@@ -466,12 +475,12 @@ void pt_migrate(struct vcpu *v)
+     struct list_head *head = &v->arch.hvm.tm_list;
+     struct periodic_time *pt;
+ 
+-    spin_lock(&v->arch.hvm.tm_lock);
++    pt_vcpu_lock(v);
+ 
+     list_for_each_entry ( pt, head, list )
+         migrate_timer(&pt->timer, v->processor);
+ 
+-    spin_unlock(&v->arch.hvm.tm_lock);
++    pt_vcpu_unlock(v);
+ }
+ 
+ void create_periodic_time(
+@@ -490,7 +499,7 @@ void create_periodic_time(
+ 
+     destroy_periodic_time(pt);
+ 
+-    spin_lock(&v->arch.hvm.tm_lock);
++    write_lock(&v->domain->arch.hvm.pl_time->pt_migrate);
+ 
+     pt->pending_intr_nr = 0;
+     pt->do_not_freeze = 0;
+@@ -540,7 +549,7 @@ void create_periodic_time(
+     init_timer(&pt->timer, pt_timer_fn, pt, v->processor);
+     set_timer(&pt->timer, pt->scheduled);
+ 
+-    spin_unlock(&v->arch.hvm.tm_lock);
++    write_unlock(&v->domain->arch.hvm.pl_time->pt_migrate);
+ }
+ 
+ void destroy_periodic_time(struct periodic_time *pt)
+@@ -565,30 +574,20 @@ void destroy_periodic_time(struct period
+ 
+ static void pt_adjust_vcpu(struct periodic_time *pt, struct vcpu *v)
+ {
+-    int on_list;
+-
+     ASSERT(pt->source == PTSRC_isa || pt->source == PTSRC_ioapic);
+ 
+     if ( pt->vcpu == NULL )
+         return;
+ 
+-    pt_lock(pt);
+-    on_list = pt->on_list;
+-    if ( pt->on_list )
+-        list_del(&pt->list);
+-    pt->on_list = 0;
+-    pt_unlock(pt);
+-
+-    spin_lock(&v->arch.hvm.tm_lock);
++    write_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
+     pt->vcpu = v;
+-    if ( on_list )
++    if ( pt->on_list )
+     {
+-        pt->on_list = 1;
++        list_del(&pt->list);
+         list_add(&pt->list, &v->arch.hvm.tm_list);
+-
+         migrate_timer(&pt->timer, v->processor);
+     }
+-    spin_unlock(&v->arch.hvm.tm_lock);
++    write_unlock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
+ }
+ 
+ void pt_adjust_global_vcpu_target(struct vcpu *v)
+--- a/xen/include/asm-x86/hvm/vpt.h
++++ b/xen/include/asm-x86/hvm/vpt.h
+@@ -128,6 +128,13 @@ struct pl_time {    /* platform time */
+     struct RTCState  vrtc;
+     struct HPETState vhpet;
+     struct PMTState  vpmt;
++    /*
++     * rwlock to prevent periodic_time vCPU migration. Take the lock in read
++     * mode in order to prevent the vcpu field of periodic_time from changing.
++     * Lock must be taken in write mode when changes to the vcpu field are
++     * performed, as it allows exclusive access to all the timers of a domain.
++     */
++    rwlock_t pt_migrate;
+     /* guest_time = Xen sys time + stime_offset */
+     int64_t stime_offset;
+     /* Ensures monotonicity in appropriate timer modes. */
diff --git a/system/xen/xsa/xsa337-4.13-1.patch b/system/xen/xsa/xsa337-4.13-1.patch
new file mode 100644
index 0000000000..2091626f4f
--- /dev/null
+++ b/system/xen/xsa/xsa337-4.13-1.patch
@@ -0,0 +1,87 @@
+From: Roger Pau Monné <roger.pau@citrix.com>
+Subject: x86/msi: get rid of read_msi_msg
+
+It's safer and faster to just use the cached last written
+(untranslated) MSI message stored in msi_desc for the single user that
+calls read_msi_msg.
+
+This also prevents relying on the data read from the device MSI
+registers in order to figure out the index into the IOMMU interrupt
+remapping table, which is not safe.
+
+This is part of XSA-337.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Requested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/arch/x86/msi.c
++++ b/xen/arch/x86/msi.c
+@@ -183,54 +183,6 @@ void msi_compose_msg(unsigned vector, co
+                 MSI_DATA_VECTOR(vector);
+ }
+
+-static bool read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+-{
+-    switch ( entry->msi_attrib.type )
+-    {
+-    case PCI_CAP_ID_MSI:
+-    {
+-        struct pci_dev *dev = entry->dev;
+-        int pos = entry->msi_attrib.pos;
+-        uint16_t data;
+-
+-        msg->address_lo = pci_conf_read32(dev->sbdf,
+-                                          msi_lower_address_reg(pos));
+-        if ( entry->msi_attrib.is_64 )
+-        {
+-            msg->address_hi = pci_conf_read32(dev->sbdf,
+-                                              msi_upper_address_reg(pos));
+-            data = pci_conf_read16(dev->sbdf, msi_data_reg(pos, 1));
+-        }
+-        else
+-        {
+-            msg->address_hi = 0;
+-            data = pci_conf_read16(dev->sbdf, msi_data_reg(pos, 0));
+-        }
+-        msg->data = data;
+-        break;
+-    }
+-    case PCI_CAP_ID_MSIX:
+-    {
+-        void __iomem *base = entry->mask_base;
+-
+-        if ( unlikely(!msix_memory_decoded(entry->dev,
+-                                           entry->msi_attrib.pos)) )
+-            return false;
+-        msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+-        msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
+-        msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
+-        break;
+-    }
+-    default:
+-        BUG();
+-    }
+-
+-    if ( iommu_intremap )
+-        iommu_read_msi_from_ire(entry, msg);
+-
+-    return true;
+-}
+-
+ static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+ {
+     entry->msg = *msg;
+@@ -302,10 +254,7 @@ void set_msi_affinity(struct irq_desc *d
+
+     ASSERT(spin_is_locked(&desc->lock));
+
+-    memset(&msg, 0, sizeof(msg));
+-    if ( !read_msi_msg(msi_desc, &msg) )
+-        return;
+-
++    msg = msi_desc->msg;
+     msg.data &= ~MSI_DATA_VECTOR_MASK;
+     msg.data |= MSI_DATA_VECTOR(desc->arch.vector);
+     msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
diff --git a/system/xen/xsa/xsa337-4.13-2.patch b/system/xen/xsa/xsa337-4.13-2.patch
new file mode 100644
index 0000000000..bdefd37cdc
--- /dev/null
+++ b/system/xen/xsa/xsa337-4.13-2.patch
@@ -0,0 +1,181 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/MSI-X: restrict reading of table/PBA bases from BARs
+
+When assigned to less trusted or un-trusted guests, devices may change
+state behind our backs (they may e.g. get reset by means we may not know
+about). Therefore we should avoid reading BARs from hardware once a
+device is no longer owned by Dom0. Furthermore when we can't read a BAR,
+or when we read zero, we shouldn't instead use the caller provided
+address unless that caller can be trusted.
+
+Re-arrange the logic in msix_capability_init() such that only Dom0 (and
+only if the device isn't DomU-owned yet) or calls through
+PHYSDEVOP_prepare_msix will actually result in the reading of the
+respective BAR register(s). Additionally do so only as long as in-use
+table entries are known (note that invocation of PHYSDEVOP_prepare_msix
+counts as a "pseudo" entry). In all other uses the value already
+recorded will get used instead.
+
+Clear the recorded values in _pci_cleanup_msix() as well as on the one
+affected error path. (Adjust this error path to also avoid blindly
+disabling MSI-X when it was enabled on entry to the function.)
+
+While moving around variable declarations (in many cases to reduce their
+scopes), also adjust some of their types.
+
+This is part of XSA-337.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+
+--- a/xen/arch/x86/msi.c
++++ b/xen/arch/x86/msi.c
+@@ -769,16 +769,14 @@ static int msix_capability_init(struct p
+ {
+     struct arch_msix *msix = dev->msix;
+     struct msi_desc *entry = NULL;
+-    int vf;
+     u16 control;
+     u64 table_paddr;
+     u32 table_offset;
+-    u8 bir, pbus, pslot, pfunc;
+     u16 seg = dev->seg;
+     u8 bus = dev->bus;
+     u8 slot = PCI_SLOT(dev->devfn);
+     u8 func = PCI_FUNC(dev->devfn);
+-    bool maskall = msix->host_maskall;
++    bool maskall = msix->host_maskall, zap_on_error = false;
+     unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
+                                            PCI_CAP_ID_MSIX);
+ 
+@@ -820,43 +818,45 @@ static int msix_capability_init(struct p
+ 
+     /* Locate MSI-X table region */
+     table_offset = pci_conf_read32(dev->sbdf, msix_table_offset_reg(pos));
+-    bir = (u8)(table_offset & PCI_MSIX_BIRMASK);
+-    table_offset &= ~PCI_MSIX_BIRMASK;
++    if ( !msix->used_entries &&
++         (!msi ||
++          (is_hardware_domain(current->domain) &&
++           (dev->domain == current->domain || dev->domain == dom_io))) )
++    {
++        unsigned int bir = table_offset & PCI_MSIX_BIRMASK, pbus, pslot, pfunc;
++        int vf;
++        paddr_t pba_paddr;
++        unsigned int pba_offset;
+ 
+-    if ( !dev->info.is_virtfn )
+-    {
+-        pbus = bus;
+-        pslot = slot;
+-        pfunc = func;
+-        vf = -1;
+-    }
+-    else
+-    {
+-        pbus = dev->info.physfn.bus;
+-        pslot = PCI_SLOT(dev->info.physfn.devfn);
+-        pfunc = PCI_FUNC(dev->info.physfn.devfn);
+-        vf = PCI_BDF2(dev->bus, dev->devfn);
+-    }
+-
+-    table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
+-    WARN_ON(msi && msi->table_base != table_paddr);
+-    if ( !table_paddr )
+-    {
+-        if ( !msi || !msi->table_base )
++        if ( !dev->info.is_virtfn )
+         {
+-            pci_conf_write16(dev->sbdf, msix_control_reg(pos),
+-                             control & ~PCI_MSIX_FLAGS_ENABLE);
+-            xfree(entry);
+-            return -ENXIO;
++            pbus = bus;
++            pslot = slot;
++            pfunc = func;
++            vf = -1;
++        }
++        else
++        {
++            pbus = dev->info.physfn.bus;
++            pslot = PCI_SLOT(dev->info.physfn.devfn);
++            pfunc = PCI_FUNC(dev->info.physfn.devfn);
++            vf = PCI_BDF2(dev->bus, dev->devfn);
+         }
+-        table_paddr = msi->table_base;
+-    }
+-    table_paddr += table_offset;
+ 
+-    if ( !msix->used_entries )
+-    {
+-        u64 pba_paddr;
+-        u32 pba_offset;
++        table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
++        WARN_ON(msi && msi->table_base != table_paddr);
++        if ( !table_paddr )
++        {
++            if ( !msi || !msi->table_base )
++            {
++                pci_conf_write16(dev->sbdf, msix_control_reg(pos),
++                                 control & ~PCI_MSIX_FLAGS_ENABLE);
++                xfree(entry);
++                return -ENXIO;
++            }
++            table_paddr = msi->table_base;
++        }
++        table_paddr += table_offset & ~PCI_MSIX_BIRMASK;
+ 
+         msix->table.first = PFN_DOWN(table_paddr);
+         msix->table.last = PFN_DOWN(table_paddr +
+@@ -875,7 +875,18 @@ static int msix_capability_init(struct p
+                                   BITS_TO_LONGS(msix->nr_entries) - 1);
+         WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, msix->pba.first,
+                                         msix->pba.last));
++
++        zap_on_error = true;
++    }
++    else if ( !msix->table.first )
++    {
++        pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
++        xfree(entry);
++        return -ENODATA;
+     }
++    else
++        table_paddr = (msix->table.first << PAGE_SHIFT) +
++                      (table_offset & ~PCI_MSIX_BIRMASK & ~PAGE_MASK);
+ 
+     if ( entry )
+     {
+@@ -886,8 +897,15 @@ static int msix_capability_init(struct p
+ 
+         if ( idx < 0 )
+         {
+-            pci_conf_write16(dev->sbdf, msix_control_reg(pos),
+-                             control & ~PCI_MSIX_FLAGS_ENABLE);
++            if ( zap_on_error )
++            {
++                msix->table.first = 0;
++                msix->pba.first = 0;
++
++                control &= ~PCI_MSIX_FLAGS_ENABLE;
++            }
++
++            pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
+             xfree(entry);
+             return idx;
+         }
+@@ -1076,9 +1094,14 @@ static void _pci_cleanup_msix(struct arc
+         if ( rangeset_remove_range(mmio_ro_ranges, msix->table.first,
+                                    msix->table.last) )
+             WARN();
++        msix->table.first = 0;
++        msix->table.last = 0;
++
+         if ( rangeset_remove_range(mmio_ro_ranges, msix->pba.first,
+                                    msix->pba.last) )
+             WARN();
++        msix->pba.first = 0;
++        msix->pba.last = 0;
+     }
+ }
+ 
diff --git a/system/xen/xsa/xsa338.patch b/system/xen/xsa/xsa338.patch
new file mode 100644
index 0000000000..776521990e
--- /dev/null
+++ b/system/xen/xsa/xsa338.patch
@@ -0,0 +1,42 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: evtchn: relax port_is_valid()
+
+To avoid ports potentially becoming invalid behind the back of certain
+other functions (due to ->max_evtchn shrinking) because of
+- a guest invoking evtchn_reset() and from a 2nd vCPU opening new
+  channels in parallel (see also XSA-343),
+- alloc_unbound_xen_event_channel() produced channels living above the
+  2-level range (see also XSA-342),
+drop the max_evtchns check from port_is_valid(). For a port for which
+the function once returned "true", the returned value may not turn into
+"false" later on. The function's result may only depend on bounds which
+can only ever grow (which is the case for d->valid_evtchns).
+
+This also eliminates a false sense of safety, utilized by some of the
+users (see again XSA-343): Without a suitable lock held, d->max_evtchns
+may change at any time, and hence deducing that certain other operations
+are safe when port_is_valid() returned true is not legitimate. The
+opportunities to abuse this may get widened by the change here
+(depending on guest and host configuration), but will be taken care of
+by the other XSA.
+
+This is XSA-338.
+
+Fixes: 48974e6ce52e ("evtchn: use a per-domain variable for the max number of event channels")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+---
+v5: New, split from larger patch.
+
+--- a/xen/include/xen/event.h
++++ b/xen/include/xen/event.h
+@@ -107,8 +107,6 @@ void notify_via_xen_event_channel(struct
+ 
+ static inline bool_t port_is_valid(struct domain *d, unsigned int p)
+ {
+-    if ( p >= d->max_evtchns )
+-        return 0;
+     return p < read_atomic(&d->valid_evtchns);
+ }
+ 
diff --git a/system/xen/xsa/xsa339.patch b/system/xen/xsa/xsa339.patch
new file mode 100644
index 0000000000..3311ae093f
--- /dev/null
+++ b/system/xen/xsa/xsa339.patch
@@ -0,0 +1,76 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/pv: Avoid double exception injection
+
+There is at least one path (SYSENTER with NT set, Xen converts to #GP) which
+ends up injecting the #GP fault twice, first in compat_sysenter(), and then a
+second time in compat_test_all_events(), due to the stale TBF_EXCEPTION left
+in TRAPBOUNCE_flags.
+
+The guest kernel sees the second fault first, which is a kernel level #GP
+pointing at the head of the #GP handler, and is therefore a userspace
+trigger-able DoS.
+
+This particular bug has bitten us several times before, so rearrange
+{compat_,}create_bounce_frame() to clobber TRAPBOUNCE on success, rather than
+leaving this task to one area of code which isn't used uniformly.
+
+Other scenarios which might result in a double injection (e.g. two calls
+directly to compat_create_bounce_frame) will now crash the guest, which is far
+more obvious than letting the kernel run with corrupt state.
+
+This is XSA-339
+
+Fixes: fdac9515607b ("x86: clear EFLAGS.NT in SYSENTER entry path")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
+index c3e62f8734..73619f57ca 100644
+--- a/xen/arch/x86/x86_64/compat/entry.S
++++ b/xen/arch/x86/x86_64/compat/entry.S
+@@ -78,7 +78,6 @@ compat_process_softirqs:
+         sti
+ .Lcompat_bounce_exception:
+         call  compat_create_bounce_frame
+-        movb  $0, TRAPBOUNCE_flags(%rdx)
+         jmp   compat_test_all_events
+ 
+ 	ALIGN
+@@ -352,7 +351,13 @@ __UNLIKELY_END(compat_bounce_null_selector)
+         movl  %eax,UREGS_cs+8(%rsp)
+         movl  TRAPBOUNCE_eip(%rdx),%eax
+         movl  %eax,UREGS_rip+8(%rsp)
++
++        /* Trapbounce complete.  Clobber state to avoid an erroneous second injection. */
++        xor   %eax, %eax
++        mov   %ax,  TRAPBOUNCE_cs(%rdx)
++        mov   %al,  TRAPBOUNCE_flags(%rdx)
+         ret
++
+ .section .fixup,"ax"
+ .Lfx13:
+         xorl  %edi,%edi
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 1e880eb9f6..71a00e846b 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -90,7 +90,6 @@ process_softirqs:
+         sti
+ .Lbounce_exception:
+         call  create_bounce_frame
+-        movb  $0, TRAPBOUNCE_flags(%rdx)
+         jmp   test_all_events
+ 
+         ALIGN
+@@ -512,6 +511,11 @@ UNLIKELY_START(z, create_bounce_frame_bad_bounce_ip)
+         jmp   asm_domain_crash_synchronous  /* Does not return */
+ __UNLIKELY_END(create_bounce_frame_bad_bounce_ip)
+         movq  %rax,UREGS_rip+8(%rsp)
++
++        /* Trapbounce complete.  Clobber state to avoid an erroneous second injection. */
++        xor   %eax, %eax
++        mov   %rax, TRAPBOUNCE_eip(%rdx)
++        mov   %al,  TRAPBOUNCE_flags(%rdx)
+         ret
+ 
+         .pushsection .fixup, "ax", @progbits
diff --git a/system/xen/xsa/xsa340.patch b/system/xen/xsa/xsa340.patch
new file mode 100644
index 0000000000..38d04da465
--- /dev/null
+++ b/system/xen/xsa/xsa340.patch
@@ -0,0 +1,65 @@
+From: Julien Grall <jgrall@amazon.com>
+Subject: xen/evtchn: Add missing barriers when accessing/allocating an event channel
+
+While the allocation of a bucket is always performed with the per-domain
+lock, the bucket may be accessed without the lock taken (for instance, see
+evtchn_send()).
+
+Instead such sites relies on port_is_valid() to return a non-zero value
+when the port has a struct evtchn associated to it. The function will
+mostly check whether the port is less than d->valid_evtchns as all the
+buckets/event channels should be allocated up to that point.
+
+Unfortunately a compiler is free to re-order the assignment in
+evtchn_allocate_port() so it would be possible to have d->valid_evtchns
+updated before the new bucket has finish to allocate.
+
+Additionally on Arm, even if this was compiled "correctly", the
+processor can still re-order the memory access.
+
+Add a write memory barrier in the allocation side and a read memory
+barrier when the port is valid to prevent any re-ordering issue.
+
+This is XSA-340.
+
+Reported-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+
+--- a/xen/common/event_channel.c
++++ b/xen/common/event_channel.c
+@@ -178,6 +178,13 @@ int evtchn_allocate_port(struct domain *
+             return -ENOMEM;
+         bucket_from_port(d, port) = chn;
+
++        /*
++         * d->valid_evtchns is used to check whether the bucket can be
++         * accessed without the per-domain lock. Therefore,
++         * d->valid_evtchns should be seen *after* the new bucket has
++         * been setup.
++         */
++        smp_wmb();
+         write_atomic(&d->valid_evtchns, d->valid_evtchns + EVTCHNS_PER_BUCKET);
+     }
+
+--- a/xen/include/xen/event.h
++++ b/xen/include/xen/event.h
+@@ -107,7 +107,17 @@ void notify_via_xen_event_channel(struct
+
+ static inline bool_t port_is_valid(struct domain *d, unsigned int p)
+ {
+-    return p < read_atomic(&d->valid_evtchns);
++    if ( p >= read_atomic(&d->valid_evtchns) )
++        return false;
++
++    /*
++     * The caller will usually access the event channel afterwards and
++     * may be done without taking the per-domain lock. The barrier is
++     * going in pair the smp_wmb() barrier in evtchn_allocate_port().
++     */
++    smp_rmb();
++
++    return true;
+ }
+
+ static inline struct evtchn *evtchn_from_port(struct domain *d, unsigned int p)
diff --git a/system/xen/xsa/xsa342-4.13.patch b/system/xen/xsa/xsa342-4.13.patch
new file mode 100644
index 0000000000..334baf1b69
--- /dev/null
+++ b/system/xen/xsa/xsa342-4.13.patch
@@ -0,0 +1,145 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: evtchn/x86: enforce correct upper limit for 32-bit guests
+
+The recording of d->max_evtchns in evtchn_2l_init(), in particular with
+the limited set of callers of the function, is insufficient. Neither for
+PV nor for HVM guests the bitness is known at domain_create() time, yet
+the upper bound in 2-level mode depends upon guest bitness. Recording
+too high a limit "allows" x86 32-bit domains to open not properly usable
+event channels, management of which (inside Xen) would then result in
+corruption of the shared info and vCPU info structures.
+
+Keep the upper limit dynamic for the 2-level case, introducing a helper
+function to retrieve the effective limit. This helper is now supposed to
+be private to the event channel code. The used in do_poll() and
+domain_dump_evtchn_info() weren't consistent with port uses elsewhere
+and hence get switched to port_is_valid().
+
+Furthermore FIFO mode's setup_ports() gets adjusted to loop only up to
+the prior ABI limit, rather than all the way up to the new one.
+
+Finally a word on the change to do_poll(): Accessing ->max_evtchns
+without holding a suitable lock was never safe, as it as well as
+->evtchn_port_ops may change behind do_poll()'s back. Using
+port_is_valid() instead widens some the window for potential abuse,
+until we've dealt with the race altogether (see XSA-343).
+
+This is XSA-342.
+
+Reported-by: Julien Grall <jgrall@amazon.com>
+Fixes: 48974e6ce52e ("evtchn: use a per-domain variable for the max number of event channels")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+
+--- a/xen/common/event_2l.c
++++ b/xen/common/event_2l.c
+@@ -103,7 +103,6 @@ static const struct evtchn_port_ops evtc
+ void evtchn_2l_init(struct domain *d)
+ {
+     d->evtchn_port_ops = &evtchn_port_ops_2l;
+-    d->max_evtchns = BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d);
+ }
+ 
+ /*
+--- a/xen/common/event_channel.c
++++ b/xen/common/event_channel.c
+@@ -151,7 +151,7 @@ static void free_evtchn_bucket(struct do
+ 
+ int evtchn_allocate_port(struct domain *d, evtchn_port_t port)
+ {
+-    if ( port > d->max_evtchn_port || port >= d->max_evtchns )
++    if ( port > d->max_evtchn_port || port >= max_evtchns(d) )
+         return -ENOSPC;
+ 
+     if ( port_is_valid(d, port) )
+@@ -1396,13 +1396,11 @@ static void domain_dump_evtchn_info(stru
+ 
+     spin_lock(&d->event_lock);
+ 
+-    for ( port = 1; port < d->max_evtchns; ++port )
++    for ( port = 1; port_is_valid(d, port); ++port )
+     {
+         const struct evtchn *chn;
+         char *ssid;
+ 
+-        if ( !port_is_valid(d, port) )
+-            continue;
+         chn = evtchn_from_port(d, port);
+         if ( chn->state == ECS_FREE )
+             continue;
+--- a/xen/common/event_fifo.c
++++ b/xen/common/event_fifo.c
+@@ -478,7 +478,7 @@ static void cleanup_event_array(struct d
+     d->evtchn_fifo = NULL;
+ }
+ 
+-static void setup_ports(struct domain *d)
++static void setup_ports(struct domain *d, unsigned int prev_evtchns)
+ {
+     unsigned int port;
+ 
+@@ -488,7 +488,7 @@ static void setup_ports(struct domain *d
+      * - save its pending state.
+      * - set default priority.
+      */
+-    for ( port = 1; port < d->max_evtchns; port++ )
++    for ( port = 1; port < prev_evtchns; port++ )
+     {
+         struct evtchn *evtchn;
+ 
+@@ -546,6 +546,8 @@ int evtchn_fifo_init_control(struct evtc
+     if ( !d->evtchn_fifo )
+     {
+         struct vcpu *vcb;
++        /* Latch the value before it changes during setup_event_array(). */
++        unsigned int prev_evtchns = max_evtchns(d);
+ 
+         for_each_vcpu ( d, vcb ) {
+             rc = setup_control_block(vcb);
+@@ -562,8 +564,7 @@ int evtchn_fifo_init_control(struct evtc
+             goto error;
+ 
+         d->evtchn_port_ops = &evtchn_port_ops_fifo;
+-        d->max_evtchns = EVTCHN_FIFO_NR_CHANNELS;
+-        setup_ports(d);
++        setup_ports(d, prev_evtchns);
+     }
+     else
+         rc = map_control_block(v, gfn, offset);
+--- a/xen/common/schedule.c
++++ b/xen/common/schedule.c
+@@ -1434,7 +1434,7 @@ static long do_poll(struct sched_poll *s
+             goto out;
+ 
+         rc = -EINVAL;
+-        if ( port >= d->max_evtchns )
++        if ( !port_is_valid(d, port) )
+             goto out;
+ 
+         rc = 0;
+--- a/xen/include/xen/event.h
++++ b/xen/include/xen/event.h
+@@ -105,6 +105,12 @@ void notify_via_xen_event_channel(struct
+ #define bucket_from_port(d, p) \
+     ((group_from_port(d, p))[((p) % EVTCHNS_PER_GROUP) / EVTCHNS_PER_BUCKET])
+ 
++static inline unsigned int max_evtchns(const struct domain *d)
++{
++    return d->evtchn_fifo ? EVTCHN_FIFO_NR_CHANNELS
++                          : BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d);
++}
++
+ static inline bool_t port_is_valid(struct domain *d, unsigned int p)
+ {
+     if ( p >= read_atomic(&d->valid_evtchns) )
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -382,7 +382,6 @@ struct domain
+     /* Event channel information. */
+     struct evtchn   *evtchn;                         /* first bucket only */
+     struct evtchn  **evtchn_group[NR_EVTCHN_GROUPS]; /* all other buckets */
+-    unsigned int     max_evtchns;     /* number supported by ABI */
+     unsigned int     max_evtchn_port; /* max permitted port number */
+     unsigned int     valid_evtchns;   /* number of allocated event channels */
+     spinlock_t       event_lock;
diff --git a/system/xen/xsa/xsa343-1.patch b/system/xen/xsa/xsa343-1.patch
new file mode 100644
index 0000000000..0abbc03e8d
--- /dev/null
+++ b/system/xen/xsa/xsa343-1.patch
@@ -0,0 +1,199 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: evtchn: evtchn_reset() shouldn't succeed with still-open ports
+
+While the function closes all ports, it does so without holding any
+lock, and hence racing requests may be issued causing new ports to get
+opened. This would have been problematic in particular if such a newly
+opened port had a port number above the new implementation limit (i.e.
+when switching from FIFO to 2-level) after the reset, as prior to
+"evtchn: relax port_is_valid()" this could have led to e.g.
+evtchn_close()'s "BUG_ON(!port_is_valid(d2, port2))" to trigger.
+
+Introduce a counter of active ports and check that it's (still) no
+larger then the number of Xen internally used ones after obtaining the
+necessary lock in evtchn_reset().
+
+As to the access model of the new {active,xen}_evtchns fields - while
+all writes get done using write_atomic(), reads ought to use
+read_atomic() only when outside of a suitably locked region.
+
+Note that as of now evtchn_bind_virq() and evtchn_bind_ipi() don't have
+a need to call check_free_port().
+
+This is part of XSA-343.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+---
+v7: Drop optimization from evtchn_reset().
+v6: Fix loop exit condition in evtchn_reset(). Use {read,write}_atomic()
+    also for xen_evtchns.
+v5: Move increment in alloc_unbound_xen_event_channel() out of the inner
+    locked region.
+v4: Account for Xen internal ports.
+v3: Document intended access next to new struct field.
+v2: Add comment to check_free_port(). Drop commented out calls.
+
+--- a/xen/common/event_channel.c
++++ b/xen/common/event_channel.c
+@@ -188,6 +188,8 @@ int evtchn_allocate_port(struct domain *
+         write_atomic(&d->valid_evtchns, d->valid_evtchns + EVTCHNS_PER_BUCKET);
+     }
+ 
++    write_atomic(&d->active_evtchns, d->active_evtchns + 1);
++
+     return 0;
+ }
+ 
+@@ -211,11 +213,26 @@ static int get_free_port(struct domain *
+     return -ENOSPC;
+ }
+ 
++/*
++ * Check whether a port is still marked free, and if so update the domain
++ * counter accordingly.  To be used on function exit paths.
++ */
++static void check_free_port(struct domain *d, evtchn_port_t port)
++{
++    if ( port_is_valid(d, port) &&
++         evtchn_from_port(d, port)->state == ECS_FREE )
++        write_atomic(&d->active_evtchns, d->active_evtchns - 1);
++}
++
+ void evtchn_free(struct domain *d, struct evtchn *chn)
+ {
+     /* Clear pending event to avoid unexpected behavior on re-bind. */
+     evtchn_port_clear_pending(d, chn);
+ 
++    if ( consumer_is_xen(chn) )
++        write_atomic(&d->xen_evtchns, d->xen_evtchns - 1);
++    write_atomic(&d->active_evtchns, d->active_evtchns - 1);
++
+     /* Reset binding to vcpu0 when the channel is freed. */
+     chn->state          = ECS_FREE;
+     chn->notify_vcpu_id = 0;
+@@ -258,6 +275,7 @@ static long evtchn_alloc_unbound(evtchn_
+     alloc->port = port;
+ 
+  out:
++    check_free_port(d, port);
+     spin_unlock(&d->event_lock);
+     rcu_unlock_domain(d);
+ 
+@@ -351,6 +369,7 @@ static long evtchn_bind_interdomain(evtc
+     bind->local_port = lport;
+ 
+  out:
++    check_free_port(ld, lport);
+     spin_unlock(&ld->event_lock);
+     if ( ld != rd )
+         spin_unlock(&rd->event_lock);
+@@ -488,7 +507,7 @@ static long evtchn_bind_pirq(evtchn_bind
+     struct domain *d = current->domain;
+     struct vcpu   *v = d->vcpu[0];
+     struct pirq   *info;
+-    int            port, pirq = bind->pirq;
++    int            port = 0, pirq = bind->pirq;
+     long           rc;
+ 
+     if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
+@@ -536,6 +555,7 @@ static long evtchn_bind_pirq(evtchn_bind
+     arch_evtchn_bind_pirq(d, pirq);
+ 
+  out:
++    check_free_port(d, port);
+     spin_unlock(&d->event_lock);
+ 
+     return rc;
+@@ -1011,10 +1031,10 @@ int evtchn_unmask(unsigned int port)
+     return 0;
+ }
+ 
+-
+ int evtchn_reset(struct domain *d)
+ {
+     unsigned int i;
++    int rc = 0;
+ 
+     if ( d != current->domain && !d->controller_pause_count )
+         return -EINVAL;
+@@ -1024,7 +1044,9 @@ int evtchn_reset(struct domain *d)
+ 
+     spin_lock(&d->event_lock);
+ 
+-    if ( d->evtchn_fifo )
++    if ( d->active_evtchns > d->xen_evtchns )
++        rc = -EAGAIN;
++    else if ( d->evtchn_fifo )
+     {
+         /* Switching back to 2-level ABI. */
+         evtchn_fifo_destroy(d);
+@@ -1033,7 +1055,7 @@ int evtchn_reset(struct domain *d)
+ 
+     spin_unlock(&d->event_lock);
+ 
+-    return 0;
++    return rc;
+ }
+ 
+ static long evtchn_set_priority(const struct evtchn_set_priority *set_priority)
+@@ -1219,10 +1241,9 @@ int alloc_unbound_xen_event_channel(
+ 
+     spin_lock(&ld->event_lock);
+ 
+-    rc = get_free_port(ld);
++    port = rc = get_free_port(ld);
+     if ( rc < 0 )
+         goto out;
+-    port = rc;
+     chn = evtchn_from_port(ld, port);
+ 
+     rc = xsm_evtchn_unbound(XSM_TARGET, ld, chn, remote_domid);
+@@ -1238,7 +1259,10 @@ int alloc_unbound_xen_event_channel(
+ 
+     spin_unlock(&chn->lock);
+ 
++    write_atomic(&ld->xen_evtchns, ld->xen_evtchns + 1);
++
+  out:
++    check_free_port(ld, port);
+     spin_unlock(&ld->event_lock);
+ 
+     return rc < 0 ? rc : port;
+@@ -1314,6 +1338,7 @@ int evtchn_init(struct domain *d, unsign
+         return -EINVAL;
+     }
+     evtchn_from_port(d, 0)->state = ECS_RESERVED;
++    write_atomic(&d->active_evtchns, 0);
+ 
+ #if MAX_VIRT_CPUS > BITS_PER_LONG
+     d->poll_mask = xzalloc_array(unsigned long, BITS_TO_LONGS(d->max_vcpus));
+@@ -1340,6 +1365,8 @@ void evtchn_destroy(struct domain *d)
+     for ( i = 0; port_is_valid(d, i); i++ )
+         evtchn_close(d, i, 0);
+ 
++    ASSERT(!d->active_evtchns);
++
+     clear_global_virq_handlers(d);
+ 
+     evtchn_fifo_destroy(d);
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -361,6 +361,16 @@ struct domain
+     struct evtchn  **evtchn_group[NR_EVTCHN_GROUPS]; /* all other buckets */
+     unsigned int     max_evtchn_port; /* max permitted port number */
+     unsigned int     valid_evtchns;   /* number of allocated event channels */
++    /*
++     * Number of in-use event channels.  Writers should use write_atomic().
++     * Readers need to use read_atomic() only when not holding event_lock.
++     */
++    unsigned int     active_evtchns;
++    /*
++     * Number of event channels used internally by Xen (not subject to
++     * EVTCHNOP_reset).  Read/write access like for active_evtchns.
++     */
++    unsigned int     xen_evtchns;
+     spinlock_t       event_lock;
+     const struct evtchn_port_ops *evtchn_port_ops;
+     struct evtchn_fifo_domain *evtchn_fifo;
diff --git a/system/xen/xsa/xsa343-2.patch b/system/xen/xsa/xsa343-2.patch
new file mode 100644
index 0000000000..b8eb4998f1
--- /dev/null
+++ b/system/xen/xsa/xsa343-2.patch
@@ -0,0 +1,295 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: evtchn: convert per-channel lock to be IRQ-safe
+
+... in order for send_guest_{global,vcpu}_virq() to be able to make use
+of it.
+
+This is part of XSA-343.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+---
+v6: New.
+---
+TBD: This is the "dumb" conversion variant. In a couple of cases the
+     slightly simpler spin_{,un}lock_irq() could apparently be used.
+
+--- a/xen/common/event_channel.c
++++ b/xen/common/event_channel.c
+@@ -248,6 +248,7 @@ static long evtchn_alloc_unbound(evtchn_
+     int            port;
+     domid_t        dom = alloc->dom;
+     long           rc;
++    unsigned long  flags;
+ 
+     d = rcu_lock_domain_by_any_id(dom);
+     if ( d == NULL )
+@@ -263,14 +264,14 @@ static long evtchn_alloc_unbound(evtchn_
+     if ( rc )
+         goto out;
+ 
+-    spin_lock(&chn->lock);
++    spin_lock_irqsave(&chn->lock, flags);
+ 
+     chn->state = ECS_UNBOUND;
+     if ( (chn->u.unbound.remote_domid = alloc->remote_dom) == DOMID_SELF )
+         chn->u.unbound.remote_domid = current->domain->domain_id;
+     evtchn_port_init(d, chn);
+ 
+-    spin_unlock(&chn->lock);
++    spin_unlock_irqrestore(&chn->lock, flags);
+ 
+     alloc->port = port;
+ 
+@@ -283,26 +284,32 @@ static long evtchn_alloc_unbound(evtchn_
+ }
+ 
+ 
+-static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn)
++static unsigned long double_evtchn_lock(struct evtchn *lchn,
++                                        struct evtchn *rchn)
+ {
+-    if ( lchn < rchn )
++    unsigned long flags;
++
++    if ( lchn <= rchn )
+     {
+-        spin_lock(&lchn->lock);
+-        spin_lock(&rchn->lock);
++        spin_lock_irqsave(&lchn->lock, flags);
++        if ( lchn != rchn )
++            spin_lock(&rchn->lock);
+     }
+     else
+     {
+-        if ( lchn != rchn )
+-            spin_lock(&rchn->lock);
++        spin_lock_irqsave(&rchn->lock, flags);
+         spin_lock(&lchn->lock);
+     }
++
++    return flags;
+ }
+ 
+-static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn)
++static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn,
++                                 unsigned long flags)
+ {
+-    spin_unlock(&lchn->lock);
+     if ( lchn != rchn )
+-        spin_unlock(&rchn->lock);
++        spin_unlock(&lchn->lock);
++    spin_unlock_irqrestore(&rchn->lock, flags);
+ }
+ 
+ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
+@@ -312,6 +319,7 @@ static long evtchn_bind_interdomain(evtc
+     int            lport, rport = bind->remote_port;
+     domid_t        rdom = bind->remote_dom;
+     long           rc;
++    unsigned long  flags;
+ 
+     if ( rdom == DOMID_SELF )
+         rdom = current->domain->domain_id;
+@@ -347,7 +355,7 @@ static long evtchn_bind_interdomain(evtc
+     if ( rc )
+         goto out;
+ 
+-    double_evtchn_lock(lchn, rchn);
++    flags = double_evtchn_lock(lchn, rchn);
+ 
+     lchn->u.interdomain.remote_dom  = rd;
+     lchn->u.interdomain.remote_port = rport;
+@@ -364,7 +372,7 @@ static long evtchn_bind_interdomain(evtc
+      */
+     evtchn_port_set_pending(ld, lchn->notify_vcpu_id, lchn);
+ 
+-    double_evtchn_unlock(lchn, rchn);
++    double_evtchn_unlock(lchn, rchn, flags);
+ 
+     bind->local_port = lport;
+ 
+@@ -387,6 +395,7 @@ int evtchn_bind_virq(evtchn_bind_virq_t
+     struct domain *d = current->domain;
+     int            virq = bind->virq, vcpu = bind->vcpu;
+     int            rc = 0;
++    unsigned long  flags;
+ 
+     if ( (virq < 0) || (virq >= ARRAY_SIZE(v->virq_to_evtchn)) )
+         return -EINVAL;
+@@ -424,14 +433,14 @@ int evtchn_bind_virq(evtchn_bind_virq_t
+ 
+     chn = evtchn_from_port(d, port);
+ 
+-    spin_lock(&chn->lock);
++    spin_lock_irqsave(&chn->lock, flags);
+ 
+     chn->state          = ECS_VIRQ;
+     chn->notify_vcpu_id = vcpu;
+     chn->u.virq         = virq;
+     evtchn_port_init(d, chn);
+ 
+-    spin_unlock(&chn->lock);
++    spin_unlock_irqrestore(&chn->lock, flags);
+ 
+     v->virq_to_evtchn[virq] = bind->port = port;
+ 
+@@ -448,6 +457,7 @@ static long evtchn_bind_ipi(evtchn_bind_
+     struct domain *d = current->domain;
+     int            port, vcpu = bind->vcpu;
+     long           rc = 0;
++    unsigned long  flags;
+ 
+     if ( domain_vcpu(d, vcpu) == NULL )
+         return -ENOENT;
+@@ -459,13 +469,13 @@ static long evtchn_bind_ipi(evtchn_bind_
+ 
+     chn = evtchn_from_port(d, port);
+ 
+-    spin_lock(&chn->lock);
++    spin_lock_irqsave(&chn->lock, flags);
+ 
+     chn->state          = ECS_IPI;
+     chn->notify_vcpu_id = vcpu;
+     evtchn_port_init(d, chn);
+ 
+-    spin_unlock(&chn->lock);
++    spin_unlock_irqrestore(&chn->lock, flags);
+ 
+     bind->port = port;
+ 
+@@ -509,6 +519,7 @@ static long evtchn_bind_pirq(evtchn_bind
+     struct pirq   *info;
+     int            port = 0, pirq = bind->pirq;
+     long           rc;
++    unsigned long  flags;
+ 
+     if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
+         return -EINVAL;
+@@ -541,14 +552,14 @@ static long evtchn_bind_pirq(evtchn_bind
+         goto out;
+     }
+ 
+-    spin_lock(&chn->lock);
++    spin_lock_irqsave(&chn->lock, flags);
+ 
+     chn->state  = ECS_PIRQ;
+     chn->u.pirq.irq = pirq;
+     link_pirq_port(port, chn, v);
+     evtchn_port_init(d, chn);
+ 
+-    spin_unlock(&chn->lock);
++    spin_unlock_irqrestore(&chn->lock, flags);
+ 
+     bind->port = port;
+ 
+@@ -569,6 +580,7 @@ int evtchn_close(struct domain *d1, int
+     struct evtchn *chn1, *chn2;
+     int            port2;
+     long           rc = 0;
++    unsigned long  flags;
+ 
+  again:
+     spin_lock(&d1->event_lock);
+@@ -668,14 +680,14 @@ int evtchn_close(struct domain *d1, int
+         BUG_ON(chn2->state != ECS_INTERDOMAIN);
+         BUG_ON(chn2->u.interdomain.remote_dom != d1);
+ 
+-        double_evtchn_lock(chn1, chn2);
++        flags = double_evtchn_lock(chn1, chn2);
+ 
+         evtchn_free(d1, chn1);
+ 
+         chn2->state = ECS_UNBOUND;
+         chn2->u.unbound.remote_domid = d1->domain_id;
+ 
+-        double_evtchn_unlock(chn1, chn2);
++        double_evtchn_unlock(chn1, chn2, flags);
+ 
+         goto out;
+ 
+@@ -683,9 +695,9 @@ int evtchn_close(struct domain *d1, int
+         BUG();
+     }
+ 
+-    spin_lock(&chn1->lock);
++    spin_lock_irqsave(&chn1->lock, flags);
+     evtchn_free(d1, chn1);
+-    spin_unlock(&chn1->lock);
++    spin_unlock_irqrestore(&chn1->lock, flags);
+ 
+  out:
+     if ( d2 != NULL )
+@@ -705,13 +717,14 @@ int evtchn_send(struct domain *ld, unsig
+     struct evtchn *lchn, *rchn;
+     struct domain *rd;
+     int            rport, ret = 0;
++    unsigned long  flags;
+ 
+     if ( !port_is_valid(ld, lport) )
+         return -EINVAL;
+ 
+     lchn = evtchn_from_port(ld, lport);
+ 
+-    spin_lock(&lchn->lock);
++    spin_lock_irqsave(&lchn->lock, flags);
+ 
+     /* Guest cannot send via a Xen-attached event channel. */
+     if ( unlikely(consumer_is_xen(lchn)) )
+@@ -746,7 +759,7 @@ int evtchn_send(struct domain *ld, unsig
+     }
+ 
+ out:
+-    spin_unlock(&lchn->lock);
++    spin_unlock_irqrestore(&lchn->lock, flags);
+ 
+     return ret;
+ }
+@@ -1238,6 +1251,7 @@ int alloc_unbound_xen_event_channel(
+ {
+     struct evtchn *chn;
+     int            port, rc;
++    unsigned long  flags;
+ 
+     spin_lock(&ld->event_lock);
+ 
+@@ -1250,14 +1264,14 @@ int alloc_unbound_xen_event_channel(
+     if ( rc )
+         goto out;
+ 
+-    spin_lock(&chn->lock);
++    spin_lock_irqsave(&chn->lock, flags);
+ 
+     chn->state = ECS_UNBOUND;
+     chn->xen_consumer = get_xen_consumer(notification_fn);
+     chn->notify_vcpu_id = lvcpu;
+     chn->u.unbound.remote_domid = remote_domid;
+ 
+-    spin_unlock(&chn->lock);
++    spin_unlock_irqrestore(&chn->lock, flags);
+ 
+     write_atomic(&ld->xen_evtchns, ld->xen_evtchns + 1);
+ 
+@@ -1280,11 +1294,12 @@ void notify_via_xen_event_channel(struct
+ {
+     struct evtchn *lchn, *rchn;
+     struct domain *rd;
++    unsigned long flags;
+ 
+     ASSERT(port_is_valid(ld, lport));
+     lchn = evtchn_from_port(ld, lport);
+ 
+-    spin_lock(&lchn->lock);
++    spin_lock_irqsave(&lchn->lock, flags);
+ 
+     if ( likely(lchn->state == ECS_INTERDOMAIN) )
+     {
+@@ -1294,7 +1309,7 @@ void notify_via_xen_event_channel(struct
+         evtchn_port_set_pending(rd, rchn->notify_vcpu_id, rchn);
+     }
+ 
+-    spin_unlock(&lchn->lock);
++    spin_unlock_irqrestore(&lchn->lock, flags);
+ }
+ 
+ void evtchn_check_pollers(struct domain *d, unsigned int port)
diff --git a/system/xen/xsa/xsa343-3.patch b/system/xen/xsa/xsa343-3.patch
new file mode 100644
index 0000000000..e513e308eb
--- /dev/null
+++ b/system/xen/xsa/xsa343-3.patch
@@ -0,0 +1,392 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: evtchn: address races with evtchn_reset()
+
+Neither d->evtchn_port_ops nor max_evtchns(d) may be used in an entirely
+lock-less manner, as both may change by a racing evtchn_reset(). In the
+common case, at least one of the domain's event lock or the per-channel
+lock needs to be held. In the specific case of the inter-domain sending
+by evtchn_send() and notify_via_xen_event_channel() holding the other
+side's per-channel lock is sufficient, as the channel can't change state
+without both per-channel locks held. Without such a channel changing
+state, evtchn_reset() can't complete successfully.
+
+Lock-free accesses continue to be permitted for the shim (calling some
+otherwise internal event channel functions), as this happens while the
+domain is in effectively single-threaded mode. Special care also needs
+taking for the shim's marking of in-use ports as ECS_RESERVED (allowing
+use of such ports in the shim case is okay because switching into and
+hence also out of FIFO mode is impossihble there).
+
+As a side effect, certain operations on Xen bound event channels which
+were mistakenly permitted so far (e.g. unmask or poll) will be refused
+now.
+
+This is part of XSA-343.
+
+Reported-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+---
+v9: Add arch_evtchn_is_special() to fix PV shim.
+v8: Add BUILD_BUG_ON() in evtchn_usable().
+v7: Add locking related comment ahead of struct evtchn_port_ops.
+v6: New.
+---
+TBD: I've been considering to move some of the wrappers from xen/event.h
+     into event_channel.c (or even drop them altogether), when they
+     require external locking (e.g. evtchn_port_init() or
+     evtchn_port_set_priority()). Does anyone have a strong opinion
+     either way?
+
+--- a/xen/arch/x86/irq.c
++++ b/xen/arch/x86/irq.c
+@@ -2488,14 +2488,24 @@ static void dump_irqs(unsigned char key)
+ 
+             for ( i = 0; i < action->nr_guests; )
+             {
++                struct evtchn *evtchn;
++                unsigned int pending = 2, masked = 2;
++
+                 d = action->guest[i++];
+                 pirq = domain_irq_to_pirq(d, irq);
+                 info = pirq_info(d, pirq);
++                evtchn = evtchn_from_port(d, info->evtchn);
++                local_irq_disable();
++                if ( spin_trylock(&evtchn->lock) )
++                {
++                    pending = evtchn_is_pending(d, evtchn);
++                    masked = evtchn_is_masked(d, evtchn);
++                    spin_unlock(&evtchn->lock);
++                }
++                local_irq_enable();
+                 printk("d%d:%3d(%c%c%c)%c",
+-                       d->domain_id, pirq,
+-                       evtchn_port_is_pending(d, info->evtchn) ? 'P' : '-',
+-                       evtchn_port_is_masked(d, info->evtchn) ? 'M' : '-',
+-                       info->masked ? 'M' : '-',
++                       d->domain_id, pirq, "-P?"[pending],
++                       "-M?"[masked], info->masked ? 'M' : '-',
+                        i < action->nr_guests ? ',' : '\n');
+             }
+         }
+--- a/xen/arch/x86/pv/shim.c
++++ b/xen/arch/x86/pv/shim.c
+@@ -660,8 +660,11 @@ void pv_shim_inject_evtchn(unsigned int
+     if ( port_is_valid(guest, port) )
+     {
+         struct evtchn *chn = evtchn_from_port(guest, port);
++        unsigned long flags;
+ 
++        spin_lock_irqsave(&chn->lock, flags);
+         evtchn_port_set_pending(guest, chn->notify_vcpu_id, chn);
++        spin_unlock_irqrestore(&chn->lock, flags);
+     }
+ }
+ 
+--- a/xen/common/event_2l.c
++++ b/xen/common/event_2l.c
+@@ -63,8 +63,10 @@ static void evtchn_2l_unmask(struct doma
+     }
+ }
+ 
+-static bool evtchn_2l_is_pending(const struct domain *d, evtchn_port_t port)
++static bool evtchn_2l_is_pending(const struct domain *d,
++                                 const struct evtchn *evtchn)
+ {
++    evtchn_port_t port = evtchn->port;
+     unsigned int max_ports = BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d);
+ 
+     ASSERT(port < max_ports);
+@@ -72,8 +74,10 @@ static bool evtchn_2l_is_pending(const s
+             guest_test_bit(d, port, &shared_info(d, evtchn_pending)));
+ }
+ 
+-static bool evtchn_2l_is_masked(const struct domain *d, evtchn_port_t port)
++static bool evtchn_2l_is_masked(const struct domain *d,
++                                const struct evtchn *evtchn)
+ {
++    evtchn_port_t port = evtchn->port;
+     unsigned int max_ports = BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d);
+ 
+     ASSERT(port < max_ports);
+--- a/xen/common/event_channel.c
++++ b/xen/common/event_channel.c
+@@ -156,8 +156,9 @@ int evtchn_allocate_port(struct domain *
+ 
+     if ( port_is_valid(d, port) )
+     {
+-        if ( evtchn_from_port(d, port)->state != ECS_FREE ||
+-             evtchn_port_is_busy(d, port) )
++        const struct evtchn *chn = evtchn_from_port(d, port);
++
++        if ( chn->state != ECS_FREE || evtchn_is_busy(d, chn) )
+             return -EBUSY;
+     }
+     else
+@@ -774,6 +775,7 @@ void send_guest_vcpu_virq(struct vcpu *v
+     unsigned long flags;
+     int port;
+     struct domain *d;
++    struct evtchn *chn;
+ 
+     ASSERT(!virq_is_global(virq));
+ 
+@@ -784,7 +786,10 @@ void send_guest_vcpu_virq(struct vcpu *v
+         goto out;
+ 
+     d = v->domain;
+-    evtchn_port_set_pending(d, v->vcpu_id, evtchn_from_port(d, port));
++    chn = evtchn_from_port(d, port);
++    spin_lock(&chn->lock);
++    evtchn_port_set_pending(d, v->vcpu_id, chn);
++    spin_unlock(&chn->lock);
+ 
+  out:
+     spin_unlock_irqrestore(&v->virq_lock, flags);
+@@ -813,7 +818,9 @@ void send_guest_global_virq(struct domai
+         goto out;
+ 
+     chn = evtchn_from_port(d, port);
++    spin_lock(&chn->lock);
+     evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
++    spin_unlock(&chn->lock);
+ 
+  out:
+     spin_unlock_irqrestore(&v->virq_lock, flags);
+@@ -823,6 +830,7 @@ void send_guest_pirq(struct domain *d, c
+ {
+     int port;
+     struct evtchn *chn;
++    unsigned long flags;
+ 
+     /*
+      * PV guests: It should not be possible to race with __evtchn_close(). The
+@@ -837,7 +845,9 @@ void send_guest_pirq(struct domain *d, c
+     }
+ 
+     chn = evtchn_from_port(d, port);
++    spin_lock_irqsave(&chn->lock, flags);
+     evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
++    spin_unlock_irqrestore(&chn->lock, flags);
+ }
+ 
+ static struct domain *global_virq_handlers[NR_VIRQS] __read_mostly;
+@@ -1034,12 +1044,15 @@ int evtchn_unmask(unsigned int port)
+ {
+     struct domain *d = current->domain;
+     struct evtchn *evtchn;
++    unsigned long flags;
+ 
+     if ( unlikely(!port_is_valid(d, port)) )
+         return -EINVAL;
+ 
+     evtchn = evtchn_from_port(d, port);
++    spin_lock_irqsave(&evtchn->lock, flags);
+     evtchn_port_unmask(d, evtchn);
++    spin_unlock_irqrestore(&evtchn->lock, flags);
+ 
+     return 0;
+ }
+@@ -1449,8 +1462,8 @@ static void domain_dump_evtchn_info(stru
+ 
+         printk("    %4u [%d/%d/",
+                port,
+-               evtchn_port_is_pending(d, port),
+-               evtchn_port_is_masked(d, port));
++               evtchn_is_pending(d, chn),
++               evtchn_is_masked(d, chn));
+         evtchn_port_print_state(d, chn);
+         printk("]: s=%d n=%d x=%d",
+                chn->state, chn->notify_vcpu_id, chn->xen_consumer);
+--- a/xen/common/event_fifo.c
++++ b/xen/common/event_fifo.c
+@@ -296,23 +296,26 @@ static void evtchn_fifo_unmask(struct do
+         evtchn_fifo_set_pending(v, evtchn);
+ }
+ 
+-static bool evtchn_fifo_is_pending(const struct domain *d, evtchn_port_t port)
++static bool evtchn_fifo_is_pending(const struct domain *d,
++                                   const struct evtchn *evtchn)
+ {
+-    const event_word_t *word = evtchn_fifo_word_from_port(d, port);
++    const event_word_t *word = evtchn_fifo_word_from_port(d, evtchn->port);
+ 
+     return word && guest_test_bit(d, EVTCHN_FIFO_PENDING, word);
+ }
+ 
+-static bool_t evtchn_fifo_is_masked(const struct domain *d, evtchn_port_t port)
++static bool_t evtchn_fifo_is_masked(const struct domain *d,
++                                    const struct evtchn *evtchn)
+ {
+-    const event_word_t *word = evtchn_fifo_word_from_port(d, port);
++    const event_word_t *word = evtchn_fifo_word_from_port(d, evtchn->port);
+ 
+     return !word || guest_test_bit(d, EVTCHN_FIFO_MASKED, word);
+ }
+ 
+-static bool_t evtchn_fifo_is_busy(const struct domain *d, evtchn_port_t port)
++static bool_t evtchn_fifo_is_busy(const struct domain *d,
++                                  const struct evtchn *evtchn)
+ {
+-    const event_word_t *word = evtchn_fifo_word_from_port(d, port);
++    const event_word_t *word = evtchn_fifo_word_from_port(d, evtchn->port);
+ 
+     return word && guest_test_bit(d, EVTCHN_FIFO_LINKED, word);
+ }
+--- a/xen/include/asm-x86/event.h
++++ b/xen/include/asm-x86/event.h
+@@ -47,4 +47,10 @@ static inline bool arch_virq_is_global(u
+     return true;
+ }
+ 
++#ifdef CONFIG_PV_SHIM
++# include <asm/pv/shim.h>
++# define arch_evtchn_is_special(chn) \
++             (pv_shim && (chn)->port && (chn)->state == ECS_RESERVED)
++#endif
++
+ #endif
+--- a/xen/include/xen/event.h
++++ b/xen/include/xen/event.h
+@@ -133,6 +133,24 @@ static inline struct evtchn *evtchn_from
+     return bucket_from_port(d, p) + (p % EVTCHNS_PER_BUCKET);
+ }
+ 
++/*
++ * "usable" as in "by a guest", i.e. Xen consumed channels are assumed to be
++ * taken care of separately where used for Xen's internal purposes.
++ */
++static bool evtchn_usable(const struct evtchn *evtchn)
++{
++    if ( evtchn->xen_consumer )
++        return false;
++
++#ifdef arch_evtchn_is_special
++    if ( arch_evtchn_is_special(evtchn) )
++        return true;
++#endif
++
++    BUILD_BUG_ON(ECS_FREE > ECS_RESERVED);
++    return evtchn->state > ECS_RESERVED;
++}
++
+ /* Wait on a Xen-attached event channel. */
+ #define wait_on_xen_event_channel(port, condition)                      \
+     do {                                                                \
+@@ -165,19 +183,24 @@ int evtchn_reset(struct domain *d);
+ 
+ /*
+  * Low-level event channel port ops.
++ *
++ * All hooks have to be called with a lock held which prevents the channel
++ * from changing state. This may be the domain event lock, the per-channel
++ * lock, or in the case of sending interdomain events also the other side's
++ * per-channel lock. Exceptions apply in certain cases for the PV shim.
+  */
+ struct evtchn_port_ops {
+     void (*init)(struct domain *d, struct evtchn *evtchn);
+     void (*set_pending)(struct vcpu *v, struct evtchn *evtchn);
+     void (*clear_pending)(struct domain *d, struct evtchn *evtchn);
+     void (*unmask)(struct domain *d, struct evtchn *evtchn);
+-    bool (*is_pending)(const struct domain *d, evtchn_port_t port);
+-    bool (*is_masked)(const struct domain *d, evtchn_port_t port);
++    bool (*is_pending)(const struct domain *d, const struct evtchn *evtchn);
++    bool (*is_masked)(const struct domain *d, const struct evtchn *evtchn);
+     /*
+      * Is the port unavailable because it's still being cleaned up
+      * after being closed?
+      */
+-    bool (*is_busy)(const struct domain *d, evtchn_port_t port);
++    bool (*is_busy)(const struct domain *d, const struct evtchn *evtchn);
+     int (*set_priority)(struct domain *d, struct evtchn *evtchn,
+                         unsigned int priority);
+     void (*print_state)(struct domain *d, const struct evtchn *evtchn);
+@@ -193,38 +216,67 @@ static inline void evtchn_port_set_pendi
+                                            unsigned int vcpu_id,
+                                            struct evtchn *evtchn)
+ {
+-    d->evtchn_port_ops->set_pending(d->vcpu[vcpu_id], evtchn);
++    if ( evtchn_usable(evtchn) )
++        d->evtchn_port_ops->set_pending(d->vcpu[vcpu_id], evtchn);
+ }
+ 
+ static inline void evtchn_port_clear_pending(struct domain *d,
+                                              struct evtchn *evtchn)
+ {
+-    d->evtchn_port_ops->clear_pending(d, evtchn);
++    if ( evtchn_usable(evtchn) )
++        d->evtchn_port_ops->clear_pending(d, evtchn);
+ }
+ 
+ static inline void evtchn_port_unmask(struct domain *d,
+                                       struct evtchn *evtchn)
+ {
+-    d->evtchn_port_ops->unmask(d, evtchn);
++    if ( evtchn_usable(evtchn) )
++        d->evtchn_port_ops->unmask(d, evtchn);
+ }
+ 
+-static inline bool evtchn_port_is_pending(const struct domain *d,
+-                                          evtchn_port_t port)
++static inline bool evtchn_is_pending(const struct domain *d,
++                                     const struct evtchn *evtchn)
+ {
+-    return d->evtchn_port_ops->is_pending(d, port);
++    return evtchn_usable(evtchn) && d->evtchn_port_ops->is_pending(d, evtchn);
+ }
+ 
+-static inline bool evtchn_port_is_masked(const struct domain *d,
+-                                         evtchn_port_t port)
++static inline bool evtchn_port_is_pending(struct domain *d, evtchn_port_t port)
+ {
+-    return d->evtchn_port_ops->is_masked(d, port);
++    struct evtchn *evtchn = evtchn_from_port(d, port);
++    bool rc;
++    unsigned long flags;
++
++    spin_lock_irqsave(&evtchn->lock, flags);
++    rc = evtchn_is_pending(d, evtchn);
++    spin_unlock_irqrestore(&evtchn->lock, flags);
++
++    return rc;
++}
++
++static inline bool evtchn_is_masked(const struct domain *d,
++                                    const struct evtchn *evtchn)
++{
++    return !evtchn_usable(evtchn) || d->evtchn_port_ops->is_masked(d, evtchn);
++}
++
++static inline bool evtchn_port_is_masked(struct domain *d, evtchn_port_t port)
++{
++    struct evtchn *evtchn = evtchn_from_port(d, port);
++    bool rc;
++    unsigned long flags;
++
++    spin_lock_irqsave(&evtchn->lock, flags);
++    rc = evtchn_is_masked(d, evtchn);
++    spin_unlock_irqrestore(&evtchn->lock, flags);
++
++    return rc;
+ }
+ 
+-static inline bool evtchn_port_is_busy(const struct domain *d,
+-                                       evtchn_port_t port)
++static inline bool evtchn_is_busy(const struct domain *d,
++                                  const struct evtchn *evtchn)
+ {
+     return d->evtchn_port_ops->is_busy &&
+-           d->evtchn_port_ops->is_busy(d, port);
++           d->evtchn_port_ops->is_busy(d, evtchn);
+ }
+ 
+ static inline int evtchn_port_set_priority(struct domain *d,
+@@ -233,6 +285,8 @@ static inline int evtchn_port_set_priori
+ {
+     if ( !d->evtchn_port_ops->set_priority )
+         return -ENOSYS;
++    if ( !evtchn_usable(evtchn) )
++        return -EACCES;
+     return d->evtchn_port_ops->set_priority(d, evtchn, priority);
+ }
+ 
diff --git a/system/xen/xsa/xsa344-4.13-1.patch b/system/xen/xsa/xsa344-4.13-1.patch
new file mode 100644
index 0000000000..d8e9b3f43f
--- /dev/null
+++ b/system/xen/xsa/xsa344-4.13-1.patch
@@ -0,0 +1,130 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: evtchn: arrange for preemption in evtchn_destroy()
+
+Especially closing of fully established interdomain channels can take
+quite some time, due to the locking involved. Therefore we shouldn't
+assume we can clean up still active ports all in one go. Besides adding
+the necessary preemption check, also avoid pointlessly starting from
+(or now really ending at) 0; 1 is the lowest numbered port which may
+need closing.
+
+Since we're now reducing ->valid_evtchns, free_xen_event_channel(),
+and (at least to be on the safe side) notify_via_xen_event_channel()
+need to cope with attempts to close / unbind from / send through already
+closed (and no longer valid, as per port_is_valid()) ports.
+
+This is part of XSA-344.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+
+--- a/xen/common/domain.c
++++ b/xen/common/domain.c
+@@ -770,12 +770,14 @@ int domain_kill(struct domain *d)
+             return domain_kill(d);
+         d->is_dying = DOMDYING_dying;
+         argo_destroy(d);
+-        evtchn_destroy(d);
+         gnttab_release_mappings(d);
+         vnuma_destroy(d->vnuma);
+         domain_set_outstanding_pages(d, 0);
+         /* fallthrough */
+     case DOMDYING_dying:
++        rc = evtchn_destroy(d);
++        if ( rc )
++            break;
+         rc = domain_relinquish_resources(d);
+         if ( rc != 0 )
+             break;
+--- a/xen/common/event_channel.c
++++ b/xen/common/event_channel.c
+@@ -1297,7 +1297,16 @@ int alloc_unbound_xen_event_channel(
+ 
+ void free_xen_event_channel(struct domain *d, int port)
+ {
+-    BUG_ON(!port_is_valid(d, port));
++    if ( !port_is_valid(d, port) )
++    {
++        /*
++         * Make sure ->is_dying is read /after/ ->valid_evtchns, pairing
++         * with the spin_barrier() and BUG_ON() in evtchn_destroy().
++         */
++        smp_rmb();
++        BUG_ON(!d->is_dying);
++        return;
++    }
+ 
+     evtchn_close(d, port, 0);
+ }
+@@ -1309,7 +1318,17 @@ void notify_via_xen_event_channel(struct
+     struct domain *rd;
+     unsigned long flags;
+ 
+-    ASSERT(port_is_valid(ld, lport));
++    if ( !port_is_valid(ld, lport) )
++    {
++        /*
++         * Make sure ->is_dying is read /after/ ->valid_evtchns, pairing
++         * with the spin_barrier() and BUG_ON() in evtchn_destroy().
++         */
++        smp_rmb();
++        ASSERT(ld->is_dying);
++        return;
++    }
++
+     lchn = evtchn_from_port(ld, lport);
+ 
+     spin_lock_irqsave(&lchn->lock, flags);
+@@ -1380,8 +1399,7 @@ int evtchn_init(struct domain *d, unsign
+     return 0;
+ }
+ 
+-
+-void evtchn_destroy(struct domain *d)
++int evtchn_destroy(struct domain *d)
+ {
+     unsigned int i;
+ 
+@@ -1390,14 +1408,29 @@ void evtchn_destroy(struct domain *d)
+     spin_barrier(&d->event_lock);
+ 
+     /* Close all existing event channels. */
+-    for ( i = 0; port_is_valid(d, i); i++ )
++    for ( i = d->valid_evtchns; --i; )
++    {
+         evtchn_close(d, i, 0);
+ 
++        /*
++         * Avoid preempting when called from domain_create()'s error path,
++         * and don't check too often (choice of frequency is arbitrary).
++         */
++        if ( i && !(i & 0x3f) && d->is_dying != DOMDYING_dead &&
++             hypercall_preempt_check() )
++        {
++            write_atomic(&d->valid_evtchns, i);
++            return -ERESTART;
++        }
++    }
++
+     ASSERT(!d->active_evtchns);
+ 
+     clear_global_virq_handlers(d);
+ 
+     evtchn_fifo_destroy(d);
++
++    return 0;
+ }
+ 
+ 
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -136,7 +136,7 @@ struct evtchn
+ } __attribute__((aligned(64)));
+ 
+ int  evtchn_init(struct domain *d, unsigned int max_port);
+-void evtchn_destroy(struct domain *d); /* from domain_kill */
++int  evtchn_destroy(struct domain *d); /* from domain_kill */
+ void evtchn_destroy_final(struct domain *d); /* from complete_domain_destroy */
+ 
+ struct waitqueue_vcpu;
diff --git a/system/xen/xsa/xsa344-4.13-2.patch b/system/xen/xsa/xsa344-4.13-2.patch
new file mode 100644
index 0000000000..3f0339498f
--- /dev/null
+++ b/system/xen/xsa/xsa344-4.13-2.patch
@@ -0,0 +1,203 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: evtchn: arrange for preemption in evtchn_reset()
+
+Like for evtchn_destroy() looping over all possible event channels to
+close them can take a significant amount of time. Unlike done there, we
+can't alter domain properties (i.e. d->valid_evtchns) here. Borrow, in a
+lightweight form, the paging domctl continuation concept, redirecting
+the continuations to different sub-ops. Just like there this is to be
+able to allow for predictable overall results of the involved sub-ops:
+Racing requests should either complete or be refused.
+
+Note that a domain can't interfere with an already started (by a remote
+domain) reset, due to being paused. It can prevent a remote reset from
+happening by leaving a reset unfinished, but that's only going to affect
+itself.
+
+This is part of XSA-344.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+
+--- a/xen/common/domain.c
++++ b/xen/common/domain.c
+@@ -1214,7 +1214,7 @@ void domain_unpause_except_self(struct d
+         domain_unpause(d);
+ }
+ 
+-int domain_soft_reset(struct domain *d)
++int domain_soft_reset(struct domain *d, bool resuming)
+ {
+     struct vcpu *v;
+     int rc;
+@@ -1228,7 +1228,7 @@ int domain_soft_reset(struct domain *d)
+         }
+     spin_unlock(&d->shutdown_lock);
+ 
+-    rc = evtchn_reset(d);
++    rc = evtchn_reset(d, resuming);
+     if ( rc )
+         return rc;
+ 
+--- a/xen/common/domctl.c
++++ b/xen/common/domctl.c
+@@ -572,12 +572,22 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
+     }
+ 
+     case XEN_DOMCTL_soft_reset:
++    case XEN_DOMCTL_soft_reset_cont:
+         if ( d == current->domain ) /* no domain_pause() */
+         {
+             ret = -EINVAL;
+             break;
+         }
+-        ret = domain_soft_reset(d);
++        ret = domain_soft_reset(d, op->cmd == XEN_DOMCTL_soft_reset_cont);
++        if ( ret == -ERESTART )
++        {
++            op->cmd = XEN_DOMCTL_soft_reset_cont;
++            if ( !__copy_field_to_guest(u_domctl, op, cmd) )
++                ret = hypercall_create_continuation(__HYPERVISOR_domctl,
++                                                    "h", u_domctl);
++            else
++                ret = -EFAULT;
++        }
+         break;
+ 
+     case XEN_DOMCTL_destroydomain:
+--- a/xen/common/event_channel.c
++++ b/xen/common/event_channel.c
+@@ -1057,7 +1057,7 @@ int evtchn_unmask(unsigned int port)
+     return 0;
+ }
+ 
+-int evtchn_reset(struct domain *d)
++int evtchn_reset(struct domain *d, bool resuming)
+ {
+     unsigned int i;
+     int rc = 0;
+@@ -1065,11 +1065,40 @@ int evtchn_reset(struct domain *d)
+     if ( d != current->domain && !d->controller_pause_count )
+         return -EINVAL;
+ 
+-    for ( i = 0; port_is_valid(d, i); i++ )
++    spin_lock(&d->event_lock);
++
++    /*
++     * If we are resuming, then start where we stopped. Otherwise, check
++     * that a reset operation is not already in progress, and if none is,
++     * record that this is now the case.
++     */
++    i = resuming ? d->next_evtchn : !d->next_evtchn;
++    if ( i > d->next_evtchn )
++        d->next_evtchn = i;
++
++    spin_unlock(&d->event_lock);
++
++    if ( !i )
++        return -EBUSY;
++
++    for ( ; port_is_valid(d, i); i++ )
++    {
+         evtchn_close(d, i, 1);
+ 
++        /* NB: Choice of frequency is arbitrary. */
++        if ( !(i & 0x3f) && hypercall_preempt_check() )
++        {
++            spin_lock(&d->event_lock);
++            d->next_evtchn = i;
++            spin_unlock(&d->event_lock);
++            return -ERESTART;
++        }
++    }
++
+     spin_lock(&d->event_lock);
+ 
++    d->next_evtchn = 0;
++
+     if ( d->active_evtchns > d->xen_evtchns )
+         rc = -EAGAIN;
+     else if ( d->evtchn_fifo )
+@@ -1204,7 +1233,8 @@ long do_event_channel_op(int cmd, XEN_GU
+         break;
+     }
+ 
+-    case EVTCHNOP_reset: {
++    case EVTCHNOP_reset:
++    case EVTCHNOP_reset_cont: {
+         struct evtchn_reset reset;
+         struct domain *d;
+ 
+@@ -1217,9 +1247,13 @@ long do_event_channel_op(int cmd, XEN_GU
+ 
+         rc = xsm_evtchn_reset(XSM_TARGET, current->domain, d);
+         if ( !rc )
+-            rc = evtchn_reset(d);
++            rc = evtchn_reset(d, cmd == EVTCHNOP_reset_cont);
+ 
+         rcu_unlock_domain(d);
++
++        if ( rc == -ERESTART )
++            rc = hypercall_create_continuation(__HYPERVISOR_event_channel_op,
++                                               "ih", EVTCHNOP_reset_cont, arg);
+         break;
+     }
+ 
+--- a/xen/include/public/domctl.h
++++ b/xen/include/public/domctl.h
+@@ -1152,7 +1152,10 @@ struct xen_domctl {
+ #define XEN_DOMCTL_iomem_permission              20
+ #define XEN_DOMCTL_ioport_permission             21
+ #define XEN_DOMCTL_hypercall_init                22
+-#define XEN_DOMCTL_arch_setup                    23 /* Obsolete IA64 only */
++#ifdef __XEN__
++/* #define XEN_DOMCTL_arch_setup                 23 Obsolete IA64 only */
++#define XEN_DOMCTL_soft_reset_cont               23
++#endif
+ #define XEN_DOMCTL_settimeoffset                 24
+ #define XEN_DOMCTL_getvcpuaffinity               25
+ #define XEN_DOMCTL_real_mode_area                26 /* Obsolete PPC only */
+--- a/xen/include/public/event_channel.h
++++ b/xen/include/public/event_channel.h
+@@ -74,6 +74,9 @@
+ #define EVTCHNOP_init_control    11
+ #define EVTCHNOP_expand_array    12
+ #define EVTCHNOP_set_priority    13
++#ifdef __XEN__
++#define EVTCHNOP_reset_cont      14
++#endif
+ /* ` } */
+ 
+ typedef uint32_t evtchn_port_t;
+--- a/xen/include/xen/event.h
++++ b/xen/include/xen/event.h
+@@ -171,7 +171,7 @@ void evtchn_check_pollers(struct domain
+ void evtchn_2l_init(struct domain *d);
+ 
+ /* Close all event channels and reset to 2-level ABI. */
+-int evtchn_reset(struct domain *d);
++int evtchn_reset(struct domain *d, bool resuming);
+ 
+ /*
+  * Low-level event channel port ops.
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -394,6 +394,8 @@ struct domain
+      * EVTCHNOP_reset).  Read/write access like for active_evtchns.
+      */
+     unsigned int     xen_evtchns;
++    /* Port to resume from in evtchn_reset(), when in a continuation. */
++    unsigned int     next_evtchn;
+     spinlock_t       event_lock;
+     const struct evtchn_port_ops *evtchn_port_ops;
+     struct evtchn_fifo_domain *evtchn_fifo;
+@@ -663,7 +665,7 @@ int domain_shutdown(struct domain *d, u8
+ void domain_resume(struct domain *d);
+ void domain_pause_for_debugger(void);
+ 
+-int domain_soft_reset(struct domain *d);
++int domain_soft_reset(struct domain *d, bool resuming);
+ 
+ int vcpu_start_shutdown_deferral(struct vcpu *v);
+ void vcpu_end_shutdown_deferral(struct vcpu *v);
diff --git a/system/xen/xsa/xsa345-0001-x86-mm-Refactor-map_pages_to_xen-to-have-only-a-sing.patch b/system/xen/xsa/xsa345-0001-x86-mm-Refactor-map_pages_to_xen-to-have-only-a-sing.patch
new file mode 100644
index 0000000000..d325385a56
--- /dev/null
+++ b/system/xen/xsa/xsa345-0001-x86-mm-Refactor-map_pages_to_xen-to-have-only-a-sing.patch
@@ -0,0 +1,94 @@
+From b3e0d4e37b7902533a463812374947d4d6d2e463 Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2@citrix.com>
+Date: Sat, 11 Jan 2020 21:57:41 +0000
+Subject: [PATCH 1/3] x86/mm: Refactor map_pages_to_xen to have only a single
+ exit path
+
+We will soon need to perform clean-ups before returning.
+
+No functional change.
+
+This is part of XSA-345.
+
+Reported-by: Hongyan Xia <hongyxia@amazon.com>
+Signed-off-by: Wei Liu <wei.liu2@citrix.com>
+Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+---
+ xen/arch/x86/mm.c | 17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 30dffb68e8..133a393875 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -5187,6 +5187,7 @@ int map_pages_to_xen(
+     l2_pgentry_t *pl2e, ol2e;
+     l1_pgentry_t *pl1e, ol1e;
+     unsigned int  i;
++    int rc = -ENOMEM;
+ 
+ #define flush_flags(oldf) do {                 \
+     unsigned int o_ = (oldf);                  \
+@@ -5207,7 +5208,8 @@ int map_pages_to_xen(
+         l3_pgentry_t ol3e, *pl3e = virt_to_xen_l3e(virt);
+ 
+         if ( !pl3e )
+-            return -ENOMEM;
++            goto out;
++
+         ol3e = *pl3e;
+ 
+         if ( cpu_has_page1gb &&
+@@ -5295,7 +5297,7 @@ int map_pages_to_xen(
+ 
+             pl2e = alloc_xen_pagetable();
+             if ( pl2e == NULL )
+-                return -ENOMEM;
++                goto out;
+ 
+             for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+                 l2e_write(pl2e + i,
+@@ -5324,7 +5326,7 @@ int map_pages_to_xen(
+ 
+         pl2e = virt_to_xen_l2e(virt);
+         if ( !pl2e )
+-            return -ENOMEM;
++            goto out;
+ 
+         if ( ((((virt >> PAGE_SHIFT) | mfn_x(mfn)) &
+                ((1u << PAGETABLE_ORDER) - 1)) == 0) &&
+@@ -5367,7 +5369,7 @@ int map_pages_to_xen(
+             {
+                 pl1e = virt_to_xen_l1e(virt);
+                 if ( pl1e == NULL )
+-                    return -ENOMEM;
++                    goto out;
+             }
+             else if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
+             {
+@@ -5394,7 +5396,7 @@ int map_pages_to_xen(
+ 
+                 pl1e = alloc_xen_pagetable();
+                 if ( pl1e == NULL )
+-                    return -ENOMEM;
++                    goto out;
+ 
+                 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+                     l1e_write(&pl1e[i],
+@@ -5538,7 +5540,10 @@ int map_pages_to_xen(
+ 
+ #undef flush_flags
+ 
+-    return 0;
++    rc = 0;
++
++ out:
++    return rc;
+ }
+ 
+ int populate_pt_range(unsigned long virt, unsigned long nr_mfns)
+-- 
+2.25.1
+
diff --git a/system/xen/xsa/xsa345-0002-x86-mm-Refactor-modify_xen_mappings-to-have-one-exit.patch b/system/xen/xsa/xsa345-0002-x86-mm-Refactor-modify_xen_mappings-to-have-one-exit.patch
new file mode 100644
index 0000000000..836bed681a
--- /dev/null
+++ b/system/xen/xsa/xsa345-0002-x86-mm-Refactor-modify_xen_mappings-to-have-one-exit.patch
@@ -0,0 +1,68 @@
+From 9f6f35b833d295acaaa2d8ff8cf309bf688cfd50 Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2@citrix.com>
+Date: Sat, 11 Jan 2020 21:57:42 +0000
+Subject: [PATCH 2/3] x86/mm: Refactor modify_xen_mappings to have one exit
+ path
+
+We will soon need to perform clean-ups before returning.
+
+No functional change.
+
+This is part of XSA-345.
+
+Reported-by: Hongyan Xia <hongyxia@amazon.com>
+Signed-off-by: Wei Liu <wei.liu2@citrix.com>
+Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+---
+ xen/arch/x86/mm.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 133a393875..af726d3274 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -5570,6 +5570,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+     l1_pgentry_t *pl1e;
+     unsigned int  i;
+     unsigned long v = s;
++    int rc = -ENOMEM;
+ 
+     /* Set of valid PTE bits which may be altered. */
+ #define FLAGS_MASK (_PAGE_NX|_PAGE_RW|_PAGE_PRESENT)
+@@ -5611,7 +5612,8 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+             /* PAGE1GB: shatter the superpage and fall through. */
+             pl2e = alloc_xen_pagetable();
+             if ( !pl2e )
+-                return -ENOMEM;
++                goto out;
++
+             for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+                 l2e_write(pl2e + i,
+                           l2e_from_pfn(l3e_get_pfn(*pl3e) +
+@@ -5666,7 +5668,8 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+                 /* PSE: shatter the superpage and try again. */
+                 pl1e = alloc_xen_pagetable();
+                 if ( !pl1e )
+-                    return -ENOMEM;
++                    goto out;
++
+                 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+                     l1e_write(&pl1e[i],
+                               l1e_from_pfn(l2e_get_pfn(*pl2e) + i,
+@@ -5795,7 +5798,10 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+     flush_area(NULL, FLUSH_TLB_GLOBAL);
+ 
+ #undef FLAGS_MASK
+-    return 0;
++    rc = 0;
++
++ out:
++    return rc;
+ }
+ 
+ #undef flush_area
+-- 
+2.25.1
+
diff --git a/system/xen/xsa/xsa345-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch b/system/xen/xsa/xsa345-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch
new file mode 100644
index 0000000000..db407416b9
--- /dev/null
+++ b/system/xen/xsa/xsa345-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch
@@ -0,0 +1,249 @@
+From 0ff9a8453dc47cd47eee9659d5916afb5094e871 Mon Sep 17 00:00:00 2001
+From: Hongyan Xia <hongyxia@amazon.com>
+Date: Sat, 11 Jan 2020 21:57:43 +0000
+Subject: [PATCH 3/3] x86/mm: Prevent some races in hypervisor mapping updates
+
+map_pages_to_xen will attempt to coalesce mappings into 2MiB and 1GiB
+superpages if possible, to maximize TLB efficiency.  This means both
+replacing superpage entries with smaller entries, and replacing
+smaller entries with superpages.
+
+Unfortunately, while some potential races are handled correctly,
+others are not.  These include:
+
+1. When one processor modifies a sub-superpage mapping while another
+processor replaces the entire range with a superpage.
+
+Take the following example:
+
+Suppose L3[N] points to L2.  And suppose we have two processors, A and
+B.
+
+* A walks the pagetables, get a pointer to L2.
+* B replaces L3[N] with a 1GiB mapping.
+* B Frees L2
+* A writes L2[M] #
+
+This is race exacerbated by the fact that virt_to_xen_l[21]e doesn't
+handle higher-level superpages properly: If you call virt_xen_to_l2e
+on a virtual address within an L3 superpage, you'll either hit a BUG()
+(most likely), or get a pointer into the middle of a data page; same
+with virt_xen_to_l1 on a virtual address within either an L3 or L2
+superpage.
+
+So take the following example:
+
+* A reads pl3e and discovers it to point to an L2.
+* B replaces L3[N] with a 1GiB mapping
+* A calls virt_to_xen_l2e() and hits the BUG_ON() #
+
+2. When two processors simultaneously try to replace a sub-superpage
+mapping with a superpage mapping.
+
+Take the following example:
+
+Suppose L3[N] points to L2.  And suppose we have two processors, A and B,
+both trying to replace L3[N] with a superpage.
+
+* A walks the pagetables, get a pointer to pl3e, and takes a copy ol3e pointing to L2.
+* B walks the pagetables, gets a pointre to pl3e, and takes a copy ol3e pointing to L2.
+* A writes the new value into L3[N]
+* B writes the new value into L3[N]
+* A recursively frees all the L1's under L2, then frees L2
+* B recursively double-frees all the L1's under L2, then double-frees L2 #
+
+Fix this by grabbing a lock for the entirety of the mapping update
+operation.
+
+Rather than grabbing map_pgdir_lock for the entire operation, however,
+repurpose the PGT_locked bit from L3's page->type_info as a lock.
+This means that rather than locking the entire address space, we
+"only" lock a single 512GiB chunk of hypervisor address space at a
+time.
+
+There was a proposal for a lock-and-reverify approach, where we walk
+the pagetables to the point where we decide what to do; then grab the
+map_pgdir_lock, re-verify the information we collected without the
+lock, and finally make the change (starting over again if anything had
+changed).  Without being able to guarantee that the L2 table wasn't
+freed, however, that means every read would need to be considered
+potentially unsafe.  Thinking carefully about that is probably
+something that wants to be done on public, not under time pressure.
+
+This is part of XSA-345.
+
+Reported-by: Hongyan Xia <hongyxia@amazon.com>
+Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+ xen/arch/x86/mm.c | 92 +++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 89 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index af726d3274..d6a0761f43 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2167,6 +2167,50 @@ void page_unlock(struct page_info *page)
+     current_locked_page_set(NULL);
+ }
+ 
++/*
++ * L3 table locks:
++ *
++ * Used for serialization in map_pages_to_xen() and modify_xen_mappings().
++ *
++ * For Xen PT pages, the page->u.inuse.type_info is unused and it is safe to
++ * reuse the PGT_locked flag. This lock is taken only when we move down to L3
++ * tables and below, since L4 (and above, for 5-level paging) is still globally
++ * protected by map_pgdir_lock.
++ *
++ * PV MMU update hypercalls call map_pages_to_xen while holding a page's page_lock().
++ * This has two implications:
++ * - We cannot reuse reuse current_locked_page_* for debugging
++ * - To avoid the chance of deadlock, even for different pages, we
++ *   must never grab page_lock() after grabbing l3t_lock().  This
++ *   includes any page_lock()-based locks, such as
++ *   mem_sharing_page_lock().
++ *
++ * Also note that we grab the map_pgdir_lock while holding the
++ * l3t_lock(), so to avoid deadlock we must avoid grabbing them in
++ * reverse order.
++ */
++static void l3t_lock(struct page_info *page)
++{
++    unsigned long x, nx;
++
++    do {
++        while ( (x = page->u.inuse.type_info) & PGT_locked )
++            cpu_relax();
++        nx = x | PGT_locked;
++    } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
++}
++
++static void l3t_unlock(struct page_info *page)
++{
++    unsigned long x, nx, y = page->u.inuse.type_info;
++
++    do {
++        x = y;
++        BUG_ON(!(x & PGT_locked));
++        nx = x & ~PGT_locked;
++    } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x );
++}
++
+ #ifdef CONFIG_PV
+ /*
+  * PTE flags that a guest may change without re-validating the PTE.
+@@ -5177,6 +5221,23 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
+                          flush_area_local((const void *)v, f) : \
+                          flush_area_all((const void *)v, f))
+ 
++#define L3T_INIT(page) (page) = ZERO_BLOCK_PTR
++
++#define L3T_LOCK(page)        \
++    do {                      \
++        if ( locking )        \
++            l3t_lock(page);   \
++    } while ( false )
++
++#define L3T_UNLOCK(page)                           \
++    do {                                           \
++        if ( locking && (page) != ZERO_BLOCK_PTR ) \
++        {                                          \
++            l3t_unlock(page);                      \
++            (page) = ZERO_BLOCK_PTR;               \
++        }                                          \
++    } while ( false )
++
+ int map_pages_to_xen(
+     unsigned long virt,
+     mfn_t mfn,
+@@ -5188,6 +5249,7 @@ int map_pages_to_xen(
+     l1_pgentry_t *pl1e, ol1e;
+     unsigned int  i;
+     int rc = -ENOMEM;
++    struct page_info *current_l3page;
+ 
+ #define flush_flags(oldf) do {                 \
+     unsigned int o_ = (oldf);                  \
+@@ -5203,13 +5265,20 @@ int map_pages_to_xen(
+     }                                          \
+ } while (0)
+ 
++    L3T_INIT(current_l3page);
++
+     while ( nr_mfns != 0 )
+     {
+-        l3_pgentry_t ol3e, *pl3e = virt_to_xen_l3e(virt);
++        l3_pgentry_t *pl3e, ol3e;
+ 
++        L3T_UNLOCK(current_l3page);
++
++        pl3e = virt_to_xen_l3e(virt);
+         if ( !pl3e )
+             goto out;
+ 
++        current_l3page = virt_to_page(pl3e);
++        L3T_LOCK(current_l3page);
+         ol3e = *pl3e;
+ 
+         if ( cpu_has_page1gb &&
+@@ -5543,6 +5612,7 @@ int map_pages_to_xen(
+     rc = 0;
+ 
+  out:
++    L3T_UNLOCK(current_l3page);
+     return rc;
+ }
+ 
+@@ -5571,6 +5641,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+     unsigned int  i;
+     unsigned long v = s;
+     int rc = -ENOMEM;
++    struct page_info *current_l3page;
+ 
+     /* Set of valid PTE bits which may be altered. */
+ #define FLAGS_MASK (_PAGE_NX|_PAGE_RW|_PAGE_PRESENT)
+@@ -5579,11 +5650,22 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+     ASSERT(IS_ALIGNED(s, PAGE_SIZE));
+     ASSERT(IS_ALIGNED(e, PAGE_SIZE));
+ 
++    L3T_INIT(current_l3page);
++
+     while ( v < e )
+     {
+-        l3_pgentry_t *pl3e = virt_to_xen_l3e(v);
++        l3_pgentry_t *pl3e;
++
++        L3T_UNLOCK(current_l3page);
+ 
+-        if ( !pl3e || !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
++        pl3e = virt_to_xen_l3e(v);
++        if ( !pl3e )
++            goto out;
++
++        current_l3page = virt_to_page(pl3e);
++        L3T_LOCK(current_l3page);
++
++        if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
+         {
+             /* Confirm the caller isn't trying to create new mappings. */
+             ASSERT(!(nf & _PAGE_PRESENT));
+@@ -5801,9 +5883,13 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+     rc = 0;
+ 
+  out:
++    L3T_UNLOCK(current_l3page);
+     return rc;
+ }
+ 
++#undef L3T_LOCK
++#undef L3T_UNLOCK
++
+ #undef flush_area
+ 
+ int destroy_xen_mappings(unsigned long s, unsigned long e)
+-- 
+2.25.1
+
diff --git a/system/xen/xsa/xsa346-4.13-1.patch b/system/xen/xsa/xsa346-4.13-1.patch
new file mode 100644
index 0000000000..a32e658e80
--- /dev/null
+++ b/system/xen/xsa/xsa346-4.13-1.patch
@@ -0,0 +1,50 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: IOMMU: suppress "iommu_dont_flush_iotlb" when about to free a page
+
+Deferring flushes to a single, wide range one - as is done when
+handling XENMAPSPACE_gmfn_range - is okay only as long as
+pages don't get freed ahead of the eventual flush. While the only
+function setting the flag (xenmem_add_to_physmap()) suggests by its name
+that it's only mapping new entries, in reality the way
+xenmem_add_to_physmap_one() works means an unmap would happen not only
+for the page being moved (but not freed) but, if the destination GFN is
+populated, also for the page being displaced from that GFN. Collapsing
+the two flushes for this GFN into just one (end even more so deferring
+it to a batched invocation) is not correct.
+
+This is part of XSA-346.
+
+Fixes: cf95b2a9fd5a ("iommu: Introduce per cpu flag (iommu_dont_flush_iotlb) to avoid unnecessary iotlb... ")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Paul Durrant <paul@xen.org>
+Acked-by: Julien Grall <jgrall@amazon.com>
+
+--- a/xen/common/memory.c
++++ b/xen/common/memory.c
+@@ -292,6 +292,7 @@ int guest_remove_page(struct domain *d,
+     p2m_type_t p2mt;
+ #endif
+     mfn_t mfn;
++    bool *dont_flush_p, dont_flush;
+     int rc;
+ 
+ #ifdef CONFIG_X86
+@@ -378,8 +379,18 @@ int guest_remove_page(struct domain *d,
+         return -ENXIO;
+     }
+ 
++    /*
++     * Since we're likely to free the page below, we need to suspend
++     * xenmem_add_to_physmap()'s suppressing of IOMMU TLB flushes.
++     */
++    dont_flush_p = &this_cpu(iommu_dont_flush_iotlb);
++    dont_flush = *dont_flush_p;
++    *dont_flush_p = false;
++
+     rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0);
+ 
++    *dont_flush_p = dont_flush;
++
+     /*
+      * With the lack of an IOMMU on some platforms, domains with DMA-capable
+      * device must retrieve the same pfn when the hypercall populate_physmap
diff --git a/system/xen/xsa/xsa346-4.13-2.patch b/system/xen/xsa/xsa346-4.13-2.patch
new file mode 100644
index 0000000000..6371b5c3db
--- /dev/null
+++ b/system/xen/xsa/xsa346-4.13-2.patch
@@ -0,0 +1,204 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: IOMMU: hold page ref until after deferred TLB flush
+
+When moving around a page via XENMAPSPACE_gmfn_range, deferring the TLB
+flush for the "from" GFN range requires that the page remains allocated
+to the guest until the TLB flush has actually occurred. Otherwise a
+parallel hypercall to remove the page would only flush the TLB for the
+GFN it has been moved to, but not the one is was mapped at originally.
+
+This is part of XSA-346.
+
+Fixes: cf95b2a9fd5a ("iommu: Introduce per cpu flag (iommu_dont_flush_iotlb) to avoid unnecessary iotlb... ")
+Reported-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+
+--- a/xen/arch/arm/mm.c
++++ b/xen/arch/arm/mm.c
+@@ -1407,7 +1407,7 @@ void share_xen_page_with_guest(struct pa
+ int xenmem_add_to_physmap_one(
+     struct domain *d,
+     unsigned int space,
+-    union xen_add_to_physmap_batch_extra extra,
++    union add_to_physmap_extra extra,
+     unsigned long idx,
+     gfn_t gfn)
+ {
+@@ -1480,10 +1480,6 @@ int xenmem_add_to_physmap_one(
+         break;
+     }
+     case XENMAPSPACE_dev_mmio:
+-        /* extra should be 0. Reserved for future use. */
+-        if ( extra.res0 )
+-            return -EOPNOTSUPP;
+-
+         rc = map_dev_mmio_region(d, gfn, 1, _mfn(idx));
+         return rc;
+ 
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -4617,7 +4617,7 @@ static int handle_iomem_range(unsigned l
+ int xenmem_add_to_physmap_one(
+     struct domain *d,
+     unsigned int space,
+-    union xen_add_to_physmap_batch_extra extra,
++    union add_to_physmap_extra extra,
+     unsigned long idx,
+     gfn_t gpfn)
+ {
+@@ -4701,9 +4701,20 @@ int xenmem_add_to_physmap_one(
+         rc = guest_physmap_add_page(d, gpfn, mfn, PAGE_ORDER_4K);
+ 
+  put_both:
+-    /* In the XENMAPSPACE_gmfn case, we took a ref of the gfn at the top. */
++    /*
++     * In the XENMAPSPACE_gmfn case, we took a ref of the gfn at the top.
++     * We also may need to transfer ownership of the page reference to our
++     * caller.
++     */
+     if ( space == XENMAPSPACE_gmfn )
++    {
+         put_gfn(d, gfn);
++        if ( !rc && extra.ppage )
++        {
++            *extra.ppage = page;
++            page = NULL;
++        }
++    }
+ 
+     if ( page )
+         put_page(page);
+--- a/xen/common/memory.c
++++ b/xen/common/memory.c
+@@ -814,13 +814,12 @@ int xenmem_add_to_physmap(struct domain
+ {
+     unsigned int done = 0;
+     long rc = 0;
+-    union xen_add_to_physmap_batch_extra extra;
++    union add_to_physmap_extra extra = {};
++    struct page_info *pages[16];
+ 
+     ASSERT(paging_mode_translate(d));
+ 
+-    if ( xatp->space != XENMAPSPACE_gmfn_foreign )
+-        extra.res0 = 0;
+-    else
++    if ( xatp->space == XENMAPSPACE_gmfn_foreign )
+         extra.foreign_domid = DOMID_INVALID;
+ 
+     if ( xatp->space != XENMAPSPACE_gmfn_range )
+@@ -835,7 +834,10 @@ int xenmem_add_to_physmap(struct domain
+     xatp->size -= start;
+ 
+     if ( is_iommu_enabled(d) )
++    {
+        this_cpu(iommu_dont_flush_iotlb) = 1;
++       extra.ppage = &pages[0];
++    }
+ 
+     while ( xatp->size > done )
+     {
+@@ -847,8 +849,12 @@ int xenmem_add_to_physmap(struct domain
+         xatp->idx++;
+         xatp->gpfn++;
+ 
++        if ( extra.ppage )
++            ++extra.ppage;
++
+         /* Check for continuation if it's not the last iteration. */
+-        if ( xatp->size > ++done && hypercall_preempt_check() )
++        if ( (++done > ARRAY_SIZE(pages) && extra.ppage) ||
++             (xatp->size > done && hypercall_preempt_check()) )
+         {
+             rc = start + done;
+             break;
+@@ -858,6 +864,7 @@ int xenmem_add_to_physmap(struct domain
+     if ( is_iommu_enabled(d) )
+     {
+         int ret;
++        unsigned int i;
+ 
+         this_cpu(iommu_dont_flush_iotlb) = 0;
+ 
+@@ -866,6 +873,15 @@ int xenmem_add_to_physmap(struct domain
+         if ( unlikely(ret) && rc >= 0 )
+             rc = ret;
+ 
++        /*
++         * Now that the IOMMU TLB flush was done for the original GFN, drop
++         * the page references. The 2nd flush below is fine to make later, as
++         * whoever removes the page again from its new GFN will have to do
++         * another flush anyway.
++         */
++        for ( i = 0; i < done; ++i )
++            put_page(pages[i]);
++
+         ret = iommu_iotlb_flush(d, _dfn(xatp->gpfn - done), done,
+                                 IOMMU_FLUSHF_added | IOMMU_FLUSHF_modified);
+         if ( unlikely(ret) && rc >= 0 )
+@@ -879,6 +895,8 @@ static int xenmem_add_to_physmap_batch(s
+                                        struct xen_add_to_physmap_batch *xatpb,
+                                        unsigned int extent)
+ {
++    union add_to_physmap_extra extra = {};
++
+     if ( unlikely(xatpb->size < extent) )
+         return -EILSEQ;
+ 
+@@ -890,6 +908,19 @@ static int xenmem_add_to_physmap_batch(s
+          !guest_handle_subrange_okay(xatpb->errs, extent, xatpb->size - 1) )
+         return -EFAULT;
+ 
++    switch ( xatpb->space )
++    {
++    case XENMAPSPACE_dev_mmio:
++        /* res0 is reserved for future use. */
++        if ( xatpb->u.res0 )
++            return -EOPNOTSUPP;
++        break;
++
++    case XENMAPSPACE_gmfn_foreign:
++        extra.foreign_domid = xatpb->u.foreign_domid;
++        break;
++    }
++
+     while ( xatpb->size > extent )
+     {
+         xen_ulong_t idx;
+@@ -902,8 +933,7 @@ static int xenmem_add_to_physmap_batch(s
+                                                extent, 1)) )
+             return -EFAULT;
+ 
+-        rc = xenmem_add_to_physmap_one(d, xatpb->space,
+-                                       xatpb->u,
++        rc = xenmem_add_to_physmap_one(d, xatpb->space, extra,
+                                        idx, _gfn(gpfn));
+ 
+         if ( unlikely(__copy_to_guest_offset(xatpb->errs, extent, &rc, 1)) )
+--- a/xen/include/xen/mm.h
++++ b/xen/include/xen/mm.h
+@@ -588,8 +588,22 @@ void scrub_one_page(struct page_info *);
+                       &(d)->xenpage_list : &(d)->page_list)
+ #endif
+ 
++union add_to_physmap_extra {
++    /*
++     * XENMAPSPACE_gmfn: When deferring TLB flushes, a page reference needs
++     * to be kept until after the flush, so the page can't get removed from
++     * the domain (and re-used for another purpose) beforehand. By passing
++     * non-NULL, the caller of xenmem_add_to_physmap_one() indicates it wants
++     * to have ownership of such a reference transferred in the success case.
++     */
++    struct page_info **ppage;
++
++    /* XENMAPSPACE_gmfn_foreign */
++    domid_t foreign_domid;
++};
++
+ int xenmem_add_to_physmap_one(struct domain *d, unsigned int space,
+-                              union xen_add_to_physmap_batch_extra extra,
++                              union add_to_physmap_extra extra,
+                               unsigned long idx, gfn_t gfn);
+ 
+ int xenmem_add_to_physmap(struct domain *d, struct xen_add_to_physmap *xatp,
diff --git a/system/xen/xsa/xsa347-4.13-1.patch b/system/xen/xsa/xsa347-4.13-1.patch
new file mode 100644
index 0000000000..e9f31a151f
--- /dev/null
+++ b/system/xen/xsa/xsa347-4.13-1.patch
@@ -0,0 +1,149 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: AMD/IOMMU: convert amd_iommu_pte from struct to union
+
+This is to add a "raw" counterpart to the bitfield equivalent. Take the
+opportunity and
+ - convert fields to bool / unsigned int,
+ - drop the naming of the reserved field,
+ - shorten the names of the ignored ones.
+
+This is part of XSA-347.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Paul Durrant <paul@xen.org>
+
+--- a/xen/drivers/passthrough/amd/iommu_map.c
++++ b/xen/drivers/passthrough/amd/iommu_map.c
+@@ -38,7 +38,7 @@ static unsigned int pfn_to_pde_idx(unsig
+ static unsigned int clear_iommu_pte_present(unsigned long l1_mfn,
+                                             unsigned long dfn)
+ {
+-    struct amd_iommu_pte *table, *pte;
++    union amd_iommu_pte *table, *pte;
+     unsigned int flush_flags;
+ 
+     table = map_domain_page(_mfn(l1_mfn));
+@@ -52,7 +52,7 @@ static unsigned int clear_iommu_pte_pres
+     return flush_flags;
+ }
+ 
+-static unsigned int set_iommu_pde_present(struct amd_iommu_pte *pte,
++static unsigned int set_iommu_pde_present(union amd_iommu_pte *pte,
+                                           unsigned long next_mfn,
+                                           unsigned int next_level, bool iw,
+                                           bool ir)
+@@ -87,7 +87,7 @@ static unsigned int set_iommu_pte_presen
+                                           int pde_level,
+                                           bool iw, bool ir)
+ {
+-    struct amd_iommu_pte *table, *pde;
++    union amd_iommu_pte *table, *pde;
+     unsigned int flush_flags;
+ 
+     table = map_domain_page(_mfn(pt_mfn));
+@@ -178,7 +178,7 @@ void iommu_dte_set_guest_cr3(struct amd_
+ static int iommu_pde_from_dfn(struct domain *d, unsigned long dfn,
+                               unsigned long pt_mfn[], bool map)
+ {
+-    struct amd_iommu_pte *pde, *next_table_vaddr;
++    union amd_iommu_pte *pde, *next_table_vaddr;
+     unsigned long  next_table_mfn;
+     unsigned int level;
+     struct page_info *table;
+@@ -458,7 +458,7 @@ int __init amd_iommu_quarantine_init(str
+     unsigned long end_gfn =
+         1ul << (DEFAULT_DOMAIN_ADDRESS_WIDTH - PAGE_SHIFT);
+     unsigned int level = amd_iommu_get_paging_mode(end_gfn);
+-    struct amd_iommu_pte *table;
++    union amd_iommu_pte *table;
+ 
+     if ( hd->arch.root_table )
+     {
+@@ -489,7 +489,7 @@ int __init amd_iommu_quarantine_init(str
+ 
+         for ( i = 0; i < PTE_PER_TABLE_SIZE; i++ )
+         {
+-            struct amd_iommu_pte *pde = &table[i];
++            union amd_iommu_pte *pde = &table[i];
+ 
+             /*
+              * PDEs are essentially a subset of PTEs, so this function
+--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
++++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
+@@ -390,7 +390,7 @@ static void deallocate_next_page_table(s
+ 
+ static void deallocate_page_table(struct page_info *pg)
+ {
+-    struct amd_iommu_pte *table_vaddr;
++    union amd_iommu_pte *table_vaddr;
+     unsigned int index, level = PFN_ORDER(pg);
+ 
+     PFN_ORDER(pg) = 0;
+@@ -405,7 +405,7 @@ static void deallocate_page_table(struct
+ 
+     for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
+     {
+-        struct amd_iommu_pte *pde = &table_vaddr[index];
++        union amd_iommu_pte *pde = &table_vaddr[index];
+ 
+         if ( pde->mfn && pde->next_level && pde->pr )
+         {
+@@ -557,7 +557,7 @@ static void amd_dump_p2m_table_level(str
+                                      paddr_t gpa, int indent)
+ {
+     paddr_t address;
+-    struct amd_iommu_pte *table_vaddr;
++    const union amd_iommu_pte *table_vaddr;
+     int index;
+ 
+     if ( level < 1 )
+@@ -573,7 +573,7 @@ static void amd_dump_p2m_table_level(str
+ 
+     for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
+     {
+-        struct amd_iommu_pte *pde = &table_vaddr[index];
++        const union amd_iommu_pte *pde = &table_vaddr[index];
+ 
+         if ( !(index % 2) )
+             process_pending_softirqs();
+--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
++++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+@@ -465,20 +465,23 @@ union amd_iommu_x2apic_control {
+ #define IOMMU_PAGE_TABLE_U32_PER_ENTRY	(IOMMU_PAGE_TABLE_ENTRY_SIZE / 4)
+ #define IOMMU_PAGE_TABLE_ALIGNMENT	4096
+ 
+-struct amd_iommu_pte {
+-    uint64_t pr:1;
+-    uint64_t ignored0:4;
+-    uint64_t a:1;
+-    uint64_t d:1;
+-    uint64_t ignored1:2;
+-    uint64_t next_level:3;
+-    uint64_t mfn:40;
+-    uint64_t reserved:7;
+-    uint64_t u:1;
+-    uint64_t fc:1;
+-    uint64_t ir:1;
+-    uint64_t iw:1;
+-    uint64_t ignored2:1;
++union amd_iommu_pte {
++    uint64_t raw;
++    struct {
++        bool pr:1;
++        unsigned int ign0:4;
++        bool a:1;
++        bool d:1;
++        unsigned int ign1:2;
++        unsigned int next_level:3;
++        uint64_t mfn:40;
++        unsigned int :7;
++        bool u:1;
++        bool fc:1;
++        bool ir:1;
++        bool iw:1;
++        unsigned int ign2:1;
++    };
+ };
+ 
+ /* Paging modes */
diff --git a/system/xen/xsa/xsa347-4.13-2.patch b/system/xen/xsa/xsa347-4.13-2.patch
new file mode 100644
index 0000000000..fbe7461636
--- /dev/null
+++ b/system/xen/xsa/xsa347-4.13-2.patch
@@ -0,0 +1,72 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: AMD/IOMMU: update live PTEs atomically
+
+Updating a live PTE bitfield by bitfield risks the compiler re-ordering
+the individual updates as well as splitting individual updates into
+multiple memory writes. Construct the new entry fully in a local
+variable, do the check to determine the flushing needs on the thus
+established new entry, and then write the new entry by a single insn.
+
+Similarly using memset() to clear a PTE is unsafe, as the order of
+writes the function does is, at least in principle, undefined.
+
+This is part of XSA-347.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Paul Durrant <paul@xen.org>
+
+--- a/xen/drivers/passthrough/amd/iommu_map.c
++++ b/xen/drivers/passthrough/amd/iommu_map.c
+@@ -45,7 +45,7 @@ static unsigned int clear_iommu_pte_pres
+     pte = &table[pfn_to_pde_idx(dfn, 1)];
+ 
+     flush_flags = pte->pr ? IOMMU_FLUSHF_modified : 0;
+-    memset(pte, 0, sizeof(*pte));
++    write_atomic(&pte->raw, 0);
+ 
+     unmap_domain_page(table);
+ 
+@@ -57,26 +57,30 @@ static unsigned int set_iommu_pde_presen
+                                           unsigned int next_level, bool iw,
+                                           bool ir)
+ {
++    union amd_iommu_pte new = {}, old;
+     unsigned int flush_flags = IOMMU_FLUSHF_added;
+ 
+-    if ( pte->pr &&
+-         (pte->mfn != next_mfn ||
+-          pte->iw != iw ||
+-          pte->ir != ir ||
+-          pte->next_level != next_level) )
+-            flush_flags |= IOMMU_FLUSHF_modified;
+-
+     /*
+      * FC bit should be enabled in PTE, this helps to solve potential
+      * issues with ATS devices
+      */
+-    pte->fc = !next_level;
++    new.fc = !next_level;
++
++    new.mfn = next_mfn;
++    new.iw = iw;
++    new.ir = ir;
++    new.next_level = next_level;
++    new.pr = true;
++
++    old.raw = read_atomic(&pte->raw);
++    old.ign0 = 0;
++    old.ign1 = 0;
++    old.ign2 = 0;
++
++    if ( old.pr && old.raw != new.raw )
++        flush_flags |= IOMMU_FLUSHF_modified;
+ 
+-    pte->mfn = next_mfn;
+-    pte->iw = iw;
+-    pte->ir = ir;
+-    pte->next_level = next_level;
+-    pte->pr = 1;
++    write_atomic(&pte->raw, new.raw);
+ 
+     return flush_flags;
+ }
diff --git a/system/xen/xsa/xsa347-4.13-3.patch b/system/xen/xsa/xsa347-4.13-3.patch
new file mode 100644
index 0000000000..90c8e66020
--- /dev/null
+++ b/system/xen/xsa/xsa347-4.13-3.patch
@@ -0,0 +1,59 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: AMD/IOMMU: ensure suitable ordering of DTE modifications
+
+DMA and interrupt translation should be enabled only after other
+applicable DTE fields have been written. Similarly when disabling
+translation or when moving a device between domains, translation should
+first be disabled, before other entry fields get modified. Note however
+that the "moving" aspect doesn't apply to the interrupt remapping side,
+as domain specifics are maintained in the IRTEs here, not the DTE. We
+also never disable interrupt remapping once it got enabled for a device
+(the respective argument passed is always the immutable iommu_intremap).
+
+This is part of XSA-347.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Paul Durrant <paul@xen.org>
+
+--- a/xen/drivers/passthrough/amd/iommu_map.c
++++ b/xen/drivers/passthrough/amd/iommu_map.c
+@@ -107,11 +107,18 @@ void amd_iommu_set_root_page_table(struc
+                                    uint64_t root_ptr, uint16_t domain_id,
+                                    uint8_t paging_mode, bool valid)
+ {
++    if ( valid || dte->v )
++    {
++        dte->tv = false;
++        dte->v = true;
++        smp_wmb();
++    }
+     dte->domain_id = domain_id;
+     dte->pt_root = paddr_to_pfn(root_ptr);
+     dte->iw = true;
+     dte->ir = true;
+     dte->paging_mode = paging_mode;
++    smp_wmb();
+     dte->tv = true;
+     dte->v = valid;
+ }
+@@ -134,6 +141,7 @@ void amd_iommu_set_intremap_table(
+     }
+ 
+     dte->ig = false; /* unmapped interrupts result in i/o page faults */
++    smp_wmb();
+     dte->iv = valid;
+ }
+ 
+--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
++++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
+@@ -120,7 +120,10 @@ static void amd_iommu_setup_domain_devic
+         /* Undo what amd_iommu_disable_domain_device() may have done. */
+         ivrs_dev = &get_ivrs_mappings(iommu->seg)[req_id];
+         if ( dte->it_root )
++        {
+             dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED;
++            smp_wmb();
++        }
+         dte->iv = iommu_intremap;
+         dte->ex = ivrs_dev->dte_allow_exclusion;
+         dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
-- 
cgit v1.2.3