summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--system/xen/dom0/README.dom02
-rw-r--r--system/xen/dom0/kernel-xen.sh2
-rw-r--r--system/xen/xen.SlackBuild2
-rw-r--r--system/xen/xen.info6
-rw-r--r--system/xen/xsa/xsa252.patch27
-rw-r--r--system/xen/xsa/xsa253.patch26
-rw-r--r--system/xen/xsa/xsa255-1.patch133
-rw-r--r--system/xen/xsa/xsa255-2.patch167
-rw-r--r--system/xen/xsa/xsa256.patch40
-rw-r--r--system/xen/xsa/xsa260-1.patch72
-rw-r--r--system/xen/xsa/xsa260-2.patch110
-rw-r--r--system/xen/xsa/xsa260-3.patch138
-rw-r--r--system/xen/xsa/xsa260-4.patch72
-rw-r--r--system/xen/xsa/xsa261.patch279
-rw-r--r--system/xen/xsa/xsa262-4.10.patch76
15 files changed, 753 insertions, 399 deletions
diff --git a/system/xen/dom0/README.dom0 b/system/xen/dom0/README.dom0
index 2114164f38..09f8373b8c 100644
--- a/system/xen/dom0/README.dom0
+++ b/system/xen/dom0/README.dom0
@@ -46,7 +46,7 @@ Xen EFI binary.
To make things a bit easier, a copy of Xen EFI binary can be found here:
- http://slackware.hr/~mario/xen/xen-4.10.0.efi.gz
+ http://slackware.hr/~mario/xen/xen-4.10.1.efi.gz
If an automatic boot to Xen kernel is desired, the binary should be renamed and
copied to the following location: /boot/efi/EFI/BOOT/bootx64.efi
diff --git a/system/xen/dom0/kernel-xen.sh b/system/xen/dom0/kernel-xen.sh
index dba4b3f91a..74075da12d 100644
--- a/system/xen/dom0/kernel-xen.sh
+++ b/system/xen/dom0/kernel-xen.sh
@@ -6,7 +6,7 @@
# Modified by Mario Preksavec <mario@slackware.hr>
KERNEL=${KERNEL:-4.4.118}
-XEN=${XEN:-4.10.0}
+XEN=${XEN:-4.10.1}
BOOTLOADER=${BOOTLOADER:-lilo}
ROOTMOD=${ROOTMOD:-ext4}
diff --git a/system/xen/xen.SlackBuild b/system/xen/xen.SlackBuild
index 726227650a..6984578b49 100644
--- a/system/xen/xen.SlackBuild
+++ b/system/xen/xen.SlackBuild
@@ -23,7 +23,7 @@
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PRGNAM=xen
-VERSION=${VERSION:-4.10.0}
+VERSION=${VERSION:-4.10.1}
BUILD=${BUILD:-1}
TAG=${TAG:-_SBo}
diff --git a/system/xen/xen.info b/system/xen/xen.info
index b2253a3497..906a067f60 100644
--- a/system/xen/xen.info
+++ b/system/xen/xen.info
@@ -1,7 +1,7 @@
PRGNAM="xen"
-VERSION="4.10.0"
+VERSION="4.10.1"
HOMEPAGE="http://www.xenproject.org/"
-DOWNLOAD="http://mirror.slackware.hr/sources/xen/xen-4.10.0.tar.gz \
+DOWNLOAD="http://mirror.slackware.hr/sources/xen/xen-4.10.1.tar.gz \
http://mirror.slackware.hr/sources/xen-extfiles/ipxe-git-356f6c1b64d7a97746d1816cef8ca22bdd8d0b5d.tar.gz \
http://mirror.slackware.hr/sources/xen-extfiles/lwip-1.3.0.tar.gz \
http://mirror.slackware.hr/sources/xen-extfiles/zlib-1.2.3.tar.gz \
@@ -13,7 +13,7 @@ DOWNLOAD="http://mirror.slackware.hr/sources/xen/xen-4.10.0.tar.gz \
http://mirror.slackware.hr/sources/xen-extfiles/tpm_emulator-0.7.4.tar.gz \
http://mirror.slackware.hr/sources/xen-seabios/seabios-1.10.2.tar.gz \
http://mirror.slackware.hr/sources/xen-ovmf/xen-ovmf-20170920_947f3737a.tar.bz2"
-MD5SUM="ab9d320d02cb40f6b40506aed1a38d58 \
+MD5SUM="d1b1d14ce76622062c9977d9c8ba772e \
0061f103c84b25c2e6ac47649b909bde \
36cc57650cffda9a0269493be2a169bb \
debc62758716a169df9f62e6ab2bc634 \
diff --git a/system/xen/xsa/xsa252.patch b/system/xen/xsa/xsa252.patch
deleted file mode 100644
index 8615928142..0000000000
--- a/system/xen/xsa/xsa252.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: memory: don't implicitly unpin for decrease-reservation
-
-It very likely was a mistake (copy-and-paste from domain cleanup code)
-to implicitly unpin here: The caller should really unpin itself before
-(or after, if they so wish) requesting the page to be removed.
-
-This is XSA-252.
-
-Reported-by: Jann Horn <jannh@google.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-
---- a/xen/common/memory.c
-+++ b/xen/common/memory.c
-@@ -357,11 +357,6 @@ int guest_remove_page(struct domain *d,
-
- rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0);
-
--#ifdef _PGT_pinned
-- if ( !rc && test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
-- put_page_and_type(page);
--#endif
--
- /*
- * With the lack of an IOMMU on some platforms, domains with DMA-capable
- * device must retrieve the same pfn when the hypercall populate_physmap
diff --git a/system/xen/xsa/xsa253.patch b/system/xen/xsa/xsa253.patch
deleted file mode 100644
index 19e4269358..0000000000
--- a/system/xen/xsa/xsa253.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/msr: Free msr_vcpu_policy during vcpu destruction
-
-c/s 4187f79dc7 "x86/msr: introduce struct msr_vcpu_policy" introduced a
-per-vcpu memory allocation, but failed to free it in the clean vcpu
-destruction case.
-
-This is XSA-253
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
-index b17468c..0ae715d 100644
---- a/xen/arch/x86/domain.c
-+++ b/xen/arch/x86/domain.c
-@@ -382,6 +382,9 @@ void vcpu_destroy(struct vcpu *v)
-
- vcpu_destroy_fpu(v);
-
-+ xfree(v->arch.msr);
-+ v->arch.msr = NULL;
-+
- if ( !is_idle_domain(v->domain) )
- vpmu_destroy(v);
-
diff --git a/system/xen/xsa/xsa255-1.patch b/system/xen/xsa/xsa255-1.patch
deleted file mode 100644
index f8bba9e516..0000000000
--- a/system/xen/xsa/xsa255-1.patch
+++ /dev/null
@@ -1,133 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: gnttab/ARM: don't corrupt shared GFN array
-
-... by writing status GFNs to it. Introduce a second array instead.
-Also implement gnttab_status_gmfn() properly now that the information is
-suitably being tracked.
-
-While touching it anyway, remove a misguided (but luckily benign) upper
-bound check from gnttab_shared_gmfn(): We should never access beyond the
-bounds of that array.
-
-This is part of XSA-255.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
----
-v3: Don't init the ARM GFN arrays to zero anymore, use INVALID_GFN.
-v2: New.
-
---- a/xen/common/grant_table.c
-+++ b/xen/common/grant_table.c
-@@ -3775,6 +3775,7 @@ int gnttab_map_frame(struct domain *d, u
- {
- int rc = 0;
- struct grant_table *gt = d->grant_table;
-+ bool status = false;
-
- grant_write_lock(gt);
-
-@@ -3785,6 +3786,7 @@ int gnttab_map_frame(struct domain *d, u
- (idx & XENMAPIDX_grant_table_status) )
- {
- idx &= ~XENMAPIDX_grant_table_status;
-+ status = true;
- if ( idx < nr_status_frames(gt) )
- *mfn = _mfn(virt_to_mfn(gt->status[idx]));
- else
-@@ -3802,7 +3804,7 @@ int gnttab_map_frame(struct domain *d, u
- }
-
- if ( !rc )
-- gnttab_set_frame_gfn(gt, idx, gfn);
-+ gnttab_set_frame_gfn(gt, status, idx, gfn);
-
- grant_write_unlock(gt);
-
---- a/xen/include/asm-arm/grant_table.h
-+++ b/xen/include/asm-arm/grant_table.h
-@@ -9,7 +9,8 @@
- #define INITIAL_NR_GRANT_FRAMES 1U
-
- struct grant_table_arch {
-- gfn_t *gfn;
-+ gfn_t *shared_gfn;
-+ gfn_t *status_gfn;
- };
-
- void gnttab_clear_flag(unsigned long nr, uint16_t *addr);
-@@ -21,7 +22,6 @@ int replace_grant_host_mapping(unsigned
- unsigned long new_gpaddr, unsigned int flags);
- void gnttab_mark_dirty(struct domain *d, unsigned long l);
- #define gnttab_create_status_page(d, t, i) do {} while (0)
--#define gnttab_status_gmfn(d, t, i) (0)
- #define gnttab_release_host_mappings(domain) 1
- static inline int replace_grant_supported(void)
- {
-@@ -42,19 +42,35 @@ static inline unsigned int gnttab_dom0_m
-
- #define gnttab_init_arch(gt) \
- ({ \
-- (gt)->arch.gfn = xzalloc_array(gfn_t, (gt)->max_grant_frames); \
-- ( (gt)->arch.gfn ? 0 : -ENOMEM ); \
-+ unsigned int ngf_ = (gt)->max_grant_frames; \
-+ unsigned int nsf_ = grant_to_status_frames(ngf_); \
-+ \
-+ (gt)->arch.shared_gfn = xmalloc_array(gfn_t, ngf_); \
-+ (gt)->arch.status_gfn = xmalloc_array(gfn_t, nsf_); \
-+ if ( (gt)->arch.shared_gfn && (gt)->arch.status_gfn ) \
-+ { \
-+ while ( ngf_-- ) \
-+ (gt)->arch.shared_gfn[ngf_] = INVALID_GFN; \
-+ while ( nsf_-- ) \
-+ (gt)->arch.status_gfn[nsf_] = INVALID_GFN; \
-+ } \
-+ else \
-+ gnttab_destroy_arch(gt); \
-+ (gt)->arch.shared_gfn ? 0 : -ENOMEM; \
- })
-
- #define gnttab_destroy_arch(gt) \
- do { \
-- xfree((gt)->arch.gfn); \
-- (gt)->arch.gfn = NULL; \
-+ xfree((gt)->arch.shared_gfn); \
-+ (gt)->arch.shared_gfn = NULL; \
-+ xfree((gt)->arch.status_gfn); \
-+ (gt)->arch.status_gfn = NULL; \
- } while ( 0 )
-
--#define gnttab_set_frame_gfn(gt, idx, gfn) \
-+#define gnttab_set_frame_gfn(gt, st, idx, gfn) \
- do { \
-- (gt)->arch.gfn[idx] = gfn; \
-+ ((st) ? (gt)->arch.status_gfn : (gt)->arch.shared_gfn)[idx] = \
-+ (gfn); \
- } while ( 0 )
-
- #define gnttab_create_shared_page(d, t, i) \
-@@ -65,8 +81,10 @@ static inline unsigned int gnttab_dom0_m
- } while ( 0 )
-
- #define gnttab_shared_gmfn(d, t, i) \
-- ( ((i >= nr_grant_frames(t)) && \
-- (i < (t)->max_grant_frames))? 0 : gfn_x((t)->arch.gfn[i]))
-+ gfn_x(((i) >= nr_grant_frames(t)) ? INVALID_GFN : (t)->arch.shared_gfn[i])
-+
-+#define gnttab_status_gmfn(d, t, i) \
-+ gfn_x(((i) >= nr_status_frames(t)) ? INVALID_GFN : (t)->arch.status_gfn[i])
-
- #define gnttab_need_iommu_mapping(d) \
- (is_domain_direct_mapped(d) && need_iommu(d))
---- a/xen/include/asm-x86/grant_table.h
-+++ b/xen/include/asm-x86/grant_table.h
-@@ -46,7 +46,7 @@ static inline unsigned int gnttab_dom0_m
-
- #define gnttab_init_arch(gt) 0
- #define gnttab_destroy_arch(gt) do {} while ( 0 )
--#define gnttab_set_frame_gfn(gt, idx, gfn) do {} while ( 0 )
-+#define gnttab_set_frame_gfn(gt, st, idx, gfn) do {} while ( 0 )
-
- #define gnttab_create_shared_page(d, t, i) \
- do { \
diff --git a/system/xen/xsa/xsa255-2.patch b/system/xen/xsa/xsa255-2.patch
deleted file mode 100644
index 402b6efe98..0000000000
--- a/system/xen/xsa/xsa255-2.patch
+++ /dev/null
@@ -1,167 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: gnttab: don't blindly free status pages upon version change
-
-There may still be active mappings, which would trigger the respective
-BUG_ON(). Split the loop into one dealing with the page attributes and
-the second (when the first fully passed) freeing the pages. Return an
-error if any pages still have pending references.
-
-This is part of XSA-255.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
----
-v4: Add gprintk(XENLOG_ERR, ...) to domain_crash() invocations.
-v3: Call guest_physmap_remove_page() from gnttab_map_frame(), making the
- code unconditional at the same time. Re-base over changes to first
- patch.
-v2: Also deal with translated guests.
-
---- a/xen/common/grant_table.c
-+++ b/xen/common/grant_table.c
-@@ -1636,23 +1636,74 @@ status_alloc_failed:
- return -ENOMEM;
- }
-
--static void
-+static int
- gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt)
- {
-- int i;
-+ unsigned int i;
-
- for ( i = 0; i < nr_status_frames(gt); i++ )
- {
- struct page_info *pg = virt_to_page(gt->status[i]);
-+ gfn_t gfn = gnttab_get_frame_gfn(gt, true, i);
-+
-+ /*
-+ * For translated domains, recovering from failure after partial
-+ * changes were made is more complicated than it seems worth
-+ * implementing at this time. Hence respective error paths below
-+ * crash the domain in such a case.
-+ */
-+ if ( paging_mode_translate(d) )
-+ {
-+ int rc = gfn_eq(gfn, INVALID_GFN)
-+ ? 0
-+ : guest_physmap_remove_page(d, gfn,
-+ _mfn(page_to_mfn(pg)), 0);
-+
-+ if ( rc )
-+ {
-+ gprintk(XENLOG_ERR,
-+ "Could not remove status frame %u (GFN %#lx) from P2M\n",
-+ i, gfn_x(gfn));
-+ domain_crash(d);
-+ return rc;
-+ }
-+ gnttab_set_frame_gfn(gt, true, i, INVALID_GFN);
-+ }
-
- BUG_ON(page_get_owner(pg) != d);
- if ( test_and_clear_bit(_PGC_allocated, &pg->count_info) )
- put_page(pg);
-- BUG_ON(pg->count_info & ~PGC_xen_heap);
-+
-+ if ( pg->count_info & ~PGC_xen_heap )
-+ {
-+ if ( paging_mode_translate(d) )
-+ {
-+ gprintk(XENLOG_ERR,
-+ "Wrong page state %#lx of status frame %u (GFN %#lx)\n",
-+ pg->count_info, i, gfn_x(gfn));
-+ domain_crash(d);
-+ }
-+ else
-+ {
-+ if ( get_page(pg, d) )
-+ set_bit(_PGC_allocated, &pg->count_info);
-+ while ( i-- )
-+ gnttab_create_status_page(d, gt, i);
-+ }
-+ return -EBUSY;
-+ }
-+
-+ page_set_owner(pg, NULL);
-+ }
-+
-+ for ( i = 0; i < nr_status_frames(gt); i++ )
-+ {
- free_xenheap_page(gt->status[i]);
- gt->status[i] = NULL;
- }
- gt->nr_status_frames = 0;
-+
-+ return 0;
- }
-
- /*
-@@ -2962,8 +3013,9 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
- break;
- }
-
-- if ( op.version < 2 && gt->gt_version == 2 )
-- gnttab_unpopulate_status_frames(currd, gt);
-+ if ( op.version < 2 && gt->gt_version == 2 &&
-+ (res = gnttab_unpopulate_status_frames(currd, gt)) != 0 )
-+ goto out_unlock;
-
- /* Make sure there's no crud left over from the old version. */
- for ( i = 0; i < nr_grant_frames(gt); i++ )
-@@ -3803,6 +3855,11 @@ int gnttab_map_frame(struct domain *d, u
- rc = -EINVAL;
- }
-
-+ if ( !rc && paging_mode_translate(d) &&
-+ !gfn_eq(gnttab_get_frame_gfn(gt, status, idx), INVALID_GFN) )
-+ rc = guest_physmap_remove_page(d, gnttab_get_frame_gfn(gt, status, idx),
-+ *mfn, 0);
-+
- if ( !rc )
- gnttab_set_frame_gfn(gt, status, idx, gfn);
-
---- a/xen/include/asm-arm/grant_table.h
-+++ b/xen/include/asm-arm/grant_table.h
-@@ -73,6 +73,11 @@ static inline unsigned int gnttab_dom0_m
- (gfn); \
- } while ( 0 )
-
-+#define gnttab_get_frame_gfn(gt, st, idx) ({ \
-+ _gfn((st) ? gnttab_status_gmfn(NULL, gt, idx) \
-+ : gnttab_shared_gmfn(NULL, gt, idx)); \
-+})
-+
- #define gnttab_create_shared_page(d, t, i) \
- do { \
- share_xen_page_with_guest( \
---- a/xen/include/asm-x86/grant_table.h
-+++ b/xen/include/asm-x86/grant_table.h
-@@ -47,6 +47,12 @@ static inline unsigned int gnttab_dom0_m
- #define gnttab_init_arch(gt) 0
- #define gnttab_destroy_arch(gt) do {} while ( 0 )
- #define gnttab_set_frame_gfn(gt, st, idx, gfn) do {} while ( 0 )
-+#define gnttab_get_frame_gfn(gt, st, idx) ({ \
-+ unsigned long mfn_ = (st) ? gnttab_status_mfn(gt, idx) \
-+ : gnttab_shared_mfn(gt, idx); \
-+ unsigned long gpfn_ = get_gpfn_from_mfn(mfn_); \
-+ VALID_M2P(gpfn_) ? _gfn(gpfn_) : INVALID_GFN; \
-+})
-
- #define gnttab_create_shared_page(d, t, i) \
- do { \
-@@ -63,11 +69,11 @@ static inline unsigned int gnttab_dom0_m
- } while ( 0 )
-
-
--#define gnttab_shared_mfn(d, t, i) \
-+#define gnttab_shared_mfn(t, i) \
- ((virt_to_maddr((t)->shared_raw[i]) >> PAGE_SHIFT))
-
- #define gnttab_shared_gmfn(d, t, i) \
-- (mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i)))
-+ (mfn_to_gmfn(d, gnttab_shared_mfn(t, i)))
-
-
- #define gnttab_status_mfn(t, i) \
diff --git a/system/xen/xsa/xsa256.patch b/system/xen/xsa/xsa256.patch
deleted file mode 100644
index 50ff24e17b..0000000000
--- a/system/xen/xsa/xsa256.patch
+++ /dev/null
@@ -1,40 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/hvm: Disallow the creation of HVM domains without Local APIC emulation
-
-There are multiple problems, not necesserily limited to:
-
- * Guests which configure event channels via hvmop_set_evtchn_upcall_vector(),
- or which hit %cr8 emulation will cause Xen to fall over a NULL vlapic->regs
- pointer.
-
- * On Intel hardware, disabling the TPR_SHADOW execution control without
- reenabling CR8_{LOAD,STORE} interception means that the guests %cr8
- accesses interact with the real TPR. Amongst other things, setting the
- real TPR to 0xf blocks even IPIs from interrupting this CPU.
-
- * On hardware which sets up the use of Interrupt Posting, including
- IOMMU-Posting, guests run without the appropriate non-root configuration,
- which at a minimum will result in dropped interrupts.
-
-Whether no-LAPIC mode is of any use at all remains to be seen.
-
-This is XSA-256.
-
-Reported-by: Ian Jackson <ian.jackson@eu.citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
-index f93327b..f65fc12 100644
---- a/xen/arch/x86/domain.c
-+++ b/xen/arch/x86/domain.c
-@@ -413,7 +413,7 @@ static bool emulation_flags_ok(const struct domain *d, uint32_t emflags)
- if ( is_hardware_domain(d) &&
- emflags != (XEN_X86_EMU_LAPIC|XEN_X86_EMU_IOAPIC) )
- return false;
-- if ( !is_hardware_domain(d) && emflags &&
-+ if ( !is_hardware_domain(d) &&
- emflags != XEN_X86_EMU_ALL && emflags != XEN_X86_EMU_LAPIC )
- return false;
- }
diff --git a/system/xen/xsa/xsa260-1.patch b/system/xen/xsa/xsa260-1.patch
new file mode 100644
index 0000000000..21da59cddd
--- /dev/null
+++ b/system/xen/xsa/xsa260-1.patch
@@ -0,0 +1,72 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/traps: Fix %dr6 handing in #DB handler
+
+Most bits in %dr6 accumulate, rather than being set directly based on the
+current source of #DB. Have the handler follow the manuals guidance, which
+avoids leaking hypervisor debugging activities into guest context.
+
+This is part of XSA-260 / CVE-2018-8897.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/arch/x86/traps.c 2018-04-13 15:29:36.006747135 +0200
++++ b/xen/arch/x86/traps.c 2018-04-13 15:44:57.015516185 +0200
+@@ -1761,11 +1761,36 @@ static void ler_enable(void)
+
+ void do_debug(struct cpu_user_regs *regs)
+ {
++ unsigned long dr6;
+ struct vcpu *v = current;
+
++ /* Stash dr6 as early as possible. */
++ dr6 = read_debugreg(6);
++
+ if ( debugger_trap_entry(TRAP_debug, regs) )
+ return;
+
++ /*
++ * At the time of writing (March 2018), on the subject of %dr6:
++ *
++ * The Intel manual says:
++ * Certain debug exceptions may clear bits 0-3. The remaining contents
++ * of the DR6 register are never cleared by the processor. To avoid
++ * confusion in identifying debug exceptions, debug handlers should
++ * clear the register (except bit 16, which they should set) before
++ * returning to the interrupted task.
++ *
++ * The AMD manual says:
++ * Bits 15:13 of the DR6 register are not cleared by the processor and
++ * must be cleared by software after the contents have been read.
++ *
++ * Some bits are reserved set, some are reserved clear, and some bits
++ * which were previously reserved set are reused and cleared by hardware.
++ * For future compatibility, reset to the default value, which will allow
++ * us to spot any bit being changed by hardware to its non-default value.
++ */
++ write_debugreg(6, X86_DR6_DEFAULT);
++
+ if ( !guest_mode(regs) )
+ {
+ if ( regs->eflags & X86_EFLAGS_TF )
+@@ -1798,7 +1823,8 @@ void do_debug(struct cpu_user_regs *regs
+ }
+
+ /* Save debug status register where guest OS can peek at it */
+- v->arch.debugreg[6] = read_debugreg(6);
++ v->arch.debugreg[6] |= (dr6 & ~X86_DR6_DEFAULT);
++ v->arch.debugreg[6] &= (dr6 | ~X86_DR6_DEFAULT);
+
+ ler_enable();
+ pv_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC);
+--- a/xen/include/asm-x86/debugreg.h 2015-02-11 09:36:29.000000000 +0100
++++ b/xen/include/asm-x86/debugreg.h 2018-04-13 15:44:57.015516185 +0200
+@@ -24,6 +24,8 @@
+ #define DR_STATUS_RESERVED_ZERO (~0xffffeffful) /* Reserved, read as zero */
+ #define DR_STATUS_RESERVED_ONE 0xffff0ff0ul /* Reserved, read as one */
+
++#define X86_DR6_DEFAULT 0xffff0ff0ul /* Default %dr6 value. */
++
+ /* Now define a bunch of things for manipulating the control register.
+ The top two bytes of the control register consist of 4 fields of 4
+ bits - each field corresponds to one of the four debug registers,
diff --git a/system/xen/xsa/xsa260-2.patch b/system/xen/xsa/xsa260-2.patch
new file mode 100644
index 0000000000..be71b2438f
--- /dev/null
+++ b/system/xen/xsa/xsa260-2.patch
@@ -0,0 +1,110 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/pv: Move exception injection into {,compat_}test_all_events()
+
+This allows paths to jump straight to {,compat_}test_all_events() and have
+injection of pending exceptions happen automatically, rather than requiring
+all calling paths to handle exceptions themselves.
+
+The normal exception path is simplified as a result, and
+compat_post_handle_exception() is removed entirely.
+
+This is part of XSA-260 / CVE-2018-8897.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/arch/x86/x86_64/compat/entry.S
++++ b/xen/arch/x86/x86_64/compat/entry.S
+@@ -39,6 +39,12 @@ ENTRY(compat_test_all_events)
+ leaq irq_stat+IRQSTAT_softirq_pending(%rip),%rcx
+ cmpl $0,(%rcx,%rax,1)
+ jne compat_process_softirqs
++
++ /* Inject exception if pending. */
++ lea VCPU_trap_bounce(%rbx), %rdx
++ testb $TBF_EXCEPTION, TRAPBOUNCE_flags(%rdx)
++ jnz .Lcompat_process_trapbounce
++
+ testb $1,VCPU_mce_pending(%rbx)
+ jnz compat_process_mce
+ .Lcompat_test_guest_nmi:
+@@ -68,6 +74,15 @@ compat_process_softirqs:
+ call do_softirq
+ jmp compat_test_all_events
+
++ ALIGN
++/* %rbx: struct vcpu, %rdx: struct trap_bounce */
++.Lcompat_process_trapbounce:
++ sti
++.Lcompat_bounce_exception:
++ call compat_create_bounce_frame
++ movb $0, TRAPBOUNCE_flags(%rdx)
++ jmp compat_test_all_events
++
+ ALIGN
+ /* %rbx: struct vcpu */
+ compat_process_mce:
+@@ -189,15 +204,6 @@ ENTRY(cr4_pv32_restore)
+ xor %eax, %eax
+ ret
+
+-/* %rdx: trap_bounce, %rbx: struct vcpu */
+-ENTRY(compat_post_handle_exception)
+- testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
+- jz compat_test_all_events
+-.Lcompat_bounce_exception:
+- call compat_create_bounce_frame
+- movb $0,TRAPBOUNCE_flags(%rdx)
+- jmp compat_test_all_events
+-
+ .section .text.entry, "ax", @progbits
+
+ /* See lstar_enter for entry register state. */
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -42,6 +42,12 @@ test_all_events:
+ leaq irq_stat+IRQSTAT_softirq_pending(%rip), %rcx
+ cmpl $0, (%rcx, %rax, 1)
+ jne process_softirqs
++
++ /* Inject exception if pending. */
++ lea VCPU_trap_bounce(%rbx), %rdx
++ testb $TBF_EXCEPTION, TRAPBOUNCE_flags(%rdx)
++ jnz .Lprocess_trapbounce
++
+ cmpb $0, VCPU_mce_pending(%rbx)
+ jne process_mce
+ .Ltest_guest_nmi:
+@@ -70,6 +76,15 @@ process_softirqs:
+ jmp test_all_events
+
+ ALIGN
++/* %rbx: struct vcpu, %rdx struct trap_bounce */
++.Lprocess_trapbounce:
++ sti
++.Lbounce_exception:
++ call create_bounce_frame
++ movb $0, TRAPBOUNCE_flags(%rdx)
++ jmp test_all_events
++
++ ALIGN
+ /* %rbx: struct vcpu */
+ process_mce:
+ testb $1 << VCPU_TRAP_MCE, VCPU_async_exception_mask(%rbx)
+@@ -667,15 +682,9 @@ handle_exception_saved:
+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+ testb $3,UREGS_cs(%rsp)
+ jz restore_all_xen
+- leaq VCPU_trap_bounce(%rbx),%rdx
+ movq VCPU_domain(%rbx),%rax
+ testb $1,DOMAIN_is_32bit_pv(%rax)
+- jnz compat_post_handle_exception
+- testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
+- jz test_all_events
+-.Lbounce_exception:
+- call create_bounce_frame
+- movb $0,TRAPBOUNCE_flags(%rdx)
++ jnz compat_test_all_events
+ jmp test_all_events
+
+ /* No special register assumptions. */
diff --git a/system/xen/xsa/xsa260-3.patch b/system/xen/xsa/xsa260-3.patch
new file mode 100644
index 0000000000..f0a0a5687d
--- /dev/null
+++ b/system/xen/xsa/xsa260-3.patch
@@ -0,0 +1,138 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/traps: Use an Interrupt Stack Table for #DB
+
+PV guests can use architectural corner cases to cause #DB to be raised after
+transitioning into supervisor mode.
+
+Use an interrupt stack table for #DB to prevent the exception being taken with
+a guest controlled stack pointer.
+
+This is part of XSA-260 / CVE-2018-8897.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/arch/x86/cpu/common.c
++++ b/xen/arch/x86/cpu/common.c
+@@ -679,6 +679,7 @@ void load_system_tables(void)
+ [IST_MCE - 1] = stack_top + IST_MCE * PAGE_SIZE,
+ [IST_DF - 1] = stack_top + IST_DF * PAGE_SIZE,
+ [IST_NMI - 1] = stack_top + IST_NMI * PAGE_SIZE,
++ [IST_DB - 1] = stack_top + IST_DB * PAGE_SIZE,
+
+ [IST_MAX ... ARRAY_SIZE(tss->ist) - 1] =
+ 0x8600111111111111ul,
+@@ -706,6 +707,7 @@ void load_system_tables(void)
+ set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF);
+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI);
+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE);
++ set_ist(&idt_tables[cpu][TRAP_debug], IST_DB);
+
+ /*
+ * Bottom-of-stack must be 16-byte aligned!
+--- a/xen/arch/x86/hvm/svm/svm.c
++++ b/xen/arch/x86/hvm/svm/svm.c
+@@ -1046,6 +1046,7 @@ static void svm_ctxt_switch_from(struct
+ set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF);
+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI);
+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE);
++ set_ist(&idt_tables[cpu][TRAP_debug], IST_DB);
+ }
+
+ static void svm_ctxt_switch_to(struct vcpu *v)
+@@ -1067,6 +1068,7 @@ static void svm_ctxt_switch_to(struct vc
+ set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE);
+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE);
+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
++ set_ist(&idt_tables[cpu][TRAP_debug], IST_NONE);
+
+ svm_restore_dr(v);
+
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -964,6 +964,7 @@ static int cpu_smpboot_alloc(unsigned in
+ set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE);
+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE);
+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
++ set_ist(&idt_tables[cpu][TRAP_debug], IST_NONE);
+
+ for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1);
+ i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i )
+--- a/xen/arch/x86/traps.c
++++ b/xen/arch/x86/traps.c
+@@ -325,13 +325,13 @@ static void show_guest_stack(struct vcpu
+ /*
+ * Notes for get_stack_trace_bottom() and get_stack_dump_bottom()
+ *
+- * Stack pages 0, 1 and 2:
++ * Stack pages 0 - 3:
+ * These are all 1-page IST stacks. Each of these stacks have an exception
+ * frame and saved register state at the top. The interesting bound for a
+ * trace is the word adjacent to this, while the bound for a dump is the
+ * very top, including the exception frame.
+ *
+- * Stack pages 3, 4 and 5:
++ * Stack pages 4 and 5:
+ * None of these are particularly interesting. With MEMORY_GUARD, page 5 is
+ * explicitly not present, so attempting to dump or trace it is
+ * counterproductive. Without MEMORY_GUARD, it is possible for a call chain
+@@ -352,12 +352,12 @@ unsigned long get_stack_trace_bottom(uns
+ {
+ switch ( get_stack_page(sp) )
+ {
+- case 0 ... 2:
++ case 0 ... 3:
+ return ROUNDUP(sp, PAGE_SIZE) -
+ offsetof(struct cpu_user_regs, es) - sizeof(unsigned long);
+
+ #ifndef MEMORY_GUARD
+- case 3 ... 5:
++ case 4 ... 5:
+ #endif
+ case 6 ... 7:
+ return ROUNDUP(sp, STACK_SIZE) -
+@@ -372,11 +372,11 @@ unsigned long get_stack_dump_bottom(unsi
+ {
+ switch ( get_stack_page(sp) )
+ {
+- case 0 ... 2:
++ case 0 ... 3:
+ return ROUNDUP(sp, PAGE_SIZE) - sizeof(unsigned long);
+
+ #ifndef MEMORY_GUARD
+- case 3 ... 5:
++ case 4 ... 5:
+ #endif
+ case 6 ... 7:
+ return ROUNDUP(sp, STACK_SIZE) - sizeof(unsigned long);
+@@ -1943,6 +1943,7 @@ void __init init_idt_traps(void)
+ set_ist(&idt_table[TRAP_double_fault], IST_DF);
+ set_ist(&idt_table[TRAP_nmi], IST_NMI);
+ set_ist(&idt_table[TRAP_machine_check], IST_MCE);
++ set_ist(&idt_table[TRAP_debug], IST_DB);
+
+ /* CPU0 uses the master IDT. */
+ idt_tables[0] = idt_table;
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -739,7 +739,7 @@ ENTRY(device_not_available)
+ ENTRY(debug)
+ pushq $0
+ movl $TRAP_debug,4(%rsp)
+- jmp handle_exception
++ jmp handle_ist_exception
+
+ ENTRY(int3)
+ pushq $0
+--- a/xen/include/asm-x86/processor.h
++++ b/xen/include/asm-x86/processor.h
+@@ -443,7 +443,8 @@ struct __packed __cacheline_aligned tss_
+ #define IST_DF 1UL
+ #define IST_NMI 2UL
+ #define IST_MCE 3UL
+-#define IST_MAX 3UL
++#define IST_DB 4UL
++#define IST_MAX 4UL
+
+ /* Set the interrupt stack table used by a particular interrupt
+ * descriptor table entry. */
diff --git a/system/xen/xsa/xsa260-4.patch b/system/xen/xsa/xsa260-4.patch
new file mode 100644
index 0000000000..c2fa02d6e1
--- /dev/null
+++ b/system/xen/xsa/xsa260-4.patch
@@ -0,0 +1,72 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/traps: Fix handling of #DB exceptions in hypervisor context
+
+The WARN_ON() can be triggered by guest activities, and emits a full stack
+trace without rate limiting. Swap it out for a ratelimited printk with just
+enough information to work out what is going on.
+
+Not all #DB exceptions are traps, so blindly continuing is not a safe action
+to take. We don't let PV guests select these settings in the real %dr7 to
+begin with, but for added safety against unexpected situations, detect the
+fault cases and crash in an obvious manner.
+
+This is part of XSA-260 / CVE-2018-8897.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/arch/x86/traps.c
++++ b/xen/arch/x86/traps.c
+@@ -1809,16 +1809,44 @@ void do_debug(struct cpu_user_regs *regs
+ regs->eflags &= ~X86_EFLAGS_TF;
+ }
+ }
+- else
++
++ /*
++ * Check for fault conditions. General Detect, and instruction
++ * breakpoints are faults rather than traps, at which point attempting
++ * to ignore and continue will result in a livelock.
++ */
++ if ( dr6 & DR_GENERAL_DETECT )
++ {
++ printk(XENLOG_ERR "Hit General Detect in Xen context\n");
++ fatal_trap(regs, 0);
++ }
++
++ if ( dr6 & (DR_TRAP3 | DR_TRAP2 | DR_TRAP1 | DR_TRAP0) )
+ {
+- /*
+- * We ignore watchpoints when they trigger within Xen. This may
+- * happen when a buffer is passed to us which previously had a
+- * watchpoint set on it. No need to bump EIP; the only faulting
+- * trap is an instruction breakpoint, which can't happen to us.
+- */
+- WARN_ON(!search_exception_table(regs));
++ unsigned int bp, dr7 = read_debugreg(7) >> DR_CONTROL_SHIFT;
++
++ for ( bp = 0; bp < 4; ++bp )
++ {
++ if ( (dr6 & (1u << bp)) && /* Breakpoint triggered? */
++ ((dr7 & (3u << (bp * DR_CONTROL_SIZE))) == 0) /* Insn? */ )
++ {
++ printk(XENLOG_ERR
++ "Hit instruction breakpoint in Xen context\n");
++ fatal_trap(regs, 0);
++ }
++ }
+ }
++
++ /*
++ * Whatever caused this #DB should be a trap. Note it and continue.
++ * Guests can trigger this in certain corner cases, so ensure the
++ * message is ratelimited.
++ */
++ gprintk(XENLOG_WARNING,
++ "Hit #DB in Xen context: %04x:%p [%ps], stk %04x:%p, dr6 %lx\n",
++ regs->cs, _p(regs->rip), _p(regs->rip),
++ regs->ss, _p(regs->rsp), dr6);
++
+ goto out;
+ }
+
diff --git a/system/xen/xsa/xsa261.patch b/system/xen/xsa/xsa261.patch
new file mode 100644
index 0000000000..a51744b8d0
--- /dev/null
+++ b/system/xen/xsa/xsa261.patch
@@ -0,0 +1,279 @@
+From: Xen Project Security Team <security@xenproject.org>
+Subject: x86/vpt: add support for IO-APIC routed interrupts
+
+And modify the HPET code to make use of it. Currently HPET interrupts
+are always treated as ISA and thus injected through the vPIC. This is
+wrong because HPET interrupts when not in legacy mode should be
+injected from the IO-APIC.
+
+To make things worse, the supported interrupt routing values are set
+to [20..23], which clearly falls outside of the ISA range, thus
+leading to an ASSERT in debug builds or memory corruption in non-debug
+builds because the interrupt injection code will write out of the
+bounds of the arch.hvm_domain.vpic array.
+
+Since the HPET interrupt source can change between ISA and IO-APIC
+always destroy the timer before changing the mode, or else Xen risks
+changing it while the timer is active.
+
+Note that vpt interrupt injection is racy in the sense that the
+vIO-APIC RTE entry can be written by the guest in between the call to
+pt_irq_masked and hvm_ioapic_assert, or the call to pt_update_irq and
+pt_intr_post. Those are not deemed to be security issues, but rather
+quirks of the current implementation. In the worse case the guest
+might lose interrupts or get multiple interrupt vectors injected for
+the same timer source.
+
+This is part of XSA-261.
+
+Address actual and potential compiler warnings. Fix formatting.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+---
+Changes since v2:
+ - Move fallthrough comment to be just above the case label.
+ - Fix now stale comment in pt_update_irq.
+ - Use NR_ISAIRQS instead of 16.
+ - Expand commit message to mention the quirkiness of vpt interrupt
+ injection.
+
+Changes since v1:
+ - Simply usage of gsi in pt_irq_masked.
+ - Introduce hvm_ioapic_assert.
+ - Fix pt->source == PTSRC_isa in create_periodic_time.
+
+--- a/xen/arch/x86/hvm/hpet.c
++++ b/xen/arch/x86/hvm/hpet.c
+@@ -264,13 +264,20 @@ static void hpet_set_timer(HPETState *h,
+ diff = (timer_is_32bit(h, tn) && (-diff > HPET_TINY_TIME_SPAN))
+ ? (uint32_t)diff : 0;
+
++ destroy_periodic_time(&h->pt[tn]);
+ if ( (tn <= 1) && (h->hpet.config & HPET_CFG_LEGACY) )
++ {
+ /* if LegacyReplacementRoute bit is set, HPET specification requires
+ timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC,
+ timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */
+ irq = (tn == 0) ? 0 : 8;
++ h->pt[tn].source = PTSRC_isa;
++ }
+ else
++ {
+ irq = timer_int_route(h, tn);
++ h->pt[tn].source = PTSRC_ioapic;
++ }
+
+ /*
+ * diff is the time from now when the timer should fire, for a periodic
+--- a/xen/arch/x86/hvm/irq.c
++++ b/xen/arch/x86/hvm/irq.c
+@@ -41,6 +41,26 @@ static void assert_gsi(struct domain *d,
+ vioapic_irq_positive_edge(d, ioapic_gsi);
+ }
+
++int hvm_ioapic_assert(struct domain *d, unsigned int gsi, bool level)
++{
++ struct hvm_irq *hvm_irq = hvm_domain_irq(d);
++ int vector;
++
++ if ( gsi >= hvm_irq->nr_gsis )
++ {
++ ASSERT_UNREACHABLE();
++ return -1;
++ }
++
++ spin_lock(&d->arch.hvm_domain.irq_lock);
++ if ( !level || hvm_irq->gsi_assert_count[gsi]++ == 0 )
++ assert_gsi(d, gsi);
++ vector = vioapic_get_vector(d, gsi);
++ spin_unlock(&d->arch.hvm_domain.irq_lock);
++
++ return vector;
++}
++
+ static void assert_irq(struct domain *d, unsigned ioapic_gsi, unsigned pic_irq)
+ {
+ assert_gsi(d, ioapic_gsi);
+--- a/xen/arch/x86/hvm/vpt.c
++++ b/xen/arch/x86/hvm/vpt.c
+@@ -107,31 +107,49 @@ static int pt_irq_vector(struct periodic
+ static int pt_irq_masked(struct periodic_time *pt)
+ {
+ struct vcpu *v = pt->vcpu;
+- unsigned int gsi, isa_irq;
+- int mask;
+- uint8_t pic_imr;
++ unsigned int gsi = pt->irq;
+
+- if ( pt->source == PTSRC_lapic )
++ switch ( pt->source )
++ {
++ case PTSRC_lapic:
+ {
+ struct vlapic *vlapic = vcpu_vlapic(v);
++
+ return (!vlapic_enabled(vlapic) ||
+ (vlapic_get_reg(vlapic, APIC_LVTT) & APIC_LVT_MASKED));
+ }
+
+- isa_irq = pt->irq;
+- gsi = hvm_isa_irq_to_gsi(isa_irq);
+- pic_imr = v->domain->arch.hvm_domain.vpic[isa_irq >> 3].imr;
+- mask = vioapic_get_mask(v->domain, gsi);
+- if ( mask < 0 )
+- {
+- dprintk(XENLOG_WARNING, "d%u: invalid GSI (%u) for platform timer\n",
+- v->domain->domain_id, gsi);
+- domain_crash(v->domain);
+- return -1;
++ case PTSRC_isa:
++ {
++ uint8_t pic_imr = v->domain->arch.hvm_domain.vpic[pt->irq >> 3].imr;
++
++ /* Check if the interrupt is unmasked in the PIC. */
++ if ( !(pic_imr & (1 << (pt->irq & 7))) && vlapic_accept_pic_intr(v) )
++ return 0;
++
++ gsi = hvm_isa_irq_to_gsi(pt->irq);
++ }
++
++ /* Fallthrough to check if the interrupt is masked on the IO APIC. */
++ case PTSRC_ioapic:
++ {
++ int mask = vioapic_get_mask(v->domain, gsi);
++
++ if ( mask < 0 )
++ {
++ dprintk(XENLOG_WARNING,
++ "d%d: invalid GSI (%u) for platform timer\n",
++ v->domain->domain_id, gsi);
++ domain_crash(v->domain);
++ return -1;
++ }
++
++ return mask;
++ }
+ }
+
+- return (((pic_imr & (1 << (isa_irq & 7))) || !vlapic_accept_pic_intr(v)) &&
+- mask);
++ ASSERT_UNREACHABLE();
++ return 1;
+ }
+
+ static void pt_lock(struct periodic_time *pt)
+@@ -252,7 +270,7 @@ int pt_update_irq(struct vcpu *v)
+ struct list_head *head = &v->arch.hvm_vcpu.tm_list;
+ struct periodic_time *pt, *temp, *earliest_pt;
+ uint64_t max_lag;
+- int irq, is_lapic, pt_vector;
++ int irq, pt_vector = -1;
+
+ spin_lock(&v->arch.hvm_vcpu.tm_lock);
+
+@@ -288,29 +306,26 @@ int pt_update_irq(struct vcpu *v)
+
+ earliest_pt->irq_issued = 1;
+ irq = earliest_pt->irq;
+- is_lapic = (earliest_pt->source == PTSRC_lapic);
+
+ spin_unlock(&v->arch.hvm_vcpu.tm_lock);
+
+- /*
+- * If periodic timer interrut is handled by lapic, its vector in
+- * IRR is returned and used to set eoi_exit_bitmap for virtual
+- * interrupt delivery case. Otherwise return -1 to do nothing.
+- */
+- if ( is_lapic )
++ switch ( earliest_pt->source )
+ {
++ case PTSRC_lapic:
++ /*
++ * If periodic timer interrupt is handled by lapic, its vector in
++ * IRR is returned and used to set eoi_exit_bitmap for virtual
++ * interrupt delivery case. Otherwise return -1 to do nothing.
++ */
+ vlapic_set_irq(vcpu_vlapic(v), irq, 0);
+ pt_vector = irq;
+- }
+- else
+- {
++ break;
++
++ case PTSRC_isa:
+ hvm_isa_irq_deassert(v->domain, irq);
+ if ( platform_legacy_irq(irq) && vlapic_accept_pic_intr(v) &&
+ v->domain->arch.hvm_domain.vpic[irq >> 3].int_output )
+- {
+ hvm_isa_irq_assert(v->domain, irq, NULL);
+- pt_vector = -1;
+- }
+ else
+ {
+ pt_vector = hvm_isa_irq_assert(v->domain, irq, vioapic_get_vector);
+@@ -321,6 +336,17 @@ int pt_update_irq(struct vcpu *v)
+ if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) )
+ pt_vector = -1;
+ }
++ break;
++
++ case PTSRC_ioapic:
++ /*
++ * NB: At the moment IO-APIC routed interrupts generated by vpt devices
++ * (HPET) are edge-triggered.
++ */
++ pt_vector = hvm_ioapic_assert(v->domain, irq, false);
++ if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) )
++ pt_vector = -1;
++ break;
+ }
+
+ return pt_vector;
+@@ -418,7 +444,14 @@ void create_periodic_time(
+ struct vcpu *v, struct periodic_time *pt, uint64_t delta,
+ uint64_t period, uint8_t irq, time_cb *cb, void *data)
+ {
+- ASSERT(pt->source != 0);
++ if ( !pt->source ||
++ (pt->irq >= NR_ISAIRQS && pt->source == PTSRC_isa) ||
++ (pt->irq >= hvm_domain_irq(v->domain)->nr_gsis &&
++ pt->source == PTSRC_ioapic) )
++ {
++ ASSERT_UNREACHABLE();
++ return;
++ }
+
+ destroy_periodic_time(pt);
+
+@@ -498,7 +531,7 @@ static void pt_adjust_vcpu(struct period
+ {
+ int on_list;
+
+- ASSERT(pt->source == PTSRC_isa);
++ ASSERT(pt->source == PTSRC_isa || pt->source == PTSRC_ioapic);
+
+ if ( pt->vcpu == NULL )
+ return;
+--- a/xen/include/asm-x86/hvm/irq.h
++++ b/xen/include/asm-x86/hvm/irq.h
+@@ -207,6 +207,9 @@ int hvm_set_pci_link_route(struct domain
+
+ int hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data);
+
++/* Assert an IO APIC pin. */
++int hvm_ioapic_assert(struct domain *d, unsigned int gsi, bool level);
++
+ void hvm_maybe_deassert_evtchn_irq(void);
+ void hvm_assert_evtchn_irq(struct vcpu *v);
+ void hvm_set_callback_via(struct domain *d, uint64_t via);
+--- a/xen/include/asm-x86/hvm/vpt.h
++++ b/xen/include/asm-x86/hvm/vpt.h
+@@ -44,6 +44,7 @@ struct periodic_time {
+ bool_t warned_timeout_too_short;
+ #define PTSRC_isa 1 /* ISA time source */
+ #define PTSRC_lapic 2 /* LAPIC time source */
++#define PTSRC_ioapic 3 /* IOAPIC time source */
+ u8 source; /* PTSRC_ */
+ u8 irq;
+ struct vcpu *vcpu; /* vcpu timer interrupt delivers to */
diff --git a/system/xen/xsa/xsa262-4.10.patch b/system/xen/xsa/xsa262-4.10.patch
new file mode 100644
index 0000000000..ba9a8ffa22
--- /dev/null
+++ b/system/xen/xsa/xsa262-4.10.patch
@@ -0,0 +1,76 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/HVM: guard against emulator driving ioreq state in weird ways
+
+In the case where hvm_wait_for_io() calls wait_on_xen_event_channel(),
+p->state ends up being read twice in succession: once to determine that
+state != p->state, and then again at the top of the loop. This gives a
+compromised emulator a chance to change the state back between the two
+reads, potentially keeping Xen in a loop indefinitely.
+
+Instead:
+* Read p->state once in each of the wait_on_xen_event_channel() tests,
+* re-use that value the next time around,
+* and insist that the states continue to transition "forward" (with the
+ exception of the transition to STATE_IOREQ_NONE).
+
+This is XSA-262.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: George Dunlap <george.dunlap@citrix.com>
+
+--- a/xen/arch/x86/hvm/ioreq.c
++++ b/xen/arch/x86/hvm/ioreq.c
+@@ -87,14 +87,17 @@ static void hvm_io_assist(struct hvm_ior
+
+ static bool hvm_wait_for_io(struct hvm_ioreq_vcpu *sv, ioreq_t *p)
+ {
++ unsigned int prev_state = STATE_IOREQ_NONE;
++
+ while ( sv->pending )
+ {
+ unsigned int state = p->state;
+
+- rmb();
+- switch ( state )
++ smp_rmb();
++
++ recheck:
++ if ( unlikely(state == STATE_IOREQ_NONE) )
+ {
+- case STATE_IOREQ_NONE:
+ /*
+ * The only reason we should see this case is when an
+ * emulator is dying and it races with an I/O being
+@@ -102,14 +105,30 @@ static bool hvm_wait_for_io(struct hvm_i
+ */
+ hvm_io_assist(sv, ~0ul);
+ break;
++ }
++
++ if ( unlikely(state < prev_state) )
++ {
++ gdprintk(XENLOG_ERR, "Weird HVM ioreq state transition %u -> %u\n",
++ prev_state, state);
++ sv->pending = false;
++ domain_crash(sv->vcpu->domain);
++ return false; /* bail */
++ }
++
++ switch ( prev_state = state )
++ {
+ case STATE_IORESP_READY: /* IORESP_READY -> NONE */
+ p->state = STATE_IOREQ_NONE;
+ hvm_io_assist(sv, p->data);
+ break;
+ case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
+ case STATE_IOREQ_INPROCESS:
+- wait_on_xen_event_channel(sv->ioreq_evtchn, p->state != state);
+- break;
++ wait_on_xen_event_channel(sv->ioreq_evtchn,
++ ({ state = p->state;
++ smp_rmb();
++ state != prev_state; }));
++ goto recheck;
+ default:
+ gdprintk(XENLOG_ERR, "Weird HVM iorequest state %u\n", state);
+ sv->pending = false;