summaryrefslogtreecommitdiff
path: root/system/xen/xsa/xsa206-4.8-0015-oxenstored-transaction-conflicts-improve-logging.patch
blob: bc46ed1ac5ee39d34519a229a81687263711dc10 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
From d7d0c021115d40177035a0626ed47681b034b584 Mon Sep 17 00:00:00 2001
From: Thomas Sanders <thomas.sanders@citrix.com>
Date: Mon, 27 Mar 2017 14:36:34 +0100
Subject: [PATCH 15/15] oxenstored transaction conflicts: improve logging

For information related to transaction conflicts, potentially frequent
logging at "info" priority has been changed to "debug" priority, and
once per two minutes there is an "info" priority summary.

Additional detailed logging has been added at "debug" priority.

Reported-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com>
---
 tools/ocaml/xenstored/domain.ml      | 8 ++++++++
 tools/ocaml/xenstored/domains.ml     | 5 +++++
 tools/ocaml/xenstored/process.ml     | 6 +++++-
 tools/ocaml/xenstored/transaction.ml | 5 +++++
 tools/ocaml/xenstored/xenstored.ml   | 6 ++++++
 5 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/tools/ocaml/xenstored/domain.ml b/tools/ocaml/xenstored/domain.ml
index e677aa3..4515650 100644
--- a/tools/ocaml/xenstored/domain.ml
+++ b/tools/ocaml/xenstored/domain.ml
@@ -34,6 +34,7 @@ type t =
 	mutable conflict_credit: float; (* Must be positive to perform writes; a commit
 	                                   that later causes conflict with another
 	                                   domain's transaction costs credit. *)
+	mutable caused_conflicts: int64;
 }
 
 let is_dom0 d = d.id = 0
@@ -93,4 +94,11 @@ let make id mfn remote_port interface eventchn = {
 	bad_client = false;
 	io_credit = 0;
 	conflict_credit = !Define.conflict_burst_limit;
+	caused_conflicts = 0L;
 }
+
+let log_and_reset_conflict_stats logfn dom =
+	if dom.caused_conflicts > 0L then (
+		logfn dom.id dom.caused_conflicts;
+		dom.caused_conflicts <- 0L
+	)
diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml
index 61d1e2e..fdae298 100644
--- a/tools/ocaml/xenstored/domains.ml
+++ b/tools/ocaml/xenstored/domains.ml
@@ -146,8 +146,10 @@ let create0 doms =
 	dom
 
 let decr_conflict_credit doms dom =
+	dom.Domain.caused_conflicts <- Int64.add 1L dom.Domain.caused_conflicts;
 	let before = dom.Domain.conflict_credit in
 	let after = max (-1.0) (before -. 1.0) in
+	debug "decr_conflict_credit dom%d %F -> %F" (Domain.get_id dom) before after;
 	dom.Domain.conflict_credit <- after;
 	let newly_penalised =
 		before >= !Define.conflict_burst_limit
@@ -178,7 +180,9 @@ let decr_conflict_credit doms dom =
 let incr_conflict_credit_from_queue doms =
 	let process_queue q requeue_test =
 		let d = pop q in
+		let before = d.Domain.conflict_credit in (* just for debug-logging *)
 		d.Domain.conflict_credit <- min (d.Domain.conflict_credit +. 1.0) !Define.conflict_burst_limit;
+		debug "incr_conflict_credit_from_queue: dom%d: %F -> %F" (Domain.get_id d) before d.Domain.conflict_credit;
 		if requeue_test d.Domain.conflict_credit then (
 			push d q (* Make it queue up again for its next point of credit. *)
 		)
@@ -200,6 +204,7 @@ let incr_conflict_credit doms =
 			let before = dom.Domain.conflict_credit in
 			let after = min (before +. 1.0) !Define.conflict_burst_limit in
 			dom.Domain.conflict_credit <- after;
+			debug "incr_conflict_credit dom%d: %F -> %F" (Domain.get_id dom) before after;
 
 			if before <= 0.0 && after > 0.0
 			then doms.n_paused <- doms.n_paused - 1;
diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml
index adfc7a4..8a688c4 100644
--- a/tools/ocaml/xenstored/process.ml
+++ b/tools/ocaml/xenstored/process.ml
@@ -330,6 +330,7 @@ let transaction_replay c t doms cons =
 				Transaction.commit ~con replay_t
 			with
 			| Transaction_again -> (
+				Transaction.failed_commits := Int64.add !Transaction.failed_commits 1L;
 				let victim_domstr = Connection.get_domstr c in
 				debug "Apportioning blame for EAGAIN in txn %d, domain=%s" id victim_domstr;
 				let punish guilty_con =
@@ -351,7 +352,10 @@ let transaction_replay c t doms cons =
 					else false
 				) in
 				let guilty_cons = History.filter_connections ~ignore:c ~since:t.Transaction.start_count ~f:judge_and_sentence in
-				if Hashtbl.length guilty_cons = 0 then debug "Found no culprit for conflict in %s: must be self or not in history." con;
+				if Hashtbl.length guilty_cons = 0 then (
+					debug "Found no culprit for conflict in %s: must be self or not in history." con;
+					Transaction.failed_commits_no_culprit := Int64.add !Transaction.failed_commits_no_culprit 1L
+				);
 				false
 			)
 			| e ->
diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml
index 8f95301..da4a3e3 100644
--- a/tools/ocaml/xenstored/transaction.ml
+++ b/tools/ocaml/xenstored/transaction.ml
@@ -90,6 +90,11 @@ type t = {
 let get_id t = match t.ty with No -> none | Full (id, _, _) -> id
 
 let counter = ref 0L
+let failed_commits = ref 0L
+let failed_commits_no_culprit = ref 0L
+let reset_conflict_stats () =
+	failed_commits := 0L;
+	failed_commits_no_culprit := 0L
 
 (* Scope for optimisation: different data-structure and functions to search/filter it *)
 let short_running_txns = ref []
diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
index 06387a8..05ace4d 100644
--- a/tools/ocaml/xenstored/xenstored.ml
+++ b/tools/ocaml/xenstored/xenstored.ml
@@ -376,6 +376,7 @@ let _ =
 	let last_scan_time = ref 0. in
 
 	let periodic_ops now =
+		debug "periodic_ops starting";
 		(* we garbage collect the string->int dictionary after a sizeable amount of operations,
 		 * there's no need to be really fast even if we got loose
 		 * objects since names are often reuse.
@@ -395,7 +396,11 @@ let _ =
 
 		(* make sure we don't print general stats faster than 2 min *)
 		if now > (!last_stat_time +. 120.) then (
+			info "Transaction conflict statistics for last %F seconds:" (now -. !last_stat_time);
 			last_stat_time := now;
+			Domains.iter domains (Domain.log_and_reset_conflict_stats (info "Dom%d caused %Ld conflicts"));
+			info "%Ld failed transactions; of these no culprit was found for %Ld" !Transaction.failed_commits !Transaction.failed_commits_no_culprit;
+			Transaction.reset_conflict_stats ();
 
 			let gc = Gc.stat () in
 			let (lanon, lanon_ops, lanon_watchs,
@@ -415,6 +420,7 @@ let _ =
 			     gc.Gc.free_words gc.Gc.free_blocks
 		);
 		let elapsed = Unix.gettimeofday () -. now in
+		debug "periodic_ops took %F seconds." elapsed;
 		delay_next_frequent_ops_by elapsed
 	in
 
-- 
2.1.4