Thu Apr 23 20:38:33 2020 UTC ()
When computing TSC skew make 8 measurements and use the average.


(ad)
diff -r1.41 -r1.42 src/sys/arch/x86/x86/tsc.c

cvs diff -r1.41 -r1.42 src/sys/arch/x86/x86/tsc.c (switch to unified diff)

--- src/sys/arch/x86/x86/tsc.c 2020/04/21 02:56:37 1.41
+++ src/sys/arch/x86/x86/tsc.c 2020/04/23 20:38:33 1.42
@@ -1,326 +1,332 @@ @@ -1,326 +1,332 @@
1/* $NetBSD: tsc.c,v 1.41 2020/04/21 02:56:37 msaitoh Exp $ */ 1/* $NetBSD: tsc.c,v 1.42 2020/04/23 20:38:33 ad Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 4 * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
15 * 15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE. 26 * POSSIBILITY OF SUCH DAMAGE.
27 */ 27 */
28 28
29#include <sys/cdefs.h> 29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: tsc.c,v 1.41 2020/04/21 02:56:37 msaitoh Exp $"); 30__KERNEL_RCSID(0, "$NetBSD: tsc.c,v 1.42 2020/04/23 20:38:33 ad Exp $");
31 31
32#include <sys/param.h> 32#include <sys/param.h>
33#include <sys/systm.h> 33#include <sys/systm.h>
34#include <sys/time.h> 34#include <sys/time.h>
35#include <sys/timetc.h> 35#include <sys/timetc.h>
36#include <sys/lwp.h> 36#include <sys/lwp.h>
37#include <sys/atomic.h> 37#include <sys/atomic.h>
38#include <sys/kernel.h> 38#include <sys/kernel.h>
39#include <sys/cpu.h> 39#include <sys/cpu.h>
40#include <sys/xcall.h> 40#include <sys/xcall.h>
41 41
42#include <machine/cpu_counter.h> 42#include <machine/cpu_counter.h>
43#include <machine/cpuvar.h> 43#include <machine/cpuvar.h>
44#include <machine/cpufunc.h> 44#include <machine/cpufunc.h>
45#include <machine/specialreg.h> 45#include <machine/specialreg.h>
46#include <machine/cputypes.h> 46#include <machine/cputypes.h>
47 47
48#include "tsc.h" 48#include "tsc.h"
49 49
50u_int tsc_get_timecount(struct timecounter *); 50u_int tsc_get_timecount(struct timecounter *);
51 51
52uint64_t tsc_freq; /* exported for sysctl */ 52uint64_t tsc_freq; /* exported for sysctl */
53static int64_t tsc_drift_max = 250; /* max cycles */ 53static int64_t tsc_drift_max = 250; /* max cycles */
54static int64_t tsc_drift_observed; 54static int64_t tsc_drift_observed;
55static bool tsc_good; 55static bool tsc_good;
56 56
57int tsc_user_enabled = 1; 57int tsc_user_enabled = 1;
58 58
59static volatile int64_t tsc_sync_val; 59static volatile int64_t tsc_sync_val;
60static volatile struct cpu_info *tsc_sync_cpu; 60static volatile struct cpu_info *tsc_sync_cpu;
61 61
62static struct timecounter tsc_timecounter = { 62static struct timecounter tsc_timecounter = {
63 .tc_get_timecount = tsc_get_timecount, 63 .tc_get_timecount = tsc_get_timecount,
64 .tc_counter_mask = ~0U, 64 .tc_counter_mask = ~0U,
65 .tc_name = "TSC", 65 .tc_name = "TSC",
66 .tc_quality = 3000, 66 .tc_quality = 3000,
67}; 67};
68 68
69bool 69bool
70tsc_is_invariant(void) 70tsc_is_invariant(void)
71{ 71{
72 struct cpu_info *ci; 72 struct cpu_info *ci;
73 uint32_t descs[4]; 73 uint32_t descs[4];
74 uint32_t family; 74 uint32_t family;
75 bool invariant; 75 bool invariant;
76 76
77 if (!cpu_hascounter()) 77 if (!cpu_hascounter())
78 return false; 78 return false;
79 79
80 ci = curcpu(); 80 ci = curcpu();
81 invariant = false; 81 invariant = false;
82 82
83 if (cpu_vendor == CPUVENDOR_INTEL) { 83 if (cpu_vendor == CPUVENDOR_INTEL) {
84 /* 84 /*
85 * From Intel(tm) 64 and IA-32 Architectures Software 85 * From Intel(tm) 64 and IA-32 Architectures Software
86 * Developer's Manual Volume 3A: System Programming Guide, 86 * Developer's Manual Volume 3A: System Programming Guide,
87 * Part 1, 17.13 TIME_STAMP COUNTER, these are the processors 87 * Part 1, 17.13 TIME_STAMP COUNTER, these are the processors
88 * where the TSC is known invariant: 88 * where the TSC is known invariant:
89 * 89 *
90 * Pentium 4, Intel Xeon (family 0f, models 03 and higher) 90 * Pentium 4, Intel Xeon (family 0f, models 03 and higher)
91 * Core Solo and Core Duo processors (family 06, model 0e) 91 * Core Solo and Core Duo processors (family 06, model 0e)
92 * Xeon 5100 series and Core 2 Duo (family 06, model 0f) 92 * Xeon 5100 series and Core 2 Duo (family 06, model 0f)
93 * Core 2 and Xeon (family 06, model 17) 93 * Core 2 and Xeon (family 06, model 17)
94 * Atom (family 06, model 1c) 94 * Atom (family 06, model 1c)
95 * 95 *
96 * We'll also assume that it's safe on the Pentium, and 96 * We'll also assume that it's safe on the Pentium, and
97 * that it's safe on P-II and P-III Xeons due to the 97 * that it's safe on P-II and P-III Xeons due to the
98 * typical configuration of those systems. 98 * typical configuration of those systems.
99 * 99 *
100 */ 100 */
101 switch (CPUID_TO_BASEFAMILY(ci->ci_signature)) { 101 switch (CPUID_TO_BASEFAMILY(ci->ci_signature)) {
102 case 0x05: 102 case 0x05:
103 invariant = true; 103 invariant = true;
104 break; 104 break;
105 case 0x06: 105 case 0x06:
106 invariant = CPUID_TO_MODEL(ci->ci_signature) == 0x0e || 106 invariant = CPUID_TO_MODEL(ci->ci_signature) == 0x0e ||
107 CPUID_TO_MODEL(ci->ci_signature) == 0x0f || 107 CPUID_TO_MODEL(ci->ci_signature) == 0x0f ||
108 CPUID_TO_MODEL(ci->ci_signature) == 0x17 || 108 CPUID_TO_MODEL(ci->ci_signature) == 0x17 ||
109 CPUID_TO_MODEL(ci->ci_signature) == 0x1c; 109 CPUID_TO_MODEL(ci->ci_signature) == 0x1c;
110 break; 110 break;
111 case 0x0f: 111 case 0x0f:
112 invariant = CPUID_TO_MODEL(ci->ci_signature) >= 0x03; 112 invariant = CPUID_TO_MODEL(ci->ci_signature) >= 0x03;
113 break; 113 break;
114 } 114 }
115 } else if (cpu_vendor == CPUVENDOR_AMD) { 115 } else if (cpu_vendor == CPUVENDOR_AMD) {
116 /* 116 /*
117 * TSC and Power Management Events on AMD Processors 117 * TSC and Power Management Events on AMD Processors
118 * Nov 2, 2005 Rich Brunner, AMD Fellow 118 * Nov 2, 2005 Rich Brunner, AMD Fellow
119 * http://lkml.org/lkml/2005/11/4/173 119 * http://lkml.org/lkml/2005/11/4/173
120 * 120 *
121 * See Appendix E.4.7 CPUID Fn8000_0007_EDX Advanced Power 121 * See Appendix E.4.7 CPUID Fn8000_0007_EDX Advanced Power
122 * Management Features, AMD64 Architecture Programmer's 122 * Management Features, AMD64 Architecture Programmer's
123 * Manual Volume 3: General-Purpose and System Instructions. 123 * Manual Volume 3: General-Purpose and System Instructions.
124 * The check is done below. 124 * The check is done below.
125 */ 125 */
126 } 126 }
127 127
128 /* 128 /*
129 * The best way to check whether the TSC counter is invariant or not 129 * The best way to check whether the TSC counter is invariant or not
130 * is to check CPUID 80000007. 130 * is to check CPUID 80000007.
131 */ 131 */
132 family = CPUID_TO_BASEFAMILY(ci->ci_signature); 132 family = CPUID_TO_BASEFAMILY(ci->ci_signature);
133 if (((cpu_vendor == CPUVENDOR_INTEL) || (cpu_vendor == CPUVENDOR_AMD)) 133 if (((cpu_vendor == CPUVENDOR_INTEL) || (cpu_vendor == CPUVENDOR_AMD))
134 && ((family == 0x06) || (family == 0x0f))) { 134 && ((family == 0x06) || (family == 0x0f))) {
135 x86_cpuid(0x80000000, descs); 135 x86_cpuid(0x80000000, descs);
136 if (descs[0] >= 0x80000007) { 136 if (descs[0] >= 0x80000007) {
137 x86_cpuid(0x80000007, descs); 137 x86_cpuid(0x80000007, descs);
138 invariant = (descs[3] & CPUID_APM_ITSC) != 0; 138 invariant = (descs[3] & CPUID_APM_ITSC) != 0;
139 } 139 }
140 } 140 }
141 141
142 return invariant; 142 return invariant;
143} 143}
144 144
145/* 145/*
146 * Initialize timecounter(9) of TSC. 146 * Initialize timecounter(9) of TSC.
147 * This function is called after all secondary processors were up and 147 * This function is called after all secondary processors were up and
148 * calculated the drift. 148 * calculated the drift.
149 */ 149 */
150void 150void
151tsc_tc_init(void) 151tsc_tc_init(void)
152{ 152{
153 struct cpu_info *ci; 153 struct cpu_info *ci;
154 bool invariant; 154 bool invariant;
155 155
156 if (!cpu_hascounter()) 156 if (!cpu_hascounter())
157 return; 157 return;
158 158
159 ci = curcpu(); 159 ci = curcpu();
160 tsc_freq = ci->ci_data.cpu_cc_freq; 160 tsc_freq = ci->ci_data.cpu_cc_freq;
161 tsc_good = (cpu_feature[0] & CPUID_MSR) != 0 && 161 tsc_good = (cpu_feature[0] & CPUID_MSR) != 0 &&
162 (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0); 162 (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0);
163 163
164 invariant = tsc_is_invariant(); 164 invariant = tsc_is_invariant();
165 if (!invariant) { 165 if (!invariant) {
166 aprint_debug("TSC not known invariant on this CPU\n"); 166 aprint_debug("TSC not known invariant on this CPU\n");
167 tsc_timecounter.tc_quality = -100; 167 tsc_timecounter.tc_quality = -100;
168 } else if (tsc_drift_observed > tsc_drift_max) { 168 } else if (tsc_drift_observed > tsc_drift_max) {
169 aprint_error("ERROR: %lld cycle TSC drift observed\n", 169 aprint_error("ERROR: %lld cycle TSC drift observed\n",
170 (long long)tsc_drift_observed); 170 (long long)tsc_drift_observed);
171 tsc_timecounter.tc_quality = -100; 171 tsc_timecounter.tc_quality = -100;
172 invariant = false; 172 invariant = false;
173 } 173 }
174 174
175 if (tsc_freq != 0) { 175 if (tsc_freq != 0) {
176 tsc_timecounter.tc_frequency = tsc_freq; 176 tsc_timecounter.tc_frequency = tsc_freq;
177 tc_init(&tsc_timecounter); 177 tc_init(&tsc_timecounter);
178 } 178 }
179} 179}
180 180
181/* 181/*
182 * Record drift (in clock cycles). Called during AP startup. 182 * Record drift (in clock cycles). Called during AP startup.
183 */ 183 */
184void 184void
185tsc_sync_drift(int64_t drift) 185tsc_sync_drift(int64_t drift)
186{ 186{
187 187
188 if (drift < 0) 188 if (drift < 0)
189 drift = -drift; 189 drift = -drift;
190 if (drift > tsc_drift_observed) 190 if (drift > tsc_drift_observed)
191 tsc_drift_observed = drift; 191 tsc_drift_observed = drift;
192} 192}
193 193
194/* 194/*
195 * Called during startup of APs, by the boot processor. Interrupts 195 * Called during startup of APs, by the boot processor. Interrupts
196 * are disabled on entry. 196 * are disabled on entry.
197 */ 197 */
198static void 198static void
199tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp) 199tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
200{ 200{
201 uint64_t bptsc; 201 uint64_t bptsc;
202 202
203 if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL) { 203 if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL) {
204 panic("tsc_sync_bp: 1"); 204 panic("tsc_sync_bp: 1");
205 } 205 }
206 206
207 /* Flag it and read our TSC. */ 207 /* Flag it and read our TSC. */
208 atomic_or_uint(&ci->ci_flags, CPUF_SYNCTSC); 208 atomic_or_uint(&ci->ci_flags, CPUF_SYNCTSC);
209 bptsc = (rdtsc() >> 1); 209 bptsc = (rdtsc() >> 1);
210 210
211 /* Wait for remote to complete, and read ours again. */ 211 /* Wait for remote to complete, and read ours again. */
212 while ((ci->ci_flags & CPUF_SYNCTSC) != 0) { 212 while ((ci->ci_flags & CPUF_SYNCTSC) != 0) {
213 __insn_barrier(); 213 __insn_barrier();
214 } 214 }
215 bptsc += (rdtsc() >> 1); 215 bptsc += (rdtsc() >> 1);
216 216
217 /* Wait for the results to come in. */ 217 /* Wait for the results to come in. */
218 while (tsc_sync_cpu == ci) { 218 while (tsc_sync_cpu == ci) {
219 x86_pause(); 219 x86_pause();
220 } 220 }
221 if (tsc_sync_cpu != NULL) { 221 if (tsc_sync_cpu != NULL) {
222 panic("tsc_sync_bp: 2"); 222 panic("tsc_sync_bp: 2");
223 } 223 }
224 224
225 *bptscp = bptsc; 225 *bptscp = bptsc;
226 *aptscp = tsc_sync_val; 226 *aptscp = tsc_sync_val;
227} 227}
228 228
229void 229void
230tsc_sync_bp(struct cpu_info *ci) 230tsc_sync_bp(struct cpu_info *ci)
231{ 231{
232 uint64_t bptsc, aptsc; 232 int64_t bptsc, aptsc, bsum = 0, asum = 0;
233 233
234 tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */ 234 tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
235 tsc_read_bp(ci, &bptsc, &aptsc); 235 for (int i = 0; i < 8; i++) {
 236 tsc_read_bp(ci, &bptsc, &aptsc);
 237 bsum += bptsc;
 238 asum += aptsc;
 239 }
236 240
237 /* Compute final value to adjust for skew. */ 241 /* Compute final value to adjust for skew. */
238 ci->ci_data.cpu_cc_skew = bptsc - aptsc; 242 ci->ci_data.cpu_cc_skew = (bsum - asum) >> 3;
239} 243}
240 244
241/* 245/*
242 * Called during startup of AP, by the AP itself. Interrupts are 246 * Called during startup of AP, by the AP itself. Interrupts are
243 * disabled on entry. 247 * disabled on entry.
244 */ 248 */
245static void 249static void
246tsc_post_ap(struct cpu_info *ci) 250tsc_post_ap(struct cpu_info *ci)
247{ 251{
248 uint64_t tsc; 252 uint64_t tsc;
249 253
250 /* Wait for go-ahead from primary. */ 254 /* Wait for go-ahead from primary. */
251 while ((ci->ci_flags & CPUF_SYNCTSC) == 0) { 255 while ((ci->ci_flags & CPUF_SYNCTSC) == 0) {
252 __insn_barrier(); 256 __insn_barrier();
253 } 257 }
254 tsc = (rdtsc() >> 1); 258 tsc = (rdtsc() >> 1);
255 259
256 /* Instruct primary to read its counter. */ 260 /* Instruct primary to read its counter. */
257 atomic_and_uint(&ci->ci_flags, ~CPUF_SYNCTSC); 261 atomic_and_uint(&ci->ci_flags, ~CPUF_SYNCTSC);
258 tsc += (rdtsc() >> 1); 262 tsc += (rdtsc() >> 1);
259 263
260 /* Post result. Ensure the whole value goes out atomically. */ 264 /* Post result. Ensure the whole value goes out atomically. */
261 (void)atomic_swap_64(&tsc_sync_val, tsc); 265 (void)atomic_swap_64(&tsc_sync_val, tsc);
262 266
263 if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci) { 267 if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci) {
264 panic("tsc_sync_ap"); 268 panic("tsc_sync_ap");
265 } 269 }
266} 270}
267 271
268void 272void
269tsc_sync_ap(struct cpu_info *ci) 273tsc_sync_ap(struct cpu_info *ci)
270{ 274{
271 275
272 tsc_post_ap(ci); 276 tsc_post_ap(ci);
273 tsc_post_ap(ci); 277 for (int i = 0; i < 8; i++) {
 278 tsc_post_ap(ci);
 279 }
274} 280}
275 281
276static void 282static void
277tsc_apply_cpu(void *arg1, void *arg2) 283tsc_apply_cpu(void *arg1, void *arg2)
278{ 284{
279 bool enable = arg1 != NULL; 285 bool enable = arg1 != NULL;
280 if (enable) { 286 if (enable) {
281 lcr4(rcr4() & ~CR4_TSD); 287 lcr4(rcr4() & ~CR4_TSD);
282 } else { 288 } else {
283 lcr4(rcr4() | CR4_TSD); 289 lcr4(rcr4() | CR4_TSD);
284 } 290 }
285} 291}
286 292
287void 293void
288tsc_user_enable(void) 294tsc_user_enable(void)
289{ 295{
290 uint64_t xc; 296 uint64_t xc;
291 297
292 xc = xc_broadcast(0, tsc_apply_cpu, (void *)true, NULL); 298 xc = xc_broadcast(0, tsc_apply_cpu, (void *)true, NULL);
293 xc_wait(xc); 299 xc_wait(xc);
294} 300}
295 301
296void 302void
297tsc_user_disable(void) 303tsc_user_disable(void)
298{ 304{
299 uint64_t xc; 305 uint64_t xc;
300 306
301 xc = xc_broadcast(0, tsc_apply_cpu, (void *)false, NULL); 307 xc = xc_broadcast(0, tsc_apply_cpu, (void *)false, NULL);
302 xc_wait(xc); 308 xc_wait(xc);
303} 309}
304 310
305uint64_t 311uint64_t
306cpu_frequency(struct cpu_info *ci) 312cpu_frequency(struct cpu_info *ci)
307{ 313{
308 314
309 return ci->ci_data.cpu_cc_freq; 315 return ci->ci_data.cpu_cc_freq;
310} 316}
311 317
312int 318int
313cpu_hascounter(void) 319cpu_hascounter(void)
314{ 320{
315 321
316 return cpu_feature[0] & CPUID_TSC; 322 return cpu_feature[0] & CPUID_TSC;
317} 323}
318 324
319uint64_t 325uint64_t
320cpu_counter_serializing(void) 326cpu_counter_serializing(void)
321{ 327{
322 if (tsc_good) 328 if (tsc_good)
323 return rdmsr(MSR_TSC); 329 return rdmsr(MSR_TSC);
324 else 330 else
325 return cpu_counter(); 331 return cpu_counter();
326} 332}