Fri Jul 28 10:37:28 2023 UTC ()
timecounter(9): Link to phk's timecounter paper for reference.

No functional change intended.


(riastradh)
diff -r1.74 -r1.75 src/sys/kern/kern_tc.c

cvs diff -r1.74 -r1.75 src/sys/kern/kern_tc.c (switch to unified diff)

--- src/sys/kern/kern_tc.c 2023/07/27 01:48:49 1.74
+++ src/sys/kern/kern_tc.c 2023/07/28 10:37:28 1.75
@@ -1,1042 +1,1046 @@ @@ -1,1042 +1,1046 @@
1/* $NetBSD: kern_tc.c,v 1.74 2023/07/27 01:48:49 riastradh Exp $ */ 1/* $NetBSD: kern_tc.c,v 1.75 2023/07/28 10:37:28 riastradh Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran. 8 * by Andrew Doran.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/*- 32/*-
33 * ---------------------------------------------------------------------------- 33 * ----------------------------------------------------------------------------
34 * "THE BEER-WARE LICENSE" (Revision 42): 34 * "THE BEER-WARE LICENSE" (Revision 42):
35 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 35 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
36 * can do whatever you want with this stuff. If we meet some day, and you think 36 * can do whatever you want with this stuff. If we meet some day, and you think
37 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 37 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
38 * --------------------------------------------------------------------------- 38 * ---------------------------------------------------------------------------
39 */ 39 */
40 40
 41/*
 42 * https://papers.freebsd.org/2002/phk-timecounters.files/timecounter.pdf
 43 */
 44
41#include <sys/cdefs.h> 45#include <sys/cdefs.h>
42/* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */ 46/* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */
43__KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.74 2023/07/27 01:48:49 riastradh Exp $"); 47__KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.75 2023/07/28 10:37:28 riastradh Exp $");
44 48
45#ifdef _KERNEL_OPT 49#ifdef _KERNEL_OPT
46#include "opt_ntp.h" 50#include "opt_ntp.h"
47#endif 51#endif
48 52
49#include <sys/param.h> 53#include <sys/param.h>
50 54
51#include <sys/atomic.h> 55#include <sys/atomic.h>
52#include <sys/evcnt.h> 56#include <sys/evcnt.h>
53#include <sys/kauth.h> 57#include <sys/kauth.h>
54#include <sys/kernel.h> 58#include <sys/kernel.h>
55#include <sys/lock.h> 59#include <sys/lock.h>
56#include <sys/mutex.h> 60#include <sys/mutex.h>
57#include <sys/reboot.h> /* XXX just to get AB_VERBOSE */ 61#include <sys/reboot.h> /* XXX just to get AB_VERBOSE */
58#include <sys/sysctl.h> 62#include <sys/sysctl.h>
59#include <sys/syslog.h> 63#include <sys/syslog.h>
60#include <sys/systm.h> 64#include <sys/systm.h>
61#include <sys/timepps.h> 65#include <sys/timepps.h>
62#include <sys/timetc.h> 66#include <sys/timetc.h>
63#include <sys/timex.h> 67#include <sys/timex.h>
64#include <sys/xcall.h> 68#include <sys/xcall.h>
65 69
66/* 70/*
67 * A large step happens on boot. This constant detects such steps. 71 * A large step happens on boot. This constant detects such steps.
68 * It is relatively small so that ntp_update_second gets called enough 72 * It is relatively small so that ntp_update_second gets called enough
69 * in the typical 'missed a couple of seconds' case, but doesn't loop 73 * in the typical 'missed a couple of seconds' case, but doesn't loop
70 * forever when the time step is large. 74 * forever when the time step is large.
71 */ 75 */
72#define LARGE_STEP 200 76#define LARGE_STEP 200
73 77
74/* 78/*
75 * Implement a dummy timecounter which we can use until we get a real one 79 * Implement a dummy timecounter which we can use until we get a real one
76 * in the air. This allows the console and other early stuff to use 80 * in the air. This allows the console and other early stuff to use
77 * time services. 81 * time services.
78 */ 82 */
79 83
80static u_int 84static u_int
81dummy_get_timecount(struct timecounter *tc) 85dummy_get_timecount(struct timecounter *tc)
82{ 86{
83 static u_int now; 87 static u_int now;
84 88
85 return ++now; 89 return ++now;
86} 90}
87 91
88static struct timecounter dummy_timecounter = { 92static struct timecounter dummy_timecounter = {
89 .tc_get_timecount = dummy_get_timecount, 93 .tc_get_timecount = dummy_get_timecount,
90 .tc_counter_mask = ~0u, 94 .tc_counter_mask = ~0u,
91 .tc_frequency = 1000000, 95 .tc_frequency = 1000000,
92 .tc_name = "dummy", 96 .tc_name = "dummy",
93 .tc_quality = -1000000, 97 .tc_quality = -1000000,
94 .tc_priv = NULL, 98 .tc_priv = NULL,
95}; 99};
96 100
97struct timehands { 101struct timehands {
98 /* These fields must be initialized by the driver. */ 102 /* These fields must be initialized by the driver. */
99 struct timecounter *th_counter; /* active timecounter */ 103 struct timecounter *th_counter; /* active timecounter */
100 int64_t th_adjustment; /* frequency adjustment */ 104 int64_t th_adjustment; /* frequency adjustment */
101 /* (NTP/adjtime) */ 105 /* (NTP/adjtime) */
102 uint64_t th_scale; /* scale factor (counter */ 106 uint64_t th_scale; /* scale factor (counter */
103 /* tick->time) */ 107 /* tick->time) */
104 uint64_t th_offset_count; /* offset at last time */ 108 uint64_t th_offset_count; /* offset at last time */
105 /* update (tc_windup()) */ 109 /* update (tc_windup()) */
106 struct bintime th_offset; /* bin (up)time at windup */ 110 struct bintime th_offset; /* bin (up)time at windup */
107 struct timeval th_microtime; /* cached microtime */ 111 struct timeval th_microtime; /* cached microtime */
108 struct timespec th_nanotime; /* cached nanotime */ 112 struct timespec th_nanotime; /* cached nanotime */
109 /* Fields not to be copied in tc_windup start with th_generation. */ 113 /* Fields not to be copied in tc_windup start with th_generation. */
110 volatile u_int th_generation; /* current genration */ 114 volatile u_int th_generation; /* current genration */
111 struct timehands *th_next; /* next timehand */ 115 struct timehands *th_next; /* next timehand */
112}; 116};
113 117
114static struct timehands th0; 118static struct timehands th0;
115static struct timehands th9 = { .th_next = &th0, }; 119static struct timehands th9 = { .th_next = &th0, };
116static struct timehands th8 = { .th_next = &th9, }; 120static struct timehands th8 = { .th_next = &th9, };
117static struct timehands th7 = { .th_next = &th8, }; 121static struct timehands th7 = { .th_next = &th8, };
118static struct timehands th6 = { .th_next = &th7, }; 122static struct timehands th6 = { .th_next = &th7, };
119static struct timehands th5 = { .th_next = &th6, }; 123static struct timehands th5 = { .th_next = &th6, };
120static struct timehands th4 = { .th_next = &th5, }; 124static struct timehands th4 = { .th_next = &th5, };
121static struct timehands th3 = { .th_next = &th4, }; 125static struct timehands th3 = { .th_next = &th4, };
122static struct timehands th2 = { .th_next = &th3, }; 126static struct timehands th2 = { .th_next = &th3, };
123static struct timehands th1 = { .th_next = &th2, }; 127static struct timehands th1 = { .th_next = &th2, };
124static struct timehands th0 = { 128static struct timehands th0 = {
125 .th_counter = &dummy_timecounter, 129 .th_counter = &dummy_timecounter,
126 .th_scale = (uint64_t)-1 / 1000000, 130 .th_scale = (uint64_t)-1 / 1000000,
127 .th_offset = { .sec = 1, .frac = 0 }, 131 .th_offset = { .sec = 1, .frac = 0 },
128 .th_generation = 1, 132 .th_generation = 1,
129 .th_next = &th1, 133 .th_next = &th1,
130}; 134};
131 135
132static struct timehands *volatile timehands = &th0; 136static struct timehands *volatile timehands = &th0;
133struct timecounter *timecounter = &dummy_timecounter; 137struct timecounter *timecounter = &dummy_timecounter;
134static struct timecounter *timecounters = &dummy_timecounter; 138static struct timecounter *timecounters = &dummy_timecounter;
135 139
136/* used by savecore(8) */ 140/* used by savecore(8) */
137time_t time_second_legacy asm("time_second"); 141time_t time_second_legacy asm("time_second");
138 142
139#ifdef __HAVE_ATOMIC64_LOADSTORE 143#ifdef __HAVE_ATOMIC64_LOADSTORE
140volatile time_t time__second __cacheline_aligned = 1; 144volatile time_t time__second __cacheline_aligned = 1;
141volatile time_t time__uptime __cacheline_aligned = 1; 145volatile time_t time__uptime __cacheline_aligned = 1;
142#else 146#else
143static volatile struct { 147static volatile struct {
144 uint32_t lo, hi; 148 uint32_t lo, hi;
145} time__uptime32 __cacheline_aligned = { 149} time__uptime32 __cacheline_aligned = {
146 .lo = 1, 150 .lo = 1,
147}, time__second32 __cacheline_aligned = { 151}, time__second32 __cacheline_aligned = {
148 .lo = 1, 152 .lo = 1,
149}; 153};
150#endif 154#endif
151 155
152static struct { 156static struct {
153 struct bintime bin; 157 struct bintime bin;
154 volatile unsigned gen; /* even when stable, odd when changing */ 158 volatile unsigned gen; /* even when stable, odd when changing */
155} timebase __cacheline_aligned; 159} timebase __cacheline_aligned;
156 160
157static int timestepwarnings; 161static int timestepwarnings;
158 162
159kmutex_t timecounter_lock; 163kmutex_t timecounter_lock;
160static u_int timecounter_mods; 164static u_int timecounter_mods;
161static volatile int timecounter_removals = 1; 165static volatile int timecounter_removals = 1;
162static u_int timecounter_bad; 166static u_int timecounter_bad;
163 167
164#ifdef __HAVE_ATOMIC64_LOADSTORE 168#ifdef __HAVE_ATOMIC64_LOADSTORE
165 169
166static inline void 170static inline void
167setrealuptime(time_t second, time_t uptime) 171setrealuptime(time_t second, time_t uptime)
168{ 172{
169 173
170 time_second_legacy = second; 174 time_second_legacy = second;
171 175
172 atomic_store_relaxed(&time__second, second); 176 atomic_store_relaxed(&time__second, second);
173 atomic_store_relaxed(&time__uptime, uptime); 177 atomic_store_relaxed(&time__uptime, uptime);
174} 178}
175 179
176#else 180#else
177 181
178static inline void 182static inline void
179setrealuptime(time_t second, time_t uptime) 183setrealuptime(time_t second, time_t uptime)
180{ 184{
181 uint32_t seclo = second & 0xffffffff, sechi = second >> 32; 185 uint32_t seclo = second & 0xffffffff, sechi = second >> 32;
182 uint32_t uplo = uptime & 0xffffffff, uphi = uptime >> 32; 186 uint32_t uplo = uptime & 0xffffffff, uphi = uptime >> 32;
183 187
184 KDASSERT(mutex_owned(&timecounter_lock)); 188 KDASSERT(mutex_owned(&timecounter_lock));
185 189
186 time_second_legacy = second; 190 time_second_legacy = second;
187 191
188 /* 192 /*
189 * Fast path -- no wraparound, just updating the low bits, so 193 * Fast path -- no wraparound, just updating the low bits, so
190 * no need for seqlocked access. 194 * no need for seqlocked access.
191 */ 195 */
192 if (__predict_true(sechi == time__second32.hi) && 196 if (__predict_true(sechi == time__second32.hi) &&
193 __predict_true(uphi == time__uptime32.hi)) { 197 __predict_true(uphi == time__uptime32.hi)) {
194 atomic_store_relaxed(&time__second32.lo, seclo); 198 atomic_store_relaxed(&time__second32.lo, seclo);
195 atomic_store_relaxed(&time__uptime32.lo, uplo); 199 atomic_store_relaxed(&time__uptime32.lo, uplo);
196 return; 200 return;
197 } 201 }
198 202
199 atomic_store_relaxed(&time__second32.hi, 0xffffffff); 203 atomic_store_relaxed(&time__second32.hi, 0xffffffff);
200 atomic_store_relaxed(&time__uptime32.hi, 0xffffffff); 204 atomic_store_relaxed(&time__uptime32.hi, 0xffffffff);
201 membar_producer(); 205 membar_producer();
202 atomic_store_relaxed(&time__second32.lo, seclo); 206 atomic_store_relaxed(&time__second32.lo, seclo);
203 atomic_store_relaxed(&time__uptime32.lo, uplo); 207 atomic_store_relaxed(&time__uptime32.lo, uplo);
204 membar_producer(); 208 membar_producer();
205 atomic_store_relaxed(&time__second32.hi, sechi); 209 atomic_store_relaxed(&time__second32.hi, sechi);
206 atomic_store_relaxed(&time__uptime32.hi, uphi); 210 atomic_store_relaxed(&time__uptime32.hi, uphi);
207} 211}
208 212
209time_t 213time_t
210getrealtime(void) 214getrealtime(void)
211{ 215{
212 uint32_t lo, hi; 216 uint32_t lo, hi;
213 217
214 do { 218 do {
215 for (;;) { 219 for (;;) {
216 hi = atomic_load_relaxed(&time__second32.hi); 220 hi = atomic_load_relaxed(&time__second32.hi);
217 if (__predict_true(hi != 0xffffffff)) 221 if (__predict_true(hi != 0xffffffff))
218 break; 222 break;
219 SPINLOCK_BACKOFF_HOOK; 223 SPINLOCK_BACKOFF_HOOK;
220 } 224 }
221 membar_consumer(); 225 membar_consumer();
222 lo = atomic_load_relaxed(&time__second32.lo); 226 lo = atomic_load_relaxed(&time__second32.lo);
223 membar_consumer(); 227 membar_consumer();
224 } while (hi != atomic_load_relaxed(&time__second32.hi)); 228 } while (hi != atomic_load_relaxed(&time__second32.hi));
225 229
226 return ((time_t)hi << 32) | lo; 230 return ((time_t)hi << 32) | lo;
227} 231}
228 232
229time_t 233time_t
230getuptime(void) 234getuptime(void)
231{ 235{
232 uint32_t lo, hi; 236 uint32_t lo, hi;
233 237
234 do { 238 do {
235 for (;;) { 239 for (;;) {
236 hi = atomic_load_relaxed(&time__uptime32.hi); 240 hi = atomic_load_relaxed(&time__uptime32.hi);
237 if (__predict_true(hi != 0xffffffff)) 241 if (__predict_true(hi != 0xffffffff))
238 break; 242 break;
239 SPINLOCK_BACKOFF_HOOK; 243 SPINLOCK_BACKOFF_HOOK;
240 } 244 }
241 membar_consumer(); 245 membar_consumer();
242 lo = atomic_load_relaxed(&time__uptime32.lo); 246 lo = atomic_load_relaxed(&time__uptime32.lo);
243 membar_consumer(); 247 membar_consumer();
244 } while (hi != atomic_load_relaxed(&time__uptime32.hi)); 248 } while (hi != atomic_load_relaxed(&time__uptime32.hi));
245 249
246 return ((time_t)hi << 32) | lo; 250 return ((time_t)hi << 32) | lo;
247} 251}
248 252
249time_t 253time_t
250getboottime(void) 254getboottime(void)
251{ 255{
252 256
253 return getrealtime() - getuptime(); 257 return getrealtime() - getuptime();
254} 258}
255 259
256uint32_t 260uint32_t
257getuptime32(void) 261getuptime32(void)
258{ 262{
259 263
260 return atomic_load_relaxed(&time__uptime32.lo); 264 return atomic_load_relaxed(&time__uptime32.lo);
261} 265}
262 266
263#endif /* !defined(__HAVE_ATOMIC64_LOADSTORE) */ 267#endif /* !defined(__HAVE_ATOMIC64_LOADSTORE) */
264 268
265/* 269/*
266 * sysctl helper routine for kern.timercounter.hardware 270 * sysctl helper routine for kern.timercounter.hardware
267 */ 271 */
268static int 272static int
269sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS) 273sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS)
270{ 274{
271 struct sysctlnode node; 275 struct sysctlnode node;
272 int error; 276 int error;
273 char newname[MAX_TCNAMELEN]; 277 char newname[MAX_TCNAMELEN];
274 struct timecounter *newtc, *tc; 278 struct timecounter *newtc, *tc;
275 279
276 tc = timecounter; 280 tc = timecounter;
277 281
278 strlcpy(newname, tc->tc_name, sizeof(newname)); 282 strlcpy(newname, tc->tc_name, sizeof(newname));
279 283
280 node = *rnode; 284 node = *rnode;
281 node.sysctl_data = newname; 285 node.sysctl_data = newname;
282 node.sysctl_size = sizeof(newname); 286 node.sysctl_size = sizeof(newname);
283 287
284 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 288 error = sysctl_lookup(SYSCTLFN_CALL(&node));
285 289
286 if (error || 290 if (error ||
287 newp == NULL || 291 newp == NULL ||
288 strncmp(newname, tc->tc_name, sizeof(newname)) == 0) 292 strncmp(newname, tc->tc_name, sizeof(newname)) == 0)
289 return error; 293 return error;
290 294
291 if (l != NULL && (error = kauth_authorize_system(l->l_cred,  295 if (l != NULL && (error = kauth_authorize_system(l->l_cred,
292 KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname, 296 KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname,
293 NULL, NULL)) != 0) 297 NULL, NULL)) != 0)
294 return error; 298 return error;
295 299
296 if (!cold) 300 if (!cold)
297 mutex_spin_enter(&timecounter_lock); 301 mutex_spin_enter(&timecounter_lock);
298 error = EINVAL; 302 error = EINVAL;
299 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 303 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
300 if (strcmp(newname, newtc->tc_name) != 0) 304 if (strcmp(newname, newtc->tc_name) != 0)
301 continue; 305 continue;
302 /* Warm up new timecounter. */ 306 /* Warm up new timecounter. */
303 (void)newtc->tc_get_timecount(newtc); 307 (void)newtc->tc_get_timecount(newtc);
304 (void)newtc->tc_get_timecount(newtc); 308 (void)newtc->tc_get_timecount(newtc);
305 timecounter = newtc; 309 timecounter = newtc;
306 error = 0; 310 error = 0;
307 break; 311 break;
308 } 312 }
309 if (!cold) 313 if (!cold)
310 mutex_spin_exit(&timecounter_lock); 314 mutex_spin_exit(&timecounter_lock);
311 return error; 315 return error;
312} 316}
313 317
314static int 318static int
315sysctl_kern_timecounter_choice(SYSCTLFN_ARGS) 319sysctl_kern_timecounter_choice(SYSCTLFN_ARGS)
316{ 320{
317 char buf[MAX_TCNAMELEN+48]; 321 char buf[MAX_TCNAMELEN+48];
318 char *where; 322 char *where;
319 const char *spc; 323 const char *spc;
320 struct timecounter *tc; 324 struct timecounter *tc;
321 size_t needed, left, slen; 325 size_t needed, left, slen;
322 int error, mods; 326 int error, mods;
323 327
324 if (newp != NULL) 328 if (newp != NULL)
325 return EPERM; 329 return EPERM;
326 if (namelen != 0) 330 if (namelen != 0)
327 return EINVAL; 331 return EINVAL;
328 332
329 mutex_spin_enter(&timecounter_lock); 333 mutex_spin_enter(&timecounter_lock);
330 retry: 334 retry:
331 spc = ""; 335 spc = "";
332 error = 0; 336 error = 0;
333 needed = 0; 337 needed = 0;
334 left = *oldlenp; 338 left = *oldlenp;
335 where = oldp; 339 where = oldp;
336 for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) { 340 for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) {
337 if (where == NULL) { 341 if (where == NULL) {
338 needed += sizeof(buf); /* be conservative */ 342 needed += sizeof(buf); /* be conservative */
339 } else { 343 } else {
340 slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64 344 slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64
341 " Hz)", spc, tc->tc_name, tc->tc_quality, 345 " Hz)", spc, tc->tc_name, tc->tc_quality,
342 tc->tc_frequency); 346 tc->tc_frequency);
343 if (left < slen + 1) 347 if (left < slen + 1)
344 break; 348 break;
345 mods = timecounter_mods; 349 mods = timecounter_mods;
346 mutex_spin_exit(&timecounter_lock); 350 mutex_spin_exit(&timecounter_lock);
347 error = copyout(buf, where, slen + 1); 351 error = copyout(buf, where, slen + 1);
348 mutex_spin_enter(&timecounter_lock); 352 mutex_spin_enter(&timecounter_lock);
349 if (mods != timecounter_mods) { 353 if (mods != timecounter_mods) {
350 goto retry; 354 goto retry;
351 } 355 }
352 spc = " "; 356 spc = " ";
353 where += slen; 357 where += slen;
354 needed += slen; 358 needed += slen;
355 left -= slen; 359 left -= slen;
356 } 360 }
357 } 361 }
358 mutex_spin_exit(&timecounter_lock); 362 mutex_spin_exit(&timecounter_lock);
359 363
360 *oldlenp = needed; 364 *oldlenp = needed;
361 return error; 365 return error;
362} 366}
363 367
364SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup") 368SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup")
365{ 369{
366 const struct sysctlnode *node; 370 const struct sysctlnode *node;
367 371
368 sysctl_createv(clog, 0, NULL, &node, 372 sysctl_createv(clog, 0, NULL, &node,
369 CTLFLAG_PERMANENT, 373 CTLFLAG_PERMANENT,
370 CTLTYPE_NODE, "timecounter", 374 CTLTYPE_NODE, "timecounter",
371 SYSCTL_DESCR("time counter information"), 375 SYSCTL_DESCR("time counter information"),
372 NULL, 0, NULL, 0, 376 NULL, 0, NULL, 0,
373 CTL_KERN, CTL_CREATE, CTL_EOL); 377 CTL_KERN, CTL_CREATE, CTL_EOL);
374 378
375 if (node != NULL) { 379 if (node != NULL) {
376 sysctl_createv(clog, 0, NULL, NULL, 380 sysctl_createv(clog, 0, NULL, NULL,
377 CTLFLAG_PERMANENT, 381 CTLFLAG_PERMANENT,
378 CTLTYPE_STRING, "choice", 382 CTLTYPE_STRING, "choice",
379 SYSCTL_DESCR("available counters"), 383 SYSCTL_DESCR("available counters"),
380 sysctl_kern_timecounter_choice, 0, NULL, 0, 384 sysctl_kern_timecounter_choice, 0, NULL, 0,
381 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL); 385 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
382 386
383 sysctl_createv(clog, 0, NULL, NULL, 387 sysctl_createv(clog, 0, NULL, NULL,
384 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 388 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
385 CTLTYPE_STRING, "hardware", 389 CTLTYPE_STRING, "hardware",
386 SYSCTL_DESCR("currently active time counter"), 390 SYSCTL_DESCR("currently active time counter"),
387 sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN, 391 sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN,
388 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL); 392 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
389 393
390 sysctl_createv(clog, 0, NULL, NULL, 394 sysctl_createv(clog, 0, NULL, NULL,
391 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 395 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
392 CTLTYPE_INT, "timestepwarnings", 396 CTLTYPE_INT, "timestepwarnings",
393 SYSCTL_DESCR("log time steps"), 397 SYSCTL_DESCR("log time steps"),
394 NULL, 0, &timestepwarnings, 0, 398 NULL, 0, &timestepwarnings, 0,
395 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL); 399 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
396 } 400 }
397} 401}
398 402
399#ifdef TC_COUNTERS 403#ifdef TC_COUNTERS
400#define TC_STATS(name) \ 404#define TC_STATS(name) \
401static struct evcnt n##name = \ 405static struct evcnt n##name = \
402 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name); \ 406 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name); \
403EVCNT_ATTACH_STATIC(n##name) 407EVCNT_ATTACH_STATIC(n##name)
404TC_STATS(binuptime); TC_STATS(nanouptime); TC_STATS(microuptime); 408TC_STATS(binuptime); TC_STATS(nanouptime); TC_STATS(microuptime);
405TC_STATS(bintime); TC_STATS(nanotime); TC_STATS(microtime); 409TC_STATS(bintime); TC_STATS(nanotime); TC_STATS(microtime);
406TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime); 410TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime);
407TC_STATS(getbintime); TC_STATS(getnanotime); TC_STATS(getmicrotime); 411TC_STATS(getbintime); TC_STATS(getnanotime); TC_STATS(getmicrotime);
408TC_STATS(setclock); 412TC_STATS(setclock);
409#define TC_COUNT(var) var.ev_count++ 413#define TC_COUNT(var) var.ev_count++
410#undef TC_STATS 414#undef TC_STATS
411#else 415#else
412#define TC_COUNT(var) /* nothing */ 416#define TC_COUNT(var) /* nothing */
413#endif /* TC_COUNTERS */ 417#endif /* TC_COUNTERS */
414 418
415static void tc_windup(void); 419static void tc_windup(void);
416 420
417/* 421/*
418 * Return the difference between the timehands' counter value now and what 422 * Return the difference between the timehands' counter value now and what
419 * was when we copied it to the timehands' offset_count. 423 * was when we copied it to the timehands' offset_count.
420 */ 424 */
421static inline u_int 425static inline u_int
422tc_delta(struct timehands *th) 426tc_delta(struct timehands *th)
423{ 427{
424 struct timecounter *tc; 428 struct timecounter *tc;
425 429
426 tc = th->th_counter; 430 tc = th->th_counter;
427 return (tc->tc_get_timecount(tc) - 431 return (tc->tc_get_timecount(tc) -
428 th->th_offset_count) & tc->tc_counter_mask; 432 th->th_offset_count) & tc->tc_counter_mask;
429} 433}
430 434
431/* 435/*
432 * Functions for reading the time. We have to loop until we are sure that 436 * Functions for reading the time. We have to loop until we are sure that
433 * the timehands that we operated on was not updated under our feet. See 437 * the timehands that we operated on was not updated under our feet. See
434 * the comment in <sys/timevar.h> for a description of these 12 functions. 438 * the comment in <sys/timevar.h> for a description of these 12 functions.
435 */ 439 */
436 440
437void 441void
438binuptime(struct bintime *bt) 442binuptime(struct bintime *bt)
439{ 443{
440 struct timehands *th; 444 struct timehands *th;
441 lwp_t *l; 445 lwp_t *l;
442 u_int lgen, gen; 446 u_int lgen, gen;
443 447
444 TC_COUNT(nbinuptime); 448 TC_COUNT(nbinuptime);
445 449
446 /* 450 /*
447 * Provide exclusion against tc_detach(). 451 * Provide exclusion against tc_detach().
448 * 452 *
449 * We record the number of timecounter removals before accessing 453 * We record the number of timecounter removals before accessing
450 * timecounter state. Note that the LWP can be using multiple 454 * timecounter state. Note that the LWP can be using multiple
451 * "generations" at once, due to interrupts (interrupted while in 455 * "generations" at once, due to interrupts (interrupted while in
452 * this function). Hardware interrupts will borrow the interrupted 456 * this function). Hardware interrupts will borrow the interrupted
453 * LWP's l_tcgen value for this purpose, and can themselves be 457 * LWP's l_tcgen value for this purpose, and can themselves be
454 * interrupted by higher priority interrupts. In this case we need 458 * interrupted by higher priority interrupts. In this case we need
455 * to ensure that the oldest generation in use is recorded. 459 * to ensure that the oldest generation in use is recorded.
456 * 460 *
457 * splsched() is too expensive to use, so we take care to structure 461 * splsched() is too expensive to use, so we take care to structure
458 * this code in such a way that it is not required. Likewise, we 462 * this code in such a way that it is not required. Likewise, we
459 * do not disable preemption. 463 * do not disable preemption.
460 * 464 *
461 * Memory barriers are also too expensive to use for such a 465 * Memory barriers are also too expensive to use for such a
462 * performance critical function. The good news is that we do not 466 * performance critical function. The good news is that we do not
463 * need memory barriers for this type of exclusion, as the thread 467 * need memory barriers for this type of exclusion, as the thread
464 * updating timecounter_removals will issue a broadcast cross call 468 * updating timecounter_removals will issue a broadcast cross call
465 * before inspecting our l_tcgen value (this elides memory ordering 469 * before inspecting our l_tcgen value (this elides memory ordering
466 * issues). 470 * issues).
467 * 471 *
468 * XXX If the author of the above comment knows how to make it 472 * XXX If the author of the above comment knows how to make it
469 * safe to avoid memory barriers around the access to 473 * safe to avoid memory barriers around the access to
470 * th->th_generation, I'm all ears. 474 * th->th_generation, I'm all ears.
471 */ 475 */
472 l = curlwp; 476 l = curlwp;
473 lgen = l->l_tcgen; 477 lgen = l->l_tcgen;
474 if (__predict_true(lgen == 0)) { 478 if (__predict_true(lgen == 0)) {
475 l->l_tcgen = timecounter_removals; 479 l->l_tcgen = timecounter_removals;
476 } 480 }
477 __insn_barrier(); 481 __insn_barrier();
478 482
479 do { 483 do {
480 th = atomic_load_consume(&timehands); 484 th = atomic_load_consume(&timehands);
481 gen = th->th_generation; 485 gen = th->th_generation;
482 membar_consumer(); 486 membar_consumer();
483 *bt = th->th_offset; 487 *bt = th->th_offset;
484 bintime_addx(bt, th->th_scale * tc_delta(th)); 488 bintime_addx(bt, th->th_scale * tc_delta(th));
485 membar_consumer(); 489 membar_consumer();
486 } while (gen == 0 || gen != th->th_generation); 490 } while (gen == 0 || gen != th->th_generation);
487 491
488 __insn_barrier(); 492 __insn_barrier();
489 l->l_tcgen = lgen; 493 l->l_tcgen = lgen;
490} 494}
491 495
492void 496void
493nanouptime(struct timespec *tsp) 497nanouptime(struct timespec *tsp)
494{ 498{
495 struct bintime bt; 499 struct bintime bt;
496 500
497 TC_COUNT(nnanouptime); 501 TC_COUNT(nnanouptime);
498 binuptime(&bt); 502 binuptime(&bt);
499 bintime2timespec(&bt, tsp); 503 bintime2timespec(&bt, tsp);
500} 504}
501 505
502void 506void
503microuptime(struct timeval *tvp) 507microuptime(struct timeval *tvp)
504{ 508{
505 struct bintime bt; 509 struct bintime bt;
506 510
507 TC_COUNT(nmicrouptime); 511 TC_COUNT(nmicrouptime);
508 binuptime(&bt); 512 binuptime(&bt);
509 bintime2timeval(&bt, tvp); 513 bintime2timeval(&bt, tvp);
510} 514}
511 515
512void 516void
513bintime(struct bintime *bt) 517bintime(struct bintime *bt)
514{ 518{
515 struct bintime boottime; 519 struct bintime boottime;
516 520
517 TC_COUNT(nbintime); 521 TC_COUNT(nbintime);
518 binuptime(bt); 522 binuptime(bt);
519 getbinboottime(&boottime); 523 getbinboottime(&boottime);
520 bintime_add(bt, &boottime); 524 bintime_add(bt, &boottime);
521} 525}
522 526
523void 527void
524nanotime(struct timespec *tsp) 528nanotime(struct timespec *tsp)
525{ 529{
526 struct bintime bt; 530 struct bintime bt;
527 531
528 TC_COUNT(nnanotime); 532 TC_COUNT(nnanotime);
529 bintime(&bt); 533 bintime(&bt);
530 bintime2timespec(&bt, tsp); 534 bintime2timespec(&bt, tsp);
531} 535}
532 536
533void 537void
534microtime(struct timeval *tvp) 538microtime(struct timeval *tvp)
535{ 539{
536 struct bintime bt; 540 struct bintime bt;
537 541
538 TC_COUNT(nmicrotime); 542 TC_COUNT(nmicrotime);
539 bintime(&bt); 543 bintime(&bt);
540 bintime2timeval(&bt, tvp); 544 bintime2timeval(&bt, tvp);
541} 545}
542 546
543void 547void
544getbinuptime(struct bintime *bt) 548getbinuptime(struct bintime *bt)
545{ 549{
546 struct timehands *th; 550 struct timehands *th;
547 u_int gen; 551 u_int gen;
548 552
549 TC_COUNT(ngetbinuptime); 553 TC_COUNT(ngetbinuptime);
550 do { 554 do {
551 th = atomic_load_consume(&timehands); 555 th = atomic_load_consume(&timehands);
552 gen = th->th_generation; 556 gen = th->th_generation;
553 membar_consumer(); 557 membar_consumer();
554 *bt = th->th_offset; 558 *bt = th->th_offset;
555 membar_consumer(); 559 membar_consumer();
556 } while (gen == 0 || gen != th->th_generation); 560 } while (gen == 0 || gen != th->th_generation);
557} 561}
558 562
559void 563void
560getnanouptime(struct timespec *tsp) 564getnanouptime(struct timespec *tsp)
561{ 565{
562 struct timehands *th; 566 struct timehands *th;
563 u_int gen; 567 u_int gen;
564 568
565 TC_COUNT(ngetnanouptime); 569 TC_COUNT(ngetnanouptime);
566 do { 570 do {
567 th = atomic_load_consume(&timehands); 571 th = atomic_load_consume(&timehands);
568 gen = th->th_generation; 572 gen = th->th_generation;
569 membar_consumer(); 573 membar_consumer();
570 bintime2timespec(&th->th_offset, tsp); 574 bintime2timespec(&th->th_offset, tsp);
571 membar_consumer(); 575 membar_consumer();
572 } while (gen == 0 || gen != th->th_generation); 576 } while (gen == 0 || gen != th->th_generation);
573} 577}
574 578
575void 579void
576getmicrouptime(struct timeval *tvp) 580getmicrouptime(struct timeval *tvp)
577{ 581{
578 struct timehands *th; 582 struct timehands *th;
579 u_int gen; 583 u_int gen;
580 584
581 TC_COUNT(ngetmicrouptime); 585 TC_COUNT(ngetmicrouptime);
582 do { 586 do {
583 th = atomic_load_consume(&timehands); 587 th = atomic_load_consume(&timehands);
584 gen = th->th_generation; 588 gen = th->th_generation;
585 membar_consumer(); 589 membar_consumer();
586 bintime2timeval(&th->th_offset, tvp); 590 bintime2timeval(&th->th_offset, tvp);
587 membar_consumer(); 591 membar_consumer();
588 } while (gen == 0 || gen != th->th_generation); 592 } while (gen == 0 || gen != th->th_generation);
589} 593}
590 594
591void 595void
592getbintime(struct bintime *bt) 596getbintime(struct bintime *bt)
593{ 597{
594 struct timehands *th; 598 struct timehands *th;
595 struct bintime boottime; 599 struct bintime boottime;
596 u_int gen; 600 u_int gen;
597 601
598 TC_COUNT(ngetbintime); 602 TC_COUNT(ngetbintime);
599 do { 603 do {
600 th = atomic_load_consume(&timehands); 604 th = atomic_load_consume(&timehands);
601 gen = th->th_generation; 605 gen = th->th_generation;
602 membar_consumer(); 606 membar_consumer();
603 *bt = th->th_offset; 607 *bt = th->th_offset;
604 membar_consumer(); 608 membar_consumer();
605 } while (gen == 0 || gen != th->th_generation); 609 } while (gen == 0 || gen != th->th_generation);
606 getbinboottime(&boottime); 610 getbinboottime(&boottime);
607 bintime_add(bt, &boottime); 611 bintime_add(bt, &boottime);
608} 612}
609 613
610static inline void 614static inline void
611dogetnanotime(struct timespec *tsp) 615dogetnanotime(struct timespec *tsp)
612{ 616{
613 struct timehands *th; 617 struct timehands *th;
614 u_int gen; 618 u_int gen;
615 619
616 TC_COUNT(ngetnanotime); 620 TC_COUNT(ngetnanotime);
617 do { 621 do {
618 th = atomic_load_consume(&timehands); 622 th = atomic_load_consume(&timehands);
619 gen = th->th_generation; 623 gen = th->th_generation;
620 membar_consumer(); 624 membar_consumer();
621 *tsp = th->th_nanotime; 625 *tsp = th->th_nanotime;
622 membar_consumer(); 626 membar_consumer();
623 } while (gen == 0 || gen != th->th_generation); 627 } while (gen == 0 || gen != th->th_generation);
624} 628}
625 629
626void 630void
627getnanotime(struct timespec *tsp) 631getnanotime(struct timespec *tsp)
628{ 632{
629 633
630 dogetnanotime(tsp); 634 dogetnanotime(tsp);
631} 635}
632 636
633void dtrace_getnanotime(struct timespec *tsp); 637void dtrace_getnanotime(struct timespec *tsp);
634 638
635void 639void
636dtrace_getnanotime(struct timespec *tsp) 640dtrace_getnanotime(struct timespec *tsp)
637{ 641{
638 642
639 dogetnanotime(tsp); 643 dogetnanotime(tsp);
640} 644}
641 645
642void 646void
643getmicrotime(struct timeval *tvp) 647getmicrotime(struct timeval *tvp)
644{ 648{
645 struct timehands *th; 649 struct timehands *th;
646 u_int gen; 650 u_int gen;
647 651
648 TC_COUNT(ngetmicrotime); 652 TC_COUNT(ngetmicrotime);
649 do { 653 do {
650 th = atomic_load_consume(&timehands); 654 th = atomic_load_consume(&timehands);
651 gen = th->th_generation; 655 gen = th->th_generation;
652 membar_consumer(); 656 membar_consumer();
653 *tvp = th->th_microtime; 657 *tvp = th->th_microtime;
654 membar_consumer(); 658 membar_consumer();
655 } while (gen == 0 || gen != th->th_generation); 659 } while (gen == 0 || gen != th->th_generation);
656} 660}
657 661
658void 662void
659getnanoboottime(struct timespec *tsp) 663getnanoboottime(struct timespec *tsp)
660{ 664{
661 struct bintime bt; 665 struct bintime bt;
662 666
663 getbinboottime(&bt); 667 getbinboottime(&bt);
664 bintime2timespec(&bt, tsp); 668 bintime2timespec(&bt, tsp);
665} 669}
666 670
667void 671void
668getmicroboottime(struct timeval *tvp) 672getmicroboottime(struct timeval *tvp)
669{ 673{
670 struct bintime bt; 674 struct bintime bt;
671 675
672 getbinboottime(&bt); 676 getbinboottime(&bt);
673 bintime2timeval(&bt, tvp); 677 bintime2timeval(&bt, tvp);
674} 678}
675 679
676void 680void
677getbinboottime(struct bintime *basep) 681getbinboottime(struct bintime *basep)
678{ 682{
679 struct bintime base; 683 struct bintime base;
680 unsigned gen; 684 unsigned gen;
681 685
682 do { 686 do {
683 /* Spin until the timebase isn't changing. */ 687 /* Spin until the timebase isn't changing. */
684 while ((gen = atomic_load_relaxed(&timebase.gen)) & 1) 688 while ((gen = atomic_load_relaxed(&timebase.gen)) & 1)
685 SPINLOCK_BACKOFF_HOOK; 689 SPINLOCK_BACKOFF_HOOK;
686 690
687 /* Read out a snapshot of the timebase. */ 691 /* Read out a snapshot of the timebase. */
688 membar_consumer(); 692 membar_consumer();
689 base = timebase.bin; 693 base = timebase.bin;
690 membar_consumer(); 694 membar_consumer();
691 695
692 /* Restart if it changed while we were reading. */ 696 /* Restart if it changed while we were reading. */
693 } while (gen != atomic_load_relaxed(&timebase.gen)); 697 } while (gen != atomic_load_relaxed(&timebase.gen));
694 698
695 *basep = base; 699 *basep = base;
696} 700}
697 701
698/* 702/*
699 * Initialize a new timecounter and possibly use it. 703 * Initialize a new timecounter and possibly use it.
700 */ 704 */
701void 705void
702tc_init(struct timecounter *tc) 706tc_init(struct timecounter *tc)
703{ 707{
704 u_int u; 708 u_int u;
705 709
706 KASSERTMSG(tc->tc_next == NULL, "timecounter %s already initialised", 710 KASSERTMSG(tc->tc_next == NULL, "timecounter %s already initialised",
707 tc->tc_name); 711 tc->tc_name);
708 712
709 u = tc->tc_frequency / tc->tc_counter_mask; 713 u = tc->tc_frequency / tc->tc_counter_mask;
710 /* XXX: We need some margin here, 10% is a guess */ 714 /* XXX: We need some margin here, 10% is a guess */
711 u *= 11; 715 u *= 11;
712 u /= 10; 716 u /= 10;
713 if (u > hz && tc->tc_quality >= 0) { 717 if (u > hz && tc->tc_quality >= 0) {
714 tc->tc_quality = -2000; 718 tc->tc_quality = -2000;
715 aprint_verbose( 719 aprint_verbose(
716 "timecounter: Timecounter \"%s\" frequency %ju Hz", 720 "timecounter: Timecounter \"%s\" frequency %ju Hz",
717 tc->tc_name, (uintmax_t)tc->tc_frequency); 721 tc->tc_name, (uintmax_t)tc->tc_frequency);
718 aprint_verbose(" -- Insufficient hz, needs at least %u\n", u); 722 aprint_verbose(" -- Insufficient hz, needs at least %u\n", u);
719 } else if (tc->tc_quality >= 0 || bootverbose) { 723 } else if (tc->tc_quality >= 0 || bootverbose) {
720 aprint_verbose( 724 aprint_verbose(
721 "timecounter: Timecounter \"%s\" frequency %ju Hz " 725 "timecounter: Timecounter \"%s\" frequency %ju Hz "
722 "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency, 726 "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency,
723 tc->tc_quality); 727 tc->tc_quality);
724 } 728 }
725 729
726 mutex_spin_enter(&timecounter_lock); 730 mutex_spin_enter(&timecounter_lock);
727 tc->tc_next = timecounters; 731 tc->tc_next = timecounters;
728 timecounters = tc; 732 timecounters = tc;
729 timecounter_mods++; 733 timecounter_mods++;
730 /* 734 /*
731 * Never automatically use a timecounter with negative quality. 735 * Never automatically use a timecounter with negative quality.
732 * Even though we run on the dummy counter, switching here may be 736 * Even though we run on the dummy counter, switching here may be
733 * worse since this timecounter may not be monotonous. 737 * worse since this timecounter may not be monotonous.
734 */ 738 */
735 if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality || 739 if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality ||
736 (tc->tc_quality == timecounter->tc_quality && 740 (tc->tc_quality == timecounter->tc_quality &&
737 tc->tc_frequency > timecounter->tc_frequency))) { 741 tc->tc_frequency > timecounter->tc_frequency))) {
738 (void)tc->tc_get_timecount(tc); 742 (void)tc->tc_get_timecount(tc);
739 (void)tc->tc_get_timecount(tc); 743 (void)tc->tc_get_timecount(tc);
740 timecounter = tc; 744 timecounter = tc;
741 tc_windup(); 745 tc_windup();
742 } 746 }
743 mutex_spin_exit(&timecounter_lock); 747 mutex_spin_exit(&timecounter_lock);
744} 748}
745 749
746/* 750/*
747 * Pick a new timecounter due to the existing counter going bad. 751 * Pick a new timecounter due to the existing counter going bad.
748 */ 752 */
749static void 753static void
750tc_pick(void) 754tc_pick(void)
751{ 755{
752 struct timecounter *best, *tc; 756 struct timecounter *best, *tc;
753 757
754 KASSERT(mutex_owned(&timecounter_lock)); 758 KASSERT(mutex_owned(&timecounter_lock));
755 759
756 for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) { 760 for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) {
757 if (tc->tc_quality > best->tc_quality) 761 if (tc->tc_quality > best->tc_quality)
758 best = tc; 762 best = tc;
759 else if (tc->tc_quality < best->tc_quality) 763 else if (tc->tc_quality < best->tc_quality)
760 continue; 764 continue;
761 else if (tc->tc_frequency > best->tc_frequency) 765 else if (tc->tc_frequency > best->tc_frequency)
762 best = tc; 766 best = tc;
763 } 767 }
764 (void)best->tc_get_timecount(best); 768 (void)best->tc_get_timecount(best);
765 (void)best->tc_get_timecount(best); 769 (void)best->tc_get_timecount(best);
766 timecounter = best; 770 timecounter = best;
767} 771}
768 772
769/* 773/*
770 * A timecounter has gone bad, arrange to pick a new one at the next 774 * A timecounter has gone bad, arrange to pick a new one at the next
771 * clock tick. 775 * clock tick.
772 */ 776 */
773void 777void
774tc_gonebad(struct timecounter *tc) 778tc_gonebad(struct timecounter *tc)
775{ 779{
776 780
777 tc->tc_quality = -100; 781 tc->tc_quality = -100;
778 membar_producer(); 782 membar_producer();
779 atomic_inc_uint(&timecounter_bad); 783 atomic_inc_uint(&timecounter_bad);
780} 784}
781 785
782/* 786/*
783 * Stop using a timecounter and remove it from the timecounters list. 787 * Stop using a timecounter and remove it from the timecounters list.
784 */ 788 */
785int 789int
786tc_detach(struct timecounter *target) 790tc_detach(struct timecounter *target)
787{ 791{
788 struct timecounter *tc; 792 struct timecounter *tc;
789 struct timecounter **tcp = NULL; 793 struct timecounter **tcp = NULL;
790 int removals; 794 int removals;
791 lwp_t *l; 795 lwp_t *l;
792 796
793 /* First, find the timecounter. */ 797 /* First, find the timecounter. */
794 mutex_spin_enter(&timecounter_lock); 798 mutex_spin_enter(&timecounter_lock);
795 for (tcp = &timecounters, tc = timecounters; 799 for (tcp = &timecounters, tc = timecounters;
796 tc != NULL; 800 tc != NULL;
797 tcp = &tc->tc_next, tc = tc->tc_next) { 801 tcp = &tc->tc_next, tc = tc->tc_next) {
798 if (tc == target) 802 if (tc == target)
799 break; 803 break;
800 } 804 }
801 if (tc == NULL) { 805 if (tc == NULL) {
802 mutex_spin_exit(&timecounter_lock); 806 mutex_spin_exit(&timecounter_lock);
803 return ESRCH; 807 return ESRCH;
804 } 808 }
805 809
806 /* And now, remove it. */ 810 /* And now, remove it. */
807 *tcp = tc->tc_next; 811 *tcp = tc->tc_next;
808 if (timecounter == target) { 812 if (timecounter == target) {
809 tc_pick(); 813 tc_pick();
810 tc_windup(); 814 tc_windup();
811 } 815 }
812 timecounter_mods++; 816 timecounter_mods++;
813 removals = timecounter_removals++; 817 removals = timecounter_removals++;
814 mutex_spin_exit(&timecounter_lock); 818 mutex_spin_exit(&timecounter_lock);
815 819
816 /* 820 /*
817 * We now have to determine if any threads in the system are still 821 * We now have to determine if any threads in the system are still
818 * making use of this timecounter. 822 * making use of this timecounter.
819 * 823 *
820 * We issue a broadcast cross call to elide memory ordering issues, 824 * We issue a broadcast cross call to elide memory ordering issues,
821 * then scan all LWPs in the system looking at each's timecounter 825 * then scan all LWPs in the system looking at each's timecounter
822 * generation number. We need to see a value of zero (not actively 826 * generation number. We need to see a value of zero (not actively
823 * using a timecounter) or a value greater than our removal value. 827 * using a timecounter) or a value greater than our removal value.
824 * 828 *
825 * We may race with threads that read `timecounter_removals' and 829 * We may race with threads that read `timecounter_removals' and
826 * and then get preempted before updating `l_tcgen'. This is not 830 * and then get preempted before updating `l_tcgen'. This is not
827 * a problem, since it means that these threads have not yet started 831 * a problem, since it means that these threads have not yet started
828 * accessing timecounter state. All we do need is one clean 832 * accessing timecounter state. All we do need is one clean
829 * snapshot of the system where every thread appears not to be using 833 * snapshot of the system where every thread appears not to be using
830 * old timecounter state. 834 * old timecounter state.
831 */ 835 */
832 for (;;) { 836 for (;;) {
833 xc_barrier(0); 837 xc_barrier(0);
834 838
835 mutex_enter(&proc_lock); 839 mutex_enter(&proc_lock);
836 LIST_FOREACH(l, &alllwp, l_list) { 840 LIST_FOREACH(l, &alllwp, l_list) {
837 if (l->l_tcgen == 0 || l->l_tcgen > removals) { 841 if (l->l_tcgen == 0 || l->l_tcgen > removals) {
838 /* 842 /*
839 * Not using timecounter or old timecounter 843 * Not using timecounter or old timecounter
840 * state at time of our xcall or later. 844 * state at time of our xcall or later.
841 */ 845 */
842 continue; 846 continue;
843 } 847 }
844 break; 848 break;
845 } 849 }
846 mutex_exit(&proc_lock); 850 mutex_exit(&proc_lock);
847 851
848 /* 852 /*
849 * If the timecounter is still in use, wait at least 10ms 853 * If the timecounter is still in use, wait at least 10ms
850 * before retrying. 854 * before retrying.
851 */ 855 */
852 if (l == NULL) { 856 if (l == NULL) {
853 break; 857 break;
854 } 858 }
855 (void)kpause("tcdetach", false, mstohz(10), NULL); 859 (void)kpause("tcdetach", false, mstohz(10), NULL);
856 } 860 }
857 861
858 tc->tc_next = NULL; 862 tc->tc_next = NULL;
859 return 0; 863 return 0;
860} 864}
861 865
862/* Report the frequency of the current timecounter. */ 866/* Report the frequency of the current timecounter. */
863uint64_t 867uint64_t
864tc_getfrequency(void) 868tc_getfrequency(void)
865{ 869{
866 870
867 return atomic_load_consume(&timehands)->th_counter->tc_frequency; 871 return atomic_load_consume(&timehands)->th_counter->tc_frequency;
868} 872}
869 873
870/* 874/*
871 * Step our concept of UTC. This is done by modifying our estimate of 875 * Step our concept of UTC. This is done by modifying our estimate of
872 * when we booted. 876 * when we booted.
873 */ 877 */
874void 878void
875tc_setclock(const struct timespec *ts) 879tc_setclock(const struct timespec *ts)
876{ 880{
877 struct timespec ts2; 881 struct timespec ts2;
878 struct bintime bt, bt2; 882 struct bintime bt, bt2;
879 883
880 mutex_spin_enter(&timecounter_lock); 884 mutex_spin_enter(&timecounter_lock);
881 TC_COUNT(nsetclock); 885 TC_COUNT(nsetclock);
882 binuptime(&bt2); 886 binuptime(&bt2);
883 timespec2bintime(ts, &bt); 887 timespec2bintime(ts, &bt);
884 bintime_sub(&bt, &bt2); 888 bintime_sub(&bt, &bt2);
885 bintime_add(&bt2, &timebase.bin); 889 bintime_add(&bt2, &timebase.bin);
886 timebase.gen |= 1; /* change in progress */ 890 timebase.gen |= 1; /* change in progress */
887 membar_producer(); 891 membar_producer();
888 timebase.bin = bt; 892 timebase.bin = bt;
889 membar_producer(); 893 membar_producer();
890 timebase.gen++; /* commit change */ 894 timebase.gen++; /* commit change */
891 tc_windup(); 895 tc_windup();
892 mutex_spin_exit(&timecounter_lock); 896 mutex_spin_exit(&timecounter_lock);
893 897
894 if (timestepwarnings) { 898 if (timestepwarnings) {
895 bintime2timespec(&bt2, &ts2); 899 bintime2timespec(&bt2, &ts2);
896 log(LOG_INFO, 900 log(LOG_INFO,
897 "Time stepped from %lld.%09ld to %lld.%09ld\n", 901 "Time stepped from %lld.%09ld to %lld.%09ld\n",
898 (long long)ts2.tv_sec, ts2.tv_nsec, 902 (long long)ts2.tv_sec, ts2.tv_nsec,
899 (long long)ts->tv_sec, ts->tv_nsec); 903 (long long)ts->tv_sec, ts->tv_nsec);
900 } 904 }
901} 905}
902 906
903/* 907/*
904 * Initialize the next struct timehands in the ring and make 908 * Initialize the next struct timehands in the ring and make
905 * it the active timehands. Along the way we might switch to a different 909 * it the active timehands. Along the way we might switch to a different
906 * timecounter and/or do seconds processing in NTP. Slightly magic. 910 * timecounter and/or do seconds processing in NTP. Slightly magic.
907 */ 911 */
908static void 912static void
909tc_windup(void) 913tc_windup(void)
910{ 914{
911 struct bintime bt; 915 struct bintime bt;
912 struct timehands *th, *tho; 916 struct timehands *th, *tho;
913 uint64_t scale; 917 uint64_t scale;
914 u_int delta, ncount, ogen; 918 u_int delta, ncount, ogen;
915 int i, s_update; 919 int i, s_update;
916 time_t t; 920 time_t t;
917 921
918 KASSERT(mutex_owned(&timecounter_lock)); 922 KASSERT(mutex_owned(&timecounter_lock));
919 923
920 s_update = 0; 924 s_update = 0;
921 925
922 /* 926 /*
923 * Make the next timehands a copy of the current one, but do not 927 * Make the next timehands a copy of the current one, but do not
924 * overwrite the generation or next pointer. While we update 928 * overwrite the generation or next pointer. While we update
925 * the contents, the generation must be zero. Ensure global 929 * the contents, the generation must be zero. Ensure global
926 * visibility of the generation before proceeding. 930 * visibility of the generation before proceeding.
927 */ 931 */
928 tho = timehands; 932 tho = timehands;
929 th = tho->th_next; 933 th = tho->th_next;
930 ogen = th->th_generation; 934 ogen = th->th_generation;
931 th->th_generation = 0; 935 th->th_generation = 0;
932 membar_producer(); 936 membar_producer();
933 bcopy(tho, th, offsetof(struct timehands, th_generation)); 937 bcopy(tho, th, offsetof(struct timehands, th_generation));
934 938
935 /* 939 /*
936 * Capture a timecounter delta on the current timecounter and if 940 * Capture a timecounter delta on the current timecounter and if
937 * changing timecounters, a counter value from the new timecounter. 941 * changing timecounters, a counter value from the new timecounter.
938 * Update the offset fields accordingly. 942 * Update the offset fields accordingly.
939 */ 943 */
940 delta = tc_delta(th); 944 delta = tc_delta(th);
941 if (th->th_counter != timecounter) 945 if (th->th_counter != timecounter)
942 ncount = timecounter->tc_get_timecount(timecounter); 946 ncount = timecounter->tc_get_timecount(timecounter);
943 else 947 else
944 ncount = 0; 948 ncount = 0;
945 th->th_offset_count += delta; 949 th->th_offset_count += delta;
946 bintime_addx(&th->th_offset, th->th_scale * delta); 950 bintime_addx(&th->th_offset, th->th_scale * delta);
947 951
948 /* 952 /*
949 * Hardware latching timecounters may not generate interrupts on 953 * Hardware latching timecounters may not generate interrupts on
950 * PPS events, so instead we poll them. There is a finite risk that 954 * PPS events, so instead we poll them. There is a finite risk that
951 * the hardware might capture a count which is later than the one we 955 * the hardware might capture a count which is later than the one we
952 * got above, and therefore possibly in the next NTP second which might 956 * got above, and therefore possibly in the next NTP second which might
953 * have a different rate than the current NTP second. It doesn't 957 * have a different rate than the current NTP second. It doesn't
954 * matter in practice. 958 * matter in practice.
955 */ 959 */
956 if (tho->th_counter->tc_poll_pps) 960 if (tho->th_counter->tc_poll_pps)
957 tho->th_counter->tc_poll_pps(tho->th_counter); 961 tho->th_counter->tc_poll_pps(tho->th_counter);
958 962
959 /* 963 /*
960 * Deal with NTP second processing. The for loop normally 964 * Deal with NTP second processing. The for loop normally
961 * iterates at most once, but in extreme situations it might 965 * iterates at most once, but in extreme situations it might
962 * keep NTP sane if timeouts are not run for several seconds. 966 * keep NTP sane if timeouts are not run for several seconds.
963 * At boot, the time step can be large when the TOD hardware 967 * At boot, the time step can be large when the TOD hardware
964 * has been read, so on really large steps, we call 968 * has been read, so on really large steps, we call
965 * ntp_update_second only twice. We need to call it twice in 969 * ntp_update_second only twice. We need to call it twice in
966 * case we missed a leap second. 970 * case we missed a leap second.
967 * If NTP is not compiled in ntp_update_second still calculates 971 * If NTP is not compiled in ntp_update_second still calculates
968 * the adjustment resulting from adjtime() calls. 972 * the adjustment resulting from adjtime() calls.
969 */ 973 */
970 bt = th->th_offset; 974 bt = th->th_offset;
971 bintime_add(&bt, &timebase.bin); 975 bintime_add(&bt, &timebase.bin);
972 i = bt.sec - tho->th_microtime.tv_sec; 976 i = bt.sec - tho->th_microtime.tv_sec;
973 if (i > LARGE_STEP) 977 if (i > LARGE_STEP)
974 i = 2; 978 i = 2;
975 for (; i > 0; i--) { 979 for (; i > 0; i--) {
976 t = bt.sec; 980 t = bt.sec;
977 ntp_update_second(&th->th_adjustment, &bt.sec); 981 ntp_update_second(&th->th_adjustment, &bt.sec);
978 s_update = 1; 982 s_update = 1;
979 if (bt.sec != t) { 983 if (bt.sec != t) {
980 timebase.gen |= 1; /* change in progress */ 984 timebase.gen |= 1; /* change in progress */
981 membar_producer(); 985 membar_producer();
982 timebase.bin.sec += bt.sec - t; 986 timebase.bin.sec += bt.sec - t;
983 membar_producer(); 987 membar_producer();
984 timebase.gen++; /* commit change */ 988 timebase.gen++; /* commit change */
985 } 989 }
986 } 990 }
987 991
988 /* Update the UTC timestamps used by the get*() functions. */ 992 /* Update the UTC timestamps used by the get*() functions. */
989 /* XXX shouldn't do this here. Should force non-`get' versions. */ 993 /* XXX shouldn't do this here. Should force non-`get' versions. */
990 bintime2timeval(&bt, &th->th_microtime); 994 bintime2timeval(&bt, &th->th_microtime);
991 bintime2timespec(&bt, &th->th_nanotime); 995 bintime2timespec(&bt, &th->th_nanotime);
992 /* Now is a good time to change timecounters. */ 996 /* Now is a good time to change timecounters. */
993 if (th->th_counter != timecounter) { 997 if (th->th_counter != timecounter) {
994 th->th_counter = timecounter; 998 th->th_counter = timecounter;
995 th->th_offset_count = ncount; 999 th->th_offset_count = ncount;
996 s_update = 1; 1000 s_update = 1;
997 } 1001 }
998 1002
999 /*- 1003 /*-
1000 * Recalculate the scaling factor. We want the number of 1/2^64 1004 * Recalculate the scaling factor. We want the number of 1/2^64
1001 * fractions of a second per period of the hardware counter, taking 1005 * fractions of a second per period of the hardware counter, taking
1002 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 1006 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
1003 * processing provides us with. 1007 * processing provides us with.
1004 * 1008 *
1005 * The th_adjustment is nanoseconds per second with 32 bit binary 1009 * The th_adjustment is nanoseconds per second with 32 bit binary
1006 * fraction and we want 64 bit binary fraction of second: 1010 * fraction and we want 64 bit binary fraction of second:
1007 * 1011 *
1008 * x = a * 2^32 / 10^9 = a * 4.294967296 1012 * x = a * 2^32 / 10^9 = a * 4.294967296
1009 * 1013 *
1010 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 1014 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
1011 * we can only multiply by about 850 without overflowing, but that 1015 * we can only multiply by about 850 without overflowing, but that
1012 * leaves suitably precise fractions for multiply before divide. 1016 * leaves suitably precise fractions for multiply before divide.
1013 * 1017 *
1014 * Divide before multiply with a fraction of 2199/512 results in a 1018 * Divide before multiply with a fraction of 2199/512 results in a
1015 * systematic undercompensation of 10PPM of th_adjustment. On a 1019 * systematic undercompensation of 10PPM of th_adjustment. On a
1016 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 1020 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable.
1017 * 1021 *
1018 * We happily sacrifice the lowest of the 64 bits of our result 1022 * We happily sacrifice the lowest of the 64 bits of our result
1019 * to the goddess of code clarity. 1023 * to the goddess of code clarity.
1020 * 1024 *
1021 */ 1025 */
1022 if (s_update) { 1026 if (s_update) {
1023 scale = (uint64_t)1 << 63; 1027 scale = (uint64_t)1 << 63;
1024 scale += (th->th_adjustment / 1024) * 2199; 1028 scale += (th->th_adjustment / 1024) * 2199;
1025 scale /= th->th_counter->tc_frequency; 1029 scale /= th->th_counter->tc_frequency;
1026 th->th_scale = scale * 2; 1030 th->th_scale = scale * 2;
1027 } 1031 }
1028 /* 1032 /*
1029 * Now that the struct timehands is again consistent, set the new 1033 * Now that the struct timehands is again consistent, set the new
1030 * generation number, making sure to not make it zero. Ensure 1034 * generation number, making sure to not make it zero. Ensure
1031 * changes are globally visible before changing. 1035 * changes are globally visible before changing.
1032 */ 1036 */
1033 if (++ogen == 0) 1037 if (++ogen == 0)
1034 ogen = 1; 1038 ogen = 1;
1035 membar_producer(); 1039 membar_producer();
1036 th->th_generation = ogen; 1040 th->th_generation = ogen;
1037 1041
1038 /* 1042 /*
1039 * Go live with the new struct timehands. Ensure changes are 1043 * Go live with the new struct timehands. Ensure changes are
1040 * globally visible before changing. 1044 * globally visible before changing.
1041 */ 1045 */
1042 setrealuptime(th->th_microtime.tv_sec, th->th_offset.sec); 1046 setrealuptime(th->th_microtime.tv_sec, th->th_offset.sec);