| @@ -1,19 +1,22 @@ | | | @@ -1,19 +1,22 @@ |
1 | /* $NetBSD: kern_tc.c,v 1.38 2009/01/11 02:45:52 christos Exp $ */ | | 1 | /* $NetBSD: kern_tc.c,v 1.39 2009/05/23 17:08:04 ad Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 2008 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
| | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
| | | 8 | * by Andrew Doran. |
| | | 9 | * |
7 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
9 | * are met: | | 12 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
15 | * | | 18 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
17 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
18 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
19 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
| @@ -27,43 +30,44 @@ | | | @@ -27,43 +30,44 @@ |
27 | */ | | 30 | */ |
28 | | | 31 | |
29 | /*- | | 32 | /*- |
30 | * ---------------------------------------------------------------------------- | | 33 | * ---------------------------------------------------------------------------- |
31 | * "THE BEER-WARE LICENSE" (Revision 42): | | 34 | * "THE BEER-WARE LICENSE" (Revision 42): |
32 | * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you | | 35 | * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you |
33 | * can do whatever you want with this stuff. If we meet some day, and you think | | 36 | * can do whatever you want with this stuff. If we meet some day, and you think |
34 | * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp | | 37 | * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp |
35 | * --------------------------------------------------------------------------- | | 38 | * --------------------------------------------------------------------------- |
36 | */ | | 39 | */ |
37 | | | 40 | |
38 | #include <sys/cdefs.h> | | 41 | #include <sys/cdefs.h> |
39 | /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */ | | 42 | /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */ |
40 | __KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.38 2009/01/11 02:45:52 christos Exp $"); | | 43 | __KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.39 2009/05/23 17:08:04 ad Exp $"); |
41 | | | 44 | |
42 | #include "opt_ntp.h" | | 45 | #include "opt_ntp.h" |
43 | | | 46 | |
44 | #include <sys/param.h> | | 47 | #include <sys/param.h> |
45 | #include <sys/kernel.h> | | 48 | #include <sys/kernel.h> |
46 | #include <sys/reboot.h> /* XXX just to get AB_VERBOSE */ | | 49 | #include <sys/reboot.h> /* XXX just to get AB_VERBOSE */ |
47 | #include <sys/sysctl.h> | | 50 | #include <sys/sysctl.h> |
48 | #include <sys/syslog.h> | | 51 | #include <sys/syslog.h> |
49 | #include <sys/systm.h> | | 52 | #include <sys/systm.h> |
50 | #include <sys/timepps.h> | | 53 | #include <sys/timepps.h> |
51 | #include <sys/timetc.h> | | 54 | #include <sys/timetc.h> |
52 | #include <sys/timex.h> | | 55 | #include <sys/timex.h> |
53 | #include <sys/evcnt.h> | | 56 | #include <sys/evcnt.h> |
54 | #include <sys/kauth.h> | | 57 | #include <sys/kauth.h> |
55 | #include <sys/mutex.h> | | 58 | #include <sys/mutex.h> |
56 | #include <sys/atomic.h> | | 59 | #include <sys/atomic.h> |
| | | 60 | #include <sys/xcall.h> |
57 | | | 61 | |
58 | /* | | 62 | /* |
59 | * A large step happens on boot. This constant detects such steps. | | 63 | * A large step happens on boot. This constant detects such steps. |
60 | * It is relatively small so that ntp_update_second gets called enough | | 64 | * It is relatively small so that ntp_update_second gets called enough |
61 | * in the typical 'missed a couple of seconds' case, but doesn't loop | | 65 | * in the typical 'missed a couple of seconds' case, but doesn't loop |
62 | * forever when the time step is large. | | 66 | * forever when the time step is large. |
63 | */ | | 67 | */ |
64 | #define LARGE_STEP 200 | | 68 | #define LARGE_STEP 200 |
65 | | | 69 | |
66 | /* | | 70 | /* |
67 | * Implement a dummy timecounter which we can use until we get a real one | | 71 | * Implement a dummy timecounter which we can use until we get a real one |
68 | * in the air. This allows the console and other early stuff to use | | 72 | * in the air. This allows the console and other early stuff to use |
69 | * time services. | | 73 | * time services. |
| @@ -116,26 +120,27 @@ static struct timehands th0 = { | | | @@ -116,26 +120,27 @@ static struct timehands th0 = { |
116 | static struct timehands *volatile timehands = &th0; | | 120 | static struct timehands *volatile timehands = &th0; |
117 | struct timecounter *timecounter = &dummy_timecounter; | | 121 | struct timecounter *timecounter = &dummy_timecounter; |
118 | static struct timecounter *timecounters = &dummy_timecounter; | | 122 | static struct timecounter *timecounters = &dummy_timecounter; |
119 | | | 123 | |
120 | time_t time_second = 1; | | 124 | time_t time_second = 1; |
121 | time_t time_uptime = 1; | | 125 | time_t time_uptime = 1; |
122 | | | 126 | |
123 | static struct bintime timebasebin; | | 127 | static struct bintime timebasebin; |
124 | | | 128 | |
125 | static int timestepwarnings; | | 129 | static int timestepwarnings; |
126 | | | 130 | |
127 | kmutex_t timecounter_lock; | | 131 | kmutex_t timecounter_lock; |
128 | static u_int timecounter_mods; | | 132 | static u_int timecounter_mods; |
| | | 133 | static volatile int timecounter_removals = 1; |
129 | static u_int timecounter_bad; | | 134 | static u_int timecounter_bad; |
130 | | | 135 | |
131 | #ifdef __FreeBSD__ | | 136 | #ifdef __FreeBSD__ |
132 | SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW, | | 137 | SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW, |
133 | ×tepwarnings, 0, ""); | | 138 | ×tepwarnings, 0, ""); |
134 | #endif /* __FreeBSD__ */ | | 139 | #endif /* __FreeBSD__ */ |
135 | | | 140 | |
136 | /* | | 141 | /* |
137 | * sysctl helper routine for kern.timercounter.hardware | | 142 | * sysctl helper routine for kern.timercounter.hardware |
138 | */ | | 143 | */ |
139 | static int | | 144 | static int |
140 | sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS) | | 145 | sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS) |
141 | { | | 146 | { |
| @@ -299,35 +304,69 @@ tc_delta(struct timehands *th) | | | @@ -299,35 +304,69 @@ tc_delta(struct timehands *th) |
299 | th->th_offset_count) & tc->tc_counter_mask); | | 304 | th->th_offset_count) & tc->tc_counter_mask); |
300 | } | | 305 | } |
301 | | | 306 | |
302 | /* | | 307 | /* |
303 | * Functions for reading the time. We have to loop until we are sure that | | 308 | * Functions for reading the time. We have to loop until we are sure that |
304 | * the timehands that we operated on was not updated under our feet. See | | 309 | * the timehands that we operated on was not updated under our feet. See |
305 | * the comment in <sys/timevar.h> for a description of these 12 functions. | | 310 | * the comment in <sys/timevar.h> for a description of these 12 functions. |
306 | */ | | 311 | */ |
307 | | | 312 | |
308 | void | | 313 | void |
309 | binuptime(struct bintime *bt) | | 314 | binuptime(struct bintime *bt) |
310 | { | | 315 | { |
311 | struct timehands *th; | | 316 | struct timehands *th; |
312 | u_int gen; | | 317 | lwp_t *l; |
| | | 318 | u_int lgen, gen; |
313 | | | 319 | |
314 | TC_COUNT(nbinuptime); | | 320 | TC_COUNT(nbinuptime); |
| | | 321 | |
| | | 322 | /* |
| | | 323 | * Provide exclusion against tc_detach(). |
| | | 324 | * |
| | | 325 | * We record the number of timecounter removals before accessing |
| | | 326 | * timecounter state. Note that the LWP can be using multiple |
| | | 327 | * "generations" at once, due to interrupts (interrupted while in |
| | | 328 | * this function). Hardware interrupts will borrow the interrupted |
| | | 329 | * LWP's l_tcgen value for this purpose, and can themselves be |
| | | 330 | * interrupted by higher priority interrupts. In this case we need |
| | | 331 | * to ensure that the oldest generation in use is recorded. |
| | | 332 | * |
| | | 333 | * splsched() is too expensive to use, so we take care to structure |
| | | 334 | * this code in such a way that it is not required. Likewise, we |
| | | 335 | * do not disable preemption. |
| | | 336 | * |
| | | 337 | * Memory barriers are also too expensive to use for such a |
| | | 338 | * performance critical function. The good news is that we do not |
| | | 339 | * need memory barriers for this type of exclusion, as the thread |
| | | 340 | * updating timecounter_removals will issue a broadcast cross call |
| | | 341 | * before inspecting our l_tcgen value (this elides memory ordering |
| | | 342 | * issues). |
| | | 343 | */ |
| | | 344 | l = curlwp; |
| | | 345 | lgen = l->l_tcgen; |
| | | 346 | if (__predict_true(lgen == 0)) { |
| | | 347 | l->l_tcgen = timecounter_removals; |
| | | 348 | } |
| | | 349 | __insn_barrier(); |
| | | 350 | |
315 | do { | | 351 | do { |
316 | th = timehands; | | 352 | th = timehands; |
317 | gen = th->th_generation; | | 353 | gen = th->th_generation; |
318 | *bt = th->th_offset; | | 354 | *bt = th->th_offset; |
319 | bintime_addx(bt, th->th_scale * tc_delta(th)); | | 355 | bintime_addx(bt, th->th_scale * tc_delta(th)); |
320 | } while (gen == 0 || gen != th->th_generation); | | 356 | } while (gen == 0 || gen != th->th_generation); |
| | | 357 | |
| | | 358 | __insn_barrier(); |
| | | 359 | l->l_tcgen = lgen; |
321 | } | | 360 | } |
322 | | | 361 | |
323 | void | | 362 | void |
324 | nanouptime(struct timespec *tsp) | | 363 | nanouptime(struct timespec *tsp) |
325 | { | | 364 | { |
326 | struct bintime bt; | | 365 | struct bintime bt; |
327 | | | 366 | |
328 | TC_COUNT(nnanouptime); | | 367 | TC_COUNT(nnanouptime); |
329 | binuptime(&bt); | | 368 | binuptime(&bt); |
330 | bintime2timespec(&bt, tsp); | | 369 | bintime2timespec(&bt, tsp); |
331 | } | | 370 | } |
332 | | | 371 | |
333 | void | | 372 | void |
| @@ -533,47 +572,95 @@ tc_gonebad(struct timecounter *tc) | | | @@ -533,47 +572,95 @@ tc_gonebad(struct timecounter *tc) |
533 | tc->tc_quality = -100; | | 572 | tc->tc_quality = -100; |
534 | membar_producer(); | | 573 | membar_producer(); |
535 | atomic_inc_uint(&timecounter_bad); | | 574 | atomic_inc_uint(&timecounter_bad); |
536 | } | | 575 | } |
537 | | | 576 | |
538 | /* | | 577 | /* |
539 | * Stop using a timecounter and remove it from the timecounters list. | | 578 | * Stop using a timecounter and remove it from the timecounters list. |
540 | */ | | 579 | */ |
541 | int | | 580 | int |
542 | tc_detach(struct timecounter *target) | | 581 | tc_detach(struct timecounter *target) |
543 | { | | 582 | { |
544 | struct timecounter *tc; | | 583 | struct timecounter *tc; |
545 | struct timecounter **tcp = NULL; | | 584 | struct timecounter **tcp = NULL; |
546 | int rc = 0; | | 585 | int removals; |
| | | 586 | uint64_t where; |
| | | 587 | lwp_t *l; |
547 | | | 588 | |
| | | 589 | /* First, find the timecounter. */ |
548 | mutex_spin_enter(&timecounter_lock); | | 590 | mutex_spin_enter(&timecounter_lock); |
549 | for (tcp = &timecounters, tc = timecounters; | | 591 | for (tcp = &timecounters, tc = timecounters; |
550 | tc != NULL; | | 592 | tc != NULL; |
551 | tcp = &tc->tc_next, tc = tc->tc_next) { | | 593 | tcp = &tc->tc_next, tc = tc->tc_next) { |
552 | if (tc == target) | | 594 | if (tc == target) |
553 | break; | | 595 | break; |
554 | } | | 596 | } |
555 | if (tc == NULL) { | | 597 | if (tc == NULL) { |
556 | rc = ESRCH; | | 598 | mutex_spin_exit(&timecounter_lock); |
557 | } else { | | 599 | return ESRCH; |
558 | *tcp = tc->tc_next; | | 600 | } |
559 | if (timecounter == target) { | | 601 | |
560 | tc_pick(); | | 602 | /* And now, remove it. */ |
561 | tc_windup(); | | 603 | *tcp = tc->tc_next; |
562 | } | | 604 | if (timecounter == target) { |
563 | timecounter_mods++; | | 605 | tc_pick(); |
| | | 606 | tc_windup(); |
564 | } | | 607 | } |
| | | 608 | timecounter_mods++; |
| | | 609 | removals = timecounter_removals++; |
565 | mutex_spin_exit(&timecounter_lock); | | 610 | mutex_spin_exit(&timecounter_lock); |
566 | return rc; | | 611 | |
| | | 612 | /* |
| | | 613 | * We now have to determine if any threads in the system are still |
| | | 614 | * making use of this timecounter. |
| | | 615 | * |
| | | 616 | * We issue a broadcast cross call to elide memory ordering issues, |
| | | 617 | * then scan all LWPs in the system looking at each's timecounter |
| | | 618 | * generation number. We need to see a value of zero (not actively |
| | | 619 | * using a timecounter) or a value greater than our removal value. |
| | | 620 | * |
| | | 621 | * We may race with threads that read `timecounter_removals' and |
| | | 622 | * and then get preempted before updating `l_tcgen'. This is not |
| | | 623 | * a problem, since it means that these threads have not yet started |
| | | 624 | * accessing timecounter state. All we do need is one clean |
| | | 625 | * snapshot of the system where every thread appears not to be using |
| | | 626 | * old timecounter state. |
| | | 627 | */ |
| | | 628 | for (;;) { |
| | | 629 | where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); |
| | | 630 | xc_wait(where); |
| | | 631 | |
| | | 632 | mutex_enter(proc_lock); |
| | | 633 | LIST_FOREACH(l, &alllwp, l_list) { |
| | | 634 | if (l->l_tcgen == 0 || l->l_tcgen > removals) { |
| | | 635 | /* |
| | | 636 | * Not using timecounter or old timecounter |
| | | 637 | * state at time of our xcall or later. |
| | | 638 | */ |
| | | 639 | continue; |
| | | 640 | } |
| | | 641 | break; |
| | | 642 | } |
| | | 643 | mutex_exit(proc_lock); |
| | | 644 | |
| | | 645 | /* |
| | | 646 | * If the timecounter is still in use, wait at least 10ms |
| | | 647 | * before retrying. |
| | | 648 | */ |
| | | 649 | if (l == NULL) { |
| | | 650 | return 0; |
| | | 651 | } |
| | | 652 | (void)kpause("tcdetach", false, mstohz(10), NULL); |
| | | 653 | } |
567 | } | | 654 | } |
568 | | | 655 | |
569 | /* Report the frequency of the current timecounter. */ | | 656 | /* Report the frequency of the current timecounter. */ |
570 | u_int64_t | | 657 | u_int64_t |
571 | tc_getfrequency(void) | | 658 | tc_getfrequency(void) |
572 | { | | 659 | { |
573 | | | 660 | |
574 | return (timehands->th_counter->tc_frequency); | | 661 | return (timehands->th_counter->tc_frequency); |
575 | } | | 662 | } |
576 | | | 663 | |
577 | /* | | 664 | /* |
578 | * Step our concept of UTC. This is done by modifying our estimate of | | 665 | * Step our concept of UTC. This is done by modifying our estimate of |
579 | * when we booted. | | 666 | * when we booted. |