Thu Sep 3 02:05:03 2020 UTC ()
- Remove redundant memory barriers.  For the ones that remain,
  use the membar_ops(3) names to make it clear how they pair up (even
  though most of them expand to the MB instruction anyway).


(thorpej)
diff -r1.269 -r1.270 src/sys/arch/alpha/alpha/pmap.c

cvs diff -r1.269 -r1.270 src/sys/arch/alpha/alpha/pmap.c (expand / switch to unified diff)

--- src/sys/arch/alpha/alpha/pmap.c 2020/08/29 20:06:59 1.269
+++ src/sys/arch/alpha/alpha/pmap.c 2020/09/03 02:05:03 1.270
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: pmap.c,v 1.269 2020/08/29 20:06:59 thorpej Exp $ */ 1/* $NetBSD: pmap.c,v 1.270 2020/09/03 02:05:03 thorpej Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008, 2020 4 * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008, 2020
5 * The NetBSD Foundation, Inc. 5 * The NetBSD Foundation, Inc.
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * This code is derived from software contributed to The NetBSD Foundation 8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10 * NASA Ames Research Center, by Andrew Doran and Mindaugas Rasiukevicius, 10 * NASA Ames Research Center, by Andrew Doran and Mindaugas Rasiukevicius,
11 * and by Chris G. Demetriou. 11 * and by Chris G. Demetriou.
12 * 12 *
13 * Redistribution and use in source and binary forms, with or without 13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions 14 * modification, are permitted provided that the following conditions
@@ -125,27 +125,27 @@ @@ -125,27 +125,27 @@
125 * this module may delay invalidate or reduced protection 125 * this module may delay invalidate or reduced protection
126 * operations until such time as they are actually 126 * operations until such time as they are actually
127 * necessary. This module is given full information as 127 * necessary. This module is given full information as
128 * to which processors are currently using which maps, 128 * to which processors are currently using which maps,
129 * and to when physical maps must be made correct. 129 * and to when physical maps must be made correct.
130 */ 130 */
131 131
132#include "opt_lockdebug.h" 132#include "opt_lockdebug.h"
133#include "opt_sysv.h" 133#include "opt_sysv.h"
134#include "opt_multiprocessor.h" 134#include "opt_multiprocessor.h"
135 135
136#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 136#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
137 137
138__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.269 2020/08/29 20:06:59 thorpej Exp $"); 138__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.270 2020/09/03 02:05:03 thorpej Exp $");
139 139
140#include <sys/param.h> 140#include <sys/param.h>
141#include <sys/systm.h> 141#include <sys/systm.h>
142#include <sys/kernel.h> 142#include <sys/kernel.h>
143#include <sys/proc.h> 143#include <sys/proc.h>
144#include <sys/malloc.h> 144#include <sys/malloc.h>
145#include <sys/pool.h> 145#include <sys/pool.h>
146#include <sys/buf.h> 146#include <sys/buf.h>
147#include <sys/evcnt.h> 147#include <sys/evcnt.h>
148#include <sys/atomic.h> 148#include <sys/atomic.h>
149#include <sys/cpu.h> 149#include <sys/cpu.h>
150 150
151#include <uvm/uvm.h> 151#include <uvm/uvm.h>
@@ -941,27 +941,26 @@ pmap_tlb_shootnow(const struct pmap_tlb_ @@ -941,27 +941,26 @@ pmap_tlb_shootnow(const struct pmap_tlb_
941 } 941 }
942 942
943#if defined(MULTIPROCESSOR) 943#if defined(MULTIPROCESSOR)
944 /* 944 /*
945 * If there are remote CPUs that need to do work, get them 945 * If there are remote CPUs that need to do work, get them
946 * started now. 946 * started now.
947 */ 947 */
948 const u_long remote_cpus = active_cpus & ~this_cpu; 948 const u_long remote_cpus = active_cpus & ~this_cpu;
949 KASSERT(tlb_context == NULL); 949 KASSERT(tlb_context == NULL);
950 if (remote_cpus) { 950 if (remote_cpus) {
951 TLB_COUNT(shootnow_remote); 951 TLB_COUNT(shootnow_remote);
952 tlb_context = tlbctx; 952 tlb_context = tlbctx;
953 tlb_pending = remote_cpus; 953 tlb_pending = remote_cpus;
954 alpha_wmb(); 
955 alpha_multicast_ipi(remote_cpus, ALPHA_IPI_SHOOTDOWN); 954 alpha_multicast_ipi(remote_cpus, ALPHA_IPI_SHOOTDOWN);
956 } 955 }
957#endif /* MULTIPROCESSOR */ 956#endif /* MULTIPROCESSOR */
958 957
959 /* 958 /*
960 * Now that the remotes have been notified, release the 959 * Now that the remotes have been notified, release the
961 * activation lock. 960 * activation lock.
962 */ 961 */
963 if (activation_locked) { 962 if (activation_locked) {
964 KASSERT(tlbctx->t_pmap != NULL); 963 KASSERT(tlbctx->t_pmap != NULL);
965 PMAP_ACT_UNLOCK(tlbctx->t_pmap); 964 PMAP_ACT_UNLOCK(tlbctx->t_pmap);
966 } 965 }
967 966
@@ -973,39 +972,39 @@ pmap_tlb_shootnow(const struct pmap_tlb_ @@ -973,39 +972,39 @@ pmap_tlb_shootnow(const struct pmap_tlb_
973 if (active_cpus & this_cpu) { 972 if (active_cpus & this_cpu) {
974 pmap_tlb_invalidate(tlbctx, ci); 973 pmap_tlb_invalidate(tlbctx, ci);
975 } 974 }
976 975
977#if defined(MULTIPROCESSOR) 976#if defined(MULTIPROCESSOR)
978 /* Wait for remote CPUs to finish. */ 977 /* Wait for remote CPUs to finish. */
979 if (remote_cpus) { 978 if (remote_cpus) {
980 int backoff = SPINLOCK_BACKOFF_MIN; 979 int backoff = SPINLOCK_BACKOFF_MIN;
981 u_int spins = 0; 980 u_int spins = 0;
982 981
983 while (atomic_load_relaxed(&tlb_context) != NULL) { 982 while (atomic_load_relaxed(&tlb_context) != NULL) {
984 SPINLOCK_BACKOFF(backoff); 983 SPINLOCK_BACKOFF(backoff);
985 if (spins++ > 0x0fffffff) { 984 if (spins++ > 0x0fffffff) {
986 alpha_mb(); 
987 printf("TLB LOCAL MASK = 0x%016lx\n", 985 printf("TLB LOCAL MASK = 0x%016lx\n",
988 this_cpu); 986 this_cpu);
989 printf("TLB REMOTE MASK = 0x%016lx\n", 987 printf("TLB REMOTE MASK = 0x%016lx\n",
990 remote_cpus); 988 remote_cpus);
991 printf("TLB REMOTE PENDING = 0x%016lx\n", 989 printf("TLB REMOTE PENDING = 0x%016lx\n",
992 tlb_pending); 990 tlb_pending);
993 printf("TLB CONTEXT = %p\n", tlb_context); 991 printf("TLB CONTEXT = %p\n", tlb_context);
994 printf("TLB LOCAL IPL = %lu\n", 992 printf("TLB LOCAL IPL = %lu\n",
995 alpha_pal_rdps() & ALPHA_PSL_IPL_MASK); 993 alpha_pal_rdps() & ALPHA_PSL_IPL_MASK);
996 panic("pmap_tlb_shootnow"); 994 panic("pmap_tlb_shootnow");
997 } 995 }
998 } 996 }
 997 membar_consumer();
999 } 998 }
1000 KASSERT(tlb_context == NULL); 999 KASSERT(tlb_context == NULL);
1001#endif /* MULTIPROCESSOR */ 1000#endif /* MULTIPROCESSOR */
1002 1001
1003 mutex_spin_exit(&tlb_lock); 1002 mutex_spin_exit(&tlb_lock);
1004 1003
1005 if (__predict_false(TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_PV)) { 1004 if (__predict_false(TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_PV)) {
1006 /* 1005 /*
1007 * P->V TLB operations may operate on multiple pmaps. 1006 * P->V TLB operations may operate on multiple pmaps.
1008 * The shootdown takes a reference on the first pmap it 1007 * The shootdown takes a reference on the first pmap it
1009 * encounters, in order to prevent it from disappearing, 1008 * encounters, in order to prevent it from disappearing,
1010 * in the hope that we end up with a single-pmap P->V 1009 * in the hope that we end up with a single-pmap P->V
1011 * operation (instrumentation shows this is not rare). 1010 * operation (instrumentation shows this is not rare).
@@ -1016,27 +1015,27 @@ pmap_tlb_shootnow(const struct pmap_tlb_ @@ -1016,27 +1015,27 @@ pmap_tlb_shootnow(const struct pmap_tlb_
1016 KASSERT(tlbctx->t_pmap != NULL); 1015 KASSERT(tlbctx->t_pmap != NULL);
1017 pmap_destroy(tlbctx->t_pmap); 1016 pmap_destroy(tlbctx->t_pmap);
1018 } 1017 }
1019} 1018}
1020 1019
1021#if defined(MULTIPROCESSOR) 1020#if defined(MULTIPROCESSOR)
1022void 1021void
1023pmap_tlb_shootdown_ipi(struct cpu_info * const ci, 1022pmap_tlb_shootdown_ipi(struct cpu_info * const ci,
1024 struct trapframe * const tf __unused) 1023 struct trapframe * const tf __unused)
1025{ 1024{
1026 KASSERT(tlb_context != NULL); 1025 KASSERT(tlb_context != NULL);
1027 pmap_tlb_invalidate(tlb_context, ci); 1026 pmap_tlb_invalidate(tlb_context, ci);
1028 if (atomic_and_ulong_nv(&tlb_pending, ~(1UL << ci->ci_cpuid)) == 0) { 1027 if (atomic_and_ulong_nv(&tlb_pending, ~(1UL << ci->ci_cpuid)) == 0) {
1029 alpha_wmb(); 1028 membar_producer();
1030 atomic_store_relaxed(&tlb_context, NULL); 1029 atomic_store_relaxed(&tlb_context, NULL);
1031 } 1030 }
1032} 1031}
1033#endif /* MULTIPROCESSOR */ 1032#endif /* MULTIPROCESSOR */
1034 1033
1035static void 1034static void
1036pmap_tlb_physpage_free(paddr_t const ptpa, 1035pmap_tlb_physpage_free(paddr_t const ptpa,
1037 struct pmap_tlb_context * const tlbctx) 1036 struct pmap_tlb_context * const tlbctx)
1038{ 1037{
1039 struct vm_page * const pg = PHYS_TO_VM_PAGE(ptpa); 1038 struct vm_page * const pg = PHYS_TO_VM_PAGE(ptpa);
1040 1039
1041 KASSERT(pg != NULL); 1040 KASSERT(pg != NULL);
1042 1041
@@ -1604,27 +1603,27 @@ pmap_create(void) @@ -1604,27 +1603,27 @@ pmap_create(void)
1604 * 1603 *
1605 * Drop the reference count on the specified pmap, releasing 1604 * Drop the reference count on the specified pmap, releasing
1606 * all resources if the reference count drops to zero. 1605 * all resources if the reference count drops to zero.
1607 */ 1606 */
1608void 1607void
1609pmap_destroy(pmap_t pmap) 1608pmap_destroy(pmap_t pmap)
1610{ 1609{
1611 1610
1612#ifdef DEBUG 1611#ifdef DEBUG
1613 if (pmapdebug & PDB_FOLLOW) 1612 if (pmapdebug & PDB_FOLLOW)
1614 printf("pmap_destroy(%p)\n", pmap); 1613 printf("pmap_destroy(%p)\n", pmap);
1615#endif 1614#endif
1616 1615
1617 PMAP_MP(alpha_mb()); 1616 PMAP_MP(membar_exit());
1618 if (atomic_dec_ulong_nv(&pmap->pm_count) > 0) 1617 if (atomic_dec_ulong_nv(&pmap->pm_count) > 0)
1619 return; 1618 return;
1620 1619
1621 rw_enter(&pmap_growkernel_lock, RW_READER); 1620 rw_enter(&pmap_growkernel_lock, RW_READER);
1622 1621
1623 /* 1622 /*
1624 * Remove it from the global list of all pmaps. 1623 * Remove it from the global list of all pmaps.
1625 */ 1624 */
1626 mutex_enter(&pmap_all_pmaps_lock); 1625 mutex_enter(&pmap_all_pmaps_lock);
1627 TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list); 1626 TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list);
1628 mutex_exit(&pmap_all_pmaps_lock); 1627 mutex_exit(&pmap_all_pmaps_lock);
1629 1628
1630 pool_cache_put(&pmap_l1pt_cache, pmap->pm_lev1map); 1629 pool_cache_put(&pmap_l1pt_cache, pmap->pm_lev1map);
@@ -1640,27 +1639,27 @@ pmap_destroy(pmap_t pmap) @@ -1640,27 +1639,27 @@ pmap_destroy(pmap_t pmap)
1640 * 1639 *
1641 * Add a reference to the specified pmap. 1640 * Add a reference to the specified pmap.
1642 */ 1641 */
1643void 1642void
1644pmap_reference(pmap_t pmap) 1643pmap_reference(pmap_t pmap)
1645{ 1644{
1646 1645
1647#ifdef DEBUG 1646#ifdef DEBUG
1648 if (pmapdebug & PDB_FOLLOW) 1647 if (pmapdebug & PDB_FOLLOW)
1649 printf("pmap_reference(%p)\n", pmap); 1648 printf("pmap_reference(%p)\n", pmap);
1650#endif 1649#endif
1651 1650
1652 atomic_inc_ulong(&pmap->pm_count); 1651 atomic_inc_ulong(&pmap->pm_count);
1653 PMAP_MP(alpha_mb()); 1652 PMAP_MP(membar_enter());
1654} 1653}
1655 1654
1656/* 1655/*
1657 * pmap_remove: [ INTERFACE ] 1656 * pmap_remove: [ INTERFACE ]
1658 * 1657 *
1659 * Remove the given range of addresses from the specified map. 1658 * Remove the given range of addresses from the specified map.
1660 * 1659 *
1661 * It is assumed that the start and end are properly 1660 * It is assumed that the start and end are properly
1662 * rounded to the page size. 1661 * rounded to the page size.
1663 */ 1662 */
1664static void 1663static void
1665pmap_remove_internal(pmap_t pmap, vaddr_t sva, vaddr_t eva, 1664pmap_remove_internal(pmap_t pmap, vaddr_t sva, vaddr_t eva,
1666 struct pmap_tlb_context * const tlbctx) 1665 struct pmap_tlb_context * const tlbctx)
@@ -2296,27 +2295,27 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, v @@ -2296,27 +2295,27 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, v
2296 2295
2297 KASSERT(va >= VM_MIN_KERNEL_ADDRESS); 2296 KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
2298 2297
2299 pt_entry_t * const pte = PMAP_KERNEL_PTE(va); 2298 pt_entry_t * const pte = PMAP_KERNEL_PTE(va);
2300 2299
2301 /* Build the new PTE. */ 2300 /* Build the new PTE. */
2302 const pt_entry_t npte = 2301 const pt_entry_t npte =
2303 ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) | 2302 ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) |
2304 PG_V | PG_WIRED; 2303 PG_V | PG_WIRED;
2305 2304
2306 /* Set the new PTE. */ 2305 /* Set the new PTE. */
2307 const pt_entry_t opte = atomic_load_relaxed(pte); 2306 const pt_entry_t opte = atomic_load_relaxed(pte);
2308 atomic_store_relaxed(pte, npte); 2307 atomic_store_relaxed(pte, npte);
2309 PMAP_MP(alpha_mb()); 2308 PMAP_MP(membar_enter());
2310 2309
2311 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 2310 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
2312 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 2311 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
2313 2312
2314 /* 2313 /*
2315 * There should not have been anything here, previously, 2314 * There should not have been anything here, previously,
2316 * so we can skip TLB shootdowns, etc. in the common case. 2315 * so we can skip TLB shootdowns, etc. in the common case.
2317 */ 2316 */
2318 if (__predict_false(opte & PG_V)) { 2317 if (__predict_false(opte & PG_V)) {
2319 const pt_entry_t diff = npte ^ opte; 2318 const pt_entry_t diff = npte ^ opte;
2320 2319
2321 printf_nolog("%s: mapping already present\n", __func__); 2320 printf_nolog("%s: mapping already present\n", __func__);
2322 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 2321 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
@@ -2359,27 +2358,26 @@ pmap_kremove(vaddr_t va, vsize_t size) @@ -2359,27 +2358,26 @@ pmap_kremove(vaddr_t va, vsize_t size)
2359 opte = atomic_load_relaxed(pte); 2358 opte = atomic_load_relaxed(pte);
2360 if (opte & PG_V) { 2359 if (opte & PG_V) {
2361 KASSERT((opte & PG_PVLIST) == 0); 2360 KASSERT((opte & PG_PVLIST) == 0);
2362 2361
2363 /* Zap the mapping. */ 2362 /* Zap the mapping. */
2364 atomic_store_relaxed(pte, PG_NV); 2363 atomic_store_relaxed(pte, PG_NV);
2365 pmap_tlb_shootdown(pmap, va, opte, &tlbctx); 2364 pmap_tlb_shootdown(pmap, va, opte, &tlbctx);
2366 2365
2367 /* Update stats. */ 2366 /* Update stats. */
2368 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 2367 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
2369 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2368 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2370 } 2369 }
2371 } 2370 }
2372 PMAP_MP(alpha_wmb()); 
2373 2371
2374 pmap_tlb_shootnow(&tlbctx); 2372 pmap_tlb_shootnow(&tlbctx);
2375 TLB_COUNT(reason_kremove); 2373 TLB_COUNT(reason_kremove);
2376} 2374}
2377 2375
2378/* 2376/*
2379 * pmap_unwire: [ INTERFACE ] 2377 * pmap_unwire: [ INTERFACE ]
2380 * 2378 *
2381 * Clear the wired attribute for a map/virtual-address pair. 2379 * Clear the wired attribute for a map/virtual-address pair.
2382 * 2380 *
2383 * The mapping must already exist in the pmap. 2381 * The mapping must already exist in the pmap.
2384 */ 2382 */
2385void 2383void
@@ -2604,27 +2602,26 @@ pmap_deactivate(struct lwp *l) @@ -2604,27 +2602,26 @@ pmap_deactivate(struct lwp *l)
2604 KASSERT(pmap == ci->ci_pmap); 2602 KASSERT(pmap == ci->ci_pmap);
2605 2603
2606 /* 2604 /*
2607 * There is no need to switch to a different PTBR here, 2605 * There is no need to switch to a different PTBR here,
2608 * because a pmap_activate() or SWPCTX is guaranteed 2606 * because a pmap_activate() or SWPCTX is guaranteed
2609 * before whatever lev1map we're on now is invalidated 2607 * before whatever lev1map we're on now is invalidated
2610 * or before user space is accessed again. 2608 * or before user space is accessed again.
2611 * 2609 *
2612 * Because only kernel mappings will be accessed before the 2610 * Because only kernel mappings will be accessed before the
2613 * next pmap_activate() call, we consider our CPU to be on 2611 * next pmap_activate() call, we consider our CPU to be on
2614 * the kernel pmap. 2612 * the kernel pmap.
2615 */ 2613 */
2616 ci->ci_pmap = pmap_kernel(); 2614 ci->ci_pmap = pmap_kernel();
2617 PMAP_MP(alpha_mb()); 
2618 KASSERT(atomic_load_relaxed(&pmap->pm_count) > 1); 2615 KASSERT(atomic_load_relaxed(&pmap->pm_count) > 1);
2619 pmap_destroy(pmap); 2616 pmap_destroy(pmap);
2620} 2617}
2621 2618
2622/* 2619/*
2623 * pmap_zero_page: [ INTERFACE ] 2620 * pmap_zero_page: [ INTERFACE ]
2624 * 2621 *
2625 * Zero the specified (machine independent) page by mapping the page 2622 * Zero the specified (machine independent) page by mapping the page
2626 * into virtual memory and clear its contents, one machine dependent 2623 * into virtual memory and clear its contents, one machine dependent
2627 * page at a time. 2624 * page at a time.
2628 * 2625 *
2629 * Note: no locking is necessary in this function. 2626 * Note: no locking is necessary in this function.
2630 */ 2627 */