Wed Jan 1 01:18:34 2020 UTC ()
explicitely include sys/atomic.h for atomic operations.


(mlelstv)
diff -r1.28 -r1.29 src/sys/uvm/uvm_pdpolicy_clock.c

cvs diff -r1.28 -r1.29 src/sys/uvm/uvm_pdpolicy_clock.c (switch to unified diff)

--- src/sys/uvm/uvm_pdpolicy_clock.c 2019/12/31 22:42:51 1.28
+++ src/sys/uvm/uvm_pdpolicy_clock.c 2020/01/01 01:18:34 1.29
@@ -1,866 +1,867 @@ @@ -1,866 +1,867 @@
1/* $NetBSD: uvm_pdpolicy_clock.c,v 1.28 2019/12/31 22:42:51 ad Exp $ */ 1/* $NetBSD: uvm_pdpolicy_clock.c,v 1.29 2020/01/01 01:18:34 mlelstv Exp $ */
2/* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */ 2/* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */
3 3
4/*- 4/*-
5 * Copyright (c) 2019 The NetBSD Foundation, Inc. 5 * Copyright (c) 2019 The NetBSD Foundation, Inc.
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * This code is derived from software contributed to The NetBSD Foundation 8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Andrew Doran. 9 * by Andrew Doran.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33/* 33/*
34 * Copyright (c) 1997 Charles D. Cranor and Washington University. 34 * Copyright (c) 1997 Charles D. Cranor and Washington University.
35 * Copyright (c) 1991, 1993, The Regents of the University of California. 35 * Copyright (c) 1991, 1993, The Regents of the University of California.
36 * 36 *
37 * All rights reserved. 37 * All rights reserved.
38 * 38 *
39 * This code is derived from software contributed to Berkeley by 39 * This code is derived from software contributed to Berkeley by
40 * The Mach Operating System project at Carnegie-Mellon University. 40 * The Mach Operating System project at Carnegie-Mellon University.
41 * 41 *
42 * Redistribution and use in source and binary forms, with or without 42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions 43 * modification, are permitted provided that the following conditions
44 * are met: 44 * are met:
45 * 1. Redistributions of source code must retain the above copyright 45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer. 46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright 47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the 48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution. 49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors 50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software 51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission. 52 * without specific prior written permission.
53 * 53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE. 64 * SUCH DAMAGE.
65 * 65 *
66 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94 66 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
67 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp 67 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
68 * 68 *
69 * 69 *
70 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 70 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
71 * All rights reserved. 71 * All rights reserved.
72 * 72 *
73 * Permission to use, copy, modify and distribute this software and 73 * Permission to use, copy, modify and distribute this software and
74 * its documentation is hereby granted, provided that both the copyright 74 * its documentation is hereby granted, provided that both the copyright
75 * notice and this permission notice appear in all copies of the 75 * notice and this permission notice appear in all copies of the
76 * software, derivative works or modified versions, and any portions 76 * software, derivative works or modified versions, and any portions
77 * thereof, and that both notices appear in supporting documentation. 77 * thereof, and that both notices appear in supporting documentation.
78 * 78 *
79 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 79 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
80 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 80 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
81 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 81 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
82 * 82 *
83 * Carnegie Mellon requests users of this software to return to 83 * Carnegie Mellon requests users of this software to return to
84 * 84 *
85 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 85 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
86 * School of Computer Science 86 * School of Computer Science
87 * Carnegie Mellon University 87 * Carnegie Mellon University
88 * Pittsburgh PA 15213-3890 88 * Pittsburgh PA 15213-3890
89 * 89 *
90 * any improvements or extensions that they make and grant Carnegie the 90 * any improvements or extensions that they make and grant Carnegie the
91 * rights to redistribute these changes. 91 * rights to redistribute these changes.
92 */ 92 */
93 93
94#if defined(PDSIM) 94#if defined(PDSIM)
95 95
96#include "pdsim.h" 96#include "pdsim.h"
97 97
98#else /* defined(PDSIM) */ 98#else /* defined(PDSIM) */
99 99
100#include <sys/cdefs.h> 100#include <sys/cdefs.h>
101__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.28 2019/12/31 22:42:51 ad Exp $"); 101__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.29 2020/01/01 01:18:34 mlelstv Exp $");
102 102
103#include <sys/param.h> 103#include <sys/param.h>
104#include <sys/proc.h> 104#include <sys/proc.h>
105#include <sys/systm.h> 105#include <sys/systm.h>
106#include <sys/kernel.h> 106#include <sys/kernel.h>
107#include <sys/kmem.h> 107#include <sys/kmem.h>
 108#include <sys/atomic.h>
108 109
109#include <uvm/uvm.h> 110#include <uvm/uvm.h>
110#include <uvm/uvm_pdpolicy.h> 111#include <uvm/uvm_pdpolicy.h>
111#include <uvm/uvm_pdpolicy_impl.h> 112#include <uvm/uvm_pdpolicy_impl.h>
112#include <uvm/uvm_stat.h> 113#include <uvm/uvm_stat.h>
113 114
114#endif /* defined(PDSIM) */ 115#endif /* defined(PDSIM) */
115 116
116/* 117/*
117 * per-CPU queue of pending page status changes. 128 entries makes for a 118 * per-CPU queue of pending page status changes. 128 entries makes for a
118 * 1kB queue on _LP64 and has been found to be a reasonable compromise that 119 * 1kB queue on _LP64 and has been found to be a reasonable compromise that
119 * keeps lock contention events and wait times low, while not using too much 120 * keeps lock contention events and wait times low, while not using too much
120 * memory nor allowing global state to fall too far behind. 121 * memory nor allowing global state to fall too far behind.
121 */ 122 */
122#if !defined(CLOCK_PDQ_SIZE) 123#if !defined(CLOCK_PDQ_SIZE)
123#define CLOCK_PDQ_SIZE 128 124#define CLOCK_PDQ_SIZE 128
124#endif /* !defined(CLOCK_PDQ_SIZE) */ 125#endif /* !defined(CLOCK_PDQ_SIZE) */
125 126
126#define PQ_TIME 0xffffffc0 /* time of last activation */ 127#define PQ_TIME 0xffffffc0 /* time of last activation */
127#define PQ_INACTIVE 0x00000010 /* page is in inactive list */ 128#define PQ_INACTIVE 0x00000010 /* page is in inactive list */
128#define PQ_ACTIVE 0x00000020 /* page is in active list */ 129#define PQ_ACTIVE 0x00000020 /* page is in active list */
129 130
130#if !defined(CLOCK_INACTIVEPCT) 131#if !defined(CLOCK_INACTIVEPCT)
131#define CLOCK_INACTIVEPCT 33 132#define CLOCK_INACTIVEPCT 33
132#endif /* !defined(CLOCK_INACTIVEPCT) */ 133#endif /* !defined(CLOCK_INACTIVEPCT) */
133 134
134struct uvmpdpol_globalstate { 135struct uvmpdpol_globalstate {
135 kmutex_t lock; /* lock on state */ 136 kmutex_t lock; /* lock on state */
136 /* <= compiler pads here */ 137 /* <= compiler pads here */
137 struct pglist s_activeq /* allocated pages, in use */ 138 struct pglist s_activeq /* allocated pages, in use */
138 __aligned(COHERENCY_UNIT); 139 __aligned(COHERENCY_UNIT);
139 struct pglist s_inactiveq; /* pages between the clock hands */ 140 struct pglist s_inactiveq; /* pages between the clock hands */
140 int s_active; 141 int s_active;
141 int s_inactive; 142 int s_inactive;
142 int s_inactarg; 143 int s_inactarg;
143 struct uvm_pctparam s_anonmin; 144 struct uvm_pctparam s_anonmin;
144 struct uvm_pctparam s_filemin; 145 struct uvm_pctparam s_filemin;
145 struct uvm_pctparam s_execmin; 146 struct uvm_pctparam s_execmin;
146 struct uvm_pctparam s_anonmax; 147 struct uvm_pctparam s_anonmax;
147 struct uvm_pctparam s_filemax; 148 struct uvm_pctparam s_filemax;
148 struct uvm_pctparam s_execmax; 149 struct uvm_pctparam s_execmax;
149 struct uvm_pctparam s_inactivepct; 150 struct uvm_pctparam s_inactivepct;
150}; 151};
151 152
152struct uvmpdpol_scanstate { 153struct uvmpdpol_scanstate {
153 bool ss_anonreact, ss_filereact, ss_execreact; 154 bool ss_anonreact, ss_filereact, ss_execreact;
154 struct vm_page ss_marker; 155 struct vm_page ss_marker;
155}; 156};
156 157
157static void uvmpdpol_pageactivate_locked(struct vm_page *); 158static void uvmpdpol_pageactivate_locked(struct vm_page *);
158static void uvmpdpol_pagedeactivate_locked(struct vm_page *); 159static void uvmpdpol_pagedeactivate_locked(struct vm_page *);
159static void uvmpdpol_pagedequeue_locked(struct vm_page *); 160static void uvmpdpol_pagedequeue_locked(struct vm_page *);
160static bool uvmpdpol_pagerealize_locked(struct vm_page *); 161static bool uvmpdpol_pagerealize_locked(struct vm_page *);
161static struct uvm_cpu *uvmpdpol_flush(void); 162static struct uvm_cpu *uvmpdpol_flush(void);
162 163
163static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned; 164static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned;
164static struct uvmpdpol_scanstate pdpol_scanstate; 165static struct uvmpdpol_scanstate pdpol_scanstate;
165 166
166PDPOL_EVCNT_DEFINE(reactexec) 167PDPOL_EVCNT_DEFINE(reactexec)
167PDPOL_EVCNT_DEFINE(reactfile) 168PDPOL_EVCNT_DEFINE(reactfile)
168PDPOL_EVCNT_DEFINE(reactanon) 169PDPOL_EVCNT_DEFINE(reactanon)
169 170
170static void 171static void
171clock_tune(void) 172clock_tune(void)
172{ 173{
173 struct uvmpdpol_globalstate *s = &pdpol_state; 174 struct uvmpdpol_globalstate *s = &pdpol_state;
174 175
175 s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct, 176 s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct,
176 s->s_active + s->s_inactive); 177 s->s_active + s->s_inactive);
177 if (s->s_inactarg <= uvmexp.freetarg) { 178 if (s->s_inactarg <= uvmexp.freetarg) {
178 s->s_inactarg = uvmexp.freetarg + 1; 179 s->s_inactarg = uvmexp.freetarg + 1;
179 } 180 }
180} 181}
181 182
182void 183void
183uvmpdpol_scaninit(void) 184uvmpdpol_scaninit(void)
184{ 185{
185 struct uvmpdpol_globalstate *s = &pdpol_state; 186 struct uvmpdpol_globalstate *s = &pdpol_state;
186 struct uvmpdpol_scanstate *ss = &pdpol_scanstate; 187 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
187 int t; 188 int t;
188 bool anonunder, fileunder, execunder; 189 bool anonunder, fileunder, execunder;
189 bool anonover, fileover, execover; 190 bool anonover, fileover, execover;
190 bool anonreact, filereact, execreact; 191 bool anonreact, filereact, execreact;
191 int64_t freepg, anonpg, filepg, execpg; 192 int64_t freepg, anonpg, filepg, execpg;
192 193
193 /* 194 /*
194 * decide which types of pages we want to reactivate instead of freeing 195 * decide which types of pages we want to reactivate instead of freeing
195 * to keep usage within the minimum and maximum usage limits. 196 * to keep usage within the minimum and maximum usage limits.
196 */ 197 */
197 198
198 cpu_count_sync_all(); 199 cpu_count_sync_all();
199 freepg = uvm_availmem(); 200 freepg = uvm_availmem();
200 anonpg = cpu_count_get(CPU_COUNT_ANONPAGES); 201 anonpg = cpu_count_get(CPU_COUNT_ANONPAGES);
201 filepg = cpu_count_get(CPU_COUNT_FILEPAGES); 202 filepg = cpu_count_get(CPU_COUNT_FILEPAGES);
202 execpg = cpu_count_get(CPU_COUNT_EXECPAGES); 203 execpg = cpu_count_get(CPU_COUNT_EXECPAGES);
203 204
204 mutex_enter(&s->lock); 205 mutex_enter(&s->lock);
205 t = s->s_active + s->s_inactive + freepg; 206 t = s->s_active + s->s_inactive + freepg;
206 anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t); 207 anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t);
207 fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t); 208 fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t);
208 execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t); 209 execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t);
209 anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t); 210 anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t);
210 fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t); 211 fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t);
211 execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t); 212 execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t);
212 anonreact = anonunder || (!anonover && (fileover || execover)); 213 anonreact = anonunder || (!anonover && (fileover || execover));
213 filereact = fileunder || (!fileover && (anonover || execover)); 214 filereact = fileunder || (!fileover && (anonover || execover));
214 execreact = execunder || (!execover && (anonover || fileover)); 215 execreact = execunder || (!execover && (anonover || fileover));
215 if (filereact && execreact && (anonreact || uvm_swapisfull())) { 216 if (filereact && execreact && (anonreact || uvm_swapisfull())) {
216 anonreact = filereact = execreact = false; 217 anonreact = filereact = execreact = false;
217 } 218 }
218 ss->ss_anonreact = anonreact; 219 ss->ss_anonreact = anonreact;
219 ss->ss_filereact = filereact; 220 ss->ss_filereact = filereact;
220 ss->ss_execreact = execreact; 221 ss->ss_execreact = execreact;
221 memset(&ss->ss_marker, 0, sizeof(ss->ss_marker)); 222 memset(&ss->ss_marker, 0, sizeof(ss->ss_marker));
222 ss->ss_marker.flags = PG_MARKER; 223 ss->ss_marker.flags = PG_MARKER;
223 TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue); 224 TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
224 mutex_exit(&s->lock); 225 mutex_exit(&s->lock);
225} 226}
226 227
227void 228void
228uvmpdpol_scanfini(void) 229uvmpdpol_scanfini(void)
229{ 230{
230 struct uvmpdpol_globalstate *s = &pdpol_state; 231 struct uvmpdpol_globalstate *s = &pdpol_state;
231 struct uvmpdpol_scanstate *ss = &pdpol_scanstate; 232 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
232 233
233 mutex_enter(&s->lock); 234 mutex_enter(&s->lock);
234 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue); 235 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
235 mutex_exit(&s->lock); 236 mutex_exit(&s->lock);
236} 237}
237 238
238struct vm_page * 239struct vm_page *
239uvmpdpol_selectvictim(kmutex_t **plock) 240uvmpdpol_selectvictim(kmutex_t **plock)
240{ 241{
241 struct uvmpdpol_globalstate *s = &pdpol_state; 242 struct uvmpdpol_globalstate *s = &pdpol_state;
242 struct uvmpdpol_scanstate *ss = &pdpol_scanstate; 243 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
243 struct vm_page *pg; 244 struct vm_page *pg;
244 kmutex_t *lock; 245 kmutex_t *lock;
245 246
246 mutex_enter(&s->lock); 247 mutex_enter(&s->lock);
247 while (/* CONSTCOND */ 1) { 248 while (/* CONSTCOND */ 1) {
248 struct vm_anon *anon; 249 struct vm_anon *anon;
249 struct uvm_object *uobj; 250 struct uvm_object *uobj;
250 251
251 pg = TAILQ_NEXT(&ss->ss_marker, pdqueue); 252 pg = TAILQ_NEXT(&ss->ss_marker, pdqueue);
252 if (pg == NULL) { 253 if (pg == NULL) {
253 break; 254 break;
254 } 255 }
255 KASSERT((pg->flags & PG_MARKER) == 0); 256 KASSERT((pg->flags & PG_MARKER) == 0);
256 uvmexp.pdscans++; 257 uvmexp.pdscans++;
257 258
258 /* 259 /*
259 * acquire interlock to stablize page identity. 260 * acquire interlock to stablize page identity.
260 * if we have caught the page in a state of flux 261 * if we have caught the page in a state of flux
261 * deal with it and retry. 262 * deal with it and retry.
262 */ 263 */
263 mutex_enter(&pg->interlock); 264 mutex_enter(&pg->interlock);
264 if (uvmpdpol_pagerealize_locked(pg)) { 265 if (uvmpdpol_pagerealize_locked(pg)) {
265 mutex_exit(&pg->interlock); 266 mutex_exit(&pg->interlock);
266 continue; 267 continue;
267 } 268 }
268 269
269 /* 270 /*
270 * now prepare to move on to the next page. 271 * now prepare to move on to the next page.
271 */ 272 */
272 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, 273 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker,
273 pdqueue); 274 pdqueue);
274 TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg, 275 TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg,
275 &ss->ss_marker, pdqueue); 276 &ss->ss_marker, pdqueue);
276 277
277 /* 278 /*
278 * enforce the minimum thresholds on different 279 * enforce the minimum thresholds on different
279 * types of memory usage. if reusing the current 280 * types of memory usage. if reusing the current
280 * page would reduce that type of usage below its 281 * page would reduce that type of usage below its
281 * minimum, reactivate the page instead and move 282 * minimum, reactivate the page instead and move
282 * on to the next page. 283 * on to the next page.
283 */ 284 */
284 anon = pg->uanon; 285 anon = pg->uanon;
285 uobj = pg->uobject; 286 uobj = pg->uobject;
286 if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) { 287 if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
287 uvmpdpol_pageactivate_locked(pg); 288 uvmpdpol_pageactivate_locked(pg);
288 mutex_exit(&pg->interlock); 289 mutex_exit(&pg->interlock);
289 PDPOL_EVCNT_INCR(reactexec); 290 PDPOL_EVCNT_INCR(reactexec);
290 continue; 291 continue;
291 } 292 }
292 if (uobj && UVM_OBJ_IS_VNODE(uobj) && 293 if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
293 !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) { 294 !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
294 uvmpdpol_pageactivate_locked(pg); 295 uvmpdpol_pageactivate_locked(pg);
295 mutex_exit(&pg->interlock); 296 mutex_exit(&pg->interlock);
296 PDPOL_EVCNT_INCR(reactfile); 297 PDPOL_EVCNT_INCR(reactfile);
297 continue; 298 continue;
298 } 299 }
299 if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) { 300 if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
300 uvmpdpol_pageactivate_locked(pg); 301 uvmpdpol_pageactivate_locked(pg);
301 mutex_exit(&pg->interlock); 302 mutex_exit(&pg->interlock);
302 PDPOL_EVCNT_INCR(reactanon); 303 PDPOL_EVCNT_INCR(reactanon);
303 continue; 304 continue;
304 } 305 }
305 306
306 /* 307 /*
307 * try to lock the object that owns the page. 308 * try to lock the object that owns the page.
308 * 309 *
309 * with the page interlock held, we can drop s->lock, which 310 * with the page interlock held, we can drop s->lock, which
310 * could otherwise serve as a barrier to us getting the 311 * could otherwise serve as a barrier to us getting the
311 * object locked, because the owner of the object's lock may 312 * object locked, because the owner of the object's lock may
312 * be blocked on s->lock (i.e. a deadlock). 313 * be blocked on s->lock (i.e. a deadlock).
313 * 314 *
314 * whatever happens, uvmpd_trylockowner() will release the 315 * whatever happens, uvmpd_trylockowner() will release the
315 * interlock. with the interlock dropped we can then 316 * interlock. with the interlock dropped we can then
316 * re-acquire our own lock. the order is: 317 * re-acquire our own lock. the order is:
317 * 318 *
318 * object -> pdpol -> interlock. 319 * object -> pdpol -> interlock.
319 */ 320 */
320 mutex_exit(&s->lock); 321 mutex_exit(&s->lock);
321 lock = uvmpd_trylockowner(pg); 322 lock = uvmpd_trylockowner(pg);
322 /* pg->interlock now released */ 323 /* pg->interlock now released */
323 mutex_enter(&s->lock); 324 mutex_enter(&s->lock);
324 if (lock == NULL) { 325 if (lock == NULL) {
325 /* didn't get it - try the next page. */ 326 /* didn't get it - try the next page. */
326 continue; 327 continue;
327 } 328 }
328 329
329 /* 330 /*
330 * move referenced pages back to active queue and skip to 331 * move referenced pages back to active queue and skip to
331 * next page. 332 * next page.
332 */ 333 */
333 if (pmap_is_referenced(pg)) { 334 if (pmap_is_referenced(pg)) {
334 mutex_enter(&pg->interlock); 335 mutex_enter(&pg->interlock);
335 uvmpdpol_pageactivate_locked(pg); 336 uvmpdpol_pageactivate_locked(pg);
336 mutex_exit(&pg->interlock); 337 mutex_exit(&pg->interlock);
337 uvmexp.pdreact++; 338 uvmexp.pdreact++;
338 mutex_exit(lock); 339 mutex_exit(lock);
339 continue; 340 continue;
340 } 341 }
341 342
342 /* we have a potential victim. */ 343 /* we have a potential victim. */
343 *plock = lock; 344 *plock = lock;
344 break; 345 break;
345 } 346 }
346 mutex_exit(&s->lock); 347 mutex_exit(&s->lock);
347 return pg; 348 return pg;
348} 349}
349 350
350void 351void
351uvmpdpol_balancequeue(int swap_shortage) 352uvmpdpol_balancequeue(int swap_shortage)
352{ 353{
353 struct uvmpdpol_globalstate *s = &pdpol_state; 354 struct uvmpdpol_globalstate *s = &pdpol_state;
354 int inactive_shortage; 355 int inactive_shortage;
355 struct vm_page *p, marker; 356 struct vm_page *p, marker;
356 kmutex_t *lock; 357 kmutex_t *lock;
357 358
358 /* 359 /*
359 * we have done the scan to get free pages. now we work on meeting 360 * we have done the scan to get free pages. now we work on meeting
360 * our inactive target. 361 * our inactive target.
361 */ 362 */
362 363
363 memset(&marker, 0, sizeof(marker)); 364 memset(&marker, 0, sizeof(marker));
364 marker.flags = PG_MARKER; 365 marker.flags = PG_MARKER;
365 366
366 mutex_enter(&s->lock); 367 mutex_enter(&s->lock);
367 TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue); 368 TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue);
368 for (;;) { 369 for (;;) {
369 inactive_shortage = 370 inactive_shortage =
370 pdpol_state.s_inactarg - pdpol_state.s_inactive; 371 pdpol_state.s_inactarg - pdpol_state.s_inactive;
371 if (inactive_shortage <= 0 && swap_shortage <= 0) { 372 if (inactive_shortage <= 0 && swap_shortage <= 0) {
372 break; 373 break;
373 } 374 }
374 p = TAILQ_NEXT(&marker, pdqueue); 375 p = TAILQ_NEXT(&marker, pdqueue);
375 if (p == NULL) { 376 if (p == NULL) {
376 break; 377 break;
377 } 378 }
378 KASSERT((p->flags & PG_MARKER) == 0); 379 KASSERT((p->flags & PG_MARKER) == 0);
379 380
380 /* 381 /*
381 * acquire interlock to stablize page identity. 382 * acquire interlock to stablize page identity.
382 * if we have caught the page in a state of flux 383 * if we have caught the page in a state of flux
383 * deal with it and retry. 384 * deal with it and retry.
384 */ 385 */
385 mutex_enter(&p->interlock); 386 mutex_enter(&p->interlock);
386 if (uvmpdpol_pagerealize_locked(p)) { 387 if (uvmpdpol_pagerealize_locked(p)) {
387 mutex_exit(&p->interlock); 388 mutex_exit(&p->interlock);
388 continue; 389 continue;
389 } 390 }
390 391
391 /* 392 /*
392 * now prepare to move on to the next page. 393 * now prepare to move on to the next page.
393 */ 394 */
394 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue); 395 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
395 TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker, 396 TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker,
396 pdqueue); 397 pdqueue);
397 398
398 /* 399 /*
399 * try to lock the object that owns the page. see comments 400 * try to lock the object that owns the page. see comments
400 * in uvmpdol_selectvictim(). 401 * in uvmpdol_selectvictim().
401 */ 402 */
402 mutex_exit(&s->lock); 403 mutex_exit(&s->lock);
403 lock = uvmpd_trylockowner(p); 404 lock = uvmpd_trylockowner(p);
404 /* p->interlock now released */ 405 /* p->interlock now released */
405 mutex_enter(&s->lock); 406 mutex_enter(&s->lock);
406 if (lock == NULL) { 407 if (lock == NULL) {
407 /* didn't get it - try the next page. */ 408 /* didn't get it - try the next page. */
408 continue; 409 continue;
409 } 410 }
410 411
411 /* 412 /*
412 * if there's a shortage of swap slots, try to free it. 413 * if there's a shortage of swap slots, try to free it.
413 */ 414 */
414 if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 && 415 if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 &&
415 (p->flags & PG_BUSY) == 0) { 416 (p->flags & PG_BUSY) == 0) {
416 if (uvmpd_dropswap(p)) { 417 if (uvmpd_dropswap(p)) {
417 swap_shortage--; 418 swap_shortage--;
418 } 419 }
419 } 420 }
420 421
421 /* 422 /*
422 * if there's a shortage of inactive pages, deactivate. 423 * if there's a shortage of inactive pages, deactivate.
423 */ 424 */
424 if (inactive_shortage > 0) { 425 if (inactive_shortage > 0) {
425 pmap_clear_reference(p); 426 pmap_clear_reference(p);
426 mutex_enter(&p->interlock); 427 mutex_enter(&p->interlock);
427 uvmpdpol_pagedeactivate_locked(p); 428 uvmpdpol_pagedeactivate_locked(p);
428 mutex_exit(&p->interlock); 429 mutex_exit(&p->interlock);
429 uvmexp.pddeact++; 430 uvmexp.pddeact++;
430 inactive_shortage--; 431 inactive_shortage--;
431 } 432 }
432 mutex_exit(lock); 433 mutex_exit(lock);
433 } 434 }
434 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue); 435 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
435 mutex_exit(&s->lock); 436 mutex_exit(&s->lock);
436} 437}
437 438
438static void 439static void
439uvmpdpol_pagedeactivate_locked(struct vm_page *pg) 440uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
440{ 441{
441 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 442 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
442 443
443 KASSERT(mutex_owned(&s->lock)); 444 KASSERT(mutex_owned(&s->lock));
444 KASSERT(mutex_owned(&pg->interlock)); 445 KASSERT(mutex_owned(&pg->interlock));
445 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) != 446 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
446 (PQ_INTENT_D | PQ_INTENT_SET)); 447 (PQ_INTENT_D | PQ_INTENT_SET));
447 448
448 if (pg->pqflags & PQ_ACTIVE) { 449 if (pg->pqflags & PQ_ACTIVE) {
449 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue); 450 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
450 KASSERT(pdpol_state.s_active > 0); 451 KASSERT(pdpol_state.s_active > 0);
451 pdpol_state.s_active--; 452 pdpol_state.s_active--;
452 } 453 }
453 if ((pg->pqflags & PQ_INACTIVE) == 0) { 454 if ((pg->pqflags & PQ_INACTIVE) == 0) {
454 KASSERT(pg->wire_count == 0); 455 KASSERT(pg->wire_count == 0);
455 TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue); 456 TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue);
456 pdpol_state.s_inactive++; 457 pdpol_state.s_inactive++;
457 } 458 }
458 pg->pqflags = (pg->pqflags & PQ_INTENT_QUEUED) | PQ_INACTIVE; 459 pg->pqflags = (pg->pqflags & PQ_INTENT_QUEUED) | PQ_INACTIVE;
459} 460}
460 461
461void 462void
462uvmpdpol_pagedeactivate(struct vm_page *pg) 463uvmpdpol_pagedeactivate(struct vm_page *pg)
463{ 464{
464 465
465 KASSERT(uvm_page_owner_locked_p(pg)); 466 KASSERT(uvm_page_owner_locked_p(pg));
466 KASSERT(mutex_owned(&pg->interlock)); 467 KASSERT(mutex_owned(&pg->interlock));
467 468
468 /* 469 /*
469 * we have to clear the reference bit now, as when it comes time to 470 * we have to clear the reference bit now, as when it comes time to
470 * realize the intent we won't have the object locked any more. 471 * realize the intent we won't have the object locked any more.
471 */ 472 */
472 pmap_clear_reference(pg); 473 pmap_clear_reference(pg);
473 uvmpdpol_set_intent(pg, PQ_INTENT_I); 474 uvmpdpol_set_intent(pg, PQ_INTENT_I);
474} 475}
475 476
476static void 477static void
477uvmpdpol_pageactivate_locked(struct vm_page *pg) 478uvmpdpol_pageactivate_locked(struct vm_page *pg)
478{ 479{
479 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 480 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
480 481
481 KASSERT(mutex_owned(&s->lock)); 482 KASSERT(mutex_owned(&s->lock));
482 KASSERT(mutex_owned(&pg->interlock)); 483 KASSERT(mutex_owned(&pg->interlock));
483 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) != 484 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
484 (PQ_INTENT_D | PQ_INTENT_SET)); 485 (PQ_INTENT_D | PQ_INTENT_SET));
485 486
486 uvmpdpol_pagedequeue_locked(pg); 487 uvmpdpol_pagedequeue_locked(pg);
487 TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue); 488 TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
488 pdpol_state.s_active++; 489 pdpol_state.s_active++;
489 pg->pqflags = (pg->pqflags & PQ_INTENT_QUEUED) | PQ_ACTIVE | 490 pg->pqflags = (pg->pqflags & PQ_INTENT_QUEUED) | PQ_ACTIVE |
490 (hardclock_ticks & PQ_TIME); 491 (hardclock_ticks & PQ_TIME);
491} 492}
492 493
493void 494void
494uvmpdpol_pageactivate(struct vm_page *pg) 495uvmpdpol_pageactivate(struct vm_page *pg)
495{ 496{
496 uint32_t pqflags; 497 uint32_t pqflags;
497 498
498 KASSERT(uvm_page_owner_locked_p(pg)); 499 KASSERT(uvm_page_owner_locked_p(pg));
499 KASSERT(mutex_owned(&pg->interlock)); 500 KASSERT(mutex_owned(&pg->interlock));
500 501
501 /* 502 /*
502 * if there is any intent set on the page, or the page is not 503 * if there is any intent set on the page, or the page is not
503 * active, or the page was activated in the "distant" past, then 504 * active, or the page was activated in the "distant" past, then
504 * it needs to be activated anew. 505 * it needs to be activated anew.
505 */ 506 */
506 pqflags = pg->pqflags; 507 pqflags = pg->pqflags;
507 if ((pqflags & PQ_INTENT_SET) != 0 || 508 if ((pqflags & PQ_INTENT_SET) != 0 ||
508 (pqflags & PQ_ACTIVE) == 0 || 509 (pqflags & PQ_ACTIVE) == 0 ||
509 ((hardclock_ticks & PQ_TIME) - (pqflags & PQ_TIME)) > hz) { 510 ((hardclock_ticks & PQ_TIME) - (pqflags & PQ_TIME)) > hz) {
510 uvmpdpol_set_intent(pg, PQ_INTENT_A); 511 uvmpdpol_set_intent(pg, PQ_INTENT_A);
511 } 512 }
512} 513}
513 514
514static void 515static void
515uvmpdpol_pagedequeue_locked(struct vm_page *pg) 516uvmpdpol_pagedequeue_locked(struct vm_page *pg)
516{ 517{
517 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 518 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
518 519
519 KASSERT(mutex_owned(&s->lock)); 520 KASSERT(mutex_owned(&s->lock));
520 KASSERT(mutex_owned(&pg->interlock)); 521 KASSERT(mutex_owned(&pg->interlock));
521 522
522 if (pg->pqflags & PQ_ACTIVE) { 523 if (pg->pqflags & PQ_ACTIVE) {
523 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue); 524 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
524 KASSERT((pg->pqflags & PQ_INACTIVE) == 0); 525 KASSERT((pg->pqflags & PQ_INACTIVE) == 0);
525 KASSERT(pdpol_state.s_active > 0); 526 KASSERT(pdpol_state.s_active > 0);
526 pdpol_state.s_active--; 527 pdpol_state.s_active--;
527 } else if (pg->pqflags & PQ_INACTIVE) { 528 } else if (pg->pqflags & PQ_INACTIVE) {
528 TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue); 529 TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue);
529 KASSERT(pdpol_state.s_inactive > 0); 530 KASSERT(pdpol_state.s_inactive > 0);
530 pdpol_state.s_inactive--; 531 pdpol_state.s_inactive--;
531 } 532 }
532 pg->pqflags &= PQ_INTENT_QUEUED; 533 pg->pqflags &= PQ_INTENT_QUEUED;
533} 534}
534 535
535void 536void
536uvmpdpol_pagedequeue(struct vm_page *pg) 537uvmpdpol_pagedequeue(struct vm_page *pg)
537{ 538{
538 539
539 KASSERT(uvm_page_owner_locked_p(pg)); 540 KASSERT(uvm_page_owner_locked_p(pg));
540 KASSERT(mutex_owned(&pg->interlock)); 541 KASSERT(mutex_owned(&pg->interlock));
541 542
542 uvmpdpol_set_intent(pg, PQ_INTENT_D); 543 uvmpdpol_set_intent(pg, PQ_INTENT_D);
543} 544}
544 545
545void 546void
546uvmpdpol_pageenqueue(struct vm_page *pg) 547uvmpdpol_pageenqueue(struct vm_page *pg)
547{ 548{
548 549
549 KASSERT(uvm_page_owner_locked_p(pg)); 550 KASSERT(uvm_page_owner_locked_p(pg));
550 KASSERT(mutex_owned(&pg->interlock)); 551 KASSERT(mutex_owned(&pg->interlock));
551 552
552 uvmpdpol_set_intent(pg, PQ_INTENT_E); 553 uvmpdpol_set_intent(pg, PQ_INTENT_E);
553} 554}
554 555
555void 556void
556uvmpdpol_anfree(struct vm_anon *an) 557uvmpdpol_anfree(struct vm_anon *an)
557{ 558{
558} 559}
559 560
560bool 561bool
561uvmpdpol_pageisqueued_p(struct vm_page *pg) 562uvmpdpol_pageisqueued_p(struct vm_page *pg)
562{ 563{
563 uint32_t pqflags; 564 uint32_t pqflags;
564 565
565 /* 566 /*
566 * if there's an intent set, we have to consider it. otherwise, 567 * if there's an intent set, we have to consider it. otherwise,
567 * return the actual state. we may be called unlocked for the 568 * return the actual state. we may be called unlocked for the
568 * purpose of assertions, which is safe due to the page lifecycle. 569 * purpose of assertions, which is safe due to the page lifecycle.
569 */ 570 */
570 pqflags = atomic_load_relaxed(&pg->pqflags); 571 pqflags = atomic_load_relaxed(&pg->pqflags);
571 if ((pqflags & PQ_INTENT_SET) != 0) { 572 if ((pqflags & PQ_INTENT_SET) != 0) {
572 return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D; 573 return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D;
573 } else { 574 } else {
574 return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0; 575 return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
575 } 576 }
576} 577}
577 578
578void 579void
579uvmpdpol_estimatepageable(int *active, int *inactive) 580uvmpdpol_estimatepageable(int *active, int *inactive)
580{ 581{
581 struct uvmpdpol_globalstate *s = &pdpol_state; 582 struct uvmpdpol_globalstate *s = &pdpol_state;
582 583
583 mutex_enter(&s->lock); 584 mutex_enter(&s->lock);
584 if (active) { 585 if (active) {
585 *active = pdpol_state.s_active; 586 *active = pdpol_state.s_active;
586 } 587 }
587 if (inactive) { 588 if (inactive) {
588 *inactive = pdpol_state.s_inactive; 589 *inactive = pdpol_state.s_inactive;
589 } 590 }
590 mutex_exit(&s->lock); 591 mutex_exit(&s->lock);
591} 592}
592 593
593#if !defined(PDSIM) 594#if !defined(PDSIM)
594static int 595static int
595min_check(struct uvm_pctparam *pct, int t) 596min_check(struct uvm_pctparam *pct, int t)
596{ 597{
597 struct uvmpdpol_globalstate *s = &pdpol_state; 598 struct uvmpdpol_globalstate *s = &pdpol_state;
598 int total = t; 599 int total = t;
599 600
600 if (pct != &s->s_anonmin) { 601 if (pct != &s->s_anonmin) {
601 total += uvm_pctparam_get(&s->s_anonmin); 602 total += uvm_pctparam_get(&s->s_anonmin);
602 } 603 }
603 if (pct != &s->s_filemin) { 604 if (pct != &s->s_filemin) {
604 total += uvm_pctparam_get(&s->s_filemin); 605 total += uvm_pctparam_get(&s->s_filemin);
605 } 606 }
606 if (pct != &s->s_execmin) { 607 if (pct != &s->s_execmin) {
607 total += uvm_pctparam_get(&s->s_execmin); 608 total += uvm_pctparam_get(&s->s_execmin);
608 } 609 }
609 if (total > 95) { 610 if (total > 95) {
610 return EINVAL; 611 return EINVAL;
611 } 612 }
612 return 0; 613 return 0;
613} 614}
614#endif /* !defined(PDSIM) */ 615#endif /* !defined(PDSIM) */
615 616
616void 617void
617uvmpdpol_init(void) 618uvmpdpol_init(void)
618{ 619{
619 struct uvmpdpol_globalstate *s = &pdpol_state; 620 struct uvmpdpol_globalstate *s = &pdpol_state;
620 621
621 mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE); 622 mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE);
622 TAILQ_INIT(&s->s_activeq); 623 TAILQ_INIT(&s->s_activeq);
623 TAILQ_INIT(&s->s_inactiveq); 624 TAILQ_INIT(&s->s_inactiveq);
624 uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL); 625 uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL);
625 uvm_pctparam_init(&s->s_anonmin, 10, min_check); 626 uvm_pctparam_init(&s->s_anonmin, 10, min_check);
626 uvm_pctparam_init(&s->s_filemin, 10, min_check); 627 uvm_pctparam_init(&s->s_filemin, 10, min_check);
627 uvm_pctparam_init(&s->s_execmin, 5, min_check); 628 uvm_pctparam_init(&s->s_execmin, 5, min_check);
628 uvm_pctparam_init(&s->s_anonmax, 80, NULL); 629 uvm_pctparam_init(&s->s_anonmax, 80, NULL);
629 uvm_pctparam_init(&s->s_filemax, 50, NULL); 630 uvm_pctparam_init(&s->s_filemax, 50, NULL);
630 uvm_pctparam_init(&s->s_execmax, 30, NULL); 631 uvm_pctparam_init(&s->s_execmax, 30, NULL);
631} 632}
632 633
633void 634void
634uvmpdpol_init_cpu(struct uvm_cpu *ucpu) 635uvmpdpol_init_cpu(struct uvm_cpu *ucpu)
635{ 636{
636 637
637 ucpu->pdq = 638 ucpu->pdq =
638 kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP); 639 kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP);
639 ucpu->pdqhead = CLOCK_PDQ_SIZE; 640 ucpu->pdqhead = CLOCK_PDQ_SIZE;
640 ucpu->pdqtail = CLOCK_PDQ_SIZE; 641 ucpu->pdqtail = CLOCK_PDQ_SIZE;
641} 642}
642 643
643void 644void
644uvmpdpol_reinit(void) 645uvmpdpol_reinit(void)
645{ 646{
646} 647}
647 648
648bool 649bool
649uvmpdpol_needsscan_p(void) 650uvmpdpol_needsscan_p(void)
650{ 651{
651 652
652 /* 653 /*
653 * this must be an unlocked check: can be called from interrupt. 654 * this must be an unlocked check: can be called from interrupt.
654 */ 655 */
655 return pdpol_state.s_inactive < pdpol_state.s_inactarg; 656 return pdpol_state.s_inactive < pdpol_state.s_inactarg;
656} 657}
657 658
658void 659void
659uvmpdpol_tune(void) 660uvmpdpol_tune(void)
660{ 661{
661 struct uvmpdpol_globalstate *s = &pdpol_state; 662 struct uvmpdpol_globalstate *s = &pdpol_state;
662 663
663 mutex_enter(&s->lock); 664 mutex_enter(&s->lock);
664 clock_tune(); 665 clock_tune();
665 mutex_exit(&s->lock); 666 mutex_exit(&s->lock);
666} 667}
667 668
668/* 669/*
669 * uvmpdpol_pagerealize_locked: take the intended state set on an indivdual 670 * uvmpdpol_pagerealize_locked: take the intended state set on an indivdual
670 * page and make it real. return true if any work was done. 671 * page and make it real. return true if any work was done.
671 */ 672 */
672static bool 673static bool
673uvmpdpol_pagerealize_locked(struct vm_page *pg) 674uvmpdpol_pagerealize_locked(struct vm_page *pg)
674{ 675{
675 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 676 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
676 677
677 KASSERT(mutex_owned(&s->lock)); 678 KASSERT(mutex_owned(&s->lock));
678 KASSERT(mutex_owned(&pg->interlock)); 679 KASSERT(mutex_owned(&pg->interlock));
679 680
680 switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) { 681 switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) {
681 case PQ_INTENT_A | PQ_INTENT_SET: 682 case PQ_INTENT_A | PQ_INTENT_SET:
682 case PQ_INTENT_E | PQ_INTENT_SET: 683 case PQ_INTENT_E | PQ_INTENT_SET:
683 uvmpdpol_pageactivate_locked(pg); 684 uvmpdpol_pageactivate_locked(pg);
684 return true; 685 return true;
685 case PQ_INTENT_I | PQ_INTENT_SET: 686 case PQ_INTENT_I | PQ_INTENT_SET:
686 uvmpdpol_pagedeactivate_locked(pg); 687 uvmpdpol_pagedeactivate_locked(pg);
687 return true; 688 return true;
688 case PQ_INTENT_D | PQ_INTENT_SET: 689 case PQ_INTENT_D | PQ_INTENT_SET:
689 uvmpdpol_pagedequeue_locked(pg); 690 uvmpdpol_pagedequeue_locked(pg);
690 return true; 691 return true;
691 default: 692 default:
692 return false; 693 return false;
693 } 694 }
694} 695}
695 696
696/* 697/*
697 * uvmpdpol_flush: return the current uvm_cpu with all of its pending 698 * uvmpdpol_flush: return the current uvm_cpu with all of its pending
698 * updates flushed to the global queues. this routine may block, and 699 * updates flushed to the global queues. this routine may block, and
699 * so can switch cpu. the idea is to empty to queue on whatever cpu 700 * so can switch cpu. the idea is to empty to queue on whatever cpu
700 * we finally end up on. 701 * we finally end up on.
701 */ 702 */
702static struct uvm_cpu * 703static struct uvm_cpu *
703uvmpdpol_flush(void) 704uvmpdpol_flush(void)
704{ 705{
705 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 706 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
706 struct uvm_cpu *ucpu; 707 struct uvm_cpu *ucpu;
707 struct vm_page *pg; 708 struct vm_page *pg;
708 709
709 KASSERT(kpreempt_disabled()); 710 KASSERT(kpreempt_disabled());
710 711
711 mutex_enter(&s->lock); 712 mutex_enter(&s->lock);
712 for (;;) { 713 for (;;) {
713 /* 714 /*
714 * prefer scanning forwards (even though mutex_enter() is 715 * prefer scanning forwards (even though mutex_enter() is
715 * serializing) so as to not defeat any prefetch logic in 716 * serializing) so as to not defeat any prefetch logic in
716 * the CPU. that means elsewhere enqueuing backwards, like 717 * the CPU. that means elsewhere enqueuing backwards, like
717 * a stack, but not so important there as pages are being 718 * a stack, but not so important there as pages are being
718 * added singularly. 719 * added singularly.
719 * 720 *
720 * prefetch the next "struct vm_page" while working on the 721 * prefetch the next "struct vm_page" while working on the
721 * current one. this has a measurable and very positive 722 * current one. this has a measurable and very positive
722 * effect in reducing the amount of time spent here under 723 * effect in reducing the amount of time spent here under
723 * the global lock. 724 * the global lock.
724 */ 725 */
725 ucpu = curcpu()->ci_data.cpu_uvm; 726 ucpu = curcpu()->ci_data.cpu_uvm;
726 KASSERT(ucpu->pdqhead <= ucpu->pdqtail); 727 KASSERT(ucpu->pdqhead <= ucpu->pdqtail);
727 if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) { 728 if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) {
728 break; 729 break;
729 } 730 }
730 pg = ucpu->pdq[ucpu->pdqhead++]; 731 pg = ucpu->pdq[ucpu->pdqhead++];
731 if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) { 732 if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) {
732 __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]); 733 __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]);
733 } 734 }
734 mutex_enter(&pg->interlock); 735 mutex_enter(&pg->interlock);
735 pg->pqflags &= ~PQ_INTENT_QUEUED; 736 pg->pqflags &= ~PQ_INTENT_QUEUED;
736 (void)uvmpdpol_pagerealize_locked(pg); 737 (void)uvmpdpol_pagerealize_locked(pg);
737 mutex_exit(&pg->interlock); 738 mutex_exit(&pg->interlock);
738 } 739 }
739 mutex_exit(&s->lock); 740 mutex_exit(&s->lock);
740 return ucpu; 741 return ucpu;
741} 742}
742 743
743/* 744/*
744 * uvmpdpol_pagerealize: realize any intent set on the page. in this 745 * uvmpdpol_pagerealize: realize any intent set on the page. in this
745 * implementation, that means putting the page on a per-CPU queue to be 746 * implementation, that means putting the page on a per-CPU queue to be
746 * dealt with later. 747 * dealt with later.
747 */ 748 */
748void 749void
749uvmpdpol_pagerealize(struct vm_page *pg) 750uvmpdpol_pagerealize(struct vm_page *pg)
750{ 751{
751 struct uvm_cpu *ucpu; 752 struct uvm_cpu *ucpu;
752 753
753 /* 754 /*
754 * drain the per per-CPU queue if full, then enter the page. 755 * drain the per per-CPU queue if full, then enter the page.
755 */ 756 */
756 kpreempt_disable(); 757 kpreempt_disable();
757 ucpu = curcpu()->ci_data.cpu_uvm; 758 ucpu = curcpu()->ci_data.cpu_uvm;
758 if (__predict_false(ucpu->pdqhead == 0)) { 759 if (__predict_false(ucpu->pdqhead == 0)) {
759 ucpu = uvmpdpol_flush(); 760 ucpu = uvmpdpol_flush();
760 } 761 }
761 ucpu->pdq[--(ucpu->pdqhead)] = pg; 762 ucpu->pdq[--(ucpu->pdqhead)] = pg;
762 kpreempt_enable(); 763 kpreempt_enable();
763} 764}
764 765
765/* 766/*
766 * uvmpdpol_idle: called from the system idle loop. periodically purge any 767 * uvmpdpol_idle: called from the system idle loop. periodically purge any
767 * pending updates back to the global queues. 768 * pending updates back to the global queues.
768 */ 769 */
769void 770void
770uvmpdpol_idle(struct uvm_cpu *ucpu) 771uvmpdpol_idle(struct uvm_cpu *ucpu)
771{ 772{
772 struct uvmpdpol_globalstate *s = &pdpol_state; 773 struct uvmpdpol_globalstate *s = &pdpol_state;
773 struct vm_page *pg; 774 struct vm_page *pg;
774 775
775 KASSERT(kpreempt_disabled()); 776 KASSERT(kpreempt_disabled());
776 777
777 /* 778 /*
778 * if no pages in the queue, we have nothing to do. 779 * if no pages in the queue, we have nothing to do.
779 */ 780 */
780 if (ucpu->pdqhead == ucpu->pdqtail) { 781 if (ucpu->pdqhead == ucpu->pdqtail) {
781 ucpu->pdqtime = hardclock_ticks; 782 ucpu->pdqtime = hardclock_ticks;
782 return; 783 return;
783 } 784 }
784 785
785 /* 786 /*
786 * don't do this more than ~8 times a second as it would needlessly 787 * don't do this more than ~8 times a second as it would needlessly
787 * exert pressure. 788 * exert pressure.
788 */ 789 */
789 if (hardclock_ticks - ucpu->pdqtime < (hz >> 3)) { 790 if (hardclock_ticks - ucpu->pdqtime < (hz >> 3)) {
790 return; 791 return;
791 } 792 }
792 793
793 /* 794 /*
794 * the idle LWP can't block, so we have to try for the lock. if we 795 * the idle LWP can't block, so we have to try for the lock. if we
795 * get it, purge the per-CPU pending update queue. continually 796 * get it, purge the per-CPU pending update queue. continually
796 * check for a pending resched: in that case exit immediately. 797 * check for a pending resched: in that case exit immediately.
797 */ 798 */
798 if (mutex_tryenter(&s->lock)) { 799 if (mutex_tryenter(&s->lock)) {
799 while (ucpu->pdqhead != ucpu->pdqtail) { 800 while (ucpu->pdqhead != ucpu->pdqtail) {
800 pg = ucpu->pdq[ucpu->pdqhead]; 801 pg = ucpu->pdq[ucpu->pdqhead];
801 if (!mutex_tryenter(&pg->interlock)) { 802 if (!mutex_tryenter(&pg->interlock)) {
802 break; 803 break;
803 } 804 }
804 ucpu->pdqhead++; 805 ucpu->pdqhead++;
805 pg->pqflags &= ~PQ_INTENT_QUEUED; 806 pg->pqflags &= ~PQ_INTENT_QUEUED;
806 (void)uvmpdpol_pagerealize_locked(pg); 807 (void)uvmpdpol_pagerealize_locked(pg);
807 mutex_exit(&pg->interlock); 808 mutex_exit(&pg->interlock);
808 if (curcpu()->ci_want_resched) { 809 if (curcpu()->ci_want_resched) {
809 break; 810 break;
810 } 811 }
811 } 812 }
812 if (ucpu->pdqhead == ucpu->pdqtail) { 813 if (ucpu->pdqhead == ucpu->pdqtail) {
813 ucpu->pdqtime = hardclock_ticks; 814 ucpu->pdqtime = hardclock_ticks;
814 } 815 }
815 mutex_exit(&s->lock); 816 mutex_exit(&s->lock);
816 } 817 }
817} 818}
818 819
819#if !defined(PDSIM) 820#if !defined(PDSIM)
820 821
821#include <sys/sysctl.h> /* XXX SYSCTL_DESCR */ 822#include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
822 823
823void 824void
824uvmpdpol_sysctlsetup(void) 825uvmpdpol_sysctlsetup(void)
825{ 826{
826 struct uvmpdpol_globalstate *s = &pdpol_state; 827 struct uvmpdpol_globalstate *s = &pdpol_state;
827 828
828 uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin", 829 uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin",
829 SYSCTL_DESCR("Percentage of physical memory reserved " 830 SYSCTL_DESCR("Percentage of physical memory reserved "
830 "for anonymous application data")); 831 "for anonymous application data"));
831 uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin", 832 uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin",
832 SYSCTL_DESCR("Percentage of physical memory reserved " 833 SYSCTL_DESCR("Percentage of physical memory reserved "
833 "for cached file data")); 834 "for cached file data"));
834 uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin", 835 uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin",
835 SYSCTL_DESCR("Percentage of physical memory reserved " 836 SYSCTL_DESCR("Percentage of physical memory reserved "
836 "for cached executable data")); 837 "for cached executable data"));
837 838
838 uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax", 839 uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax",
839 SYSCTL_DESCR("Percentage of physical memory which will " 840 SYSCTL_DESCR("Percentage of physical memory which will "
840 "be reclaimed from other usage for " 841 "be reclaimed from other usage for "
841 "anonymous application data")); 842 "anonymous application data"));
842 uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax", 843 uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax",
843 SYSCTL_DESCR("Percentage of physical memory which will " 844 SYSCTL_DESCR("Percentage of physical memory which will "
844 "be reclaimed from other usage for cached " 845 "be reclaimed from other usage for cached "
845 "file data")); 846 "file data"));
846 uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax", 847 uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax",
847 SYSCTL_DESCR("Percentage of physical memory which will " 848 SYSCTL_DESCR("Percentage of physical memory which will "
848 "be reclaimed from other usage for cached " 849 "be reclaimed from other usage for cached "
849 "executable data")); 850 "executable data"));
850 851
851 uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct", 852 uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct",
852 SYSCTL_DESCR("Percentage of inactive queue of " 853 SYSCTL_DESCR("Percentage of inactive queue of "
853 "the entire (active + inactive) queue")); 854 "the entire (active + inactive) queue"));
854} 855}
855 856
856#endif /* !defined(PDSIM) */ 857#endif /* !defined(PDSIM) */
857 858
858#if defined(PDSIM) 859#if defined(PDSIM)
859void 860void
860pdsim_dump(const char *id) 861pdsim_dump(const char *id)
861{ 862{
862#if defined(DEBUG) 863#if defined(DEBUG)
863 /* XXX */ 864 /* XXX */
864#endif /* defined(DEBUG) */ 865#endif /* defined(DEBUG) */
865} 866}
866#endif /* defined(PDSIM) */ 867#endif /* defined(PDSIM) */