Sun Mar 8 15:01:50 2020 UTC ()
Don't zap the non-pdpolicy bits in pg->pqflags.


(ad)
diff -r1.33 -r1.34 src/sys/uvm/uvm_pdpolicy_clock.c

cvs diff -r1.33 -r1.34 src/sys/uvm/uvm_pdpolicy_clock.c (switch to unified diff)

--- src/sys/uvm/uvm_pdpolicy_clock.c 2020/02/23 15:46:43 1.33
+++ src/sys/uvm/uvm_pdpolicy_clock.c 2020/03/08 15:01:50 1.34
@@ -1,857 +1,859 @@ @@ -1,857 +1,859 @@
1/* $NetBSD: uvm_pdpolicy_clock.c,v 1.33 2020/02/23 15:46:43 ad Exp $ */ 1/* $NetBSD: uvm_pdpolicy_clock.c,v 1.34 2020/03/08 15:01:50 ad Exp $ */
2/* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */ 2/* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */
3 3
4/*- 4/*-
5 * Copyright (c) 2019 The NetBSD Foundation, Inc. 5 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * This code is derived from software contributed to The NetBSD Foundation 8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Andrew Doran. 9 * by Andrew Doran.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer. 15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright 16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the 17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution. 18 * documentation and/or other materials provided with the distribution.
19 * 19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE. 30 * POSSIBILITY OF SUCH DAMAGE.
31 */ 31 */
32 32
33/* 33/*
34 * Copyright (c) 1997 Charles D. Cranor and Washington University. 34 * Copyright (c) 1997 Charles D. Cranor and Washington University.
35 * Copyright (c) 1991, 1993, The Regents of the University of California. 35 * Copyright (c) 1991, 1993, The Regents of the University of California.
36 * 36 *
37 * All rights reserved. 37 * All rights reserved.
38 * 38 *
39 * This code is derived from software contributed to Berkeley by 39 * This code is derived from software contributed to Berkeley by
40 * The Mach Operating System project at Carnegie-Mellon University. 40 * The Mach Operating System project at Carnegie-Mellon University.
41 * 41 *
42 * Redistribution and use in source and binary forms, with or without 42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions 43 * modification, are permitted provided that the following conditions
44 * are met: 44 * are met:
45 * 1. Redistributions of source code must retain the above copyright 45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer. 46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright 47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the 48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution. 49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors 50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software 51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission. 52 * without specific prior written permission.
53 * 53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE. 64 * SUCH DAMAGE.
65 * 65 *
66 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94 66 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
67 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp 67 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
68 * 68 *
69 * 69 *
70 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 70 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
71 * All rights reserved. 71 * All rights reserved.
72 * 72 *
73 * Permission to use, copy, modify and distribute this software and 73 * Permission to use, copy, modify and distribute this software and
74 * its documentation is hereby granted, provided that both the copyright 74 * its documentation is hereby granted, provided that both the copyright
75 * notice and this permission notice appear in all copies of the 75 * notice and this permission notice appear in all copies of the
76 * software, derivative works or modified versions, and any portions 76 * software, derivative works or modified versions, and any portions
77 * thereof, and that both notices appear in supporting documentation. 77 * thereof, and that both notices appear in supporting documentation.
78 * 78 *
79 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 79 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
80 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 80 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
81 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 81 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
82 * 82 *
83 * Carnegie Mellon requests users of this software to return to 83 * Carnegie Mellon requests users of this software to return to
84 * 84 *
85 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 85 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
86 * School of Computer Science 86 * School of Computer Science
87 * Carnegie Mellon University 87 * Carnegie Mellon University
88 * Pittsburgh PA 15213-3890 88 * Pittsburgh PA 15213-3890
89 * 89 *
90 * any improvements or extensions that they make and grant Carnegie the 90 * any improvements or extensions that they make and grant Carnegie the
91 * rights to redistribute these changes. 91 * rights to redistribute these changes.
92 */ 92 */
93 93
94#if defined(PDSIM) 94#if defined(PDSIM)
95 95
96#include "pdsim.h" 96#include "pdsim.h"
97 97
98#else /* defined(PDSIM) */ 98#else /* defined(PDSIM) */
99 99
100#include <sys/cdefs.h> 100#include <sys/cdefs.h>
101__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.33 2020/02/23 15:46:43 ad Exp $"); 101__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.34 2020/03/08 15:01:50 ad Exp $");
102 102
103#include <sys/param.h> 103#include <sys/param.h>
104#include <sys/proc.h> 104#include <sys/proc.h>
105#include <sys/systm.h> 105#include <sys/systm.h>
106#include <sys/kernel.h> 106#include <sys/kernel.h>
107#include <sys/kmem.h> 107#include <sys/kmem.h>
108#include <sys/atomic.h> 108#include <sys/atomic.h>
109 109
110#include <uvm/uvm.h> 110#include <uvm/uvm.h>
111#include <uvm/uvm_pdpolicy.h> 111#include <uvm/uvm_pdpolicy.h>
112#include <uvm/uvm_pdpolicy_impl.h> 112#include <uvm/uvm_pdpolicy_impl.h>
113#include <uvm/uvm_stat.h> 113#include <uvm/uvm_stat.h>
114 114
115#endif /* defined(PDSIM) */ 115#endif /* defined(PDSIM) */
116 116
117/* 117/*
118 * per-CPU queue of pending page status changes. 128 entries makes for a 118 * per-CPU queue of pending page status changes. 128 entries makes for a
119 * 1kB queue on _LP64 and has been found to be a reasonable compromise that 119 * 1kB queue on _LP64 and has been found to be a reasonable compromise that
120 * keeps lock contention events and wait times low, while not using too much 120 * keeps lock contention events and wait times low, while not using too much
121 * memory nor allowing global state to fall too far behind. 121 * memory nor allowing global state to fall too far behind.
122 */ 122 */
123#if !defined(CLOCK_PDQ_SIZE) 123#if !defined(CLOCK_PDQ_SIZE)
124#define CLOCK_PDQ_SIZE 128 124#define CLOCK_PDQ_SIZE 128
125#endif /* !defined(CLOCK_PDQ_SIZE) */ 125#endif /* !defined(CLOCK_PDQ_SIZE) */
126 126
127#define PQ_INACTIVE 0x00000010 /* page is in inactive list */ 127#define PQ_INACTIVE 0x00000010 /* page is in inactive list */
128#define PQ_ACTIVE 0x00000020 /* page is in active list */ 128#define PQ_ACTIVE 0x00000020 /* page is in active list */
129 129
130#if !defined(CLOCK_INACTIVEPCT) 130#if !defined(CLOCK_INACTIVEPCT)
131#define CLOCK_INACTIVEPCT 33 131#define CLOCK_INACTIVEPCT 33
132#endif /* !defined(CLOCK_INACTIVEPCT) */ 132#endif /* !defined(CLOCK_INACTIVEPCT) */
133 133
134struct uvmpdpol_globalstate { 134struct uvmpdpol_globalstate {
135 kmutex_t lock; /* lock on state */ 135 kmutex_t lock; /* lock on state */
136 /* <= compiler pads here */ 136 /* <= compiler pads here */
137 struct pglist s_activeq /* allocated pages, in use */ 137 struct pglist s_activeq /* allocated pages, in use */
138 __aligned(COHERENCY_UNIT); 138 __aligned(COHERENCY_UNIT);
139 struct pglist s_inactiveq; /* pages between the clock hands */ 139 struct pglist s_inactiveq; /* pages between the clock hands */
140 int s_active; 140 int s_active;
141 int s_inactive; 141 int s_inactive;
142 int s_inactarg; 142 int s_inactarg;
143 struct uvm_pctparam s_anonmin; 143 struct uvm_pctparam s_anonmin;
144 struct uvm_pctparam s_filemin; 144 struct uvm_pctparam s_filemin;
145 struct uvm_pctparam s_execmin; 145 struct uvm_pctparam s_execmin;
146 struct uvm_pctparam s_anonmax; 146 struct uvm_pctparam s_anonmax;
147 struct uvm_pctparam s_filemax; 147 struct uvm_pctparam s_filemax;
148 struct uvm_pctparam s_execmax; 148 struct uvm_pctparam s_execmax;
149 struct uvm_pctparam s_inactivepct; 149 struct uvm_pctparam s_inactivepct;
150}; 150};
151 151
152struct uvmpdpol_scanstate { 152struct uvmpdpol_scanstate {
153 bool ss_anonreact, ss_filereact, ss_execreact; 153 bool ss_anonreact, ss_filereact, ss_execreact;
154 struct vm_page ss_marker; 154 struct vm_page ss_marker;
155}; 155};
156 156
157static void uvmpdpol_pageactivate_locked(struct vm_page *); 157static void uvmpdpol_pageactivate_locked(struct vm_page *);
158static void uvmpdpol_pagedeactivate_locked(struct vm_page *); 158static void uvmpdpol_pagedeactivate_locked(struct vm_page *);
159static void uvmpdpol_pagedequeue_locked(struct vm_page *); 159static void uvmpdpol_pagedequeue_locked(struct vm_page *);
160static bool uvmpdpol_pagerealize_locked(struct vm_page *); 160static bool uvmpdpol_pagerealize_locked(struct vm_page *);
161static struct uvm_cpu *uvmpdpol_flush(void); 161static struct uvm_cpu *uvmpdpol_flush(void);
162 162
163static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned; 163static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned;
164static struct uvmpdpol_scanstate pdpol_scanstate; 164static struct uvmpdpol_scanstate pdpol_scanstate;
165 165
166PDPOL_EVCNT_DEFINE(reactexec) 166PDPOL_EVCNT_DEFINE(reactexec)
167PDPOL_EVCNT_DEFINE(reactfile) 167PDPOL_EVCNT_DEFINE(reactfile)
168PDPOL_EVCNT_DEFINE(reactanon) 168PDPOL_EVCNT_DEFINE(reactanon)
169 169
170static void 170static void
171clock_tune(void) 171clock_tune(void)
172{ 172{
173 struct uvmpdpol_globalstate *s = &pdpol_state; 173 struct uvmpdpol_globalstate *s = &pdpol_state;
174 174
175 s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct, 175 s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct,
176 s->s_active + s->s_inactive); 176 s->s_active + s->s_inactive);
177 if (s->s_inactarg <= uvmexp.freetarg) { 177 if (s->s_inactarg <= uvmexp.freetarg) {
178 s->s_inactarg = uvmexp.freetarg + 1; 178 s->s_inactarg = uvmexp.freetarg + 1;
179 } 179 }
180} 180}
181 181
182void 182void
183uvmpdpol_scaninit(void) 183uvmpdpol_scaninit(void)
184{ 184{
185 struct uvmpdpol_globalstate *s = &pdpol_state; 185 struct uvmpdpol_globalstate *s = &pdpol_state;
186 struct uvmpdpol_scanstate *ss = &pdpol_scanstate; 186 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
187 int t; 187 int t;
188 bool anonunder, fileunder, execunder; 188 bool anonunder, fileunder, execunder;
189 bool anonover, fileover, execover; 189 bool anonover, fileover, execover;
190 bool anonreact, filereact, execreact; 190 bool anonreact, filereact, execreact;
191 int64_t freepg, anonpg, filepg, execpg; 191 int64_t freepg, anonpg, filepg, execpg;
192 192
193 /* 193 /*
194 * decide which types of pages we want to reactivate instead of freeing 194 * decide which types of pages we want to reactivate instead of freeing
195 * to keep usage within the minimum and maximum usage limits. 195 * to keep usage within the minimum and maximum usage limits.
196 */ 196 */
197 197
198 cpu_count_sync_all(); 198 cpu_count_sync_all();
199 freepg = uvm_availmem(); 199 freepg = uvm_availmem();
200 anonpg = cpu_count_get(CPU_COUNT_ANONPAGES); 200 anonpg = cpu_count_get(CPU_COUNT_ANONPAGES);
201 filepg = cpu_count_get(CPU_COUNT_FILEPAGES); 201 filepg = cpu_count_get(CPU_COUNT_FILEPAGES);
202 execpg = cpu_count_get(CPU_COUNT_EXECPAGES); 202 execpg = cpu_count_get(CPU_COUNT_EXECPAGES);
203 203
204 mutex_enter(&s->lock); 204 mutex_enter(&s->lock);
205 t = s->s_active + s->s_inactive + freepg; 205 t = s->s_active + s->s_inactive + freepg;
206 anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t); 206 anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t);
207 fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t); 207 fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t);
208 execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t); 208 execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t);
209 anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t); 209 anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t);
210 fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t); 210 fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t);
211 execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t); 211 execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t);
212 anonreact = anonunder || (!anonover && (fileover || execover)); 212 anonreact = anonunder || (!anonover && (fileover || execover));
213 filereact = fileunder || (!fileover && (anonover || execover)); 213 filereact = fileunder || (!fileover && (anonover || execover));
214 execreact = execunder || (!execover && (anonover || fileover)); 214 execreact = execunder || (!execover && (anonover || fileover));
215 if (filereact && execreact && (anonreact || uvm_swapisfull())) { 215 if (filereact && execreact && (anonreact || uvm_swapisfull())) {
216 anonreact = filereact = execreact = false; 216 anonreact = filereact = execreact = false;
217 } 217 }
218 ss->ss_anonreact = anonreact; 218 ss->ss_anonreact = anonreact;
219 ss->ss_filereact = filereact; 219 ss->ss_filereact = filereact;
220 ss->ss_execreact = execreact; 220 ss->ss_execreact = execreact;
221 memset(&ss->ss_marker, 0, sizeof(ss->ss_marker)); 221 memset(&ss->ss_marker, 0, sizeof(ss->ss_marker));
222 ss->ss_marker.flags = PG_MARKER; 222 ss->ss_marker.flags = PG_MARKER;
223 TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue); 223 TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
224 mutex_exit(&s->lock); 224 mutex_exit(&s->lock);
225} 225}
226 226
227void 227void
228uvmpdpol_scanfini(void) 228uvmpdpol_scanfini(void)
229{ 229{
230 struct uvmpdpol_globalstate *s = &pdpol_state; 230 struct uvmpdpol_globalstate *s = &pdpol_state;
231 struct uvmpdpol_scanstate *ss = &pdpol_scanstate; 231 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
232 232
233 mutex_enter(&s->lock); 233 mutex_enter(&s->lock);
234 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue); 234 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
235 mutex_exit(&s->lock); 235 mutex_exit(&s->lock);
236} 236}
237 237
238struct vm_page * 238struct vm_page *
239uvmpdpol_selectvictim(krwlock_t **plock) 239uvmpdpol_selectvictim(krwlock_t **plock)
240{ 240{
241 struct uvmpdpol_globalstate *s = &pdpol_state; 241 struct uvmpdpol_globalstate *s = &pdpol_state;
242 struct uvmpdpol_scanstate *ss = &pdpol_scanstate; 242 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
243 struct vm_page *pg; 243 struct vm_page *pg;
244 krwlock_t *lock; 244 krwlock_t *lock;
245 245
246 mutex_enter(&s->lock); 246 mutex_enter(&s->lock);
247 while (/* CONSTCOND */ 1) { 247 while (/* CONSTCOND */ 1) {
248 struct vm_anon *anon; 248 struct vm_anon *anon;
249 struct uvm_object *uobj; 249 struct uvm_object *uobj;
250 250
251 pg = TAILQ_NEXT(&ss->ss_marker, pdqueue); 251 pg = TAILQ_NEXT(&ss->ss_marker, pdqueue);
252 if (pg == NULL) { 252 if (pg == NULL) {
253 break; 253 break;
254 } 254 }
255 KASSERT((pg->flags & PG_MARKER) == 0); 255 KASSERT((pg->flags & PG_MARKER) == 0);
256 uvmexp.pdscans++; 256 uvmexp.pdscans++;
257 257
258 /* 258 /*
259 * acquire interlock to stablize page identity. 259 * acquire interlock to stablize page identity.
260 * if we have caught the page in a state of flux 260 * if we have caught the page in a state of flux
261 * deal with it and retry. 261 * deal with it and retry.
262 */ 262 */
263 mutex_enter(&pg->interlock); 263 mutex_enter(&pg->interlock);
264 if (uvmpdpol_pagerealize_locked(pg)) { 264 if (uvmpdpol_pagerealize_locked(pg)) {
265 mutex_exit(&pg->interlock); 265 mutex_exit(&pg->interlock);
266 continue; 266 continue;
267 } 267 }
268 268
269 /* 269 /*
270 * now prepare to move on to the next page. 270 * now prepare to move on to the next page.
271 */ 271 */
272 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, 272 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker,
273 pdqueue); 273 pdqueue);
274 TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg, 274 TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg,
275 &ss->ss_marker, pdqueue); 275 &ss->ss_marker, pdqueue);
276 276
277 /* 277 /*
278 * enforce the minimum thresholds on different 278 * enforce the minimum thresholds on different
279 * types of memory usage. if reusing the current 279 * types of memory usage. if reusing the current
280 * page would reduce that type of usage below its 280 * page would reduce that type of usage below its
281 * minimum, reactivate the page instead and move 281 * minimum, reactivate the page instead and move
282 * on to the next page. 282 * on to the next page.
283 */ 283 */
284 anon = pg->uanon; 284 anon = pg->uanon;
285 uobj = pg->uobject; 285 uobj = pg->uobject;
286 if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) { 286 if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
287 uvmpdpol_pageactivate_locked(pg); 287 uvmpdpol_pageactivate_locked(pg);
288 mutex_exit(&pg->interlock); 288 mutex_exit(&pg->interlock);
289 PDPOL_EVCNT_INCR(reactexec); 289 PDPOL_EVCNT_INCR(reactexec);
290 continue; 290 continue;
291 } 291 }
292 if (uobj && UVM_OBJ_IS_VNODE(uobj) && 292 if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
293 !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) { 293 !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
294 uvmpdpol_pageactivate_locked(pg); 294 uvmpdpol_pageactivate_locked(pg);
295 mutex_exit(&pg->interlock); 295 mutex_exit(&pg->interlock);
296 PDPOL_EVCNT_INCR(reactfile); 296 PDPOL_EVCNT_INCR(reactfile);
297 continue; 297 continue;
298 } 298 }
299 if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) { 299 if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
300 uvmpdpol_pageactivate_locked(pg); 300 uvmpdpol_pageactivate_locked(pg);
301 mutex_exit(&pg->interlock); 301 mutex_exit(&pg->interlock);
302 PDPOL_EVCNT_INCR(reactanon); 302 PDPOL_EVCNT_INCR(reactanon);
303 continue; 303 continue;
304 } 304 }
305 305
306 /* 306 /*
307 * try to lock the object that owns the page. 307 * try to lock the object that owns the page.
308 * 308 *
309 * with the page interlock held, we can drop s->lock, which 309 * with the page interlock held, we can drop s->lock, which
310 * could otherwise serve as a barrier to us getting the 310 * could otherwise serve as a barrier to us getting the
311 * object locked, because the owner of the object's lock may 311 * object locked, because the owner of the object's lock may
312 * be blocked on s->lock (i.e. a deadlock). 312 * be blocked on s->lock (i.e. a deadlock).
313 * 313 *
314 * whatever happens, uvmpd_trylockowner() will release the 314 * whatever happens, uvmpd_trylockowner() will release the
315 * interlock. with the interlock dropped we can then 315 * interlock. with the interlock dropped we can then
316 * re-acquire our own lock. the order is: 316 * re-acquire our own lock. the order is:
317 * 317 *
318 * object -> pdpol -> interlock. 318 * object -> pdpol -> interlock.
319 */ 319 */
320 mutex_exit(&s->lock); 320 mutex_exit(&s->lock);
321 lock = uvmpd_trylockowner(pg); 321 lock = uvmpd_trylockowner(pg);
322 /* pg->interlock now released */ 322 /* pg->interlock now released */
323 mutex_enter(&s->lock); 323 mutex_enter(&s->lock);
324 if (lock == NULL) { 324 if (lock == NULL) {
325 /* didn't get it - try the next page. */ 325 /* didn't get it - try the next page. */
326 continue; 326 continue;
327 } 327 }
328 328
329 /* 329 /*
330 * move referenced pages back to active queue and skip to 330 * move referenced pages back to active queue and skip to
331 * next page. 331 * next page.
332 */ 332 */
333 if (pmap_is_referenced(pg)) { 333 if (pmap_is_referenced(pg)) {
334 mutex_enter(&pg->interlock); 334 mutex_enter(&pg->interlock);
335 uvmpdpol_pageactivate_locked(pg); 335 uvmpdpol_pageactivate_locked(pg);
336 mutex_exit(&pg->interlock); 336 mutex_exit(&pg->interlock);
337 uvmexp.pdreact++; 337 uvmexp.pdreact++;
338 rw_exit(lock); 338 rw_exit(lock);
339 continue; 339 continue;
340 } 340 }
341 341
342 /* we have a potential victim. */ 342 /* we have a potential victim. */
343 *plock = lock; 343 *plock = lock;
344 break; 344 break;
345 } 345 }
346 mutex_exit(&s->lock); 346 mutex_exit(&s->lock);
347 return pg; 347 return pg;
348} 348}
349 349
350void 350void
351uvmpdpol_balancequeue(int swap_shortage) 351uvmpdpol_balancequeue(int swap_shortage)
352{ 352{
353 struct uvmpdpol_globalstate *s = &pdpol_state; 353 struct uvmpdpol_globalstate *s = &pdpol_state;
354 int inactive_shortage; 354 int inactive_shortage;
355 struct vm_page *p, marker; 355 struct vm_page *p, marker;
356 krwlock_t *lock; 356 krwlock_t *lock;
357 357
358 /* 358 /*
359 * we have done the scan to get free pages. now we work on meeting 359 * we have done the scan to get free pages. now we work on meeting
360 * our inactive target. 360 * our inactive target.
361 */ 361 */
362 362
363 memset(&marker, 0, sizeof(marker)); 363 memset(&marker, 0, sizeof(marker));
364 marker.flags = PG_MARKER; 364 marker.flags = PG_MARKER;
365 365
366 mutex_enter(&s->lock); 366 mutex_enter(&s->lock);
367 TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue); 367 TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue);
368 for (;;) { 368 for (;;) {
369 inactive_shortage = 369 inactive_shortage =
370 pdpol_state.s_inactarg - pdpol_state.s_inactive; 370 pdpol_state.s_inactarg - pdpol_state.s_inactive;
371 if (inactive_shortage <= 0 && swap_shortage <= 0) { 371 if (inactive_shortage <= 0 && swap_shortage <= 0) {
372 break; 372 break;
373 } 373 }
374 p = TAILQ_NEXT(&marker, pdqueue); 374 p = TAILQ_NEXT(&marker, pdqueue);
375 if (p == NULL) { 375 if (p == NULL) {
376 break; 376 break;
377 } 377 }
378 KASSERT((p->flags & PG_MARKER) == 0); 378 KASSERT((p->flags & PG_MARKER) == 0);
379 379
380 /* 380 /*
381 * acquire interlock to stablize page identity. 381 * acquire interlock to stablize page identity.
382 * if we have caught the page in a state of flux 382 * if we have caught the page in a state of flux
383 * deal with it and retry. 383 * deal with it and retry.
384 */ 384 */
385 mutex_enter(&p->interlock); 385 mutex_enter(&p->interlock);
386 if (uvmpdpol_pagerealize_locked(p)) { 386 if (uvmpdpol_pagerealize_locked(p)) {
387 mutex_exit(&p->interlock); 387 mutex_exit(&p->interlock);
388 continue; 388 continue;
389 } 389 }
390 390
391 /* 391 /*
392 * now prepare to move on to the next page. 392 * now prepare to move on to the next page.
393 */ 393 */
394 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue); 394 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
395 TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker, 395 TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker,
396 pdqueue); 396 pdqueue);
397 397
398 /* 398 /*
399 * try to lock the object that owns the page. see comments 399 * try to lock the object that owns the page. see comments
400 * in uvmpdol_selectvictim(). 400 * in uvmpdol_selectvictim().
401 */ 401 */
402 mutex_exit(&s->lock); 402 mutex_exit(&s->lock);
403 lock = uvmpd_trylockowner(p); 403 lock = uvmpd_trylockowner(p);
404 /* p->interlock now released */ 404 /* p->interlock now released */
405 mutex_enter(&s->lock); 405 mutex_enter(&s->lock);
406 if (lock == NULL) { 406 if (lock == NULL) {
407 /* didn't get it - try the next page. */ 407 /* didn't get it - try the next page. */
408 continue; 408 continue;
409 } 409 }
410 410
411 /* 411 /*
412 * if there's a shortage of swap slots, try to free it. 412 * if there's a shortage of swap slots, try to free it.
413 */ 413 */
414 if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 && 414 if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 &&
415 (p->flags & PG_BUSY) == 0) { 415 (p->flags & PG_BUSY) == 0) {
416 if (uvmpd_dropswap(p)) { 416 if (uvmpd_dropswap(p)) {
417 swap_shortage--; 417 swap_shortage--;
418 } 418 }
419 } 419 }
420 420
421 /* 421 /*
422 * if there's a shortage of inactive pages, deactivate. 422 * if there's a shortage of inactive pages, deactivate.
423 */ 423 */
424 if (inactive_shortage > 0) { 424 if (inactive_shortage > 0) {
425 pmap_clear_reference(p); 425 pmap_clear_reference(p);
426 mutex_enter(&p->interlock); 426 mutex_enter(&p->interlock);
427 uvmpdpol_pagedeactivate_locked(p); 427 uvmpdpol_pagedeactivate_locked(p);
428 mutex_exit(&p->interlock); 428 mutex_exit(&p->interlock);
429 uvmexp.pddeact++; 429 uvmexp.pddeact++;
430 inactive_shortage--; 430 inactive_shortage--;
431 } 431 }
432 rw_exit(lock); 432 rw_exit(lock);
433 } 433 }
434 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue); 434 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
435 mutex_exit(&s->lock); 435 mutex_exit(&s->lock);
436} 436}
437 437
438static void 438static void
439uvmpdpol_pagedeactivate_locked(struct vm_page *pg) 439uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
440{ 440{
441 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 441 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
442 442
443 KASSERT(mutex_owned(&s->lock)); 443 KASSERT(mutex_owned(&s->lock));
444 KASSERT(mutex_owned(&pg->interlock)); 444 KASSERT(mutex_owned(&pg->interlock));
445 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) != 445 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
446 (PQ_INTENT_D | PQ_INTENT_SET)); 446 (PQ_INTENT_D | PQ_INTENT_SET));
447 447
448 if (pg->pqflags & PQ_ACTIVE) { 448 if (pg->pqflags & PQ_ACTIVE) {
449 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue); 449 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
450 KASSERT(pdpol_state.s_active > 0); 450 KASSERT(pdpol_state.s_active > 0);
451 pdpol_state.s_active--; 451 pdpol_state.s_active--;
452 } 452 }
453 if ((pg->pqflags & PQ_INACTIVE) == 0) { 453 if ((pg->pqflags & PQ_INACTIVE) == 0) {
454 KASSERT(pg->wire_count == 0); 454 KASSERT(pg->wire_count == 0);
455 TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue); 455 TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue);
456 pdpol_state.s_inactive++; 456 pdpol_state.s_inactive++;
457 } 457 }
458 pg->pqflags = (pg->pqflags & PQ_INTENT_QUEUED) | PQ_INACTIVE; 458 pg->pqflags &= ~(PQ_ACTIVE | PQ_INTENT_SET);
 459 pg->pqflags |= PQ_INACTIVE;
459} 460}
460 461
461void 462void
462uvmpdpol_pagedeactivate(struct vm_page *pg) 463uvmpdpol_pagedeactivate(struct vm_page *pg)
463{ 464{
464 465
465 KASSERT(uvm_page_owner_locked_p(pg, true)); 466 KASSERT(uvm_page_owner_locked_p(pg, true));
466 KASSERT(mutex_owned(&pg->interlock)); 467 KASSERT(mutex_owned(&pg->interlock));
467 468
468 /* 469 /*
469 * we have to clear the reference bit now, as when it comes time to 470 * we have to clear the reference bit now, as when it comes time to
470 * realize the intent we won't have the object locked any more. 471 * realize the intent we won't have the object locked any more.
471 */ 472 */
472 pmap_clear_reference(pg); 473 pmap_clear_reference(pg);
473 uvmpdpol_set_intent(pg, PQ_INTENT_I); 474 uvmpdpol_set_intent(pg, PQ_INTENT_I);
474} 475}
475 476
476static void 477static void
477uvmpdpol_pageactivate_locked(struct vm_page *pg) 478uvmpdpol_pageactivate_locked(struct vm_page *pg)
478{ 479{
479 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 480 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
480 481
481 KASSERT(mutex_owned(&s->lock)); 482 KASSERT(mutex_owned(&s->lock));
482 KASSERT(mutex_owned(&pg->interlock)); 483 KASSERT(mutex_owned(&pg->interlock));
483 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) != 484 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
484 (PQ_INTENT_D | PQ_INTENT_SET)); 485 (PQ_INTENT_D | PQ_INTENT_SET));
485 486
486 uvmpdpol_pagedequeue_locked(pg); 487 uvmpdpol_pagedequeue_locked(pg);
487 TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue); 488 TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
488 pdpol_state.s_active++; 489 pdpol_state.s_active++;
489 pg->pqflags = (pg->pqflags & PQ_INTENT_QUEUED) | PQ_ACTIVE; 490 pg->pqflags &= ~(PQ_INACTIVE | PQ_INTENT_SET);
 491 pg->pqflags |= PQ_ACTIVE;
490} 492}
491 493
492void 494void
493uvmpdpol_pageactivate(struct vm_page *pg) 495uvmpdpol_pageactivate(struct vm_page *pg)
494{ 496{
495 497
496 KASSERT(uvm_page_owner_locked_p(pg, true)); 498 KASSERT(uvm_page_owner_locked_p(pg, true));
497 KASSERT(mutex_owned(&pg->interlock)); 499 KASSERT(mutex_owned(&pg->interlock));
498 500
499 uvmpdpol_set_intent(pg, PQ_INTENT_A); 501 uvmpdpol_set_intent(pg, PQ_INTENT_A);
500} 502}
501 503
502static void 504static void
503uvmpdpol_pagedequeue_locked(struct vm_page *pg) 505uvmpdpol_pagedequeue_locked(struct vm_page *pg)
504{ 506{
505 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 507 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
506 508
507 KASSERT(mutex_owned(&s->lock)); 509 KASSERT(mutex_owned(&s->lock));
508 KASSERT(mutex_owned(&pg->interlock)); 510 KASSERT(mutex_owned(&pg->interlock));
509 511
510 if (pg->pqflags & PQ_ACTIVE) { 512 if (pg->pqflags & PQ_ACTIVE) {
511 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue); 513 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
512 KASSERT((pg->pqflags & PQ_INACTIVE) == 0); 514 KASSERT((pg->pqflags & PQ_INACTIVE) == 0);
513 KASSERT(pdpol_state.s_active > 0); 515 KASSERT(pdpol_state.s_active > 0);
514 pdpol_state.s_active--; 516 pdpol_state.s_active--;
515 } else if (pg->pqflags & PQ_INACTIVE) { 517 } else if (pg->pqflags & PQ_INACTIVE) {
516 TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue); 518 TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue);
517 KASSERT(pdpol_state.s_inactive > 0); 519 KASSERT(pdpol_state.s_inactive > 0);
518 pdpol_state.s_inactive--; 520 pdpol_state.s_inactive--;
519 } 521 }
520 pg->pqflags &= PQ_INTENT_QUEUED; 522 pg->pqflags &= ~(PQ_ACTIVE | PQ_INACTIVE | PQ_INTENT_SET);
521} 523}
522 524
523void 525void
524uvmpdpol_pagedequeue(struct vm_page *pg) 526uvmpdpol_pagedequeue(struct vm_page *pg)
525{ 527{
526 528
527 KASSERT(uvm_page_owner_locked_p(pg, true)); 529 KASSERT(uvm_page_owner_locked_p(pg, true));
528 KASSERT(mutex_owned(&pg->interlock)); 530 KASSERT(mutex_owned(&pg->interlock));
529 531
530 uvmpdpol_set_intent(pg, PQ_INTENT_D); 532 uvmpdpol_set_intent(pg, PQ_INTENT_D);
531} 533}
532 534
533void 535void
534uvmpdpol_pageenqueue(struct vm_page *pg) 536uvmpdpol_pageenqueue(struct vm_page *pg)
535{ 537{
536 538
537 KASSERT(uvm_page_owner_locked_p(pg, true)); 539 KASSERT(uvm_page_owner_locked_p(pg, true));
538 KASSERT(mutex_owned(&pg->interlock)); 540 KASSERT(mutex_owned(&pg->interlock));
539 541
540 uvmpdpol_set_intent(pg, PQ_INTENT_E); 542 uvmpdpol_set_intent(pg, PQ_INTENT_E);
541} 543}
542 544
543void 545void
544uvmpdpol_anfree(struct vm_anon *an) 546uvmpdpol_anfree(struct vm_anon *an)
545{ 547{
546} 548}
547 549
548bool 550bool
549uvmpdpol_pageisqueued_p(struct vm_page *pg) 551uvmpdpol_pageisqueued_p(struct vm_page *pg)
550{ 552{
551 uint32_t pqflags; 553 uint32_t pqflags;
552 554
553 /* 555 /*
554 * if there's an intent set, we have to consider it. otherwise, 556 * if there's an intent set, we have to consider it. otherwise,
555 * return the actual state. we may be called unlocked for the 557 * return the actual state. we may be called unlocked for the
556 * purpose of assertions, which is safe due to the page lifecycle. 558 * purpose of assertions, which is safe due to the page lifecycle.
557 */ 559 */
558 pqflags = atomic_load_relaxed(&pg->pqflags); 560 pqflags = atomic_load_relaxed(&pg->pqflags);
559 if ((pqflags & PQ_INTENT_SET) != 0) { 561 if ((pqflags & PQ_INTENT_SET) != 0) {
560 return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D; 562 return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D;
561 } else { 563 } else {
562 return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0; 564 return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
563 } 565 }
564} 566}
565 567
566void 568void
567uvmpdpol_estimatepageable(int *active, int *inactive) 569uvmpdpol_estimatepageable(int *active, int *inactive)
568{ 570{
569 struct uvmpdpol_globalstate *s = &pdpol_state; 571 struct uvmpdpol_globalstate *s = &pdpol_state;
570 572
571 /* 573 /*
572 * Don't take any locks here. This can be called from DDB, and in 574 * Don't take any locks here. This can be called from DDB, and in
573 * any case the numbers are stale the instant the lock is dropped, 575 * any case the numbers are stale the instant the lock is dropped,
574 * so it just doesn't matter. 576 * so it just doesn't matter.
575 */ 577 */
576 if (active) { 578 if (active) {
577 *active = s->s_active; 579 *active = s->s_active;
578 } 580 }
579 if (inactive) { 581 if (inactive) {
580 *inactive = s->s_inactive; 582 *inactive = s->s_inactive;
581 } 583 }
582} 584}
583 585
584#if !defined(PDSIM) 586#if !defined(PDSIM)
585static int 587static int
586min_check(struct uvm_pctparam *pct, int t) 588min_check(struct uvm_pctparam *pct, int t)
587{ 589{
588 struct uvmpdpol_globalstate *s = &pdpol_state; 590 struct uvmpdpol_globalstate *s = &pdpol_state;
589 int total = t; 591 int total = t;
590 592
591 if (pct != &s->s_anonmin) { 593 if (pct != &s->s_anonmin) {
592 total += uvm_pctparam_get(&s->s_anonmin); 594 total += uvm_pctparam_get(&s->s_anonmin);
593 } 595 }
594 if (pct != &s->s_filemin) { 596 if (pct != &s->s_filemin) {
595 total += uvm_pctparam_get(&s->s_filemin); 597 total += uvm_pctparam_get(&s->s_filemin);
596 } 598 }
597 if (pct != &s->s_execmin) { 599 if (pct != &s->s_execmin) {
598 total += uvm_pctparam_get(&s->s_execmin); 600 total += uvm_pctparam_get(&s->s_execmin);
599 } 601 }
600 if (total > 95) { 602 if (total > 95) {
601 return EINVAL; 603 return EINVAL;
602 } 604 }
603 return 0; 605 return 0;
604} 606}
605#endif /* !defined(PDSIM) */ 607#endif /* !defined(PDSIM) */
606 608
607void 609void
608uvmpdpol_init(void) 610uvmpdpol_init(void)
609{ 611{
610 struct uvmpdpol_globalstate *s = &pdpol_state; 612 struct uvmpdpol_globalstate *s = &pdpol_state;
611 613
612 mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE); 614 mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE);
613 TAILQ_INIT(&s->s_activeq); 615 TAILQ_INIT(&s->s_activeq);
614 TAILQ_INIT(&s->s_inactiveq); 616 TAILQ_INIT(&s->s_inactiveq);
615 uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL); 617 uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL);
616 uvm_pctparam_init(&s->s_anonmin, 10, min_check); 618 uvm_pctparam_init(&s->s_anonmin, 10, min_check);
617 uvm_pctparam_init(&s->s_filemin, 10, min_check); 619 uvm_pctparam_init(&s->s_filemin, 10, min_check);
618 uvm_pctparam_init(&s->s_execmin, 5, min_check); 620 uvm_pctparam_init(&s->s_execmin, 5, min_check);
619 uvm_pctparam_init(&s->s_anonmax, 80, NULL); 621 uvm_pctparam_init(&s->s_anonmax, 80, NULL);
620 uvm_pctparam_init(&s->s_filemax, 50, NULL); 622 uvm_pctparam_init(&s->s_filemax, 50, NULL);
621 uvm_pctparam_init(&s->s_execmax, 30, NULL); 623 uvm_pctparam_init(&s->s_execmax, 30, NULL);
622} 624}
623 625
624void 626void
625uvmpdpol_init_cpu(struct uvm_cpu *ucpu) 627uvmpdpol_init_cpu(struct uvm_cpu *ucpu)
626{ 628{
627 629
628 ucpu->pdq = 630 ucpu->pdq =
629 kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP); 631 kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP);
630 ucpu->pdqhead = CLOCK_PDQ_SIZE; 632 ucpu->pdqhead = CLOCK_PDQ_SIZE;
631 ucpu->pdqtail = CLOCK_PDQ_SIZE; 633 ucpu->pdqtail = CLOCK_PDQ_SIZE;
632} 634}
633 635
634void 636void
635uvmpdpol_reinit(void) 637uvmpdpol_reinit(void)
636{ 638{
637} 639}
638 640
639bool 641bool
640uvmpdpol_needsscan_p(void) 642uvmpdpol_needsscan_p(void)
641{ 643{
642 644
643 /* 645 /*
644 * this must be an unlocked check: can be called from interrupt. 646 * this must be an unlocked check: can be called from interrupt.
645 */ 647 */
646 return pdpol_state.s_inactive < pdpol_state.s_inactarg; 648 return pdpol_state.s_inactive < pdpol_state.s_inactarg;
647} 649}
648 650
649void 651void
650uvmpdpol_tune(void) 652uvmpdpol_tune(void)
651{ 653{
652 struct uvmpdpol_globalstate *s = &pdpol_state; 654 struct uvmpdpol_globalstate *s = &pdpol_state;
653 655
654 mutex_enter(&s->lock); 656 mutex_enter(&s->lock);
655 clock_tune(); 657 clock_tune();
656 mutex_exit(&s->lock); 658 mutex_exit(&s->lock);
657} 659}
658 660
659/* 661/*
660 * uvmpdpol_pagerealize_locked: take the intended state set on a page and 662 * uvmpdpol_pagerealize_locked: take the intended state set on a page and
661 * make it real. return true if any work was done. 663 * make it real. return true if any work was done.
662 */ 664 */
663static bool 665static bool
664uvmpdpol_pagerealize_locked(struct vm_page *pg) 666uvmpdpol_pagerealize_locked(struct vm_page *pg)
665{ 667{
666 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 668 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
667 669
668 KASSERT(mutex_owned(&s->lock)); 670 KASSERT(mutex_owned(&s->lock));
669 KASSERT(mutex_owned(&pg->interlock)); 671 KASSERT(mutex_owned(&pg->interlock));
670 672
671 switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) { 673 switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) {
672 case PQ_INTENT_A | PQ_INTENT_SET: 674 case PQ_INTENT_A | PQ_INTENT_SET:
673 case PQ_INTENT_E | PQ_INTENT_SET: 675 case PQ_INTENT_E | PQ_INTENT_SET:
674 uvmpdpol_pageactivate_locked(pg); 676 uvmpdpol_pageactivate_locked(pg);
675 return true; 677 return true;
676 case PQ_INTENT_I | PQ_INTENT_SET: 678 case PQ_INTENT_I | PQ_INTENT_SET:
677 uvmpdpol_pagedeactivate_locked(pg); 679 uvmpdpol_pagedeactivate_locked(pg);
678 return true; 680 return true;
679 case PQ_INTENT_D | PQ_INTENT_SET: 681 case PQ_INTENT_D | PQ_INTENT_SET:
680 uvmpdpol_pagedequeue_locked(pg); 682 uvmpdpol_pagedequeue_locked(pg);
681 return true; 683 return true;
682 default: 684 default:
683 return false; 685 return false;
684 } 686 }
685} 687}
686 688
687/* 689/*
688 * uvmpdpol_flush: return the current uvm_cpu with all of its pending 690 * uvmpdpol_flush: return the current uvm_cpu with all of its pending
689 * updates flushed to the global queues. this routine may block, and 691 * updates flushed to the global queues. this routine may block, and
690 * so can switch cpu. the idea is to empty to queue on whatever cpu 692 * so can switch cpu. the idea is to empty to queue on whatever cpu
691 * we finally end up on. 693 * we finally end up on.
692 */ 694 */
693static struct uvm_cpu * 695static struct uvm_cpu *
694uvmpdpol_flush(void) 696uvmpdpol_flush(void)
695{ 697{
696 struct uvmpdpol_globalstate *s __diagused = &pdpol_state; 698 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
697 struct uvm_cpu *ucpu; 699 struct uvm_cpu *ucpu;
698 struct vm_page *pg; 700 struct vm_page *pg;
699 701
700 KASSERT(kpreempt_disabled()); 702 KASSERT(kpreempt_disabled());
701 703
702 mutex_enter(&s->lock); 704 mutex_enter(&s->lock);
703 for (;;) { 705 for (;;) {
704 /* 706 /*
705 * prefer scanning forwards (even though mutex_enter() is 707 * prefer scanning forwards (even though mutex_enter() is
706 * serializing) so as to not defeat any prefetch logic in 708 * serializing) so as to not defeat any prefetch logic in
707 * the CPU. that means elsewhere enqueuing backwards, like 709 * the CPU. that means elsewhere enqueuing backwards, like
708 * a stack, but not so important there as pages are being 710 * a stack, but not so important there as pages are being
709 * added singularly. 711 * added singularly.
710 * 712 *
711 * prefetch the next "struct vm_page" while working on the 713 * prefetch the next "struct vm_page" while working on the
712 * current one. this has a measurable and very positive 714 * current one. this has a measurable and very positive
713 * effect in reducing the amount of time spent here under 715 * effect in reducing the amount of time spent here under
714 * the global lock. 716 * the global lock.
715 */ 717 */
716 ucpu = curcpu()->ci_data.cpu_uvm; 718 ucpu = curcpu()->ci_data.cpu_uvm;
717 KASSERT(ucpu->pdqhead <= ucpu->pdqtail); 719 KASSERT(ucpu->pdqhead <= ucpu->pdqtail);
718 if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) { 720 if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) {
719 break; 721 break;
720 } 722 }
721 pg = ucpu->pdq[ucpu->pdqhead++]; 723 pg = ucpu->pdq[ucpu->pdqhead++];
722 if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) { 724 if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) {
723 __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]); 725 __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]);
724 } 726 }
725 mutex_enter(&pg->interlock); 727 mutex_enter(&pg->interlock);
726 pg->pqflags &= ~PQ_INTENT_QUEUED; 728 pg->pqflags &= ~PQ_INTENT_QUEUED;
727 (void)uvmpdpol_pagerealize_locked(pg); 729 (void)uvmpdpol_pagerealize_locked(pg);
728 mutex_exit(&pg->interlock); 730 mutex_exit(&pg->interlock);
729 } 731 }
730 mutex_exit(&s->lock); 732 mutex_exit(&s->lock);
731 return ucpu; 733 return ucpu;
732} 734}
733 735
734/* 736/*
735 * uvmpdpol_pagerealize: realize any intent set on the page. in this 737 * uvmpdpol_pagerealize: realize any intent set on the page. in this
736 * implementation, that means putting the page on a per-CPU queue to be 738 * implementation, that means putting the page on a per-CPU queue to be
737 * dealt with later. 739 * dealt with later.
738 */ 740 */
739void 741void
740uvmpdpol_pagerealize(struct vm_page *pg) 742uvmpdpol_pagerealize(struct vm_page *pg)
741{ 743{
742 struct uvm_cpu *ucpu; 744 struct uvm_cpu *ucpu;
743 745
744 /* 746 /*
745 * drain the per per-CPU queue if full, then enter the page. 747 * drain the per per-CPU queue if full, then enter the page.
746 */ 748 */
747 kpreempt_disable(); 749 kpreempt_disable();
748 ucpu = curcpu()->ci_data.cpu_uvm; 750 ucpu = curcpu()->ci_data.cpu_uvm;
749 if (__predict_false(ucpu->pdqhead == 0)) { 751 if (__predict_false(ucpu->pdqhead == 0)) {
750 ucpu = uvmpdpol_flush(); 752 ucpu = uvmpdpol_flush();
751 } 753 }
752 ucpu->pdq[--(ucpu->pdqhead)] = pg; 754 ucpu->pdq[--(ucpu->pdqhead)] = pg;
753 kpreempt_enable(); 755 kpreempt_enable();
754} 756}
755 757
756/* 758/*
757 * uvmpdpol_idle: called from the system idle loop. periodically purge any 759 * uvmpdpol_idle: called from the system idle loop. periodically purge any
758 * pending updates back to the global queues. 760 * pending updates back to the global queues.
759 */ 761 */
760void 762void
761uvmpdpol_idle(struct uvm_cpu *ucpu) 763uvmpdpol_idle(struct uvm_cpu *ucpu)
762{ 764{
763 struct uvmpdpol_globalstate *s = &pdpol_state; 765 struct uvmpdpol_globalstate *s = &pdpol_state;
764 struct vm_page *pg; 766 struct vm_page *pg;
765 767
766 KASSERT(kpreempt_disabled()); 768 KASSERT(kpreempt_disabled());
767 769
768 /* 770 /*
769 * if no pages in the queue, we have nothing to do. 771 * if no pages in the queue, we have nothing to do.
770 */ 772 */
771 if (ucpu->pdqhead == ucpu->pdqtail) { 773 if (ucpu->pdqhead == ucpu->pdqtail) {
772 ucpu->pdqtime = hardclock_ticks; 774 ucpu->pdqtime = hardclock_ticks;
773 return; 775 return;
774 } 776 }
775 777
776 /* 778 /*
777 * don't do this more than ~8 times a second as it would needlessly 779 * don't do this more than ~8 times a second as it would needlessly
778 * exert pressure. 780 * exert pressure.
779 */ 781 */
780 if (hardclock_ticks - ucpu->pdqtime < (hz >> 3)) { 782 if (hardclock_ticks - ucpu->pdqtime < (hz >> 3)) {
781 return; 783 return;
782 } 784 }
783 785
784 /* 786 /*
785 * the idle LWP can't block, so we have to try for the lock. if we 787 * the idle LWP can't block, so we have to try for the lock. if we
786 * get it, purge the per-CPU pending update queue. continually 788 * get it, purge the per-CPU pending update queue. continually
787 * check for a pending resched: in that case exit immediately. 789 * check for a pending resched: in that case exit immediately.
788 */ 790 */
789 if (mutex_tryenter(&s->lock)) { 791 if (mutex_tryenter(&s->lock)) {
790 while (ucpu->pdqhead != ucpu->pdqtail) { 792 while (ucpu->pdqhead != ucpu->pdqtail) {
791 pg = ucpu->pdq[ucpu->pdqhead]; 793 pg = ucpu->pdq[ucpu->pdqhead];
792 if (!mutex_tryenter(&pg->interlock)) { 794 if (!mutex_tryenter(&pg->interlock)) {
793 break; 795 break;
794 } 796 }
795 ucpu->pdqhead++; 797 ucpu->pdqhead++;
796 pg->pqflags &= ~PQ_INTENT_QUEUED; 798 pg->pqflags &= ~PQ_INTENT_QUEUED;
797 (void)uvmpdpol_pagerealize_locked(pg); 799 (void)uvmpdpol_pagerealize_locked(pg);
798 mutex_exit(&pg->interlock); 800 mutex_exit(&pg->interlock);
799 if (curcpu()->ci_want_resched) { 801 if (curcpu()->ci_want_resched) {
800 break; 802 break;
801 } 803 }
802 } 804 }
803 if (ucpu->pdqhead == ucpu->pdqtail) { 805 if (ucpu->pdqhead == ucpu->pdqtail) {
804 ucpu->pdqtime = hardclock_ticks; 806 ucpu->pdqtime = hardclock_ticks;
805 } 807 }
806 mutex_exit(&s->lock); 808 mutex_exit(&s->lock);
807 } 809 }
808} 810}
809 811
810#if !defined(PDSIM) 812#if !defined(PDSIM)
811 813
812#include <sys/sysctl.h> /* XXX SYSCTL_DESCR */ 814#include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
813 815
814void 816void
815uvmpdpol_sysctlsetup(void) 817uvmpdpol_sysctlsetup(void)
816{ 818{
817 struct uvmpdpol_globalstate *s = &pdpol_state; 819 struct uvmpdpol_globalstate *s = &pdpol_state;
818 820
819 uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin", 821 uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin",
820 SYSCTL_DESCR("Percentage of physical memory reserved " 822 SYSCTL_DESCR("Percentage of physical memory reserved "
821 "for anonymous application data")); 823 "for anonymous application data"));
822 uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin", 824 uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin",
823 SYSCTL_DESCR("Percentage of physical memory reserved " 825 SYSCTL_DESCR("Percentage of physical memory reserved "
824 "for cached file data")); 826 "for cached file data"));
825 uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin", 827 uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin",
826 SYSCTL_DESCR("Percentage of physical memory reserved " 828 SYSCTL_DESCR("Percentage of physical memory reserved "
827 "for cached executable data")); 829 "for cached executable data"));
828 830
829 uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax", 831 uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax",
830 SYSCTL_DESCR("Percentage of physical memory which will " 832 SYSCTL_DESCR("Percentage of physical memory which will "
831 "be reclaimed from other usage for " 833 "be reclaimed from other usage for "
832 "anonymous application data")); 834 "anonymous application data"));
833 uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax", 835 uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax",
834 SYSCTL_DESCR("Percentage of physical memory which will " 836 SYSCTL_DESCR("Percentage of physical memory which will "
835 "be reclaimed from other usage for cached " 837 "be reclaimed from other usage for cached "
836 "file data")); 838 "file data"));
837 uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax", 839 uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax",
838 SYSCTL_DESCR("Percentage of physical memory which will " 840 SYSCTL_DESCR("Percentage of physical memory which will "
839 "be reclaimed from other usage for cached " 841 "be reclaimed from other usage for cached "
840 "executable data")); 842 "executable data"));
841 843
842 uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct", 844 uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct",
843 SYSCTL_DESCR("Percentage of inactive queue of " 845 SYSCTL_DESCR("Percentage of inactive queue of "
844 "the entire (active + inactive) queue")); 846 "the entire (active + inactive) queue"));
845} 847}
846 848
847#endif /* !defined(PDSIM) */ 849#endif /* !defined(PDSIM) */
848 850
849#if defined(PDSIM) 851#if defined(PDSIM)
850void 852void
851pdsim_dump(const char *id) 853pdsim_dump(const char *id)
852{ 854{
853#if defined(DEBUG) 855#if defined(DEBUG)
854 /* XXX */ 856 /* XXX */
855#endif /* defined(DEBUG) */ 857#endif /* defined(DEBUG) */
856} 858}
857#endif /* defined(PDSIM) */ 859#endif /* defined(PDSIM) */