Sat Dec 13 11:34:43 2008 UTC ()
It's easier for kernel reserve pages to be consumed because the pagedaemon
serves as less of a barrier these days. Restrict provision of kernel reserve
pages to kmem and one of these cases:

- doing a NOWAIT allocation
- caller is a realtime thread
- caller is a kernel thread
- explicitly requested, for example by the pmap


(ad)
diff -r1.102 -r1.103 src/sys/uvm/uvm_km.c
diff -r1.264 -r1.265 src/sys/uvm/uvm_map.c
diff -r1.140 -r1.141 src/sys/uvm/uvm_page.c

cvs diff -r1.102 -r1.103 src/sys/uvm/uvm_km.c (expand / switch to unified diff)

--- src/sys/uvm/uvm_km.c 2008/12/01 10:54:57 1.102
+++ src/sys/uvm/uvm_km.c 2008/12/13 11:34:43 1.103
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: uvm_km.c,v 1.102 2008/12/01 10:54:57 ad Exp $ */ 1/* $NetBSD: uvm_km.c,v 1.103 2008/12/13 11:34:43 ad Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California. 5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 * 6 *
7 * All rights reserved. 7 * All rights reserved.
8 * 8 *
9 * This code is derived from software contributed to Berkeley by 9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University. 10 * The Mach Operating System project at Carnegie-Mellon University.
11 * 11 *
12 * Redistribution and use in source and binary forms, with or without 12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions 13 * modification, are permitted provided that the following conditions
14 * are met: 14 * are met:
@@ -118,27 +118,27 @@ @@ -118,27 +118,27 @@
118 * uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the 118 * uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the
119 * kernel map]. if uvm_km_alloc returns virtual address 0xf8235000, 119 * kernel map]. if uvm_km_alloc returns virtual address 0xf8235000,
120 * then that means that the page at offset 0x235000 in kernel_object is 120 * then that means that the page at offset 0x235000 in kernel_object is
121 * mapped at 0xf8235000. 121 * mapped at 0xf8235000.
122 * 122 *
123 * kernel object have one other special property: when the kernel virtual 123 * kernel object have one other special property: when the kernel virtual
124 * memory mapping them is unmapped, the backing memory in the object is 124 * memory mapping them is unmapped, the backing memory in the object is
125 * freed right away. this is done with the uvm_km_pgremove() function. 125 * freed right away. this is done with the uvm_km_pgremove() function.
126 * this has to be done because there is no backing store for kernel pages 126 * this has to be done because there is no backing store for kernel pages
127 * and no need to save them after they are no longer referenced. 127 * and no need to save them after they are no longer referenced.
128 */ 128 */
129 129
130#include <sys/cdefs.h> 130#include <sys/cdefs.h>
131__KERNEL_RCSID(0, "$NetBSD: uvm_km.c,v 1.102 2008/12/01 10:54:57 ad Exp $"); 131__KERNEL_RCSID(0, "$NetBSD: uvm_km.c,v 1.103 2008/12/13 11:34:43 ad Exp $");
132 132
133#include "opt_uvmhist.h" 133#include "opt_uvmhist.h"
134 134
135#include <sys/param.h> 135#include <sys/param.h>
136#include <sys/malloc.h> 136#include <sys/malloc.h>
137#include <sys/systm.h> 137#include <sys/systm.h>
138#include <sys/proc.h> 138#include <sys/proc.h>
139#include <sys/pool.h> 139#include <sys/pool.h>
140 140
141#include <uvm/uvm.h> 141#include <uvm/uvm.h>
142 142
143/* 143/*
144 * global data structures 144 * global data structures
@@ -579,27 +579,29 @@ uvm_km_alloc(struct vm_map *map, vsize_t @@ -579,27 +579,29 @@ uvm_km_alloc(struct vm_map *map, vsize_t
579 */ 579 */
580 580
581 offset = kva - vm_map_min(kernel_map); 581 offset = kva - vm_map_min(kernel_map);
582 UVMHIST_LOG(maphist, " kva=0x%x, offset=0x%x", kva, offset,0,0); 582 UVMHIST_LOG(maphist, " kva=0x%x, offset=0x%x", kva, offset,0,0);
583 583
584 /* 584 /*
585 * now allocate and map in the memory... note that we are the only ones 585 * now allocate and map in the memory... note that we are the only ones
586 * whom should ever get a handle on this area of VM. 586 * whom should ever get a handle on this area of VM.
587 */ 587 */
588 588
589 loopva = kva; 589 loopva = kva;
590 loopsize = size; 590 loopsize = size;
591 591
592 pgaflags = UVM_PGA_USERESERVE; 592 pgaflags = 0;
 593 if (flags & UVM_KMF_NOWAIT)
 594 pgaflags |= UVM_PGA_USERESERVE;
593 if (flags & UVM_KMF_ZERO) 595 if (flags & UVM_KMF_ZERO)
594 pgaflags |= UVM_PGA_ZERO; 596 pgaflags |= UVM_PGA_ZERO;
595 prot = VM_PROT_READ | VM_PROT_WRITE; 597 prot = VM_PROT_READ | VM_PROT_WRITE;
596 if (flags & UVM_KMF_EXEC) 598 if (flags & UVM_KMF_EXEC)
597 prot |= VM_PROT_EXECUTE; 599 prot |= VM_PROT_EXECUTE;
598 while (loopsize) { 600 while (loopsize) {
599 KASSERT(!pmap_extract(pmap_kernel(), loopva, NULL)); 601 KASSERT(!pmap_extract(pmap_kernel(), loopva, NULL));
600 602
601 pg = uvm_pagealloc(NULL, offset, NULL, pgaflags); 603 pg = uvm_pagealloc(NULL, offset, NULL, pgaflags);
602 604
603 /* 605 /*
604 * out of memory? 606 * out of memory?
605 */ 607 */
@@ -688,53 +690,53 @@ uvm_km_alloc_poolpage_cache(struct vm_ma @@ -688,53 +690,53 @@ uvm_km_alloc_poolpage_cache(struct vm_ma
688#else 690#else
689 struct vm_page *pg; 691 struct vm_page *pg;
690 struct pool *pp = &vm_map_to_kernel(map)->vmk_vacache; 692 struct pool *pp = &vm_map_to_kernel(map)->vmk_vacache;
691 vaddr_t va; 693 vaddr_t va;
692 694
693 if ((map->flags & VM_MAP_VACACHE) == 0) 695 if ((map->flags & VM_MAP_VACACHE) == 0)
694 return uvm_km_alloc_poolpage(map, waitok); 696 return uvm_km_alloc_poolpage(map, waitok);
695 697
696 va = (vaddr_t)pool_get(pp, waitok ? PR_WAITOK : PR_NOWAIT); 698 va = (vaddr_t)pool_get(pp, waitok ? PR_WAITOK : PR_NOWAIT);
697 if (va == 0) 699 if (va == 0)
698 return 0; 700 return 0;
699 KASSERT(!pmap_extract(pmap_kernel(), va, NULL)); 701 KASSERT(!pmap_extract(pmap_kernel(), va, NULL));
700again: 702again:
701 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE); 703 pg = uvm_pagealloc(NULL, 0, NULL, waitok ? 0 : UVM_PGA_USERESERVE);
702 if (__predict_false(pg == NULL)) { 704 if (__predict_false(pg == NULL)) {
703 if (waitok) { 705 if (waitok) {
704 uvm_wait("plpg"); 706 uvm_wait("plpg");
705 goto again; 707 goto again;
706 } else { 708 } else {
707 pool_put(pp, (void *)va); 709 pool_put(pp, (void *)va);
708 return 0; 710 return 0;
709 } 711 }
710 } 712 }
711 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), 713 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
712 VM_PROT_READ|VM_PROT_WRITE|PMAP_KMPAGE); 714 VM_PROT_READ|VM_PROT_WRITE|PMAP_KMPAGE);
713 pmap_update(pmap_kernel()); 715 pmap_update(pmap_kernel());
714 716
715 return va; 717 return va;
716#endif /* PMAP_MAP_POOLPAGE */ 718#endif /* PMAP_MAP_POOLPAGE */
717} 719}
718 720
719vaddr_t 721vaddr_t
720uvm_km_alloc_poolpage(struct vm_map *map, bool waitok) 722uvm_km_alloc_poolpage(struct vm_map *map, bool waitok)
721{ 723{
722#if defined(PMAP_MAP_POOLPAGE) 724#if defined(PMAP_MAP_POOLPAGE)
723 struct vm_page *pg; 725 struct vm_page *pg;
724 vaddr_t va; 726 vaddr_t va;
725 727
726 again: 728 again:
727 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE); 729 pg = uvm_pagealloc(NULL, 0, NULL, waitok ? 0 : UVM_PGA_USERESERVE);
728 if (__predict_false(pg == NULL)) { 730 if (__predict_false(pg == NULL)) {
729 if (waitok) { 731 if (waitok) {
730 uvm_wait("plpg"); 732 uvm_wait("plpg");
731 goto again; 733 goto again;
732 } else 734 } else
733 return (0); 735 return (0);
734 } 736 }
735 va = PMAP_MAP_POOLPAGE(VM_PAGE_TO_PHYS(pg)); 737 va = PMAP_MAP_POOLPAGE(VM_PAGE_TO_PHYS(pg));
736 if (__predict_false(va == 0)) 738 if (__predict_false(va == 0))
737 uvm_pagefree(pg); 739 uvm_pagefree(pg);
738 return (va); 740 return (va);
739#else 741#else
740 vaddr_t va; 742 vaddr_t va;

cvs diff -r1.264 -r1.265 src/sys/uvm/uvm_map.c (expand / switch to unified diff)

--- src/sys/uvm/uvm_map.c 2008/12/01 10:54:57 1.264
+++ src/sys/uvm/uvm_map.c 2008/12/13 11:34:43 1.265
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: uvm_map.c,v 1.264 2008/12/01 10:54:57 ad Exp $ */ 1/* $NetBSD: uvm_map.c,v 1.265 2008/12/13 11:34:43 ad Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California. 5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 * 6 *
7 * All rights reserved. 7 * All rights reserved.
8 * 8 *
9 * This code is derived from software contributed to Berkeley by 9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University. 10 * The Mach Operating System project at Carnegie-Mellon University.
11 * 11 *
12 * Redistribution and use in source and binary forms, with or without 12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions 13 * modification, are permitted provided that the following conditions
14 * are met: 14 * are met:
@@ -61,27 +61,27 @@ @@ -61,27 +61,27 @@
61 * School of Computer Science 61 * School of Computer Science
62 * Carnegie Mellon University 62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890 63 * Pittsburgh PA 15213-3890
64 * 64 *
65 * any improvements or extensions that they make and grant Carnegie the 65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes. 66 * rights to redistribute these changes.
67 */ 67 */
68 68
69/* 69/*
70 * uvm_map.c: uvm map operations 70 * uvm_map.c: uvm map operations
71 */ 71 */
72 72
73#include <sys/cdefs.h> 73#include <sys/cdefs.h>
74__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.264 2008/12/01 10:54:57 ad Exp $"); 74__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.265 2008/12/13 11:34:43 ad Exp $");
75 75
76#include "opt_ddb.h" 76#include "opt_ddb.h"
77#include "opt_uvmhist.h" 77#include "opt_uvmhist.h"
78#include "opt_uvm.h" 78#include "opt_uvm.h"
79#include "opt_sysv.h" 79#include "opt_sysv.h"
80 80
81#include <sys/param.h> 81#include <sys/param.h>
82#include <sys/systm.h> 82#include <sys/systm.h>
83#include <sys/mman.h> 83#include <sys/mman.h>
84#include <sys/proc.h> 84#include <sys/proc.h>
85#include <sys/malloc.h> 85#include <sys/malloc.h>
86#include <sys/pool.h> 86#include <sys/pool.h>
87#include <sys/kernel.h> 87#include <sys/kernel.h>
@@ -4606,27 +4606,28 @@ again: @@ -4606,27 +4606,28 @@ again:
4606 LIST_REMOVE(ukh, ukh_listq); 4606 LIST_REMOVE(ukh, ukh_listq);
4607 } 4607 }
4608 mutex_spin_exit(&uvm_kentry_lock); 4608 mutex_spin_exit(&uvm_kentry_lock);
4609 4609
4610 if (entry) 4610 if (entry)
4611 return entry; 4611 return entry;
4612 4612
4613 /* 4613 /*
4614 * there's no free entry for this vm_map. 4614 * there's no free entry for this vm_map.
4615 * now we need to allocate some vm_map_entry. 4615 * now we need to allocate some vm_map_entry.
4616 * for simplicity, always allocate one page chunk of them at once. 4616 * for simplicity, always allocate one page chunk of them at once.
4617 */ 4617 */
4618 4618
4619 pg = uvm_pagealloc(NULL, 0, NULL, 0); 4619 pg = uvm_pagealloc(NULL, 0, NULL,
 4620 (flags & UVM_KMF_NOWAIT) != 0 ? UVM_PGA_USERESERVE : 0);
4620 if (__predict_false(pg == NULL)) { 4621 if (__predict_false(pg == NULL)) {
4621 if (flags & UVM_FLAG_NOWAIT) 4622 if (flags & UVM_FLAG_NOWAIT)
4622 return NULL; 4623 return NULL;
4623 uvm_wait("kme_alloc"); 4624 uvm_wait("kme_alloc");
4624 goto again; 4625 goto again;
4625 } 4626 }
4626 4627
4627 error = uvm_map_prepare(map, 0, PAGE_SIZE, NULL, UVM_UNKNOWN_OFFSET, 4628 error = uvm_map_prepare(map, 0, PAGE_SIZE, NULL, UVM_UNKNOWN_OFFSET,
4628 0, mapflags, &args); 4629 0, mapflags, &args);
4629 if (error) { 4630 if (error) {
4630 uvm_pagefree(pg); 4631 uvm_pagefree(pg);
4631 return NULL; 4632 return NULL;
4632 } 4633 }

cvs diff -r1.140 -r1.141 src/sys/uvm/uvm_page.c (expand / switch to unified diff)

--- src/sys/uvm/uvm_page.c 2008/07/04 10:56:59 1.140
+++ src/sys/uvm/uvm_page.c 2008/12/13 11:34:43 1.141
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: uvm_page.c,v 1.140 2008/07/04 10:56:59 ad Exp $ */ 1/* $NetBSD: uvm_page.c,v 1.141 2008/12/13 11:34:43 ad Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California. 5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 * 6 *
7 * All rights reserved. 7 * All rights reserved.
8 * 8 *
9 * This code is derived from software contributed to Berkeley by 9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University. 10 * The Mach Operating System project at Carnegie-Mellon University.
11 * 11 *
12 * Redistribution and use in source and binary forms, with or without 12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions 13 * modification, are permitted provided that the following conditions
14 * are met: 14 * are met:
@@ -61,27 +61,27 @@ @@ -61,27 +61,27 @@
61 * School of Computer Science 61 * School of Computer Science
62 * Carnegie Mellon University 62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890 63 * Pittsburgh PA 15213-3890
64 * 64 *
65 * any improvements or extensions that they make and grant Carnegie the 65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes. 66 * rights to redistribute these changes.
67 */ 67 */
68 68
69/* 69/*
70 * uvm_page.c: page ops. 70 * uvm_page.c: page ops.
71 */ 71 */
72 72
73#include <sys/cdefs.h> 73#include <sys/cdefs.h>
74__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.140 2008/07/04 10:56:59 ad Exp $"); 74__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.141 2008/12/13 11:34:43 ad Exp $");
75 75
76#include "opt_uvmhist.h" 76#include "opt_uvmhist.h"
77#include "opt_readahead.h" 77#include "opt_readahead.h"
78 78
79#include <sys/param.h> 79#include <sys/param.h>
80#include <sys/systm.h> 80#include <sys/systm.h>
81#include <sys/malloc.h> 81#include <sys/malloc.h>
82#include <sys/sched.h> 82#include <sys/sched.h>
83#include <sys/kernel.h> 83#include <sys/kernel.h>
84#include <sys/vnode.h> 84#include <sys/vnode.h>
85#include <sys/proc.h> 85#include <sys/proc.h>
86#include <sys/atomic.h> 86#include <sys/atomic.h>
87#include <sys/cpu.h> 87#include <sys/cpu.h>
@@ -1061,27 +1061,27 @@ uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, @@ -1061,27 +1061,27 @@ uvm_pagealloc_pgfl(struct uvm_cpu *ucpu,
1061 * unknown contents page. This is because we live with the 1061 * unknown contents page. This is because we live with the
1062 * consequences of a bad free list decision for the entire 1062 * consequences of a bad free list decision for the entire
1063 * lifetime of the page, e.g. if the page comes from memory that 1063 * lifetime of the page, e.g. if the page comes from memory that
1064 * is slower to access. 1064 * is slower to access.
1065 */ 1065 */
1066 1066
1067struct vm_page * 1067struct vm_page *
1068uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, 1068uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
1069 int flags, int strat, int free_list) 1069 int flags, int strat, int free_list)
1070{ 1070{
1071 int lcv, try1, try2, zeroit = 0, color; 1071 int lcv, try1, try2, zeroit = 0, color;
1072 struct uvm_cpu *ucpu; 1072 struct uvm_cpu *ucpu;
1073 struct vm_page *pg; 1073 struct vm_page *pg;
1074 bool use_reserve; 1074 lwp_t *l;
1075 1075
1076 KASSERT(obj == NULL || anon == NULL); 1076 KASSERT(obj == NULL || anon == NULL);
1077 KASSERT(anon == NULL || off == 0); 1077 KASSERT(anon == NULL || off == 0);
1078 KASSERT(off == trunc_page(off)); 1078 KASSERT(off == trunc_page(off));
1079 KASSERT(obj == NULL || mutex_owned(&obj->vmobjlock)); 1079 KASSERT(obj == NULL || mutex_owned(&obj->vmobjlock));
1080 KASSERT(anon == NULL || mutex_owned(&anon->an_lock)); 1080 KASSERT(anon == NULL || mutex_owned(&anon->an_lock));
1081 1081
1082 mutex_spin_enter(&uvm_fpageqlock); 1082 mutex_spin_enter(&uvm_fpageqlock);
1083 1083
1084 /* 1084 /*
1085 * This implements a global round-robin page coloring 1085 * This implements a global round-robin page coloring
1086 * algorithm. 1086 * algorithm.
1087 * 1087 *
@@ -1092,36 +1092,40 @@ uvm_pagealloc_strat(struct uvm_object *o @@ -1092,36 +1092,40 @@ uvm_pagealloc_strat(struct uvm_object *o
1092 color = ucpu->page_free_nextcolor; 1092 color = ucpu->page_free_nextcolor;
1093 1093
1094 /* 1094 /*
1095 * check to see if we need to generate some free pages waking 1095 * check to see if we need to generate some free pages waking
1096 * the pagedaemon. 1096 * the pagedaemon.
1097 */ 1097 */
1098 1098
1099 uvm_kick_pdaemon(); 1099 uvm_kick_pdaemon();
1100 1100
1101 /* 1101 /*
1102 * fail if any of these conditions is true: 1102 * fail if any of these conditions is true:
1103 * [1] there really are no free pages, or 1103 * [1] there really are no free pages, or
1104 * [2] only kernel "reserved" pages remain and 1104 * [2] only kernel "reserved" pages remain and
1105 * the page isn't being allocated to a kernel object. 1105 * reserved pages have not been requested.
1106 * [3] only pagedaemon "reserved" pages remain and 1106 * [3] only pagedaemon "reserved" pages remain and
1107 * the requestor isn't the pagedaemon. 1107 * the requestor isn't the pagedaemon.
 1108 * we make kernel reserve pages available if called by a
 1109 * kernel thread or a realtime thread.
1108 */ 1110 */
1109 1111 l = curlwp;
1110 use_reserve = (flags & UVM_PGA_USERESERVE) || 1112 if (__predict_true(l != NULL) && lwp_eprio(l) >= PRI_KTHREAD) {
1111 (obj && UVM_OBJ_IS_KERN_OBJECT(obj)); 1113 flags |= UVM_PGA_USERESERVE;
1112 if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) || 1114 }
 1115 if ((uvmexp.free <= uvmexp.reserve_kernel &&
 1116 (flags & UVM_PGA_USERESERVE) == 0) ||
1113 (uvmexp.free <= uvmexp.reserve_pagedaemon && 1117 (uvmexp.free <= uvmexp.reserve_pagedaemon &&
1114 !(use_reserve && curlwp == uvm.pagedaemon_lwp))) 1118 curlwp != uvm.pagedaemon_lwp))
1115 goto fail; 1119 goto fail;
1116 1120
1117#if PGFL_NQUEUES != 2 1121#if PGFL_NQUEUES != 2
1118#error uvm_pagealloc_strat needs to be updated 1122#error uvm_pagealloc_strat needs to be updated
1119#endif 1123#endif
1120 1124
1121 /* 1125 /*
1122 * If we want a zero'd page, try the ZEROS queue first, otherwise 1126 * If we want a zero'd page, try the ZEROS queue first, otherwise
1123 * we try the UNKNOWN queue first. 1127 * we try the UNKNOWN queue first.
1124 */ 1128 */
1125 if (flags & UVM_PGA_ZERO) { 1129 if (flags & UVM_PGA_ZERO) {
1126 try1 = PGFL_ZEROS; 1130 try1 = PGFL_ZEROS;
1127 try2 = PGFL_UNKNOWN; 1131 try2 = PGFL_UNKNOWN;