| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: vm.c,v 1.103 2010/12/01 11:19:18 pooka Exp $ */ | | 1 | /* $NetBSD: vm.c,v 1.104 2010/12/01 20:29:57 pooka Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. | | 4 | * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. |
5 | * | | 5 | * |
6 | * Development of this software was supported by | | 6 | * Development of this software was supported by |
7 | * The Finnish Cultural Foundation and the Research Foundation of | | 7 | * The Finnish Cultural Foundation and the Research Foundation of |
8 | * The Helsinki University of Technology. | | 8 | * The Helsinki University of Technology. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
| @@ -31,27 +31,27 @@ | | | @@ -31,27 +31,27 @@ |
31 | | | 31 | |
32 | /* | | 32 | /* |
33 | * Virtual memory emulation routines. | | 33 | * Virtual memory emulation routines. |
34 | */ | | 34 | */ |
35 | | | 35 | |
36 | /* | | 36 | /* |
37 | * XXX: we abuse pg->uanon for the virtual address of the storage | | 37 | * XXX: we abuse pg->uanon for the virtual address of the storage |
38 | * for each page. phys_addr would fit the job description better, | | 38 | * for each page. phys_addr would fit the job description better, |
39 | * except that it will create unnecessary lossage on some platforms | | 39 | * except that it will create unnecessary lossage on some platforms |
40 | * due to not being a pointer type. | | 40 | * due to not being a pointer type. |
41 | */ | | 41 | */ |
42 | | | 42 | |
43 | #include <sys/cdefs.h> | | 43 | #include <sys/cdefs.h> |
44 | __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.103 2010/12/01 11:19:18 pooka Exp $"); | | 44 | __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.104 2010/12/01 20:29:57 pooka Exp $"); |
45 | | | 45 | |
46 | #include <sys/param.h> | | 46 | #include <sys/param.h> |
47 | #include <sys/atomic.h> | | 47 | #include <sys/atomic.h> |
48 | #include <sys/buf.h> | | 48 | #include <sys/buf.h> |
49 | #include <sys/kernel.h> | | 49 | #include <sys/kernel.h> |
50 | #include <sys/kmem.h> | | 50 | #include <sys/kmem.h> |
51 | #include <sys/mman.h> | | 51 | #include <sys/mman.h> |
52 | #include <sys/null.h> | | 52 | #include <sys/null.h> |
53 | #include <sys/vnode.h> | | 53 | #include <sys/vnode.h> |
54 | | | 54 | |
55 | #include <machine/pmap.h> | | 55 | #include <machine/pmap.h> |
56 | | | 56 | |
57 | #include <rump/rumpuser.h> | | 57 | #include <rump/rumpuser.h> |
| @@ -164,28 +164,29 @@ static struct pool_cache pagecache; | | | @@ -164,28 +164,29 @@ static struct pool_cache pagecache; |
164 | /* | | 164 | /* |
165 | * Called with the object locked. We don't support anons. | | 165 | * Called with the object locked. We don't support anons. |
166 | */ | | 166 | */ |
167 | struct vm_page * | | 167 | struct vm_page * |
168 | uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon, | | 168 | uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon, |
169 | int flags, int strat, int free_list) | | 169 | int flags, int strat, int free_list) |
170 | { | | 170 | { |
171 | struct vm_page *pg; | | 171 | struct vm_page *pg; |
172 | | | 172 | |
173 | KASSERT(uobj && mutex_owned(&uobj->vmobjlock)); | | 173 | KASSERT(uobj && mutex_owned(&uobj->vmobjlock)); |
174 | KASSERT(anon == NULL); | | 174 | KASSERT(anon == NULL); |
175 | | | 175 | |
176 | pg = pool_cache_get(&pagecache, PR_NOWAIT); | | 176 | pg = pool_cache_get(&pagecache, PR_NOWAIT); |
177 | if (__predict_false(pg == NULL)) | | 177 | if (__predict_false(pg == NULL)) { |
178 | return NULL; | | 178 | return NULL; |
| | | 179 | } |
179 | | | 180 | |
180 | pg->offset = off; | | 181 | pg->offset = off; |
181 | pg->uobject = uobj; | | 182 | pg->uobject = uobj; |
182 | | | 183 | |
183 | pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE; | | 184 | pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE; |
184 | if (flags & UVM_PGA_ZERO) { | | 185 | if (flags & UVM_PGA_ZERO) { |
185 | uvm_pagezero(pg); | | 186 | uvm_pagezero(pg); |
186 | } | | 187 | } |
187 | | | 188 | |
188 | TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); | | 189 | TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); |
189 | (void)rb_tree_insert_node(&uobj->rb_tree, pg); | | 190 | (void)rb_tree_insert_node(&uobj->rb_tree, pg); |
190 | | | 191 | |
191 | /* | | 192 | /* |
| @@ -856,75 +857,90 @@ uvm_pageout_start(int npages) | | | @@ -856,75 +857,90 @@ uvm_pageout_start(int npages) |
856 | { | | 857 | { |
857 | | | 858 | |
858 | /* we don't have the heuristics */ | | 859 | /* we don't have the heuristics */ |
859 | } | | 860 | } |
860 | | | 861 | |
861 | void | | 862 | void |
862 | uvm_pageout_done(int npages) | | 863 | uvm_pageout_done(int npages) |
863 | { | | 864 | { |
864 | | | 865 | |
865 | /* could wakeup waiters, but just let the pagedaemon do it */ | | 866 | /* could wakeup waiters, but just let the pagedaemon do it */ |
866 | } | | 867 | } |
867 | | | 868 | |
868 | static bool | | 869 | static bool |
869 | processpage(struct vm_page *pg) | | 870 | processpage(struct vm_page *pg, bool *lockrunning) |
870 | { | | 871 | { |
871 | struct uvm_object *uobj; | | 872 | struct uvm_object *uobj; |
872 | | | 873 | |
873 | uobj = pg->uobject; | | 874 | uobj = pg->uobject; |
874 | if (mutex_tryenter(&uobj->vmobjlock)) { | | 875 | if (mutex_tryenter(&uobj->vmobjlock)) { |
875 | if ((pg->flags & PG_BUSY) == 0) { | | 876 | if ((pg->flags & PG_BUSY) == 0) { |
876 | mutex_exit(&uvm_pageqlock); | | 877 | mutex_exit(&uvm_pageqlock); |
877 | uobj->pgops->pgo_put(uobj, pg->offset, | | 878 | uobj->pgops->pgo_put(uobj, pg->offset, |
878 | pg->offset + PAGE_SIZE, | | 879 | pg->offset + PAGE_SIZE, |
879 | PGO_CLEANIT|PGO_FREE); | | 880 | PGO_CLEANIT|PGO_FREE); |
880 | KASSERT(!mutex_owned(&uobj->vmobjlock)); | | 881 | KASSERT(!mutex_owned(&uobj->vmobjlock)); |
881 | return true; | | 882 | return true; |
882 | } else { | | 883 | } else { |
883 | mutex_exit(&uobj->vmobjlock); | | 884 | mutex_exit(&uobj->vmobjlock); |
884 | } | | 885 | } |
| | | 886 | } else if (*lockrunning == false && ncpu > 1) { |
| | | 887 | CPU_INFO_ITERATOR cii; |
| | | 888 | struct cpu_info *ci; |
| | | 889 | struct lwp *l; |
| | | 890 | |
| | | 891 | l = mutex_owner(&uobj->vmobjlock); |
| | | 892 | for (CPU_INFO_FOREACH(cii, ci)) { |
| | | 893 | if (ci->ci_curlwp == l) { |
| | | 894 | *lockrunning = true; |
| | | 895 | break; |
| | | 896 | } |
| | | 897 | } |
885 | } | | 898 | } |
886 | | | 899 | |
887 | return false; | | 900 | return false; |
888 | } | | 901 | } |
889 | | | 902 | |
890 | /* | | 903 | /* |
891 | * The Diabolical pageDaemon Director (DDD). | | 904 | * The Diabolical pageDaemon Director (DDD). |
892 | */ | | 905 | */ |
893 | void | | 906 | void |
894 | uvm_pageout(void *arg) | | 907 | uvm_pageout(void *arg) |
895 | { | | 908 | { |
896 | struct vm_page *pg; | | 909 | struct vm_page *pg; |
897 | struct pool *pp, *pp_first; | | 910 | struct pool *pp, *pp_first; |
898 | uint64_t where; | | 911 | uint64_t where; |
899 | int timo = 0; | | 912 | int timo = 0; |
900 | int cleaned, skip, skipped; | | 913 | int cleaned, skip, skipped; |
901 | bool succ = false; | | 914 | bool succ = false; |
| | | 915 | bool lockrunning; |
902 | | | 916 | |
903 | mutex_enter(&pdaemonmtx); | | 917 | mutex_enter(&pdaemonmtx); |
904 | for (;;) { | | 918 | for (;;) { |
905 | if (succ) { | | 919 | if (succ) { |
906 | kernel_map->flags &= ~VM_MAP_WANTVA; | | 920 | kernel_map->flags &= ~VM_MAP_WANTVA; |
907 | kmem_map->flags &= ~VM_MAP_WANTVA; | | 921 | kmem_map->flags &= ~VM_MAP_WANTVA; |
908 | timo = 0; | | 922 | timo = 0; |
909 | if (pdaemon_waiters) { | | 923 | if (pdaemon_waiters) { |
910 | pdaemon_waiters = 0; | | 924 | pdaemon_waiters = 0; |
911 | cv_broadcast(&oomwait); | | 925 | cv_broadcast(&oomwait); |
912 | } | | 926 | } |
913 | } | | 927 | } |
914 | succ = false; | | 928 | succ = false; |
915 | | | 929 | |
916 | cv_timedwait(&pdaemoncv, &pdaemonmtx, timo); | | 930 | if (pdaemon_waiters == 0) { |
917 | uvmexp.pdwoke++; | | 931 | cv_timedwait(&pdaemoncv, &pdaemonmtx, timo); |
| | | 932 | uvmexp.pdwoke++; |
| | | 933 | } |
918 | | | 934 | |
919 | /* tell the world that we are hungry */ | | 935 | /* tell the world that we are hungry */ |
920 | kernel_map->flags |= VM_MAP_WANTVA; | | 936 | kernel_map->flags |= VM_MAP_WANTVA; |
921 | kmem_map->flags |= VM_MAP_WANTVA; | | 937 | kmem_map->flags |= VM_MAP_WANTVA; |
922 | | | 938 | |
923 | if (pdaemon_waiters == 0 && !NEED_PAGEDAEMON()) | | 939 | if (pdaemon_waiters == 0 && !NEED_PAGEDAEMON()) |
924 | continue; | | 940 | continue; |
925 | mutex_exit(&pdaemonmtx); | | 941 | mutex_exit(&pdaemonmtx); |
926 | | | 942 | |
927 | /* | | 943 | /* |
928 | * step one: reclaim the page cache. this should give | | 944 | * step one: reclaim the page cache. this should give |
929 | * us the biggest earnings since whole pages are released | | 945 | * us the biggest earnings since whole pages are released |
930 | * into backing memory. | | 946 | * into backing memory. |
| @@ -933,53 +949,76 @@ uvm_pageout(void *arg) | | | @@ -933,53 +949,76 @@ uvm_pageout(void *arg) |
933 | if (!NEED_PAGEDAEMON()) { | | 949 | if (!NEED_PAGEDAEMON()) { |
934 | succ = true; | | 950 | succ = true; |
935 | mutex_enter(&pdaemonmtx); | | 951 | mutex_enter(&pdaemonmtx); |
936 | continue; | | 952 | continue; |
937 | } | | 953 | } |
938 | | | 954 | |
939 | /* | | 955 | /* |
940 | * Ok, so that didn't help. Next, try to hunt memory | | 956 | * Ok, so that didn't help. Next, try to hunt memory |
941 | * by pushing out vnode pages. The pages might contain | | 957 | * by pushing out vnode pages. The pages might contain |
942 | * useful cached data, but we need the memory. | | 958 | * useful cached data, but we need the memory. |
943 | */ | | 959 | */ |
944 | cleaned = 0; | | 960 | cleaned = 0; |
945 | skip = 0; | | 961 | skip = 0; |
| | | 962 | lockrunning = false; |
946 | again: | | 963 | again: |
947 | mutex_enter(&uvm_pageqlock); | | 964 | mutex_enter(&uvm_pageqlock); |
948 | while (cleaned < PAGEDAEMON_OBJCHUNK) { | | 965 | while (cleaned < PAGEDAEMON_OBJCHUNK) { |
949 | skipped = 0; | | 966 | skipped = 0; |
950 | TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) { | | 967 | TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) { |
951 | | | 968 | |
952 | /* | | 969 | /* |
953 | * skip over pages we _might_ have tried | | 970 | * skip over pages we _might_ have tried |
954 | * to handle earlier. they might not be | | 971 | * to handle earlier. they might not be |
955 | * exactly the same ones, but I'm not too | | 972 | * exactly the same ones, but I'm not too |
956 | * concerned. | | 973 | * concerned. |
957 | */ | | 974 | */ |
958 | while (skipped++ < skip) | | 975 | while (skipped++ < skip) |
959 | continue; | | 976 | continue; |
960 | | | 977 | |
961 | if (processpage(pg)) { | | 978 | if (processpage(pg, &lockrunning)) { |
962 | cleaned++; | | 979 | cleaned++; |
963 | goto again; | | 980 | goto again; |
964 | } | | 981 | } |
965 | | | 982 | |
966 | skip++; | | 983 | skip++; |
967 | } | | 984 | } |
968 | break; | | 985 | break; |
969 | } | | 986 | } |
970 | mutex_exit(&uvm_pageqlock); | | 987 | mutex_exit(&uvm_pageqlock); |
971 | | | 988 | |
972 | /* | | 989 | /* |
| | | 990 | * Ok, someone is running with an object lock held. |
| | | 991 | * We want to yield the host CPU to make sure the |
| | | 992 | * thread is not parked on the host. Since sched_yield() |
| | | 993 | * doesn't appear to do anything on NetBSD, nanosleep |
| | | 994 | * for the smallest possible time and hope we're back in |
| | | 995 | * the game soon. |
| | | 996 | */ |
| | | 997 | if (cleaned == 0 && lockrunning) { |
| | | 998 | uint64_t sec, nsec; |
| | | 999 | |
| | | 1000 | sec = 0; |
| | | 1001 | nsec = 1; |
| | | 1002 | rumpuser_nanosleep(&sec, &nsec, NULL); |
| | | 1003 | |
| | | 1004 | lockrunning = false; |
| | | 1005 | skip = 0; |
| | | 1006 | |
| | | 1007 | /* and here we go again */ |
| | | 1008 | goto again; |
| | | 1009 | } |
| | | 1010 | |
| | | 1011 | /* |
973 | * And of course we need to reclaim the page cache | | 1012 | * And of course we need to reclaim the page cache |
974 | * again to actually release memory. | | 1013 | * again to actually release memory. |
975 | */ | | 1014 | */ |
976 | pool_cache_reclaim(&pagecache); | | 1015 | pool_cache_reclaim(&pagecache); |
977 | if (!NEED_PAGEDAEMON()) { | | 1016 | if (!NEED_PAGEDAEMON()) { |
978 | succ = true; | | 1017 | succ = true; |
979 | mutex_enter(&pdaemonmtx); | | 1018 | mutex_enter(&pdaemonmtx); |
980 | continue; | | 1019 | continue; |
981 | } | | 1020 | } |
982 | | | 1021 | |
983 | /* | | 1022 | /* |
984 | * Still not there? sleeves come off right about now. | | 1023 | * Still not there? sleeves come off right about now. |
985 | * First: do reclaim on kernel/kmem map. | | 1024 | * First: do reclaim on kernel/kmem map. |
| @@ -1002,27 +1041,27 @@ uvm_pageout(void *arg) | | | @@ -1002,27 +1041,27 @@ uvm_pageout(void *arg) |
1002 | break; | | 1041 | break; |
1003 | pool_drain_start(&pp, &where); | | 1042 | pool_drain_start(&pp, &where); |
1004 | if (pp == pp_first) { | | 1043 | if (pp == pp_first) { |
1005 | succ = pool_drain_end(pp, where); | | 1044 | succ = pool_drain_end(pp, where); |
1006 | break; | | 1045 | break; |
1007 | } | | 1046 | } |
1008 | } | | 1047 | } |
1009 | | | 1048 | |
1010 | /* | | 1049 | /* |
1011 | * Need to use PYEC on our bag of tricks. | | 1050 | * Need to use PYEC on our bag of tricks. |
1012 | * Unfortunately, the wife just borrowed it. | | 1051 | * Unfortunately, the wife just borrowed it. |
1013 | */ | | 1052 | */ |
1014 | | | 1053 | |
1015 | if (!succ) { | | 1054 | if (!succ && cleaned == 0) { |
1016 | rumpuser_dprintf("pagedaemoness: failed to reclaim " | | 1055 | rumpuser_dprintf("pagedaemoness: failed to reclaim " |
1017 | "memory ... sleeping (deadlock?)\n"); | | 1056 | "memory ... sleeping (deadlock?)\n"); |
1018 | timo = hz; | | 1057 | timo = hz; |
1019 | } | | 1058 | } |
1020 | | | 1059 | |
1021 | mutex_enter(&pdaemonmtx); | | 1060 | mutex_enter(&pdaemonmtx); |
1022 | } | | 1061 | } |
1023 | | | 1062 | |
1024 | panic("you can swap out any time you like, but you can never leave"); | | 1063 | panic("you can swap out any time you like, but you can never leave"); |
1025 | } | | 1064 | } |
1026 | | | 1065 | |
1027 | void | | 1066 | void |
1028 | uvm_kick_pdaemon() | | 1067 | uvm_kick_pdaemon() |