Sat Aug 27 16:23:44 2011 UTC ()
Implement sparse dumps for amd64 (copied from i386). Disabled for now via
sysctl.
XXX: most of the code can be merged.


(christos)
diff -r1.164 -r1.165 src/sys/arch/amd64/amd64/machdep.c
diff -r1.25 -r1.26 src/sys/arch/amd64/include/pmap.h

cvs diff -r1.164 -r1.165 src/sys/arch/amd64/amd64/machdep.c (expand / switch to unified diff)

--- src/sys/arch/amd64/amd64/machdep.c 2011/08/11 18:11:17 1.164
+++ src/sys/arch/amd64/amd64/machdep.c 2011/08/27 16:23:44 1.165
@@ -1,24 +1,28 @@ @@ -1,24 +1,28 @@
1/* $NetBSD: machdep.c,v 1.164 2011/08/11 18:11:17 cherry Exp $ */ 1/* $NetBSD: machdep.c,v 1.165 2011/08/27 16:23:44 christos Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008 4 * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
5 * The NetBSD Foundation, Inc. 5 * The NetBSD Foundation, Inc.
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * This code is derived from software contributed to The NetBSD Foundation 8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace 9 * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace
10 * Simulation Facility, NASA Ames Research Center. 10 * Simulation Facility, NASA Ames Research Center.
11 * 11 *
 12 * This code is derived from software contributed to The NetBSD Foundation
 13 * by Coyote Point Systems, Inc. which was written under contract to Coyote
 14 * Point by Jed Davis and Devon O'Dell.
 15 *
12 * Redistribution and use in source and binary forms, with or without 16 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions 17 * modification, are permitted provided that the following conditions
14 * are met: 18 * are met:
15 * 1. Redistributions of source code must retain the above copyright 19 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer. 20 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright 21 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the 22 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution. 23 * documentation and/or other materials provided with the distribution.
20 * 24 *
21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
@@ -97,27 +101,27 @@ @@ -97,27 +101,27 @@
97 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
98 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
99 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
100 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
101 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
102 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
103 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
104 * SUCH DAMAGE. 108 * SUCH DAMAGE.
105 * 109 *
106 * @(#)machdep.c 7.4 (Berkeley) 6/3/91 110 * @(#)machdep.c 7.4 (Berkeley) 6/3/91
107 */ 111 */
108 112
109#include <sys/cdefs.h> 113#include <sys/cdefs.h>
110__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.164 2011/08/11 18:11:17 cherry Exp $"); 114__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.165 2011/08/27 16:23:44 christos Exp $");
111 115
112/* #define XENDEBUG_LOW */ 116/* #define XENDEBUG_LOW */
113 117
114#include "opt_modular.h" 118#include "opt_modular.h"
115#include "opt_user_ldt.h" 119#include "opt_user_ldt.h"
116#include "opt_ddb.h" 120#include "opt_ddb.h"
117#include "opt_kgdb.h" 121#include "opt_kgdb.h"
118#include "opt_cpureset_delay.h" 122#include "opt_cpureset_delay.h"
119#include "opt_mtrr.h" 123#include "opt_mtrr.h"
120#include "opt_realmem.h" 124#include "opt_realmem.h"
121#include "opt_xen.h" 125#include "opt_xen.h"
122#ifndef XEN 126#ifndef XEN
123#include "opt_physmem.h" 127#include "opt_physmem.h"
@@ -229,26 +233,45 @@ int cpureset_delay = 2000; /* defaul @@ -229,26 +233,45 @@ int cpureset_delay = 2000; /* defaul
229#endif 233#endif
230 234
231int cpu_class = CPUCLASS_686; 235int cpu_class = CPUCLASS_686;
232 236
233#ifdef MTRR 237#ifdef MTRR
234struct mtrr_funcs *mtrr_funcs; 238struct mtrr_funcs *mtrr_funcs;
235#endif 239#endif
236 240
237int physmem; 241int physmem;
238uint64_t dumpmem_low; 242uint64_t dumpmem_low;
239uint64_t dumpmem_high; 243uint64_t dumpmem_high;
240int cpu_class; 244int cpu_class;
241 245
 246
 247#ifndef NO_SPARSE_DUMP
 248int sparse_dump = 0;
 249
 250paddr_t max_paddr = 0;
 251unsigned char *sparse_dump_physmap;
 252#endif
 253
 254char *dump_headerbuf, *dump_headerbuf_ptr;
 255#define dump_headerbuf_size PAGE_SIZE
 256#define dump_headerbuf_end (dump_headerbuf + dump_headerbuf_size)
 257#define dump_headerbuf_avail (dump_headerbuf_end - dump_headerbuf_ptr)
 258daddr_t dump_header_blkno;
 259
 260size_t dump_nmemsegs;
 261size_t dump_npages;
 262size_t dump_header_size;
 263size_t dump_totalbytesleft;
 264
242vaddr_t msgbuf_vaddr; 265vaddr_t msgbuf_vaddr;
243paddr_t msgbuf_paddr; 266paddr_t msgbuf_paddr;
244 267
245struct { 268struct {
246 paddr_t paddr; 269 paddr_t paddr;
247 psize_t sz; 270 psize_t sz;
248} msgbuf_p_seg[VM_PHYSSEG_MAX]; 271} msgbuf_p_seg[VM_PHYSSEG_MAX];
249unsigned int msgbuf_p_cnt = 0; 272unsigned int msgbuf_p_cnt = 0;
250  273
251vaddr_t idt_vaddr; 274vaddr_t idt_vaddr;
252paddr_t idt_paddr; 275paddr_t idt_paddr;
253 276
254vaddr_t lo32_vaddr; 277vaddr_t lo32_vaddr;
@@ -280,28 +303,48 @@ struct mtrr_funcs *mtrr_funcs; @@ -280,28 +303,48 @@ struct mtrr_funcs *mtrr_funcs;
280#endif 303#endif
281 304
282/* 305/*
283 * Size of memory segments, before any memory is stolen. 306 * Size of memory segments, before any memory is stolen.
284 */ 307 */
285phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; 308phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
286int mem_cluster_cnt; 309int mem_cluster_cnt;
287 310
288char x86_64_doubleflt_stack[4096]; 311char x86_64_doubleflt_stack[4096];
289 312
290int cpu_dump(void); 313int cpu_dump(void);
291int cpu_dumpsize(void); 314int cpu_dumpsize(void);
292u_long cpu_dump_mempagecnt(void); 315u_long cpu_dump_mempagecnt(void);
293void dumpsys(void); 
294void dodumpsys(void); 316void dodumpsys(void);
 317void dumpsys(void);
 318
 319void dump_misc_init(void);
 320void dump_seg_prep(void);
 321int dump_seg_iter(int (*)(paddr_t, paddr_t));
 322
 323#ifndef NO_SPARSE_DUMP
 324void sparse_dump_reset(void);
 325void sparse_dump_mark(vaddr_t, vaddr_t, int);
 326void cpu_dump_prep_sparse(void);
 327#endif
 328
 329void dump_header_start(void);
 330int dump_header_flush(void);
 331int dump_header_addbytes(const void*, size_t);
 332int dump_header_addseg(paddr_t, paddr_t);
 333int dump_header_finish(void);
 334
 335int dump_seg_count_range(paddr_t, paddr_t);
 336int dumpsys_seg(paddr_t, paddr_t);
 337
295void init_x86_64(paddr_t); 338void init_x86_64(paddr_t);
296 339
297/* 340/*
298 * Machine-dependent startup code 341 * Machine-dependent startup code
299 */ 342 */
300void 343void
301cpu_startup(void) 344cpu_startup(void)
302{ 345{
303 int x, y; 346 int x, y;
304 vaddr_t minaddr, maxaddr; 347 vaddr_t minaddr, maxaddr;
305 psize_t sz; 348 psize_t sz;
306 349
307 /* 350 /*
@@ -520,26 +563,34 @@ SYSCTL_SETUP(sysctl_machdep_setup, "sysc @@ -520,26 +563,34 @@ SYSCTL_SETUP(sysctl_machdep_setup, "sysc
520 NULL, 1, NULL, 0, 563 NULL, 1, NULL, 0,
521 CTL_MACHDEP, CPU_SSE2, CTL_EOL); 564 CTL_MACHDEP, CPU_SSE2, CTL_EOL);
522 sysctl_createv(clog, 0, NULL, NULL, 565 sysctl_createv(clog, 0, NULL, NULL,
523 CTLFLAG_PERMANENT, 566 CTLFLAG_PERMANENT,
524 CTLTYPE_QUAD, "tsc_freq", NULL, 567 CTLTYPE_QUAD, "tsc_freq", NULL,
525 NULL, 0, &tsc_freq, 0, 568 NULL, 0, &tsc_freq, 0,
526 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 569 CTL_MACHDEP, CTL_CREATE, CTL_EOL);
527 sysctl_createv(clog, 0, NULL, NULL, 570 sysctl_createv(clog, 0, NULL, NULL,
528 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 571 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
529 CTLTYPE_INT, "pae", 572 CTLTYPE_INT, "pae",
530 SYSCTL_DESCR("Whether the kernel uses PAE"), 573 SYSCTL_DESCR("Whether the kernel uses PAE"),
531 NULL, 1, NULL, 0, 574 NULL, 1, NULL, 0,
532 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 575 CTL_MACHDEP, CTL_CREATE, CTL_EOL);
 576#ifndef NO_SPARSE_DUMP
 577 /* XXXjld Does this really belong under machdep, and not e.g. kern? */
 578 sysctl_createv(clog, 0, NULL, NULL,
 579 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 580 CTLTYPE_INT, "sparse_dump", NULL,
 581 NULL, 0, &sparse_dump, 0,
 582 CTL_MACHDEP, CTL_CREATE, CTL_EOL);
 583#endif
533} 584}
534 585
535void 586void
536buildcontext(struct lwp *l, void *catcher, void *f) 587buildcontext(struct lwp *l, void *catcher, void *f)
537{ 588{
538 struct trapframe *tf = l->l_md.md_regs; 589 struct trapframe *tf = l->l_md.md_regs;
539 590
540 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 591 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
541 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 592 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
542 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 593 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
543 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL); 594 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
544 595
545 tf->tf_rip = (uint64_t)catcher; 596 tf->tf_rip = (uint64_t)catcher;
@@ -736,254 +787,494 @@ haltsys: @@ -736,254 +787,494 @@ haltsys:
736 787
737 printf("rebooting...\n"); 788 printf("rebooting...\n");
738 if (cpureset_delay > 0) 789 if (cpureset_delay > 0)
739 delay(cpureset_delay * 1000); 790 delay(cpureset_delay * 1000);
740 cpu_reset(); 791 cpu_reset();
741 for(;;) ; 792 for(;;) ;
742 /*NOTREACHED*/ 793 /*NOTREACHED*/
743} 794}
744 795
745/* 796/*
746 * XXXfvdl share dumpcode. 797 * XXXfvdl share dumpcode.
747 */ 798 */
748 799
 800 /*
 801 * Perform assorted dump-related initialization tasks. Assumes that
 802 * the maximum physical memory address will not increase afterwards.
 803 */
 804void
 805dump_misc_init(void)
 806{
 807#ifndef NO_SPARSE_DUMP
 808 int i;
 809#endif
 810
 811 if (dump_headerbuf != NULL)
 812 return; /* already called */
 813
 814#ifndef NO_SPARSE_DUMP
 815 for (i = 0; i < mem_cluster_cnt; ++i) {
 816 paddr_t top = mem_clusters[i].start + mem_clusters[i].size;
 817 if (max_paddr < top)
 818 max_paddr = top;
 819 }
 820#ifdef DEBUG
 821 printf("dump_misc_init: max_paddr = 0x%lx\n",
 822 (unsigned long)max_paddr);
 823#endif
 824
 825 sparse_dump_physmap = (void*)uvm_km_alloc(kernel_map,
 826 roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE),
 827 PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO);
 828#endif
 829 dump_headerbuf = (void*)uvm_km_alloc(kernel_map,
 830 dump_headerbuf_size,
 831 PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO);
 832 /* XXXjld should check for failure here, disable dumps if so. */
 833}
 834
 835#ifndef NO_SPARSE_DUMP
 836/*
 837 * Clear the set of pages to include in a sparse dump.
 838 */
 839void
 840sparse_dump_reset(void)
 841{
 842 memset(sparse_dump_physmap, 0,
 843 roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE));
 844}
 845
 846/*
 847 * Include or exclude pages in a sparse dump, by half-open virtual
 848 * address interval (which may wrap around the end of the space).
 849 */
 850void
 851sparse_dump_mark(vaddr_t vbegin, vaddr_t vend, int includep)
 852{
 853 pmap_t pmap;
 854 paddr_t p;
 855 vaddr_t v;
 856
 857 /*
 858 * If a partial page is called for, the whole page must be included.
 859 */
 860 if (includep) {
 861 vbegin = rounddown(vbegin, PAGE_SIZE);
 862 vend = roundup(vend, PAGE_SIZE);
 863 } else {
 864 vbegin = roundup(vbegin, PAGE_SIZE);
 865 vend = rounddown(vend, PAGE_SIZE);
 866 }
 867
 868 pmap = pmap_kernel();
 869 for (v = vbegin; v != vend; v += PAGE_SIZE) {
 870 if (pmap_extract(pmap, v, &p)) {
 871 if (includep)
 872 setbit(sparse_dump_physmap, p/PAGE_SIZE);
 873 else
 874 clrbit(sparse_dump_physmap, p/PAGE_SIZE);
 875 }
 876 }
 877}
 878
 879/*
 880 * Machine-dependently decides on the contents of a sparse dump, using
 881 * the above.
 882 */
 883void
 884cpu_dump_prep_sparse(void)
 885{
 886 sparse_dump_reset();
 887 /* XXX could the alternate recursive page table be skipped? */
 888 sparse_dump_mark((vaddr_t)PTE_BASE, (vaddr_t)KERN_BASE, 1);
 889 /* Memory for I/O buffers could be unmarked here, for example. */
 890 /* The kernel text could also be unmarked, but gdb would be upset. */
 891}
 892#endif
 893
 894/*
 895 * Abstractly iterate over the collection of memory segments to be
 896 * dumped; the callback lacks the customary environment-pointer
 897 * argument because none of the current users really need one.
 898 *
 899 * To be used only after dump_seg_prep is called to set things up.
 900 */
 901int
 902dump_seg_iter(int (*callback)(paddr_t, paddr_t))
 903{
 904 int error, i;
 905
 906#define CALLBACK(start,size) do { \
 907 error = callback(start,size); \
 908 if (error) \
 909 return error; \
 910} while(0)
 911
 912 for (i = 0; i < mem_cluster_cnt; ++i) {
 913#ifndef NO_SPARSE_DUMP
 914 /*
 915 * The bitmap is scanned within each memory segment,
 916 * rather than over its entire domain, in case any
 917 * pages outside of the memory proper have been mapped
 918 * into kva; they might be devices that wouldn't
 919 * appreciate being arbitrarily read, and including
 920 * them could also break the assumption that a sparse
 921 * dump will always be smaller than a full one.
 922 */
 923 if (sparse_dump) {
 924 paddr_t p, start, end;
 925 int lastset;
 926
 927 start = mem_clusters[i].start;
 928 end = start + mem_clusters[i].size;
 929 start = rounddown(start, PAGE_SIZE); /* unnecessary? */
 930 lastset = 0;
 931 for (p = start; p < end; p += PAGE_SIZE) {
 932 int thisset = isset(sparse_dump_physmap,
 933 p/PAGE_SIZE);
 934
 935 if (!lastset && thisset)
 936 start = p;
 937 if (lastset && !thisset)
 938 CALLBACK(start, p - start);
 939 lastset = thisset;
 940 }
 941 if (lastset)
 942 CALLBACK(start, p - start);
 943 } else
 944#endif
 945 CALLBACK(mem_clusters[i].start, mem_clusters[i].size);
 946 }
 947 return 0;
 948#undef CALLBACK
 949}
 950
 951/*
 952 * Prepare for an impending core dump: decide what's being dumped and
 953 * how much space it will take up.
 954 */
 955void
 956dump_seg_prep(void)
 957{
 958#ifndef NO_SPARSE_DUMP
 959 if (sparse_dump)
 960 cpu_dump_prep_sparse();
 961#endif
 962
 963 dump_nmemsegs = 0;
 964 dump_npages = 0;
 965 dump_seg_iter(dump_seg_count_range);
 966
 967 dump_header_size = ALIGN(sizeof(kcore_seg_t)) +
 968 ALIGN(sizeof(cpu_kcore_hdr_t)) +
 969 ALIGN(dump_nmemsegs * sizeof(phys_ram_seg_t));
 970 dump_header_size = roundup(dump_header_size, dbtob(1));
 971
 972 /*
 973 * savecore(8) will read this to decide how many pages to
 974 * copy, and cpu_dumpconf has already used the pessimistic
 975 * value to set dumplo, so it's time to tell the truth.
 976 */
 977 dumpsize = dump_npages; /* XXX could these just be one variable? */
 978}
 979
 980int
 981dump_seg_count_range(paddr_t start, paddr_t size)
 982{
 983 ++dump_nmemsegs;
 984 dump_npages += size / PAGE_SIZE;
 985 return 0;
 986}
 987
 988/*
 989 * A sparse dump's header may be rather large, due to the number of
 990 * "segments" emitted. These routines manage a simple output buffer,
 991 * so that the header can be written to disk incrementally.
 992 */
 993void
 994dump_header_start(void)
 995{
 996 dump_headerbuf_ptr = dump_headerbuf;
 997 dump_header_blkno = dumplo;
 998}
 999
 1000int
 1001dump_header_flush(void)
 1002{
 1003 const struct bdevsw *bdev;
 1004 size_t to_write;
 1005 int error;
 1006
 1007 bdev = bdevsw_lookup(dumpdev);
 1008 to_write = roundup(dump_headerbuf_ptr - dump_headerbuf, dbtob(1));
 1009 error = bdev->d_dump(dumpdev, dump_header_blkno,
 1010 dump_headerbuf, to_write);
 1011 dump_header_blkno += btodb(to_write);
 1012 dump_headerbuf_ptr = dump_headerbuf;
 1013 return error;
 1014}
 1015
 1016int
 1017dump_header_addbytes(const void* vptr, size_t n)
 1018{
 1019 const char* ptr = vptr;
 1020 int error;
 1021
 1022 while (n > dump_headerbuf_avail) {
 1023 memcpy(dump_headerbuf_ptr, ptr, dump_headerbuf_avail);
 1024 ptr += dump_headerbuf_avail;
 1025 n -= dump_headerbuf_avail;
 1026 dump_headerbuf_ptr = dump_headerbuf_end;
 1027 error = dump_header_flush();
 1028 if (error)
 1029 return error;
 1030 }
 1031 memcpy(dump_headerbuf_ptr, ptr, n);
 1032 dump_headerbuf_ptr += n;
 1033
 1034 return 0;
 1035}
 1036
 1037int
 1038dump_header_addseg(paddr_t start, paddr_t size)
 1039{
 1040 phys_ram_seg_t seg = { start, size };
 1041
 1042 return dump_header_addbytes(&seg, sizeof(seg));
 1043}
 1044
 1045int
 1046dump_header_finish(void)
 1047{
 1048 memset(dump_headerbuf_ptr, 0, dump_headerbuf_avail);
 1049 return dump_header_flush();
 1050}
 1051
 1052
749/* 1053/*
750 * These variables are needed by /sbin/savecore 1054 * These variables are needed by /sbin/savecore
751 */ 1055 */
752uint32_t dumpmag = 0x8fca0101; /* magic number */ 1056uint32_t dumpmag = 0x8fca0101; /* magic number */
753int dumpsize = 0; /* pages */ 1057int dumpsize = 0; /* pages */
754long dumplo = 0; /* blocks */ 1058long dumplo = 0; /* blocks */
755 1059
756/* 1060/*
757 * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers. 1061 * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers
 1062 * for a full (non-sparse) dump.
758 */ 1063 */
759int 1064int
760cpu_dumpsize(void) 1065cpu_dumpsize(void)
761{ 1066{
762 int size; 1067 int size;
763 1068
764 size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) + 1069 size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) +
765 ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t)); 1070 ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t));
766 if (roundup(size, dbtob(1)) != dbtob(1)) 1071 if (roundup(size, dbtob(1)) != dbtob(1))
767 return (-1); 1072 return (-1);
768 1073
769 return (1); 1074 return (1);
770} 1075}
771 1076
772/* 1077/*
773 * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped. 1078 * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped
 1079 * for a full (non-sparse) dump.
774 */ 1080 */
775u_long 1081u_long
776cpu_dump_mempagecnt(void) 1082cpu_dump_mempagecnt(void)
777{ 1083{
778 u_long i, n; 1084 u_long i, n;
779 1085
780 n = 0; 1086 n = 0;
781 for (i = 0; i < mem_cluster_cnt; i++) 1087 for (i = 0; i < mem_cluster_cnt; i++)
782 n += atop(mem_clusters[i].size); 1088 n += atop(mem_clusters[i].size);
783 return (n); 1089 return (n);
784} 1090}
785 1091
786/* 1092/*
787 * cpu_dump: dump the machine-dependent kernel core dump headers. 1093 * cpu_dump: dump the machine-dependent kernel core dump headers.
788 */ 1094 */
789int 1095int
790cpu_dump(void) 1096cpu_dump(void)
791{ 1097{
792 int (*dump)(dev_t, daddr_t, void *, size_t); 1098 int (*dump)(dev_t, daddr_t, void *, size_t);
793 char buf[dbtob(1)]; 1099 kcore_seg_t seg;
794 kcore_seg_t *segp; 1100 cpu_kcore_hdr_t cpuhdr;
795 cpu_kcore_hdr_t *cpuhdrp; 
796 phys_ram_seg_t *memsegp; 
797 const struct bdevsw *bdev; 1101 const struct bdevsw *bdev;
798 int i; 
799 1102
800 bdev = bdevsw_lookup(dumpdev); 1103 bdev = bdevsw_lookup(dumpdev);
801 if (bdev == NULL) 1104 if (bdev == NULL)
802 return (ENXIO); 1105 return (ENXIO);
803 1106
804 dump = bdev->d_dump; 1107 dump = bdev->d_dump;
805 1108
806 memset(buf, 0, sizeof buf); 
807 segp = (kcore_seg_t *)buf; 
808 cpuhdrp = (cpu_kcore_hdr_t *)&buf[ALIGN(sizeof(*segp))]; 
809 memsegp = (phys_ram_seg_t *)&buf[ ALIGN(sizeof(*segp)) + 
810 ALIGN(sizeof(*cpuhdrp))]; 
811 
812 /* 1109 /*
813 * Generate a segment header. 1110 * Generate a segment header.
814 */ 1111 */
815 CORE_SETMAGIC(*segp, KCORE_MAGIC, MID_MACHINE, CORE_CPU); 1112 CORE_SETMAGIC(seg, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
816 segp->c_size = dbtob(1) - ALIGN(sizeof(*segp)); 1113 seg.c_size = dump_header_size - ALIGN(sizeof(seg));
 1114 (void)dump_header_addbytes(&seg, ALIGN(sizeof(seg)));
817 1115
818 /* 1116 /*
819 * Add the machine-dependent header info. 1117 * Add the machine-dependent header info.
820 */ 1118 */
821 cpuhdrp->ptdpaddr = PDPpaddr; 1119 cpuhdr.ptdpaddr = PDPpaddr;
822 cpuhdrp->nmemsegs = mem_cluster_cnt; 1120 cpuhdr.nmemsegs = dump_nmemsegs;
 1121 (void)dump_header_addbytes(&cpuhdr, ALIGN(sizeof(cpuhdr)));
823 1122
824 /* 1123 /*
825 * Fill in the memory segment descriptors. 1124 * Write out the memory segment descriptors.
826 */ 1125 */
827 for (i = 0; i < mem_cluster_cnt; i++) { 1126 return dump_seg_iter(dump_header_addseg);
828 memsegp[i].start = mem_clusters[i].start; 
829 memsegp[i].size = mem_clusters[i].size; 
830 } 
831 
832 return (dump(dumpdev, dumplo, (void *)buf, dbtob(1))); 
833} 
834 
835/* 
836 * This is called by main to set dumplo and dumpsize. 
837 * Dumps always skip the first PAGE_SIZE of disk space 
838 * in case there might be a disk label stored there. 
839 * If there is extra space, put dump at the end to 
840 * reduce the chance that swapping trashes it. 
841 */ 
842void 
843cpu_dumpconf(void) 
844{ 
845 const struct bdevsw *bdev; 
846 int nblks, dumpblks; /* size of dump area */ 
847 
848 if (dumpdev == NODEV) 
849 goto bad; 
850 bdev = bdevsw_lookup(dumpdev); 
851 if (bdev == NULL) { 
852 dumpdev = NODEV; 
853 goto bad; 
854 } 
855 if (bdev->d_psize == NULL) 
856 goto bad; 
857 nblks = (*bdev->d_psize)(dumpdev); 
858 if (nblks <= ctod(1)) 
859 goto bad; 
860 
861 dumpblks = cpu_dumpsize(); 
862 if (dumpblks < 0) 
863 goto bad; 
864 dumpblks += ctod(cpu_dump_mempagecnt()); 
865 
866 /* If dump won't fit (incl. room for possible label), punt. */ 
867 if (dumpblks > (nblks - ctod(1))) 
868 goto bad; 
869 
870 /* Put dump at end of partition */ 
871 dumplo = nblks - dumpblks; 
872 
873 /* dumpsize is in page units, and doesn't include headers. */ 
874 dumpsize = cpu_dump_mempagecnt(); 
875 return; 
876 
877 bad: 
878 dumpsize = 0; 
879} 1127}
880 1128
881/* 1129/*
882 * Doadump comes here after turning off memory management and 1130 * Doadump comes here after turning off memory management and
883 * getting on the dump stack, either when called above, or by 1131 * getting on the dump stack, either when called above, or by
884 * the auto-restart code. 1132 * the auto-restart code.
885 */ 1133 */
886#define BYTES_PER_DUMP PAGE_SIZE /* must be a multiple of pagesize XXX small */ 1134#define BYTES_PER_DUMP PAGE_SIZE /* must be a multiple of pagesize XXX small */
887static vaddr_t dumpspace; 1135static vaddr_t dumpspace;
888 1136
889vaddr_t 1137vaddr_t
890reserve_dumppages(vaddr_t p) 1138reserve_dumppages(vaddr_t p)
891{ 1139{
892 1140
893 dumpspace = p; 1141 dumpspace = p;
894 return (p + BYTES_PER_DUMP); 1142 return (p + BYTES_PER_DUMP);
895} 1143}
896 1144
 1145int
 1146dumpsys_seg(paddr_t maddr, paddr_t bytes)
 1147{
 1148 u_long i, m, n;
 1149 daddr_t blkno;
 1150 const struct bdevsw *bdev;
 1151 int (*dump)(dev_t, daddr_t, void *, size_t);
 1152 int error;
 1153
 1154 if (dumpdev == NODEV)
 1155 return ENODEV;
 1156 bdev = bdevsw_lookup(dumpdev);
 1157 if (bdev == NULL || bdev->d_psize == NULL)
 1158 return ENODEV;
 1159
 1160 dump = bdev->d_dump;
 1161
 1162 blkno = dump_header_blkno;
 1163 for (i = 0; i < bytes; i += n, dump_totalbytesleft -= n) {
 1164 /* Print out how many MBs we have left to go. */
 1165 if ((dump_totalbytesleft % (1024*1024)) == 0)
 1166 printf("%lu ", (unsigned long)
 1167 (dump_totalbytesleft / (1024 * 1024)));
 1168
 1169 /* Limit size for next transfer. */
 1170 n = bytes - i;
 1171 if (n > BYTES_PER_DUMP)
 1172 n = BYTES_PER_DUMP;
 1173
 1174 for (m = 0; m < n; m += NBPG)
 1175 pmap_kenter_pa(dumpspace + m, maddr + m,
 1176 VM_PROT_READ, 0);
 1177 pmap_update(pmap_kernel());
 1178
 1179 error = (*dump)(dumpdev, blkno, (void *)dumpspace, n);
 1180 if (error)
 1181 return error;
 1182 maddr += n;
 1183 blkno += btodb(n); /* XXX? */
 1184
 1185#if 0 /* XXX this doesn't work. grr. */
 1186 /* operator aborting dump? */
 1187 if (sget() != NULL)
 1188 return EINTR;
 1189#endif
 1190 }
 1191 dump_header_blkno = blkno;
 1192
 1193 return 0;
 1194}
 1195
897void 1196void
898dodumpsys(void) 1197dodumpsys(void)
899{ 1198{
900 const struct bdevsw *bdev; 1199 const struct bdevsw *bdev;
901 u_long totalbytesleft, bytes, i, n, memseg; 1200 int dumpend, psize;
902 u_long maddr; 
903 int psize; 
904 daddr_t blkno; 
905 int (*dump)(dev_t, daddr_t, void *, size_t); 
906 int error; 1201 int error;
907 1202
908 if (dumpdev == NODEV) 1203 if (dumpdev == NODEV)
909 return; 1204 return;
 1205
910 bdev = bdevsw_lookup(dumpdev); 1206 bdev = bdevsw_lookup(dumpdev);
911 if (bdev == NULL || bdev->d_psize == NULL) 1207 if (bdev == NULL || bdev->d_psize == NULL)
912 return; 1208 return;
913 
914 /* 1209 /*
915 * For dumps during autoconfiguration, 1210 * For dumps during autoconfiguration,
916 * if dump device has already configured... 1211 * if dump device has already configured...
917 */ 1212 */
918 if (dumpsize == 0) 1213 if (dumpsize == 0)
919 cpu_dumpconf(); 1214 cpu_dumpconf();
920 if (dumplo <= 0 || dumpsize == 0) { 1215 if (dumplo <= 0 || dumpsize == 0) {
921 printf("\ndump to dev %u,%u not possible\n", major(dumpdev), 1216 printf("\ndump to dev %u,%u not possible\n", major(dumpdev),
922 minor(dumpdev)); 1217 minor(dumpdev));
923 return; 1218 return;
924 } 1219 }
925 printf("\ndumping to dev %u,%u offset %ld\n", major(dumpdev), 1220 printf("\ndumping to dev %llu,%llu offset %ld\n",
926 minor(dumpdev), dumplo); 1221 (unsigned long long)major(dumpdev),
 1222 (unsigned long long)minor(dumpdev), dumplo);
927 1223
928 psize = (*bdev->d_psize)(dumpdev); 1224 psize = (*bdev->d_psize)(dumpdev);
929 printf("dump "); 1225 printf("dump ");
930 if (psize == -1) { 1226 if (psize == -1) {
931 printf("area unavailable\n"); 1227 printf("area unavailable\n");
932 return; 1228 return;
933 } 1229 }
934 1230
 1231#if 0 /* XXX this doesn't work. grr. */
 1232 /* toss any characters present prior to dump */
 1233 while (sget() != NULL); /*syscons and pccons differ */
 1234#endif
 1235
 1236 dump_seg_prep();
 1237 dumpend = dumplo + btodb(dump_header_size) + ctod(dump_npages);
 1238 if (dumpend > psize) {
 1239 printf("failed: insufficient space (%d < %d)\n",
 1240 psize, dumpend);
 1241 goto failed;
 1242 }
 1243
 1244 dump_header_start();
935 if ((error = cpu_dump()) != 0) 1245 if ((error = cpu_dump()) != 0)
936 goto err; 1246 goto err;
 1247 if ((error = dump_header_finish()) != 0)
 1248 goto err;
937 1249
938 totalbytesleft = ctob(cpu_dump_mempagecnt()); 1250 if (dump_header_blkno != dumplo + btodb(dump_header_size)) {
939 blkno = dumplo + cpu_dumpsize(); 1251 printf("BAD header size (%ld [written] != %ld [expected])\n",
940 dump = bdev->d_dump; 1252 (long)(dump_header_blkno - dumplo),
941 error = 0; 1253 (long)btodb(dump_header_size));
 1254 goto failed;
 1255 }
942 1256
943 for (memseg = 0; memseg < mem_cluster_cnt; memseg++) { 1257 dump_totalbytesleft = roundup(ptoa(dump_npages), BYTES_PER_DUMP);
944 maddr = mem_clusters[memseg].start; 1258 error = dump_seg_iter(dumpsys_seg);
945 bytes = mem_clusters[memseg].size; 
946 
947 for (i = 0; i < bytes; i += n, totalbytesleft -= n) { 
948 /* Print out how many MBs we have left to go. */ 
949 if ((totalbytesleft % (1024*1024)) == 0) 
950 printf("%ld ", totalbytesleft / (1024 * 1024)); 
951 
952 /* Limit size for next transfer. */ 
953 n = bytes - i; 
954 if (n > BYTES_PER_DUMP) 
955 n = BYTES_PER_DUMP; 
956 
957 (void) pmap_map(dumpspace, maddr, maddr + n, 
958 VM_PROT_READ); 
959 
960 error = (*dump)(dumpdev, blkno, (void *)dumpspace, n); 
961 if (error) 
962 goto err; 
963 maddr += n; 
964 blkno += btodb(n); /* XXX? */ 
965 1259
966#if 0 /* XXX this doesn't work. grr. */ 1260 if (error == 0 && dump_header_blkno != dumpend) {
967 /* operator aborting dump? */ 1261 printf("BAD dump size (%ld [written] != %ld [expected])\n",
968 if (sget() != NULL) { 1262 (long)(dumpend - dumplo),
969 error = EINTR; 1263 (long)(dump_header_blkno - dumplo));
970 break; 1264 goto failed;
971 } 
972#endif 
973 } 
974 } 1265 }
975 1266
976 err: 1267err:
977 switch (error) { 1268 switch (error) {
978 1269
979 case ENXIO: 1270 case ENXIO:
980 printf("device bad\n"); 1271 printf("device bad\n");
981 break; 1272 break;
982 1273
983 case EFAULT: 1274 case EFAULT:
984 printf("device not ready\n"); 1275 printf("device not ready\n");
985 break; 1276 break;
986 1277
987 case EINVAL: 1278 case EINVAL:
988 printf("area improper\n"); 1279 printf("area improper\n");
989 break; 1280 break;
@@ -994,31 +1285,96 @@ dodumpsys(void) @@ -994,31 +1285,96 @@ dodumpsys(void)
994 1285
995 case EINTR: 1286 case EINTR:
996 printf("aborted from console\n"); 1287 printf("aborted from console\n");
997 break; 1288 break;
998 1289
999 case 0: 1290 case 0:
1000 printf("succeeded\n"); 1291 printf("succeeded\n");
1001 break; 1292 break;
1002 1293
1003 default: 1294 default:
1004 printf("error %d\n", error); 1295 printf("error %d\n", error);
1005 break; 1296 break;
1006 } 1297 }
 1298failed:
1007 printf("\n\n"); 1299 printf("\n\n");
1008 delay(5000000); /* 5 seconds */ 1300 delay(5000000); /* 5 seconds */
1009} 1301}
1010 1302
1011/* 1303/*
 1304 * This is called by main to set dumplo and dumpsize.
 1305 * Dumps always skip the first PAGE_SIZE of disk space
 1306 * in case there might be a disk label stored there.
 1307 * If there is extra space, put dump at the end to
 1308 * reduce the chance that swapping trashes it.
 1309 *
 1310 * Sparse dumps can't placed as close to the end as possible, because
 1311 * savecore(8) has to know where to start reading in the dump device
 1312 * before it has access to any of the crashed system's state.
 1313 *
 1314 * Note also that a sparse dump will never be larger than a full one:
 1315 * in order to add a phys_ram_seg_t to the header, at least one page
 1316 * must be removed.
 1317 */
 1318void
 1319cpu_dumpconf(void)
 1320{
 1321 const struct bdevsw *bdev;
 1322 int nblks, dumpblks; /* size of dump area */
 1323
 1324 if (dumpdev == NODEV)
 1325 goto bad;
 1326 bdev = bdevsw_lookup(dumpdev);
 1327 if (bdev == NULL) {
 1328 dumpdev = NODEV;
 1329 goto bad;
 1330 }
 1331 if (bdev->d_psize == NULL)
 1332 goto bad;
 1333 nblks = (*bdev->d_psize)(dumpdev);
 1334 if (nblks <= ctod(1))
 1335 goto bad;
 1336
 1337 dumpblks = cpu_dumpsize();
 1338 if (dumpblks < 0)
 1339 goto bad;
 1340 dumpblks += ctod(cpu_dump_mempagecnt());
 1341
 1342 /* If dump won't fit (incl. room for possible label), punt. */
 1343 if (dumpblks > (nblks - ctod(1))) {
 1344#ifndef NO_SPARSE_DUMP
 1345 /* A sparse dump might (and hopefully will) fit. */
 1346 dumplo = ctod(1);
 1347#else
 1348 /* But if we're not configured for that, punt. */
 1349 goto bad;
 1350#endif
 1351 } else {
 1352 /* Put dump at end of partition */
 1353 dumplo = nblks - dumpblks;
 1354 }
 1355
 1356 /* dumpsize is in page units, and doesn't include headers. */
 1357 dumpsize = cpu_dump_mempagecnt();
 1358
 1359 /* Now that we've decided this will work, init ancillary stuff. */
 1360 dump_misc_init();
 1361 return;
 1362
 1363 bad:
 1364 dumpsize = 0;
 1365}
 1366
 1367/*
1012 * Clear registers on exec 1368 * Clear registers on exec
1013 */ 1369 */
1014void 1370void
1015setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack) 1371setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
1016{ 1372{
1017 struct pcb *pcb = lwp_getpcb(l); 1373 struct pcb *pcb = lwp_getpcb(l);
1018 struct trapframe *tf; 1374 struct trapframe *tf;
1019 1375
1020 /* If we were using the FPU, forget about it. */ 1376 /* If we were using the FPU, forget about it. */
1021 if (pcb->pcb_fpcpu != NULL) { 1377 if (pcb->pcb_fpcpu != NULL) {
1022 fpusave_lwp(l, false); 1378 fpusave_lwp(l, false);
1023 } 1379 }
1024 1380

cvs diff -r1.25 -r1.26 src/sys/arch/amd64/include/pmap.h (expand / switch to unified diff)

--- src/sys/arch/amd64/include/pmap.h 2011/08/13 12:09:38 1.25
+++ src/sys/arch/amd64/include/pmap.h 2011/08/27 16:23:44 1.26
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: pmap.h,v 1.25 2011/08/13 12:09:38 cherry Exp $ */ 1/* $NetBSD: pmap.h,v 1.26 2011/08/27 16:23:44 christos Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
@@ -167,26 +167,27 @@ @@ -167,26 +167,27 @@
167#define PDIR_SLOT_PTE L4_SLOT_PTE 167#define PDIR_SLOT_PTE L4_SLOT_PTE
168#define PDIR_SLOT_APTE L4_SLOT_APTE 168#define PDIR_SLOT_APTE L4_SLOT_APTE
169 169
170/* 170/*
171 * the following defines give the virtual addresses of various MMU 171 * the following defines give the virtual addresses of various MMU
172 * data structures: 172 * data structures:
173 * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings 173 * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings
174 * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD 174 * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD
175 * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP 175 * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP
176 * 176 *
177 */ 177 */
178 178
179#define PTE_BASE ((pt_entry_t *) (L4_SLOT_PTE * NBPD_L4)) 179#define PTE_BASE ((pt_entry_t *) (L4_SLOT_PTE * NBPD_L4))
 180#define KERN_BASE ((pt_entry_t *) (L4_SLOT_KERN * NBPD_L4))
180#define APTE_BASE ((pt_entry_t *) (VA_SIGN_NEG((L4_SLOT_APTE * NBPD_L4)))) 181#define APTE_BASE ((pt_entry_t *) (VA_SIGN_NEG((L4_SLOT_APTE * NBPD_L4))))
181 182
182#define L1_BASE PTE_BASE 183#define L1_BASE PTE_BASE
183#define AL1_BASE APTE_BASE 184#define AL1_BASE APTE_BASE
184 185
185#define L2_BASE ((pd_entry_t *)((char *)L1_BASE + L4_SLOT_PTE * NBPD_L3)) 186#define L2_BASE ((pd_entry_t *)((char *)L1_BASE + L4_SLOT_PTE * NBPD_L3))
186#define L3_BASE ((pd_entry_t *)((char *)L2_BASE + L4_SLOT_PTE * NBPD_L2)) 187#define L3_BASE ((pd_entry_t *)((char *)L2_BASE + L4_SLOT_PTE * NBPD_L2))
187#define L4_BASE ((pd_entry_t *)((char *)L3_BASE + L4_SLOT_PTE * NBPD_L1)) 188#define L4_BASE ((pd_entry_t *)((char *)L3_BASE + L4_SLOT_PTE * NBPD_L1))
188 189
189#define AL2_BASE ((pd_entry_t *)((char *)AL1_BASE + L4_SLOT_PTE * NBPD_L3)) 190#define AL2_BASE ((pd_entry_t *)((char *)AL1_BASE + L4_SLOT_PTE * NBPD_L3))
190#define AL3_BASE ((pd_entry_t *)((char *)AL2_BASE + L4_SLOT_PTE * NBPD_L2)) 191#define AL3_BASE ((pd_entry_t *)((char *)AL2_BASE + L4_SLOT_PTE * NBPD_L2))
191#define AL4_BASE ((pd_entry_t *)((char *)AL3_BASE + L4_SLOT_PTE * NBPD_L1)) 192#define AL4_BASE ((pd_entry_t *)((char *)AL3_BASE + L4_SLOT_PTE * NBPD_L1))
192 193