Thu May 10 12:35:53 2012 UTC ()
xc_wait() does not wait for all cpus to finish
their callback. That means the ucode buffer is released while still in use
and this causes a crash.
Quick fix: check if the ucode buffer has been freed and abort.
You may need to run 'cpuctl ucode' twice to apply it to all cpus.

Per discussion with rmind@ use low priority xcalls and splhigh.


(cegger)
diff -r1.2 -r1.3 src/sys/arch/x86/x86/cpu_ucode_amd.c

cvs diff -r1.2 -r1.3 src/sys/arch/x86/x86/cpu_ucode_amd.c (expand / switch to unified diff)

--- src/sys/arch/x86/x86/cpu_ucode_amd.c 2012/05/09 13:58:09 1.2
+++ src/sys/arch/x86/x86/cpu_ucode_amd.c 2012/05/10 12:35:53 1.3
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: cpu_ucode_amd.c,v 1.2 2012/05/09 13:58:09 cegger Exp $ */ 1/* $NetBSD: cpu_ucode_amd.c,v 1.3 2012/05/10 12:35:53 cegger Exp $ */
2/* 2/*
3 * Copyright (c) 2012 The NetBSD Foundation, Inc. 3 * Copyright (c) 2012 The NetBSD Foundation, Inc.
4 * All rights reserved. 4 * All rights reserved.
5 * 5 *
6 * This code is derived from software contributed to The NetBSD Foundation 6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Christoph Egger. 7 * by Christoph Egger.
8 * 8 *
9 * Redistribution and use in source and binary forms, with or without 9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions 10 * modification, are permitted provided that the following conditions
11 * are met: 11 * are met:
12 * 1. Redistributions of source code must retain the above copyright 12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer. 13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright 14 * 2. Redistributions in binary form must reproduce the above copyright
@@ -19,27 +19,27 @@ @@ -19,27 +19,27 @@
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE. 28 * POSSIBILITY OF SUCH DAMAGE.
29 */ 29 */
30 30
31#include <sys/cdefs.h> 31#include <sys/cdefs.h>
32__KERNEL_RCSID(0, "$NetBSD: cpu_ucode_amd.c,v 1.2 2012/05/09 13:58:09 cegger Exp $"); 32__KERNEL_RCSID(0, "$NetBSD: cpu_ucode_amd.c,v 1.3 2012/05/10 12:35:53 cegger Exp $");
33 33
34#include "opt_xen.h" 34#include "opt_xen.h"
35#include "opt_cpu_ucode.h" 35#include "opt_cpu_ucode.h"
36 36
37#include <sys/param.h> 37#include <sys/param.h>
38#include <sys/conf.h> 38#include <sys/conf.h>
39#include <sys/cpuio.h> 39#include <sys/cpuio.h>
40#include <sys/cpu.h> 40#include <sys/cpu.h>
41#include <sys/kmem.h> 41#include <sys/kmem.h>
42#include <sys/xcall.h> 42#include <sys/xcall.h>
43 43
44#include <machine/cpufunc.h> 44#include <machine/cpufunc.h>
45#include <machine/specialreg.h> 45#include <machine/specialreg.h>
@@ -140,32 +140,34 @@ struct mc_buf { @@ -140,32 +140,34 @@ struct mc_buf {
140 struct mpbhdr *mc_mpbuf; 140 struct mpbhdr *mc_mpbuf;
141 struct microcode_amd *mc_amd; 141 struct microcode_amd *mc_amd;
142 int mc_error; 142 int mc_error;
143}; 143};
144 144
145static void 145static void
146cpu_apply_cb(void *arg0, void *arg1) 146cpu_apply_cb(void *arg0, void *arg1)
147{ 147{
148 int error = 0; 148 int error = 0;
149 const struct cpu_ucode_softc *sc = arg0; 149 const struct cpu_ucode_softc *sc = arg0;
150 struct microcode_amd mc_amd; 150 struct microcode_amd mc_amd;
151 struct mc_buf mc; 151 struct mc_buf mc;
152 device_t dev; 152 device_t dev;
 153 int s;
153 154
154 memcpy(&mc, arg1, sizeof(mc)); 155 memcpy(&mc, arg1, sizeof(mc));
155 mc_amd.mpb = mc.mc_amd->mpb; 156 mc_amd.mpb = mc.mc_amd->mpb;
156 mc_amd.mpb_size = mc.mc_amd->mpb_size; 157 mc_amd.mpb_size = mc.mc_amd->mpb_size;
157 158
158 dev = curcpu()->ci_dev; 159 dev = curcpu()->ci_dev;
 160 s = splhigh();
159 161
160 do { 162 do {
161 uint64_t patchlevel; 163 uint64_t patchlevel;
162 struct microcode_amd_header *hdr; 164 struct microcode_amd_header *hdr;
163 165
164 if (mc.mc_mpbuf->mpb_type != UCODE_TYPE_PATCH) { 166 if (mc.mc_mpbuf->mpb_type != UCODE_TYPE_PATCH) {
165 aprint_debug_dev(dev, "ucode: patch type expected\n"); 167 aprint_debug_dev(dev, "ucode: patch type expected\n");
166 goto next; 168 goto next;
167 } 169 }
168 170
169 hdr = (struct microcode_amd_header *)mc_amd.mpb; 171 hdr = (struct microcode_amd_header *)mc_amd.mpb;
170 if (hdr->ah_processor_rev_id != mc.mc_equiv_cpuid) { 172 if (hdr->ah_processor_rev_id != mc.mc_equiv_cpuid) {
171 aprint_debug_dev(dev, "ucode: patch does not " 173 aprint_debug_dev(dev, "ucode: patch does not "
@@ -186,43 +188,52 @@ cpu_apply_cb(void *arg0, void *arg1) @@ -186,43 +188,52 @@ cpu_apply_cb(void *arg0, void *arg1)
186 if (patchlevel == rdmsr(MSR_UCODE_AMD_PATCHLEVEL)) { 188 if (patchlevel == rdmsr(MSR_UCODE_AMD_PATCHLEVEL)) {
187 aprint_debug_dev(dev, "ucode: update from revision " 189 aprint_debug_dev(dev, "ucode: update from revision "
188 "0x%"PRIx64" to 0x%x failed\n", 190 "0x%"PRIx64" to 0x%x failed\n",
189 patchlevel, hdr->ah_patch_id); 191 patchlevel, hdr->ah_patch_id);
190 error = EIO; 192 error = EIO;
191 goto out; 193 goto out;
192 } else { 194 } else {
193 /* Success */ 195 /* Success */
194 error = 0; 196 error = 0;
195 goto out; 197 goto out;
196 } 198 }
197 199
198next: 200next:
199 if (mc.mc_mpbuf == NULL) 201 /* Check for race:
 202 * When we booted with -x a cpu might already finished
 203 * (why doesn't xc_wait() wait for *all* cpus?)
 204 * and sc->sc_blob is already freed.
 205 * In this case the calculation below touches
 206 * non-allocated memory and results in a crash.
 207 */
 208 if (sc->sc_blob == NULL)
200 break; 209 break;
 210
201 mc.mc_buf += mc.mc_mpbuf->mpb_len + 211 mc.mc_buf += mc.mc_mpbuf->mpb_len +
202 sizeof(mc.mc_mpbuf->mpb_type) + 212 sizeof(mc.mc_mpbuf->mpb_type) +
203 sizeof(mc.mc_mpbuf->mpb_len); 213 sizeof(mc.mc_mpbuf->mpb_len);
204 mc.mc_mpbuf = (struct mpbhdr *)mc.mc_buf; 214 mc.mc_mpbuf = (struct mpbhdr *)mc.mc_buf;
205 mc_amd.mpb = (uint8_t *)mc.mc_mpbuf->mpb_data; 215 mc_amd.mpb = (uint8_t *)mc.mc_mpbuf->mpb_data;
206 mc_amd.mpb_size = mc.mc_mpbuf->mpb_len; 216 mc_amd.mpb_size = mc.mc_mpbuf->mpb_len;
207 217
208 } while ((uintptr_t)((mc.mc_buf) - (uint8_t *)sc->sc_blob) < sc->sc_blobsize); 218 } while ((uintptr_t)((mc.mc_buf) - (uint8_t *)sc->sc_blob) < sc->sc_blobsize);
209 aprint_error_dev(dev, "ucode: No newer patch available " 219 aprint_error_dev(dev, "ucode: No newer patch available "
210 "for this cpu than is already installed.\n"); 220 "for this cpu than is already installed.\n");
211 error = ENOENT; 221 error = ENOENT;
212 222
213out: 223out:
214 if (error) 224 if (error)
215 ((struct mc_buf *)(arg1))->mc_error = error; 225 ((struct mc_buf *)(arg1))->mc_error = error;
 226 splx(s);
216} 227}
217 228
218int 229int
219cpu_ucode_amd_apply(struct cpu_ucode_softc *sc) 230cpu_ucode_amd_apply(struct cpu_ucode_softc *sc)
220{ 231{
221 int i, error = 0; 232 int i, error = 0;
222 uint32_t *magic; 233 uint32_t *magic;
223 uint32_t cpu_signature; 234 uint32_t cpu_signature;
224 uint32_t equiv_cpuid = 0; 235 uint32_t equiv_cpuid = 0;
225 struct mc_buf mc; 236 struct mc_buf mc;
226 int where; 237 int where;
227 238
228 cpu_signature = curcpu()->ci_signature; 239 cpu_signature = curcpu()->ci_signature;
@@ -269,26 +280,26 @@ cpu_ucode_amd_apply(struct cpu_ucode_sof @@ -269,26 +280,26 @@ cpu_ucode_amd_apply(struct cpu_ucode_sof
269 error = ENOENT; 280 error = ENOENT;
270 goto err1; 281 goto err1;
271 } 282 }
272 283
273 mc.mc_equiv_cpuid = equiv_cpuid; 284 mc.mc_equiv_cpuid = equiv_cpuid;
274 mc.mc_buf += mc.mc_mpbuf->mpb_len + sizeof(mc.mc_mpbuf->mpb_type) + 285 mc.mc_buf += mc.mc_mpbuf->mpb_len + sizeof(mc.mc_mpbuf->mpb_type) +
275 sizeof(mc.mc_mpbuf->mpb_len); 286 sizeof(mc.mc_mpbuf->mpb_len);
276 mc.mc_mpbuf = (struct mpbhdr *)mc.mc_buf; 287 mc.mc_mpbuf = (struct mpbhdr *)mc.mc_buf;
277 mc.mc_amd->mpb = (uint8_t *)mc.mc_mpbuf->mpb_data; 288 mc.mc_amd->mpb = (uint8_t *)mc.mc_mpbuf->mpb_data;
278 mc.mc_amd->mpb_size = mc.mc_mpbuf->mpb_len; 289 mc.mc_amd->mpb_size = mc.mc_mpbuf->mpb_len;
279 290
280 /* Apply it on all cpus */ 291 /* Apply it on all cpus */
281 mc.mc_error = 0; 292 mc.mc_error = 0;
282 where = xc_broadcast(XC_HIGHPRI, cpu_apply_cb, sc, &mc); 293 where = xc_broadcast(0, cpu_apply_cb, sc, &mc);
283 294
284 /* Wait for completion */ 295 /* Wait for completion */
285 xc_wait(where); 296 xc_wait(where);
286 error = mc.mc_error; 297 error = mc.mc_error;
287 298
288err1: 299err1:
289 kmem_free(mc.mc_amd->ect, mc.mc_amd->ect_size); 300 kmem_free(mc.mc_amd->ect, mc.mc_amd->ect_size);
290err0: 301err0:
291 kmem_free(mc.mc_amd, sizeof(*mc.mc_amd)); 302 kmem_free(mc.mc_amd, sizeof(*mc.mc_amd));
292 return error; 303 return error;
293} 304}
294#endif 305#endif