| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: cprng_fast.c,v 1.15 2020/04/30 03:29:45 riastradh Exp $ */ | | 1 | /* $NetBSD: cprng_fast.c,v 1.16 2020/07/28 20:15:07 riastradh Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 2014 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 2014 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Taylor R. Campbell. | | 8 | * by Taylor R. Campbell. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
| @@ -20,259 +20,109 @@ | | | @@ -20,259 +20,109 @@ |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. | | 29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | | | 31 | |
32 | #include <sys/cdefs.h> | | 32 | #include <sys/cdefs.h> |
33 | __KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.15 2020/04/30 03:29:45 riastradh Exp $"); | | 33 | __KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.16 2020/07/28 20:15:07 riastradh Exp $"); |
34 | | | 34 | |
35 | #include <sys/types.h> | | 35 | #include <sys/types.h> |
36 | #include <sys/param.h> | | 36 | #include <sys/param.h> |
37 | #include <sys/bitops.h> | | 37 | #include <sys/bitops.h> |
38 | #include <sys/cprng.h> | | 38 | #include <sys/cprng.h> |
39 | #include <sys/cpu.h> | | 39 | #include <sys/cpu.h> |
40 | #include <sys/entropy.h> | | 40 | #include <sys/entropy.h> |
41 | #include <sys/evcnt.h> | | 41 | #include <sys/evcnt.h> |
42 | #include <sys/intr.h> | | 42 | #include <sys/intr.h> |
43 | #include <sys/kmem.h> | | 43 | #include <sys/kmem.h> |
44 | #include <sys/percpu.h> | | 44 | #include <sys/percpu.h> |
45 | | | | |
46 | /* ChaCha core */ | | | |
47 | | | | |
48 | #define crypto_core_OUTPUTWORDS 16 | | | |
49 | #define crypto_core_INPUTWORDS 4 | | | |
50 | #define crypto_core_KEYWORDS 8 | | | |
51 | #define crypto_core_CONSTWORDS 4 | | | |
52 | | | | |
53 | #define crypto_core_ROUNDS 8 | | | |
54 | | | | |
55 | static uint32_t | | | |
56 | rotate(uint32_t u, unsigned c) | | | |
57 | { | | | |
58 | | | | |
59 | return (u << c) | (u >> (32 - c)); | | | |
60 | } | | | |
61 | | | | |
62 | #define QUARTERROUND(a, b, c, d) do { \ | | | |
63 | (a) += (b); (d) ^= (a); (d) = rotate((d), 16); \ | | | |
64 | (c) += (d); (b) ^= (c); (b) = rotate((b), 12); \ | | | |
65 | (a) += (b); (d) ^= (a); (d) = rotate((d), 8); \ | | | |
66 | (c) += (d); (b) ^= (c); (b) = rotate((b), 7); \ | | | |
67 | } while (0) | | | |
68 | | | | |
69 | static void | | | |
70 | crypto_core(uint32_t *out, const uint32_t *in, const uint32_t *k, | | | |
71 | const uint32_t *c) | | | |
72 | { | | | |
73 | uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; | | | |
74 | int i; | | | |
75 | | | | |
76 | x0 = c[0]; | | | |
77 | x1 = c[1]; | | | |
78 | x2 = c[2]; | | | |
79 | x3 = c[3]; | | | |
80 | x4 = k[0]; | | | |
81 | x5 = k[1]; | | | |
82 | x6 = k[2]; | | | |
83 | x7 = k[3]; | | | |
84 | x8 = k[4]; | | | |
85 | x9 = k[5]; | | | |
86 | x10 = k[6]; | | | |
87 | x11 = k[7]; | | | |
88 | x12 = in[0]; | | | |
89 | x13 = in[1]; | | | |
90 | x14 = in[2]; | | | |
91 | x15 = in[3]; | | | |
92 | | | | |
93 | for (i = crypto_core_ROUNDS; i > 0; i -= 2) { | | | |
94 | QUARTERROUND( x0, x4, x8,x12); | | | |
95 | QUARTERROUND( x1, x5, x9,x13); | | | |
96 | QUARTERROUND( x2, x6,x10,x14); | | | |
97 | QUARTERROUND( x3, x7,x11,x15); | | | |
98 | QUARTERROUND( x0, x5,x10,x15); | | | |
99 | QUARTERROUND( x1, x6,x11,x12); | | | |
100 | QUARTERROUND( x2, x7, x8,x13); | | | |
101 | QUARTERROUND( x3, x4, x9,x14); | | | |
102 | } | | | |
103 | | | | |
104 | out[0] = x0 + c[0]; | | | |
105 | out[1] = x1 + c[1]; | | | |
106 | out[2] = x2 + c[2]; | | | |
107 | out[3] = x3 + c[3]; | | | |
108 | out[4] = x4 + k[0]; | | | |
109 | out[5] = x5 + k[1]; | | | |
110 | out[6] = x6 + k[2]; | | | |
111 | out[7] = x7 + k[3]; | | | |
112 | out[8] = x8 + k[4]; | | | |
113 | out[9] = x9 + k[5]; | | | |
114 | out[10] = x10 + k[6]; | | | |
115 | out[11] = x11 + k[7]; | | | |
116 | out[12] = x12 + in[0]; | | | |
117 | out[13] = x13 + in[1]; | | | |
118 | out[14] = x14 + in[2]; | | | |
119 | out[15] = x15 + in[3]; | | | |
120 | } | | | |
121 | | | | |
122 | /* `expand 32-byte k' */ | | | |
123 | static const uint32_t crypto_core_constant32[4] = { | | | |
124 | 0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U, | | | |
125 | }; | | | |
126 | | | | |
127 | /* | | | |
128 | * Test vector for ChaCha20 from | | | |
129 | * <http://tools.ietf.org/html/draft-strombergson-chacha-test-vectors-00>, | | | |
130 | * test vectors for ChaCha12 and ChaCha8 generated by the same | | | |
131 | * crypto_core code with crypto_core_ROUNDS varied. | | | |
132 | */ | | | |
133 | | | | |
134 | #define check(E) do \ | | | |
135 | { \ | | | |
136 | if (!(E)) \ | | | |
137 | panic("crypto self-test failed: %s", #E); \ | | | |
138 | } while (0) | | | |
139 | | | | |
140 | static void | | | |
141 | crypto_core_selftest(void) | | | |
142 | { | | | |
143 | const uint32_t zero32[8] = {0}; | | | |
144 | const uint8_t sigma[] = "expand 32-byte k"; | | | |
145 | uint32_t block[16]; | | | |
146 | unsigned i; | | | |
147 | | | | |
148 | #if crypto_core_ROUNDS == 8 | | | |
149 | static const uint8_t out[64] = { | | | |
150 | 0x3e,0x00,0xef,0x2f,0x89,0x5f,0x40,0xd6, | | | |
151 | 0x7f,0x5b,0xb8,0xe8,0x1f,0x09,0xa5,0xa1, | | | |
152 | 0x2c,0x84,0x0e,0xc3,0xce,0x9a,0x7f,0x3b, | | | |
153 | 0x18,0x1b,0xe1,0x88,0xef,0x71,0x1a,0x1e, | | | |
154 | 0x98,0x4c,0xe1,0x72,0xb9,0x21,0x6f,0x41, | | | |
155 | 0x9f,0x44,0x53,0x67,0x45,0x6d,0x56,0x19, | | | |
156 | 0x31,0x4a,0x42,0xa3,0xda,0x86,0xb0,0x01, | | | |
157 | 0x38,0x7b,0xfd,0xb8,0x0e,0x0c,0xfe,0x42, | | | |
158 | }; | | | |
159 | #elif crypto_core_ROUNDS == 12 | | | |
160 | static const uint8_t out[64] = { | | | |
161 | 0x9b,0xf4,0x9a,0x6a,0x07,0x55,0xf9,0x53, | | | |
162 | 0x81,0x1f,0xce,0x12,0x5f,0x26,0x83,0xd5, | | | |
163 | 0x04,0x29,0xc3,0xbb,0x49,0xe0,0x74,0x14, | | | |
164 | 0x7e,0x00,0x89,0xa5,0x2e,0xae,0x15,0x5f, | | | |
165 | 0x05,0x64,0xf8,0x79,0xd2,0x7a,0xe3,0xc0, | | | |
166 | 0x2c,0xe8,0x28,0x34,0xac,0xfa,0x8c,0x79, | | | |
167 | 0x3a,0x62,0x9f,0x2c,0xa0,0xde,0x69,0x19, | | | |
168 | 0x61,0x0b,0xe8,0x2f,0x41,0x13,0x26,0xbe, | | | |
169 | }; | | | |
170 | #elif crypto_core_ROUNDS == 20 | | | |
171 | static const uint8_t out[64] = { | | | |
172 | 0x76,0xb8,0xe0,0xad,0xa0,0xf1,0x3d,0x90, | | | |
173 | 0x40,0x5d,0x6a,0xe5,0x53,0x86,0xbd,0x28, | | | |
174 | 0xbd,0xd2,0x19,0xb8,0xa0,0x8d,0xed,0x1a, | | | |
175 | 0xa8,0x36,0xef,0xcc,0x8b,0x77,0x0d,0xc7, | | | |
176 | 0xda,0x41,0x59,0x7c,0x51,0x57,0x48,0x8d, | | | |
177 | 0x77,0x24,0xe0,0x3f,0xb8,0xd8,0x4a,0x37, | | | |
178 | 0x6a,0x43,0xb8,0xf4,0x15,0x18,0xa1,0x1c, | | | |
179 | 0xc3,0x87,0xb6,0x69,0xb2,0xee,0x65,0x86, | | | |
180 | }; | | | |
181 | #else | | | |
182 | #error crypto_core_ROUNDS must be 8, 12, or 20. | | | |
183 | #endif | | | |
184 | | | 45 | |
185 | check(crypto_core_constant32[0] == le32dec(&sigma[0])); | | 46 | #include <crypto/chacha/chacha.h> |
186 | check(crypto_core_constant32[1] == le32dec(&sigma[4])); | | | |
187 | check(crypto_core_constant32[2] == le32dec(&sigma[8])); | | | |
188 | check(crypto_core_constant32[3] == le32dec(&sigma[12])); | | | |
189 | | | | |
190 | crypto_core(block, zero32, zero32, crypto_core_constant32); | | | |
191 | for (i = 0; i < 16; i++) | | | |
192 | check(block[i] == le32dec(&out[i*4])); | | | |
193 | } | | | |
194 | | | | |
195 | #undef check | | | |
196 | | | 47 | |
197 | #define CPRNG_FAST_SEED_BYTES (crypto_core_KEYWORDS * sizeof(uint32_t)) | | 48 | #define CPRNG_FAST_SEED_BYTES CHACHA_STREAM_KEYBYTES |
198 | | | 49 | |
199 | struct cprng_fast { | | 50 | struct cprng_fast { |
200 | uint32_t buffer[crypto_core_OUTPUTWORDS]; | | 51 | /* 128-bit vector unit generates 256 bytes at once */ |
201 | uint32_t key[crypto_core_KEYWORDS]; | | 52 | uint8_t buf[256]; |
202 | uint32_t nonce[crypto_core_INPUTWORDS]; | | 53 | uint8_t key[CPRNG_FAST_SEED_BYTES]; |
| | | 54 | uint8_t nonce[CHACHA_STREAM_NONCEBYTES]; |
| | | 55 | unsigned i; |
203 | struct evcnt *reseed_evcnt; | | 56 | struct evcnt *reseed_evcnt; |
204 | unsigned epoch; | | 57 | unsigned epoch; |
205 | }; | | 58 | }; |
206 | | | 59 | |
207 | __CTASSERT(sizeof ((struct cprng_fast *)0)->key == CPRNG_FAST_SEED_BYTES); | | | |
208 | | | | |
209 | static void cprng_fast_init_cpu(void *, void *, struct cpu_info *); | | 60 | static void cprng_fast_init_cpu(void *, void *, struct cpu_info *); |
210 | static void cprng_fast_schedule_reseed(struct cprng_fast *); | | 61 | static void cprng_fast_schedule_reseed(struct cprng_fast *); |
211 | static void cprng_fast_intr(void *); | | 62 | static void cprng_fast_intr(void *); |
212 | | | 63 | |
213 | static void cprng_fast_seed(struct cprng_fast *, const void *); | | 64 | static void cprng_fast_seed(struct cprng_fast *, const void *); |
214 | static void cprng_fast_buf(struct cprng_fast *, void *, unsigned); | | 65 | static void cprng_fast_buf(struct cprng_fast *, void *, unsigned); |
215 | | | 66 | |
216 | static void cprng_fast_buf_short(void *, size_t); | | 67 | static void cprng_fast_buf_short(void *, size_t); |
217 | static void cprng_fast_buf_long(void *, size_t); | | 68 | static void cprng_fast_buf_long(void *, size_t); |
218 | | | 69 | |
219 | static percpu_t *cprng_fast_percpu __read_mostly; | | 70 | static percpu_t *cprng_fast_percpu __read_mostly; |
220 | static void *cprng_fast_softint __read_mostly; | | 71 | static void *cprng_fast_softint __read_mostly; |
221 | | | 72 | |
222 | void | | 73 | void |
223 | cprng_fast_init(void) | | 74 | cprng_fast_init(void) |
224 | { | | 75 | { |
225 | | | 76 | |
226 | crypto_core_selftest(); | | | |
227 | cprng_fast_percpu = percpu_create(sizeof(struct cprng_fast), | | 77 | cprng_fast_percpu = percpu_create(sizeof(struct cprng_fast), |
228 | cprng_fast_init_cpu, NULL, NULL); | | 78 | cprng_fast_init_cpu, NULL, NULL); |
229 | cprng_fast_softint = softint_establish(SOFTINT_SERIAL|SOFTINT_MPSAFE, | | 79 | cprng_fast_softint = softint_establish(SOFTINT_SERIAL|SOFTINT_MPSAFE, |
230 | &cprng_fast_intr, NULL); | | 80 | &cprng_fast_intr, NULL); |
231 | } | | 81 | } |
232 | | | 82 | |
233 | static void | | 83 | static void |
234 | cprng_fast_init_cpu(void *p, void *arg __unused, struct cpu_info *ci) | | 84 | cprng_fast_init_cpu(void *p, void *arg __unused, struct cpu_info *ci) |
235 | { | | 85 | { |
236 | struct cprng_fast *const cprng = p; | | 86 | struct cprng_fast *const cprng = p; |
237 | uint8_t seed[CPRNG_FAST_SEED_BYTES]; | | 87 | uint8_t seed[CPRNG_FAST_SEED_BYTES]; |
238 | | | 88 | |
239 | cprng->epoch = entropy_epoch(); | | 89 | cprng->epoch = entropy_epoch(); |
240 | cprng_strong(kern_cprng, seed, sizeof seed, 0); | | 90 | cprng_strong(kern_cprng, seed, sizeof seed, 0); |
241 | cprng_fast_seed(cprng, seed); | | 91 | cprng_fast_seed(cprng, seed); |
242 | (void)explicit_memset(seed, 0, sizeof seed); | | 92 | (void)explicit_memset(seed, 0, sizeof seed); |
243 | | | 93 | |
244 | cprng->reseed_evcnt = kmem_alloc(sizeof(*cprng->reseed_evcnt), | | 94 | cprng->reseed_evcnt = kmem_alloc(sizeof(*cprng->reseed_evcnt), |
245 | KM_SLEEP); | | 95 | KM_SLEEP); |
246 | evcnt_attach_dynamic(cprng->reseed_evcnt, EVCNT_TYPE_MISC, NULL, | | 96 | evcnt_attach_dynamic(cprng->reseed_evcnt, EVCNT_TYPE_MISC, NULL, |
247 | ci->ci_cpuname, "cprng_fast reseed"); | | 97 | ci->ci_cpuname, "cprng_fast reseed"); |
248 | } | | 98 | } |
249 | | | 99 | |
250 | static inline int | | 100 | static int |
251 | cprng_fast_get(struct cprng_fast **cprngp) | | 101 | cprng_fast_get(struct cprng_fast **cprngp) |
252 | { | | 102 | { |
253 | struct cprng_fast *cprng; | | 103 | struct cprng_fast *cprng; |
254 | int s; | | 104 | int s; |
255 | | | 105 | |
256 | *cprngp = cprng = percpu_getref(cprng_fast_percpu); | | 106 | *cprngp = cprng = percpu_getref(cprng_fast_percpu); |
257 | s = splvm(); | | 107 | s = splvm(); |
258 | | | 108 | |
259 | if (__predict_false(cprng->epoch != entropy_epoch())) | | 109 | if (__predict_false(cprng->epoch != entropy_epoch())) |
260 | cprng_fast_schedule_reseed(cprng); | | 110 | cprng_fast_schedule_reseed(cprng); |
261 | | | 111 | |
262 | return s; | | 112 | return s; |
263 | } | | 113 | } |
264 | | | 114 | |
265 | static inline void | | 115 | static void |
266 | cprng_fast_put(struct cprng_fast *cprng, int s) | | 116 | cprng_fast_put(struct cprng_fast *cprng, int s) |
267 | { | | 117 | { |
268 | | | 118 | |
269 | KASSERT((cprng == percpu_getref(cprng_fast_percpu)) && | | 119 | KASSERT((cprng == percpu_getref(cprng_fast_percpu)) && |
270 | (percpu_putref(cprng_fast_percpu), true)); | | 120 | (percpu_putref(cprng_fast_percpu), true)); |
271 | splx(s); | | 121 | splx(s); |
272 | percpu_putref(cprng_fast_percpu); | | 122 | percpu_putref(cprng_fast_percpu); |
273 | } | | 123 | } |
274 | | | 124 | |
275 | static void | | 125 | static void |
276 | cprng_fast_schedule_reseed(struct cprng_fast *cprng __unused) | | 126 | cprng_fast_schedule_reseed(struct cprng_fast *cprng __unused) |
277 | { | | 127 | { |
278 | | | 128 | |
| @@ -292,218 +142,117 @@ cprng_fast_intr(void *cookie __unused) | | | @@ -292,218 +142,117 @@ cprng_fast_intr(void *cookie __unused) |
292 | cprng = percpu_getref(cprng_fast_percpu); | | 142 | cprng = percpu_getref(cprng_fast_percpu); |
293 | s = splvm(); | | 143 | s = splvm(); |
294 | cprng_fast_seed(cprng, seed); | | 144 | cprng_fast_seed(cprng, seed); |
295 | cprng->epoch = epoch; | | 145 | cprng->epoch = epoch; |
296 | cprng->reseed_evcnt->ev_count++; | | 146 | cprng->reseed_evcnt->ev_count++; |
297 | splx(s); | | 147 | splx(s); |
298 | percpu_putref(cprng_fast_percpu); | | 148 | percpu_putref(cprng_fast_percpu); |
299 | | | 149 | |
300 | explicit_memset(seed, 0, sizeof(seed)); | | 150 | explicit_memset(seed, 0, sizeof(seed)); |
301 | } | | 151 | } |
302 | | | 152 | |
303 | /* CPRNG algorithm */ | | 153 | /* CPRNG algorithm */ |
304 | | | 154 | |
305 | /* | | | |
306 | * The state consists of a key, the current nonce, and a 64-byte buffer | | | |
307 | * of output. Since we fill the buffer only when we need output, and | | | |
308 | * eat a 32-bit word at a time, one 32-bit word of the buffer would be | | | |
309 | * wasted. Instead, we repurpose it to count the number of entries in | | | |
310 | * the buffer remaining, counting from high to low in order to allow | | | |
311 | * comparison to zero to detect when we need to refill it. | | | |
312 | */ | | | |
313 | #define CPRNG_FAST_BUFIDX (crypto_core_OUTPUTWORDS - 1) | | | |
314 | | | | |
315 | static void | | 155 | static void |
316 | cprng_fast_seed(struct cprng_fast *cprng, const void *seed) | | 156 | cprng_fast_seed(struct cprng_fast *cprng, const void *seed) |
317 | { | | 157 | { |
318 | | | 158 | |
319 | (void)memset(cprng->buffer, 0, sizeof cprng->buffer); | | 159 | (void)memset(cprng->buf, 0, sizeof cprng->buf); |
320 | (void)memcpy(cprng->key, seed, sizeof cprng->key); | | 160 | (void)memcpy(cprng->key, seed, sizeof cprng->key); |
321 | (void)memset(cprng->nonce, 0, sizeof cprng->nonce); | | 161 | (void)memset(cprng->nonce, 0, sizeof cprng->nonce); |
| | | 162 | cprng->i = sizeof cprng->buf; |
322 | } | | 163 | } |
323 | | | 164 | |
324 | static inline uint32_t | | 165 | static void |
325 | cprng_fast_word(struct cprng_fast *cprng) | | 166 | cprng_fast_buf(struct cprng_fast *cprng, void *buf, unsigned len) |
326 | { | | 167 | { |
327 | uint32_t v; | | 168 | uint8_t *p = buf; |
| | | 169 | unsigned n = len, n0; |
328 | | | 170 | |
329 | if (__predict_true(0 < cprng->buffer[CPRNG_FAST_BUFIDX])) { | | 171 | KASSERT(cprng->i <= sizeof(cprng->buf)); |
330 | v = cprng->buffer[--cprng->buffer[CPRNG_FAST_BUFIDX]]; | | 172 | KASSERT(len <= sizeof(cprng->buf)); |
331 | } else { | | | |
332 | /* If we don't have enough words, refill the buffer. */ | | | |
333 | crypto_core(cprng->buffer, cprng->nonce, cprng->key, | | | |
334 | crypto_core_constant32); | | | |
335 | if (__predict_false(++cprng->nonce[0] == 0)) { | | | |
336 | cprng->nonce[1]++; | | | |
337 | cprng_fast_schedule_reseed(cprng); | | | |
338 | } | | | |
339 | v = cprng->buffer[CPRNG_FAST_BUFIDX]; | | | |
340 | cprng->buffer[CPRNG_FAST_BUFIDX] = CPRNG_FAST_BUFIDX; | | | |
341 | } | | | |
342 | | | 173 | |
343 | return v; | | 174 | n0 = MIN(n, sizeof(cprng->buf) - cprng->i); |
| | | 175 | memcpy(p, &cprng->buf[cprng->i], n0); |
| | | 176 | if ((n -= n0) == 0) { |
| | | 177 | cprng->i += n0; |
| | | 178 | KASSERT(cprng->i <= sizeof(cprng->buf)); |
| | | 179 | return; |
| | | 180 | } |
| | | 181 | p += n0; |
| | | 182 | le64enc(cprng->nonce, 1 + le64dec(cprng->nonce)); |
| | | 183 | chacha_stream(cprng->buf, sizeof(cprng->buf), 0, cprng->nonce, |
| | | 184 | cprng->key, 8); |
| | | 185 | memcpy(p, cprng->buf, n); |
| | | 186 | cprng->i = n; |
344 | } | | 187 | } |
| | | 188 | |
| | | 189 | /* Public API */ |
345 | | | 190 | |
346 | static inline void | | 191 | static void |
347 | cprng_fast_buf(struct cprng_fast *cprng, void *buf, unsigned n) | | 192 | cprng_fast_buf_short(void *buf, size_t len) |
348 | { | | 193 | { |
349 | uint8_t *p = buf; | | 194 | struct cprng_fast *cprng; |
350 | uint32_t v; | | 195 | int s; |
351 | unsigned w, r; | | | |
352 | | | 196 | |
353 | w = n / sizeof(uint32_t); | | 197 | KASSERT(len <= sizeof(cprng->buf)); |
354 | while (w--) { | | | |
355 | v = cprng_fast_word(cprng); | | | |
356 | (void)memcpy(p, &v, 4); | | | |
357 | p += 4; | | | |
358 | } | | | |
359 | | | 198 | |
360 | r = n % sizeof(uint32_t); | | 199 | s = cprng_fast_get(&cprng); |
361 | if (r) { | | 200 | cprng_fast_buf(cprng, buf, len); |
362 | v = cprng_fast_word(cprng); | | 201 | cprng_fast_put(cprng, s); |
363 | while (r--) { | | | |
364 | *p++ = (v & 0xff); | | | |
365 | v >>= 8; | | | |
366 | } | | | |
367 | } | | | |
368 | } | | 202 | } |
369 | | | 203 | |
370 | /* | | | |
371 | * crypto_onetimestream: Expand a short unpredictable one-time seed | | | |
372 | * into a long unpredictable output. | | | |
373 | */ | | | |
374 | static void | | 204 | static void |
375 | crypto_onetimestream(const uint32_t seed[crypto_core_KEYWORDS], void *buf, | | 205 | cprng_fast_buf_long(void *buf, size_t len) |
376 | size_t n) | | | |
377 | { | | 206 | { |
378 | uint32_t block[crypto_core_OUTPUTWORDS]; | | 207 | uint8_t seed[CHACHA_STREAM_KEYBYTES]; |
379 | uint32_t nonce[crypto_core_INPUTWORDS] = {0}; | | 208 | uint8_t nonce[CHACHA_STREAM_NONCEBYTES] = {0}; |
380 | uint8_t *p8; | | | |
381 | uint32_t *p32; | | | |
382 | size_t ni, nb, nf; | | | |
383 | | | 209 | |
384 | /* | | 210 | CTASSERT(sizeof(seed) <= sizeof(((struct cprng_fast *)0)->buf)); |
385 | * Guarantee we can generate up to n bytes. We have | | | |
386 | * 2^(32*INPUTWORDS) possible inputs yielding output of | | | |
387 | * 4*OUTPUTWORDS*2^(32*INPUTWORDS) bytes. It suffices to | | | |
388 | * require that sizeof n > (1/CHAR_BIT) log_2 n be less than | | | |
389 | * (1/CHAR_BIT) log_2 of the total output stream length. We | | | |
390 | * have | | | |
391 | * | | | |
392 | * log_2 (4 o 2^(32 i)) = log_2 (4 o) + log_2 2^(32 i) | | | |
393 | * = 2 + log_2 o + 32 i. | | | |
394 | */ | | | |
395 | __CTASSERT(CHAR_BIT*sizeof n <= | | | |
396 | (2 + ilog2(crypto_core_OUTPUTWORDS) + 32*crypto_core_INPUTWORDS)); | | | |
397 | | | 211 | |
398 | p8 = buf; | | 212 | #if SIZE_MAX >= 0x3fffffffff |
399 | p32 = (uint32_t *)roundup2((uintptr_t)p8, sizeof(uint32_t)); | | 213 | /* >=256 GB is not reasonable */ |
400 | ni = (uint8_t *)p32 - p8; | | 214 | KASSERT(len <= 0x3fffffffff); |
401 | if (n < ni) | | 215 | #endif |
402 | ni = n; | | | |
403 | nb = (n - ni) / sizeof block; | | | |
404 | nf = (n - ni) % sizeof block; | | | |
405 | | | | |
406 | KASSERT(((uintptr_t)p32 & 3) == 0); | | | |
407 | KASSERT(ni <= n); | | | |
408 | KASSERT(nb <= (n / sizeof block)); | | | |
409 | KASSERT(nf <= n); | | | |
410 | KASSERT(n == (ni + (nb * sizeof block) + nf)); | | | |
411 | KASSERT(ni < sizeof(uint32_t)); | | | |
412 | KASSERT(nf < sizeof block); | | | |
413 | | | | |
414 | if (ni) { | | | |
415 | crypto_core(block, nonce, seed, crypto_core_constant32); | | | |
416 | nonce[0]++; | | | |
417 | (void)memcpy(p8, block, ni); | | | |
418 | } | | | |
419 | while (nb--) { | | | |
420 | crypto_core(p32, nonce, seed, crypto_core_constant32); | | | |
421 | if (++nonce[0] == 0) | | | |
422 | nonce[1]++; | | | |
423 | p32 += crypto_core_OUTPUTWORDS; | | | |
424 | } | | | |
425 | if (nf) { | | | |
426 | crypto_core(block, nonce, seed, crypto_core_constant32); | | | |
427 | if (++nonce[0] == 0) | | | |
428 | nonce[1]++; | | | |
429 | (void)memcpy(p32, block, nf); | | | |
430 | } | | | |
431 | | | 216 | |
432 | if (ni | nf) | | 217 | cprng_fast_buf_short(seed, sizeof seed); |
433 | (void)explicit_memset(block, 0, sizeof block); | | 218 | chacha_stream(buf, len, 0, nonce, seed, 8); |
| | | 219 | |
| | | 220 | (void)explicit_memset(seed, 0, sizeof seed); |
434 | } | | 221 | } |
435 | | | | |
436 | /* Public API */ | | | |
437 | | | 222 | |
438 | uint32_t | | 223 | uint32_t |
439 | cprng_fast32(void) | | 224 | cprng_fast32(void) |
440 | { | | 225 | { |
441 | struct cprng_fast *cprng; | | | |
442 | uint32_t v; | | 226 | uint32_t v; |
443 | int s; | | | |
444 | | | 227 | |
445 | s = cprng_fast_get(&cprng); | | 228 | cprng_fast_buf_short(&v, sizeof v); |
446 | v = cprng_fast_word(cprng); | | | |
447 | cprng_fast_put(cprng, s); | | | |
448 | | | 229 | |
449 | return v; | | 230 | return v; |
450 | } | | 231 | } |
451 | | | 232 | |
452 | uint64_t | | 233 | uint64_t |
453 | cprng_fast64(void) | | 234 | cprng_fast64(void) |
454 | { | | 235 | { |
455 | struct cprng_fast *cprng; | | 236 | uint64_t v; |
456 | uint32_t hi, lo; | | | |
457 | int s; | | | |
458 | | | | |
459 | s = cprng_fast_get(&cprng); | | | |
460 | hi = cprng_fast_word(cprng); | | | |
461 | lo = cprng_fast_word(cprng); | | | |
462 | cprng_fast_put(cprng, s); | | | |
463 | | | | |
464 | return ((uint64_t)hi << 32) | lo; | | | |
465 | } | | | |
466 | | | | |
467 | static void | | | |
468 | cprng_fast_buf_short(void *buf, size_t len) | | | |
469 | { | | | |
470 | struct cprng_fast *cprng; | | | |
471 | int s; | | | |
472 | | | | |
473 | s = cprng_fast_get(&cprng); | | | |
474 | cprng_fast_buf(cprng, buf, len); | | | |
475 | cprng_fast_put(cprng, s); | | | |
476 | } | | | |
477 | | | | |
478 | static __noinline void | | | |
479 | cprng_fast_buf_long(void *buf, size_t len) | | | |
480 | { | | | |
481 | uint32_t seed[crypto_core_KEYWORDS]; | | | |
482 | struct cprng_fast *cprng; | | | |
483 | int s; | | | |
484 | | | | |
485 | s = cprng_fast_get(&cprng); | | | |
486 | cprng_fast_buf(cprng, seed, sizeof seed); | | | |
487 | cprng_fast_put(cprng, s); | | | |
488 | | | 237 | |
489 | crypto_onetimestream(seed, buf, len); | | 238 | cprng_fast_buf_short(&v, sizeof v); |
490 | | | 239 | |
491 | (void)explicit_memset(seed, 0, sizeof seed); | | 240 | return v; |
492 | } | | 241 | } |
493 | | | 242 | |
494 | size_t | | 243 | size_t |
495 | cprng_fast(void *buf, size_t len) | | 244 | cprng_fast(void *buf, size_t len) |
496 | { | | 245 | { |
497 | | | 246 | |
498 | /* | | 247 | /* |
499 | * We don't want to hog the CPU, so we use the short version, | | 248 | * We don't want to hog the CPU, so we use the short version, |
500 | * to generate output without preemption, only if we can do it | | 249 | * to generate output without preemption, only if we can do it |
501 | * with at most one crypto_core. | | 250 | * with at most one ChaCha call. |
502 | */ | | 251 | */ |
503 | if (len <= (sizeof(uint32_t) * crypto_core_OUTPUTWORDS)) | | 252 | if (len <= sizeof(((struct cprng_fast *)0)->buf)) |
504 | cprng_fast_buf_short(buf, len); | | 253 | cprng_fast_buf_short(buf, len); |
505 | else | | 254 | else |
506 | cprng_fast_buf_long(buf, len); | | 255 | cprng_fast_buf_long(buf, len); |
507 | | | 256 | |
508 | return len; | | 257 | return len; /* hysterical raisins */ |
509 | } | | 258 | } |