Fri Apr 10 21:56:41 2020 UTC ()
Rename radix_tree_node_clean_p() to radix_tree_node_sum() and have it return
the computed sum.  Use to replace any_children_tagmask().  Simpler & faster.


(ad)
diff -r1.23 -r1.24 src/common/lib/libc/gen/radixtree.c

cvs diff -r1.23 -r1.24 src/common/lib/libc/gen/radixtree.c (expand / switch to unified diff)

--- src/common/lib/libc/gen/radixtree.c 2020/01/28 22:20:45 1.23
+++ src/common/lib/libc/gen/radixtree.c 2020/04/10 21:56:41 1.24
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: radixtree.c,v 1.23 2020/01/28 22:20:45 ad Exp $ */ 1/* $NetBSD: radixtree.c,v 1.24 2020/04/10 21:56:41 ad Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c)2011,2012,2013 YAMAMOTO Takashi, 4 * Copyright (c)2011,2012,2013 YAMAMOTO Takashi,
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
@@ -102,37 +102,37 @@ @@ -102,37 +102,37 @@
102 * leaf nodes with the given tag. To reduce amount of nodes to visit for 102 * leaf nodes with the given tag. To reduce amount of nodes to visit for
103 * these functions, this implementation keeps tagging information in internal 103 * these functions, this implementation keeps tagging information in internal
104 * intermediate nodes and quickly skips uninterested parts of a tree. 104 * intermediate nodes and quickly skips uninterested parts of a tree.
105 * 105 *
106 * A tree has RADIX_TREE_TAG_ID_MAX independent tag spaces, each of which are 106 * A tree has RADIX_TREE_TAG_ID_MAX independent tag spaces, each of which are
107 * identified by an zero-origin numbers, tagid. For the current implementation, 107 * identified by an zero-origin numbers, tagid. For the current implementation,
108 * RADIX_TREE_TAG_ID_MAX is 2. A set of tags is described as a bitmask tagmask, 108 * RADIX_TREE_TAG_ID_MAX is 2. A set of tags is described as a bitmask tagmask,
109 * which is a bitwise OR of (1 << tagid). 109 * which is a bitwise OR of (1 << tagid).
110 */ 110 */
111 111
112#include <sys/cdefs.h> 112#include <sys/cdefs.h>
113 113
114#if defined(_KERNEL) || defined(_STANDALONE) 114#if defined(_KERNEL) || defined(_STANDALONE)
115__KERNEL_RCSID(0, "$NetBSD: radixtree.c,v 1.23 2020/01/28 22:20:45 ad Exp $"); 115__KERNEL_RCSID(0, "$NetBSD: radixtree.c,v 1.24 2020/04/10 21:56:41 ad Exp $");
116#include <sys/param.h> 116#include <sys/param.h>
117#include <sys/errno.h> 117#include <sys/errno.h>
118#include <sys/pool.h> 118#include <sys/pool.h>
119#include <sys/radixtree.h> 119#include <sys/radixtree.h>
120#include <lib/libkern/libkern.h> 120#include <lib/libkern/libkern.h>
121#if defined(_STANDALONE) 121#if defined(_STANDALONE)
122#include <lib/libsa/stand.h> 122#include <lib/libsa/stand.h>
123#endif /* defined(_STANDALONE) */ 123#endif /* defined(_STANDALONE) */
124#else /* defined(_KERNEL) || defined(_STANDALONE) */ 124#else /* defined(_KERNEL) || defined(_STANDALONE) */
125__RCSID("$NetBSD: radixtree.c,v 1.23 2020/01/28 22:20:45 ad Exp $"); 125__RCSID("$NetBSD: radixtree.c,v 1.24 2020/04/10 21:56:41 ad Exp $");
126#include <assert.h> 126#include <assert.h>
127#include <errno.h> 127#include <errno.h>
128#include <stdbool.h> 128#include <stdbool.h>
129#include <stdlib.h> 129#include <stdlib.h>
130#include <string.h> 130#include <string.h>
131#if 1 131#if 1
132#define KASSERT assert 132#define KASSERT assert
133#else 133#else
134#define KASSERT(a) /* nothing */ 134#define KASSERT(a) /* nothing */
135#endif 135#endif
136#endif /* defined(_KERNEL) || defined(_STANDALONE) */ 136#endif /* defined(_KERNEL) || defined(_STANDALONE) */
137 137
138#include <sys/radixtree.h> 138#include <sys/radixtree.h>
@@ -188,45 +188,26 @@ entry_match_p(void *p, unsigned int tagm @@ -188,45 +188,26 @@ entry_match_p(void *p, unsigned int tagm
188 * 188 *
189 * we used to maintain a count of non-NULL nodes in this structure, but it 189 * we used to maintain a count of non-NULL nodes in this structure, but it
190 * prevented it from being aligned to a cache line boundary; the performance 190 * prevented it from being aligned to a cache line boundary; the performance
191 * benefit from being cache friendly is greater than the benefit of having 191 * benefit from being cache friendly is greater than the benefit of having
192 * a dedicated count value, especially in multi-processor situations where 192 * a dedicated count value, especially in multi-processor situations where
193 * we need to avoid intra-pool-page false sharing. 193 * we need to avoid intra-pool-page false sharing.
194 */ 194 */
195 195
196struct radix_tree_node { 196struct radix_tree_node {
197 void *n_ptrs[RADIX_TREE_PTR_PER_NODE]; 197 void *n_ptrs[RADIX_TREE_PTR_PER_NODE];
198}; 198};
199 199
200/* 200/*
201 * any_children_tagmask: 
202 * 
203 * return OR'ed tagmask of the given node's children. 
204 */ 
205 
206static unsigned int 
207any_children_tagmask(const struct radix_tree_node *n) 
208{ 
209 unsigned int mask; 
210 int i; 
211 
212 mask = 0; 
213 for (i = 0; i < RADIX_TREE_PTR_PER_NODE; i++) { 
214 mask |= (unsigned int)(uintptr_t)n->n_ptrs[i]; 
215 } 
216 return mask & RADIX_TREE_TAG_MASK; 
217} 
218 
219/* 
220 * p_refs[0].pptr == &t->t_root 201 * p_refs[0].pptr == &t->t_root
221 * : 202 * :
222 * p_refs[n].pptr == &(*p_refs[n-1])->n_ptrs[x] 203 * p_refs[n].pptr == &(*p_refs[n-1])->n_ptrs[x]
223 * : 204 * :
224 * : 205 * :
225 * p_refs[t->t_height].pptr == &leaf_pointer 206 * p_refs[t->t_height].pptr == &leaf_pointer
226 */ 207 */
227 208
228struct radix_tree_path { 209struct radix_tree_path {
229 struct radix_tree_node_ref { 210 struct radix_tree_node_ref {
230 void **pptr; 211 void **pptr;
231 } p_refs[RADIX_TREE_MAX_HEIGHT + 1]; /* +1 for the root ptr */ 212 } p_refs[RADIX_TREE_MAX_HEIGHT + 1]; /* +1 for the root ptr */
232 /* 213 /*
@@ -358,38 +339,44 @@ radix_tree_init(void) @@ -358,38 +339,44 @@ radix_tree_init(void)
358 */ 339 */
359 340
360void 341void
361radix_tree_await_memory(void) 342radix_tree_await_memory(void)
362{ 343{
363 struct radix_tree_node *n; 344 struct radix_tree_node *n;
364 345
365 n = pool_cache_get(radix_tree_node_cache, PR_WAITOK); 346 n = pool_cache_get(radix_tree_node_cache, PR_WAITOK);
366 pool_cache_put(radix_tree_node_cache, n); 347 pool_cache_put(radix_tree_node_cache, n);
367} 348}
368 349
369#endif /* defined(_KERNEL) */ 350#endif /* defined(_KERNEL) */
370 351
371static bool __unused 352/*
372radix_tree_node_clean_p(const struct radix_tree_node *n) 353 * radix_tree_node_sum:
 354 *
 355 * return the logical sum of all entries in the given node. used to quickly
 356 * check for tag masks or empty nodes.
 357 */
 358
 359static uintptr_t
 360radix_tree_node_sum(const struct radix_tree_node *n)
373{ 361{
374#if RADIX_TREE_PTR_PER_NODE > 16 362#if RADIX_TREE_PTR_PER_NODE > 16
375 unsigned int i; 363 uintptr_t sum;
 364 unsigned i;
376 365
377 for (i = 0; i < RADIX_TREE_PTR_PER_NODE; i++) { 366 for (i = 0, sum = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
378 if (n->n_ptrs[i] != NULL) { 367 sum |= (uintptr_t)n->n_ptrs[i];
379 return false; 
380 } 
381 } 368 }
382 return true; 369 return sum;
383#else /* RADIX_TREE_PTR_PER_NODE > 16 */ 370#else /* RADIX_TREE_PTR_PER_NODE > 16 */
384 uintptr_t sum; 371 uintptr_t sum;
385 372
386 /* 373 /*
387 * Unrolling the above is much better than a tight loop with two 374 * Unrolling the above is much better than a tight loop with two
388 * test+branch pairs. On x86 with gcc 5.5.0 this compiles into 19 375 * test+branch pairs. On x86 with gcc 5.5.0 this compiles into 19
389 * deterministic instructions including the "return" and prologue & 376 * deterministic instructions including the "return" and prologue &
390 * epilogue. 377 * epilogue.
391 */ 378 */
392 sum = (uintptr_t)n->n_ptrs[0]; 379 sum = (uintptr_t)n->n_ptrs[0];
393 sum |= (uintptr_t)n->n_ptrs[1]; 380 sum |= (uintptr_t)n->n_ptrs[1];
394 sum |= (uintptr_t)n->n_ptrs[2]; 381 sum |= (uintptr_t)n->n_ptrs[2];
395 sum |= (uintptr_t)n->n_ptrs[3]; 382 sum |= (uintptr_t)n->n_ptrs[3];
@@ -399,27 +386,27 @@ radix_tree_node_clean_p(const struct rad @@ -399,27 +386,27 @@ radix_tree_node_clean_p(const struct rad
399 sum |= (uintptr_t)n->n_ptrs[6]; 386 sum |= (uintptr_t)n->n_ptrs[6];
400 sum |= (uintptr_t)n->n_ptrs[7]; 387 sum |= (uintptr_t)n->n_ptrs[7];
401#endif 388#endif
402#if RADIX_TREE_PTR_PER_NODE > 8 389#if RADIX_TREE_PTR_PER_NODE > 8
403 sum |= (uintptr_t)n->n_ptrs[8]; 390 sum |= (uintptr_t)n->n_ptrs[8];
404 sum |= (uintptr_t)n->n_ptrs[9]; 391 sum |= (uintptr_t)n->n_ptrs[9];
405 sum |= (uintptr_t)n->n_ptrs[10]; 392 sum |= (uintptr_t)n->n_ptrs[10];
406 sum |= (uintptr_t)n->n_ptrs[11]; 393 sum |= (uintptr_t)n->n_ptrs[11];
407 sum |= (uintptr_t)n->n_ptrs[12]; 394 sum |= (uintptr_t)n->n_ptrs[12];
408 sum |= (uintptr_t)n->n_ptrs[13]; 395 sum |= (uintptr_t)n->n_ptrs[13];
409 sum |= (uintptr_t)n->n_ptrs[14]; 396 sum |= (uintptr_t)n->n_ptrs[14];
410 sum |= (uintptr_t)n->n_ptrs[15]; 397 sum |= (uintptr_t)n->n_ptrs[15];
411#endif 398#endif
412 return sum == 0; 399 return sum;
413#endif /* RADIX_TREE_PTR_PER_NODE > 16 */ 400#endif /* RADIX_TREE_PTR_PER_NODE > 16 */
414} 401}
415 402
416static int __unused 403static int __unused
417radix_tree_node_count_ptrs(const struct radix_tree_node *n) 404radix_tree_node_count_ptrs(const struct radix_tree_node *n)
418{ 405{
419 unsigned int i, c; 406 unsigned int i, c;
420 407
421 for (i = c = 0; i < RADIX_TREE_PTR_PER_NODE; i++) { 408 for (i = c = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
422 c += (n->n_ptrs[i] != NULL); 409 c += (n->n_ptrs[i] != NULL);
423 } 410 }
424 return c; 411 return c;
425} 412}
@@ -434,35 +421,35 @@ radix_tree_alloc_node(void) @@ -434,35 +421,35 @@ radix_tree_alloc_node(void)
434 * note that pool_cache_get can block. 421 * note that pool_cache_get can block.
435 */ 422 */
436 n = pool_cache_get(radix_tree_node_cache, PR_NOWAIT); 423 n = pool_cache_get(radix_tree_node_cache, PR_NOWAIT);
437#else /* defined(_KERNEL) */ 424#else /* defined(_KERNEL) */
438#if defined(_STANDALONE) 425#if defined(_STANDALONE)
439 n = alloc(sizeof(*n)); 426 n = alloc(sizeof(*n));
440#else /* defined(_STANDALONE) */ 427#else /* defined(_STANDALONE) */
441 n = malloc(sizeof(*n)); 428 n = malloc(sizeof(*n));
442#endif /* defined(_STANDALONE) */ 429#endif /* defined(_STANDALONE) */
443 if (n != NULL) { 430 if (n != NULL) {
444 radix_tree_node_init(n); 431 radix_tree_node_init(n);
445 } 432 }
446#endif /* defined(_KERNEL) */ 433#endif /* defined(_KERNEL) */
447 KASSERT(n == NULL || radix_tree_node_clean_p(n)); 434 KASSERT(n == NULL || radix_tree_node_sum(n) == 0);
448 return n; 435 return n;
449} 436}
450 437
451static void 438static void
452radix_tree_free_node(struct radix_tree_node *n) 439radix_tree_free_node(struct radix_tree_node *n)
453{ 440{
454 441
455 KASSERT(radix_tree_node_clean_p(n)); 442 KASSERT(radix_tree_node_sum(n) == 0);
456#if defined(_KERNEL) 443#if defined(_KERNEL)
457 pool_cache_put(radix_tree_node_cache, n); 444 pool_cache_put(radix_tree_node_cache, n);
458#elif defined(_STANDALONE) 445#elif defined(_STANDALONE)
459 dealloc(n, sizeof(*n)); 446 dealloc(n, sizeof(*n));
460#else 447#else
461 free(n); 448 free(n);
462#endif 449#endif
463} 450}
464 451
465static int 452static int
466radix_tree_grow(struct radix_tree *t, unsigned int newheight) 453radix_tree_grow(struct radix_tree *t, unsigned int newheight)
467{ 454{
468 const unsigned int tagmask = entry_tagmask(t->t_root); 455 const unsigned int tagmask = entry_tagmask(t->t_root);
@@ -677,55 +664,55 @@ radix_tree_remove_node(struct radix_tree @@ -677,55 +664,55 @@ radix_tree_remove_node(struct radix_tree
677 KASSERT(path.p_lastidx == t->t_height); 664 KASSERT(path.p_lastidx == t->t_height);
678 KASSERT(vpp == path_pptr(t, &path, path.p_lastidx)); 665 KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
679 *vpp = NULL; 666 *vpp = NULL;
680 for (i = t->t_height - 1; i >= 0; i--) { 667 for (i = t->t_height - 1; i >= 0; i--) {
681 void *entry; 668 void *entry;
682 struct radix_tree_node ** const pptr = 669 struct radix_tree_node ** const pptr =
683 (struct radix_tree_node **)path_pptr(t, &path, i); 670 (struct radix_tree_node **)path_pptr(t, &path, i);
684 struct radix_tree_node *n; 671 struct radix_tree_node *n;
685 672
686 KASSERT(pptr != NULL); 673 KASSERT(pptr != NULL);
687 entry = *pptr; 674 entry = *pptr;
688 n = entry_ptr(entry); 675 n = entry_ptr(entry);
689 KASSERT(n != NULL); 676 KASSERT(n != NULL);
690 if (!radix_tree_node_clean_p(n)) { 677 if (radix_tree_node_sum(n) != 0) {
691 break; 678 break;
692 } 679 }
693 radix_tree_free_node(n); 680 radix_tree_free_node(n);
694 *pptr = NULL; 681 *pptr = NULL;
695 } 682 }
696 /* 683 /*
697 * fix up height 684 * fix up height
698 */ 685 */
699 if (i < 0) { 686 if (i < 0) {
700 KASSERT(t->t_root == NULL); 687 KASSERT(t->t_root == NULL);
701 t->t_height = 0; 688 t->t_height = 0;
702 } 689 }
703 /* 690 /*
704 * update tags 691 * update tags
705 */ 692 */
706 for (; i >= 0; i--) { 693 for (; i >= 0; i--) {
707 void *entry; 694 void *entry;
708 struct radix_tree_node ** const pptr = 695 struct radix_tree_node ** const pptr =
709 (struct radix_tree_node **)path_pptr(t, &path, i); 696 (struct radix_tree_node **)path_pptr(t, &path, i);
710 struct radix_tree_node *n; 697 struct radix_tree_node *n;
711 unsigned int newmask; 698 unsigned int newmask;
712 699
713 KASSERT(pptr != NULL); 700 KASSERT(pptr != NULL);
714 entry = *pptr; 701 entry = *pptr;
715 n = entry_ptr(entry); 702 n = entry_ptr(entry);
716 KASSERT(n != NULL); 703 KASSERT(n != NULL);
717 KASSERT(!radix_tree_node_clean_p(n)); 704 KASSERT(radix_tree_node_sum(n) != 0);
718 newmask = any_children_tagmask(n); 705 newmask = radix_tree_node_sum(n) & RADIX_TREE_TAG_MASK;
719 if (newmask == entry_tagmask(entry)) { 706 if (newmask == entry_tagmask(entry)) {
720 break; 707 break;
721 } 708 }
722 *pptr = entry_compose(n, newmask); 709 *pptr = entry_compose(n, newmask);
723 } 710 }
724 /* 711 /*
725 * XXX is it worth to try to reduce height? 712 * XXX is it worth to try to reduce height?
726 * if we do that, make radix_tree_grow rollback its change as well. 713 * if we do that, make radix_tree_grow rollback its change as well.
727 */ 714 */
728 return entry_ptr(oldp); 715 return entry_ptr(oldp);
729} 716}
730 717
731/* 718/*
@@ -1081,27 +1068,27 @@ radix_tree_clear_tag(struct radix_tree * @@ -1081,27 +1068,27 @@ radix_tree_clear_tag(struct radix_tree *
1081 void *entry; 1068 void *entry;
1082 1069
1083 KASSERT(pptr != NULL); 1070 KASSERT(pptr != NULL);
1084 entry = *pptr; 1071 entry = *pptr;
1085 KASSERT((entry_tagmask(entry) & tagmask) != 0); 1072 KASSERT((entry_tagmask(entry) & tagmask) != 0);
1086 *pptr = entry_compose(entry_ptr(entry), 1073 *pptr = entry_compose(entry_ptr(entry),
1087 entry_tagmask(entry) & ~tagmask); 1074 entry_tagmask(entry) & ~tagmask);
1088 /* 1075 /*
1089 * check if we should proceed to process the next level. 1076 * check if we should proceed to process the next level.
1090 */ 1077 */
1091 if (0 < i) { 1078 if (0 < i) {
1092 struct radix_tree_node *n = path_node(t, &path, i - 1); 1079 struct radix_tree_node *n = path_node(t, &path, i - 1);
1093 1080
1094 if ((any_children_tagmask(n) & tagmask) != 0) { 1081 if ((radix_tree_node_sum(n) & tagmask) != 0) {
1095 break; 1082 break;
1096 } 1083 }
1097 } 1084 }
1098 } 1085 }
1099} 1086}
1100 1087
1101#if defined(UNITTEST) 1088#if defined(UNITTEST)
1102 1089
1103#include <inttypes.h> 1090#include <inttypes.h>
1104#include <stdio.h> 1091#include <stdio.h>
1105 1092
1106static void 1093static void
1107radix_tree_dump_node(const struct radix_tree *t, void *vp, 1094radix_tree_dump_node(const struct radix_tree *t, void *vp,
@@ -1114,27 +1101,28 @@ radix_tree_dump_node(const struct radix_ @@ -1114,27 +1101,28 @@ radix_tree_dump_node(const struct radix_
1114 printf(" "); 1101 printf(" ");
1115 } 1102 }
1116 if (entry_tagmask(vp) == 0) { 1103 if (entry_tagmask(vp) == 0) {
1117 printf("[%" PRIu64 "] %p", offset, entry_ptr(vp)); 1104 printf("[%" PRIu64 "] %p", offset, entry_ptr(vp));
1118 } else { 1105 } else {
1119 printf("[%" PRIu64 "] %p (tagmask=0x%x)", offset, entry_ptr(vp), 1106 printf("[%" PRIu64 "] %p (tagmask=0x%x)", offset, entry_ptr(vp),
1120 entry_tagmask(vp)); 1107 entry_tagmask(vp));
1121 } 1108 }
1122 if (height == 0) { 1109 if (height == 0) {
1123 printf(" (leaf)\n"); 1110 printf(" (leaf)\n");
1124 return; 1111 return;
1125 } 1112 }
1126 n = entry_ptr(vp); 1113 n = entry_ptr(vp);
1127 assert(any_children_tagmask(n) == entry_tagmask(vp)); 1114 assert((radix_tree_node_sum(n) & RADIX_TREE_TAG_MASK) ==
 1115 entry_tagmask(vp));
1128 printf(" (%u children)\n", radix_tree_node_count_ptrs(n)); 1116 printf(" (%u children)\n", radix_tree_node_count_ptrs(n));
1129 for (i = 0; i < __arraycount(n->n_ptrs); i++) { 1117 for (i = 0; i < __arraycount(n->n_ptrs); i++) {
1130 void *c; 1118 void *c;
1131 1119
1132 c = n->n_ptrs[i]; 1120 c = n->n_ptrs[i];
1133 if (c == NULL) { 1121 if (c == NULL) {
1134 continue; 1122 continue;
1135 } 1123 }
1136 radix_tree_dump_node(t, c, 1124 radix_tree_dump_node(t, c,
1137 offset + i * (UINT64_C(1) << 1125 offset + i * (UINT64_C(1) <<
1138 (RADIX_TREE_BITS_PER_HEIGHT * (height - 1))), height - 1); 1126 (RADIX_TREE_BITS_PER_HEIGHT * (height - 1))), height - 1);
1139 } 1127 }
1140} 1128}