| @@ -1,1013 +1,1014 @@ | | | @@ -1,1013 +1,1014 @@ |
1 | /* $NetBSD: i915_active.c,v 1.10 2021/12/24 00:14:03 riastradh Exp $ */ | | 1 | /* $NetBSD: i915_active.c,v 1.11 2022/02/14 20:37:51 riastradh Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * SPDX-License-Identifier: MIT | | 4 | * SPDX-License-Identifier: MIT |
5 | * | | 5 | * |
6 | * Copyright © 2019 Intel Corporation | | 6 | * Copyright © 2019 Intel Corporation |
7 | */ | | 7 | */ |
8 | | | 8 | |
9 | #include <sys/cdefs.h> | | 9 | #include <sys/cdefs.h> |
10 | __KERNEL_RCSID(0, "$NetBSD: i915_active.c,v 1.10 2021/12/24 00:14:03 riastradh Exp $"); | | 10 | __KERNEL_RCSID(0, "$NetBSD: i915_active.c,v 1.11 2022/02/14 20:37:51 riastradh Exp $"); |
11 | | | 11 | |
12 | #include <linux/debugobjects.h> | | 12 | #include <linux/debugobjects.h> |
13 | | | 13 | |
14 | #include "gt/intel_context.h" | | 14 | #include "gt/intel_context.h" |
15 | #include "gt/intel_engine_pm.h" | | 15 | #include "gt/intel_engine_pm.h" |
16 | #include "gt/intel_ring.h" | | 16 | #include "gt/intel_ring.h" |
17 | | | 17 | |
18 | #include "i915_drv.h" | | 18 | #include "i915_drv.h" |
19 | #include "i915_active.h" | | 19 | #include "i915_active.h" |
20 | #include "i915_globals.h" | | 20 | #include "i915_globals.h" |
21 | | | 21 | |
22 | #include <linux/nbsd-namespace.h> | | 22 | #include <linux/nbsd-namespace.h> |
23 | | | 23 | |
24 | /* | | 24 | /* |
25 | * Active refs memory management | | 25 | * Active refs memory management |
26 | * | | 26 | * |
27 | * To be more economical with memory, we reap all the i915_active trees as | | 27 | * To be more economical with memory, we reap all the i915_active trees as |
28 | * they idle (when we know the active requests are inactive) and allocate the | | 28 | * they idle (when we know the active requests are inactive) and allocate the |
29 | * nodes from a local slab cache to hopefully reduce the fragmentation. | | 29 | * nodes from a local slab cache to hopefully reduce the fragmentation. |
30 | */ | | 30 | */ |
31 | static struct i915_global_active { | | 31 | static struct i915_global_active { |
32 | struct i915_global base; | | 32 | struct i915_global base; |
33 | struct kmem_cache *slab_cache; | | 33 | struct kmem_cache *slab_cache; |
34 | } global; | | 34 | } global; |
35 | | | 35 | |
36 | struct active_node { | | 36 | struct active_node { |
37 | struct i915_active_fence base; | | 37 | struct i915_active_fence base; |
38 | struct i915_active *ref; | | 38 | struct i915_active *ref; |
39 | struct rb_node node; | | 39 | struct rb_node node; |
40 | u64 timeline; | | 40 | u64 timeline; |
41 | struct intel_engine_cs *engine; | | 41 | struct intel_engine_cs *engine; |
42 | }; | | 42 | }; |
43 | | | 43 | |
44 | static inline struct active_node * | | 44 | static inline struct active_node * |
45 | node_from_active(struct i915_active_fence *active) | | 45 | node_from_active(struct i915_active_fence *active) |
46 | { | | 46 | { |
47 | return container_of(active, struct active_node, base); | | 47 | return container_of(active, struct active_node, base); |
48 | } | | 48 | } |
49 | | | 49 | |
50 | #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) | | 50 | #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) |
51 | | | 51 | |
52 | static inline bool is_barrier(const struct i915_active_fence *active) | | 52 | static inline bool is_barrier(const struct i915_active_fence *active) |
53 | { | | 53 | { |
54 | return IS_ERR(rcu_access_pointer(active->fence)); | | 54 | return IS_ERR(rcu_access_pointer(active->fence)); |
55 | } | | 55 | } |
56 | | | 56 | |
57 | static inline struct llist_node *barrier_to_ll(struct active_node *node) | | 57 | static inline struct llist_node *barrier_to_ll(struct active_node *node) |
58 | { | | 58 | { |
59 | GEM_BUG_ON(!is_barrier(&node->base)); | | 59 | GEM_BUG_ON(!is_barrier(&node->base)); |
60 | return &node->base.llist; | | 60 | return &node->base.llist; |
61 | } | | 61 | } |
62 | | | 62 | |
63 | static inline struct intel_engine_cs * | | 63 | static inline struct intel_engine_cs * |
64 | __barrier_to_engine(struct active_node *node) | | 64 | __barrier_to_engine(struct active_node *node) |
65 | { | | 65 | { |
66 | return READ_ONCE(node->engine); | | 66 | return READ_ONCE(node->engine); |
67 | } | | 67 | } |
68 | | | 68 | |
69 | static inline struct intel_engine_cs * | | 69 | static inline struct intel_engine_cs * |
70 | barrier_to_engine(struct active_node *node) | | 70 | barrier_to_engine(struct active_node *node) |
71 | { | | 71 | { |
72 | GEM_BUG_ON(!is_barrier(&node->base)); | | 72 | GEM_BUG_ON(!is_barrier(&node->base)); |
73 | return __barrier_to_engine(node); | | 73 | return __barrier_to_engine(node); |
74 | } | | 74 | } |
75 | | | 75 | |
76 | static inline struct active_node *barrier_from_ll(struct llist_node *x) | | 76 | static inline struct active_node *barrier_from_ll(struct llist_node *x) |
77 | { | | 77 | { |
78 | return container_of(x, struct active_node, base.llist); | | 78 | return container_of(x, struct active_node, base.llist); |
79 | } | | 79 | } |
80 | | | 80 | |
81 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) | | 81 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) |
82 | | | 82 | |
83 | static void *active_debug_hint(void *addr) | | 83 | static void *active_debug_hint(void *addr) |
84 | { | | 84 | { |
85 | struct i915_active *ref = addr; | | 85 | struct i915_active *ref = addr; |
86 | | | 86 | |
87 | return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; | | 87 | return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; |
88 | } | | 88 | } |
89 | | | 89 | |
90 | static struct debug_obj_descr active_debug_desc = { | | 90 | static struct debug_obj_descr active_debug_desc = { |
91 | .name = "i915_active", | | 91 | .name = "i915_active", |
92 | .debug_hint = active_debug_hint, | | 92 | .debug_hint = active_debug_hint, |
93 | }; | | 93 | }; |
94 | | | 94 | |
95 | static void debug_active_init(struct i915_active *ref) | | 95 | static void debug_active_init(struct i915_active *ref) |
96 | { | | 96 | { |
97 | debug_object_init(ref, &active_debug_desc); | | 97 | debug_object_init(ref, &active_debug_desc); |
98 | } | | 98 | } |
99 | | | 99 | |
100 | static void debug_active_activate(struct i915_active *ref) | | 100 | static void debug_active_activate(struct i915_active *ref) |
101 | { | | 101 | { |
102 | lockdep_assert_held(&ref->tree_lock); | | 102 | lockdep_assert_held(&ref->tree_lock); |
103 | if (!atomic_read(&ref->count)) /* before the first inc */ | | 103 | if (!atomic_read(&ref->count)) /* before the first inc */ |
104 | debug_object_activate(ref, &active_debug_desc); | | 104 | debug_object_activate(ref, &active_debug_desc); |
105 | } | | 105 | } |
106 | | | 106 | |
107 | static void debug_active_deactivate(struct i915_active *ref) | | 107 | static void debug_active_deactivate(struct i915_active *ref) |
108 | { | | 108 | { |
109 | lockdep_assert_held(&ref->tree_lock); | | 109 | lockdep_assert_held(&ref->tree_lock); |
110 | if (!atomic_read(&ref->count)) /* after the last dec */ | | 110 | if (!atomic_read(&ref->count)) /* after the last dec */ |
111 | debug_object_deactivate(ref, &active_debug_desc); | | 111 | debug_object_deactivate(ref, &active_debug_desc); |
112 | } | | 112 | } |
113 | | | 113 | |
114 | static void debug_active_fini(struct i915_active *ref) | | 114 | static void debug_active_fini(struct i915_active *ref) |
115 | { | | 115 | { |
116 | debug_object_free(ref, &active_debug_desc); | | 116 | debug_object_free(ref, &active_debug_desc); |
117 | } | | 117 | } |
118 | | | 118 | |
119 | static void debug_active_assert(struct i915_active *ref) | | 119 | static void debug_active_assert(struct i915_active *ref) |
120 | { | | 120 | { |
121 | debug_object_assert_init(ref, &active_debug_desc); | | 121 | debug_object_assert_init(ref, &active_debug_desc); |
122 | } | | 122 | } |
123 | | | 123 | |
124 | #else | | 124 | #else |
125 | | | 125 | |
126 | static inline void debug_active_init(struct i915_active *ref) { } | | 126 | static inline void debug_active_init(struct i915_active *ref) { } |
127 | static inline void debug_active_activate(struct i915_active *ref) { } | | 127 | static inline void debug_active_activate(struct i915_active *ref) { } |
128 | static inline void debug_active_deactivate(struct i915_active *ref) { } | | 128 | static inline void debug_active_deactivate(struct i915_active *ref) { } |
129 | static inline void debug_active_fini(struct i915_active *ref) { } | | 129 | static inline void debug_active_fini(struct i915_active *ref) { } |
130 | static inline void debug_active_assert(struct i915_active *ref) { } | | 130 | static inline void debug_active_assert(struct i915_active *ref) { } |
131 | | | 131 | |
132 | #endif | | 132 | #endif |
133 | | | 133 | |
134 | #ifdef __NetBSD__ | | 134 | #ifdef __NetBSD__ |
135 | | | 135 | |
136 | static int | | 136 | static int |
137 | compare_nodes(void *cookie, const void *va, const void *vb) | | 137 | compare_nodes(void *cookie, const void *va, const void *vb) |
138 | { | | 138 | { |
139 | const struct active_node *a = va; | | 139 | const struct active_node *a = va; |
140 | const struct active_node *b = vb; | | 140 | const struct active_node *b = vb; |
141 | | | 141 | |
142 | if (a->timeline < b->timeline) | | 142 | if (a->timeline < b->timeline) |
143 | return -1; | | 143 | return -1; |
144 | if (a->timeline > b->timeline) | | 144 | if (a->timeline > b->timeline) |
145 | return +1; | | 145 | return +1; |
146 | if ((uintptr_t)a < (uintptr_t)b) | | 146 | if ((uintptr_t)a < (uintptr_t)b) |
147 | return -1; | | 147 | return -1; |
148 | if ((uintptr_t)a > (uintptr_t)b) | | 148 | if ((uintptr_t)a > (uintptr_t)b) |
149 | return +1; | | 149 | return +1; |
150 | return 0; | | 150 | return 0; |
151 | } | | 151 | } |
152 | | | 152 | |
153 | static int | | 153 | static int |
154 | compare_node_key(void *cookie, const void *vn, const void *vk) | | 154 | compare_node_key(void *cookie, const void *vn, const void *vk) |
155 | { | | 155 | { |
156 | const struct active_node *a = vn; | | 156 | const struct active_node *a = vn; |
157 | const uint64_t *k = vk; | | 157 | const uint64_t *k = vk; |
158 | | | 158 | |
159 | if (a->timeline < *k) | | 159 | if (a->timeline < *k) |
160 | return -1; | | 160 | return -1; |
161 | if (a->timeline > *k) | | 161 | if (a->timeline > *k) |
162 | return +1; | | 162 | return +1; |
163 | return 0; | | 163 | return 0; |
164 | } | | 164 | } |
165 | | | 165 | |
166 | static const rb_tree_ops_t active_rb_ops = { | | 166 | static const rb_tree_ops_t active_rb_ops = { |
167 | .rbto_compare_nodes = compare_nodes, | | 167 | .rbto_compare_nodes = compare_nodes, |
168 | .rbto_compare_key = compare_node_key, | | 168 | .rbto_compare_key = compare_node_key, |
169 | .rbto_node_offset = offsetof(struct active_node, node), | | 169 | .rbto_node_offset = offsetof(struct active_node, node), |
170 | }; | | 170 | }; |
171 | | | 171 | |
172 | #endif | | 172 | #endif |
173 | | | 173 | |
174 | static void | | 174 | static void |
175 | __active_retire(struct i915_active *ref) | | 175 | __active_retire(struct i915_active *ref) |
176 | { | | 176 | { |
177 | struct active_node *it, *n; | | 177 | struct active_node *it, *n; |
178 | struct rb_root root; | | 178 | struct rb_root root; |
179 | unsigned long flags; | | 179 | unsigned long flags; |
180 | | | 180 | |
181 | GEM_BUG_ON(i915_active_is_idle(ref)); | | 181 | GEM_BUG_ON(i915_active_is_idle(ref)); |
182 | | | 182 | |
183 | /* return the unused nodes to our slabcache -- flushing the allocator */ | | 183 | /* return the unused nodes to our slabcache -- flushing the allocator */ |
184 | if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags)) | | 184 | if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags)) |
185 | return; | | 185 | return; |
186 | | | 186 | |
187 | GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); | | 187 | GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); |
188 | debug_active_deactivate(ref); | | 188 | debug_active_deactivate(ref); |
189 | | | 189 | |
190 | root = ref->tree; | | 190 | root = ref->tree; |
191 | #ifdef __NetBSD__ | | 191 | #ifdef __NetBSD__ |
192 | rb_tree_init(&ref->tree.rbr_tree, &active_rb_ops); | | 192 | rb_tree_init(&ref->tree.rbr_tree, &active_rb_ops); |
193 | #else | | 193 | #else |
194 | ref->tree = RB_ROOT; | | 194 | ref->tree = RB_ROOT; |
195 | #endif | | 195 | #endif |
196 | ref->cache = NULL; | | 196 | ref->cache = NULL; |
197 | | | 197 | |
198 | DRM_SPIN_WAKEUP_ALL(&ref->tree_wq, &ref->tree_lock); | | | |
199 | | | | |
200 | spin_unlock_irqrestore(&ref->tree_lock, flags); | | 198 | spin_unlock_irqrestore(&ref->tree_lock, flags); |
201 | | | 199 | |
202 | /* After the final retire, the entire struct may be freed */ | | 200 | /* After the final retire, the entire struct may be freed */ |
203 | if (ref->retire) | | 201 | if (ref->retire) |
204 | ref->retire(ref); | | 202 | ref->retire(ref); |
205 | | | 203 | |
206 | /* ... except if you wait on it, you must manage your own references! */ | | 204 | /* ... except if you wait on it, you must manage your own references! */ |
| | | 205 | spin_lock(&ref->tree_lock); |
| | | 206 | DRM_SPIN_WAKEUP_ALL(&ref->tree_wq, &ref->tree_lock); |
| | | 207 | spin_unlock(&ref->tree_lock); |
207 | | | 208 | |
208 | rbtree_postorder_for_each_entry_safe(it, n, &root, node) { | | 209 | rbtree_postorder_for_each_entry_safe(it, n, &root, node) { |
209 | GEM_BUG_ON(i915_active_fence_isset(&it->base)); | | 210 | GEM_BUG_ON(i915_active_fence_isset(&it->base)); |
210 | kmem_cache_free(global.slab_cache, it); | | 211 | kmem_cache_free(global.slab_cache, it); |
211 | } | | 212 | } |
212 | } | | 213 | } |
213 | | | 214 | |
214 | static void | | 215 | static void |
215 | active_work(struct work_struct *wrk) | | 216 | active_work(struct work_struct *wrk) |
216 | { | | 217 | { |
217 | struct i915_active *ref = container_of(wrk, typeof(*ref), work); | | 218 | struct i915_active *ref = container_of(wrk, typeof(*ref), work); |
218 | | | 219 | |
219 | GEM_BUG_ON(!atomic_read(&ref->count)); | | 220 | GEM_BUG_ON(!atomic_read(&ref->count)); |
220 | if (atomic_add_unless(&ref->count, -1, 1)) | | 221 | if (atomic_add_unless(&ref->count, -1, 1)) |
221 | return; | | 222 | return; |
222 | | | 223 | |
223 | __active_retire(ref); | | 224 | __active_retire(ref); |
224 | } | | 225 | } |
225 | | | 226 | |
226 | static void | | 227 | static void |
227 | active_retire(struct i915_active *ref) | | 228 | active_retire(struct i915_active *ref) |
228 | { | | 229 | { |
229 | GEM_BUG_ON(!atomic_read(&ref->count)); | | 230 | GEM_BUG_ON(!atomic_read(&ref->count)); |
230 | if (atomic_add_unless(&ref->count, -1, 1)) | | 231 | if (atomic_add_unless(&ref->count, -1, 1)) |
231 | return; | | 232 | return; |
232 | | | 233 | |
233 | if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) { | | 234 | if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) { |
234 | queue_work(system_unbound_wq, &ref->work); | | 235 | queue_work(system_unbound_wq, &ref->work); |
235 | return; | | 236 | return; |
236 | } | | 237 | } |
237 | | | 238 | |
238 | __active_retire(ref); | | 239 | __active_retire(ref); |
239 | } | | 240 | } |
240 | | | 241 | |
241 | static inline struct dma_fence ** | | 242 | static inline struct dma_fence ** |
242 | __active_fence_slot(struct i915_active_fence *active) | | 243 | __active_fence_slot(struct i915_active_fence *active) |
243 | { | | 244 | { |
244 | return (struct dma_fence ** __force)&active->fence; | | 245 | return (struct dma_fence ** __force)&active->fence; |
245 | } | | 246 | } |
246 | | | 247 | |
247 | static inline bool | | 248 | static inline bool |
248 | active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) | | 249 | active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) |
249 | { | | 250 | { |
250 | struct i915_active_fence *active = | | 251 | struct i915_active_fence *active = |
251 | container_of(cb, typeof(*active), cb); | | 252 | container_of(cb, typeof(*active), cb); |
252 | | | 253 | |
253 | return cmpxchg(__active_fence_slot(active), fence, NULL) == fence; | | 254 | return cmpxchg(__active_fence_slot(active), fence, NULL) == fence; |
254 | } | | 255 | } |
255 | | | 256 | |
256 | static void | | 257 | static void |
257 | node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) | | 258 | node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) |
258 | { | | 259 | { |
259 | if (active_fence_cb(fence, cb)) | | 260 | if (active_fence_cb(fence, cb)) |
260 | active_retire(container_of(cb, struct active_node, base.cb)->ref); | | 261 | active_retire(container_of(cb, struct active_node, base.cb)->ref); |
261 | } | | 262 | } |
262 | | | 263 | |
263 | static void | | 264 | static void |
264 | excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) | | 265 | excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) |
265 | { | | 266 | { |
266 | if (active_fence_cb(fence, cb)) | | 267 | if (active_fence_cb(fence, cb)) |
267 | active_retire(container_of(cb, struct i915_active, excl.cb)); | | 268 | active_retire(container_of(cb, struct i915_active, excl.cb)); |
268 | } | | 269 | } |
269 | | | 270 | |
270 | static struct i915_active_fence * | | 271 | static struct i915_active_fence * |
271 | active_instance(struct i915_active *ref, struct intel_timeline *tl) | | 272 | active_instance(struct i915_active *ref, struct intel_timeline *tl) |
272 | { | | 273 | { |
273 | struct active_node *node, *prealloc; | | 274 | struct active_node *node, *prealloc; |
274 | struct rb_node **p, *parent; | | 275 | struct rb_node **p, *parent; |
275 | u64 idx = tl->fence_context; | | 276 | u64 idx = tl->fence_context; |
276 | | | 277 | |
277 | /* | | 278 | /* |
278 | * We track the most recently used timeline to skip a rbtree search | | 279 | * We track the most recently used timeline to skip a rbtree search |
279 | * for the common case, under typical loads we never need the rbtree | | 280 | * for the common case, under typical loads we never need the rbtree |
280 | * at all. We can reuse the last slot if it is empty, that is | | 281 | * at all. We can reuse the last slot if it is empty, that is |
281 | * after the previous activity has been retired, or if it matches the | | 282 | * after the previous activity has been retired, or if it matches the |
282 | * current timeline. | | 283 | * current timeline. |
283 | */ | | 284 | */ |
284 | node = READ_ONCE(ref->cache); | | 285 | node = READ_ONCE(ref->cache); |
285 | if (node && node->timeline == idx) | | 286 | if (node && node->timeline == idx) |
286 | return &node->base; | | 287 | return &node->base; |
287 | | | 288 | |
288 | /* Preallocate a replacement, just in case */ | | 289 | /* Preallocate a replacement, just in case */ |
289 | prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); | | 290 | prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); |
290 | if (!prealloc) | | 291 | if (!prealloc) |
291 | return NULL; | | 292 | return NULL; |
292 | memset(prealloc, 0, sizeof(*prealloc)); | | 293 | memset(prealloc, 0, sizeof(*prealloc)); |
293 | | | 294 | |
294 | spin_lock_irq(&ref->tree_lock); | | 295 | spin_lock_irq(&ref->tree_lock); |
295 | GEM_BUG_ON(i915_active_is_idle(ref)); | | 296 | GEM_BUG_ON(i915_active_is_idle(ref)); |
296 | | | 297 | |
297 | #ifdef __NetBSD__ | | 298 | #ifdef __NetBSD__ |
298 | __USE(parent); | | 299 | __USE(parent); |
299 | __USE(p); | | 300 | __USE(p); |
300 | node = rb_tree_find_node(&ref->tree.rbr_tree, &idx); | | 301 | node = rb_tree_find_node(&ref->tree.rbr_tree, &idx); |
301 | if (node) { | | 302 | if (node) { |
302 | KASSERT(node->timeline == idx); | | 303 | KASSERT(node->timeline == idx); |
303 | kmem_cache_free(global.slab_cache, prealloc); | | 304 | kmem_cache_free(global.slab_cache, prealloc); |
304 | goto out; | | 305 | goto out; |
305 | } | | 306 | } |
306 | #else | | 307 | #else |
307 | parent = NULL; | | 308 | parent = NULL; |
308 | p = &ref->tree.rb_node; | | 309 | p = &ref->tree.rb_node; |
309 | while (*p) { | | 310 | while (*p) { |
310 | parent = *p; | | 311 | parent = *p; |
311 | | | 312 | |
312 | node = rb_entry(parent, struct active_node, node); | | 313 | node = rb_entry(parent, struct active_node, node); |
313 | if (node->timeline == idx) { | | 314 | if (node->timeline == idx) { |
314 | kmem_cache_free(global.slab_cache, prealloc); | | 315 | kmem_cache_free(global.slab_cache, prealloc); |
315 | goto out; | | 316 | goto out; |
316 | } | | 317 | } |
317 | | | 318 | |
318 | if (node->timeline < idx) | | 319 | if (node->timeline < idx) |
319 | p = &parent->rb_right; | | 320 | p = &parent->rb_right; |
320 | else | | 321 | else |
321 | p = &parent->rb_left; | | 322 | p = &parent->rb_left; |
322 | } | | 323 | } |
323 | #endif | | 324 | #endif |
324 | | | 325 | |
325 | node = prealloc; | | 326 | node = prealloc; |
326 | __i915_active_fence_init(&node->base, NULL, node_retire); | | 327 | __i915_active_fence_init(&node->base, NULL, node_retire); |
327 | node->ref = ref; | | 328 | node->ref = ref; |
328 | node->timeline = idx; | | 329 | node->timeline = idx; |
329 | | | 330 | |
330 | #ifdef __NetBSD__ | | 331 | #ifdef __NetBSD__ |
331 | struct active_node *collision __diagused; | | 332 | struct active_node *collision __diagused; |
332 | collision = rb_tree_insert_node(&ref->tree.rbr_tree, node); | | 333 | collision = rb_tree_insert_node(&ref->tree.rbr_tree, node); |
333 | KASSERT(collision == node); | | 334 | KASSERT(collision == node); |
334 | #else | | 335 | #else |
335 | rb_link_node(&node->node, parent, p); | | 336 | rb_link_node(&node->node, parent, p); |
336 | rb_insert_color(&node->node, &ref->tree); | | 337 | rb_insert_color(&node->node, &ref->tree); |
337 | #endif | | 338 | #endif |
338 | | | 339 | |
339 | out: | | 340 | out: |
340 | ref->cache = node; | | 341 | ref->cache = node; |
341 | spin_unlock_irq(&ref->tree_lock); | | 342 | spin_unlock_irq(&ref->tree_lock); |
342 | | | 343 | |
343 | BUILD_BUG_ON(offsetof(typeof(*node), base)); | | 344 | BUILD_BUG_ON(offsetof(typeof(*node), base)); |
344 | return &node->base; | | 345 | return &node->base; |
345 | } | | 346 | } |
346 | | | 347 | |
347 | void __i915_active_init(struct i915_active *ref, | | 348 | void __i915_active_init(struct i915_active *ref, |
348 | int (*active)(struct i915_active *ref), | | 349 | int (*active)(struct i915_active *ref), |
349 | void (*retire)(struct i915_active *ref), | | 350 | void (*retire)(struct i915_active *ref), |
350 | struct lock_class_key *mkey, | | 351 | struct lock_class_key *mkey, |
351 | struct lock_class_key *wkey) | | 352 | struct lock_class_key *wkey) |
352 | { | | 353 | { |
353 | unsigned long bits; | | 354 | unsigned long bits; |
354 | | | 355 | |
355 | debug_active_init(ref); | | 356 | debug_active_init(ref); |
356 | | | 357 | |
357 | ref->flags = 0; | | 358 | ref->flags = 0; |
358 | ref->active = active; | | 359 | ref->active = active; |
359 | ref->retire = ptr_unpack_bits(retire, &bits, 2); | | 360 | ref->retire = ptr_unpack_bits(retire, &bits, 2); |
360 | if (bits & I915_ACTIVE_MAY_SLEEP) | | 361 | if (bits & I915_ACTIVE_MAY_SLEEP) |
361 | ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; | | 362 | ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; |
362 | | | 363 | |
363 | spin_lock_init(&ref->tree_lock); | | 364 | spin_lock_init(&ref->tree_lock); |
364 | DRM_INIT_WAITQUEUE(&ref->tree_wq, "i915act"); | | 365 | DRM_INIT_WAITQUEUE(&ref->tree_wq, "i915act"); |
365 | #ifdef __NetBSD__ | | 366 | #ifdef __NetBSD__ |
366 | rb_tree_init(&ref->tree.rbr_tree, &active_rb_ops); | | 367 | rb_tree_init(&ref->tree.rbr_tree, &active_rb_ops); |
367 | #else | | 368 | #else |
368 | ref->tree = RB_ROOT; | | 369 | ref->tree = RB_ROOT; |
369 | #endif | | 370 | #endif |
370 | ref->cache = NULL; | | 371 | ref->cache = NULL; |
371 | | | 372 | |
372 | init_llist_head(&ref->preallocated_barriers); | | 373 | init_llist_head(&ref->preallocated_barriers); |
373 | atomic_set(&ref->count, 0); | | 374 | atomic_set(&ref->count, 0); |
374 | __mutex_init(&ref->mutex, "i915_active", mkey); | | 375 | __mutex_init(&ref->mutex, "i915_active", mkey); |
375 | __i915_active_fence_init(&ref->excl, NULL, excl_retire); | | 376 | __i915_active_fence_init(&ref->excl, NULL, excl_retire); |
376 | INIT_WORK(&ref->work, active_work); | | 377 | INIT_WORK(&ref->work, active_work); |
377 | #if IS_ENABLED(CONFIG_LOCKDEP) | | 378 | #if IS_ENABLED(CONFIG_LOCKDEP) |
378 | lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0); | | 379 | lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0); |
379 | #endif | | 380 | #endif |
380 | } | | 381 | } |
381 | | | 382 | |
382 | static bool ____active_del_barrier(struct i915_active *ref, | | 383 | static bool ____active_del_barrier(struct i915_active *ref, |
383 | struct active_node *node, | | 384 | struct active_node *node, |
384 | struct intel_engine_cs *engine) | | 385 | struct intel_engine_cs *engine) |
385 | | | 386 | |
386 | { | | 387 | { |
387 | struct llist_node *head = NULL, *tail = NULL; | | 388 | struct llist_node *head = NULL, *tail = NULL; |
388 | struct llist_node *pos, *next; | | 389 | struct llist_node *pos, *next; |
389 | | | 390 | |
390 | GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); | | 391 | GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); |
391 | | | 392 | |
392 | /* | | 393 | /* |
393 | * Rebuild the llist excluding our node. We may perform this | | 394 | * Rebuild the llist excluding our node. We may perform this |
394 | * outside of the kernel_context timeline mutex and so someone | | 395 | * outside of the kernel_context timeline mutex and so someone |
395 | * else may be manipulating the engine->barrier_tasks, in | | 396 | * else may be manipulating the engine->barrier_tasks, in |
396 | * which case either we or they will be upset :) | | 397 | * which case either we or they will be upset :) |
397 | * | | 398 | * |
398 | * A second __active_del_barrier() will report failure to claim | | 399 | * A second __active_del_barrier() will report failure to claim |
399 | * the active_node and the caller will just shrug and know not to | | 400 | * the active_node and the caller will just shrug and know not to |
400 | * claim ownership of its node. | | 401 | * claim ownership of its node. |
401 | * | | 402 | * |
402 | * A concurrent i915_request_add_active_barriers() will miss adding | | 403 | * A concurrent i915_request_add_active_barriers() will miss adding |
403 | * any of the tasks, but we will try again on the next -- and since | | 404 | * any of the tasks, but we will try again on the next -- and since |
404 | * we are actively using the barrier, we know that there will be | | 405 | * we are actively using the barrier, we know that there will be |
405 | * at least another opportunity when we idle. | | 406 | * at least another opportunity when we idle. |
406 | */ | | 407 | */ |
407 | llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { | | 408 | llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { |
408 | if (node == barrier_from_ll(pos)) { | | 409 | if (node == barrier_from_ll(pos)) { |
409 | node = NULL; | | 410 | node = NULL; |
410 | continue; | | 411 | continue; |
411 | } | | 412 | } |
412 | | | 413 | |
413 | pos->next = head; | | 414 | pos->next = head; |
414 | head = pos; | | 415 | head = pos; |
415 | if (!tail) | | 416 | if (!tail) |
416 | tail = pos; | | 417 | tail = pos; |
417 | } | | 418 | } |
418 | if (head) | | 419 | if (head) |
419 | llist_add_batch(head, tail, &engine->barrier_tasks); | | 420 | llist_add_batch(head, tail, &engine->barrier_tasks); |
420 | | | 421 | |
421 | return !node; | | 422 | return !node; |
422 | } | | 423 | } |
423 | | | 424 | |
424 | static bool | | 425 | static bool |
425 | __active_del_barrier(struct i915_active *ref, struct active_node *node) | | 426 | __active_del_barrier(struct i915_active *ref, struct active_node *node) |
426 | { | | 427 | { |
427 | return ____active_del_barrier(ref, node, barrier_to_engine(node)); | | 428 | return ____active_del_barrier(ref, node, barrier_to_engine(node)); |
428 | } | | 429 | } |
429 | | | 430 | |
430 | int i915_active_ref(struct i915_active *ref, | | 431 | int i915_active_ref(struct i915_active *ref, |
431 | struct intel_timeline *tl, | | 432 | struct intel_timeline *tl, |
432 | struct dma_fence *fence) | | 433 | struct dma_fence *fence) |
433 | { | | 434 | { |
434 | struct i915_active_fence *active; | | 435 | struct i915_active_fence *active; |
435 | int err; | | 436 | int err; |
436 | | | 437 | |
437 | lockdep_assert_held(&tl->mutex); | | 438 | lockdep_assert_held(&tl->mutex); |
438 | | | 439 | |
439 | /* Prevent reaping in case we malloc/wait while building the tree */ | | 440 | /* Prevent reaping in case we malloc/wait while building the tree */ |
440 | err = i915_active_acquire(ref); | | 441 | err = i915_active_acquire(ref); |
441 | if (err) | | 442 | if (err) |
442 | return err; | | 443 | return err; |
443 | | | 444 | |
444 | active = active_instance(ref, tl); | | 445 | active = active_instance(ref, tl); |
445 | if (!active) { | | 446 | if (!active) { |
446 | err = -ENOMEM; | | 447 | err = -ENOMEM; |
447 | goto out; | | 448 | goto out; |
448 | } | | 449 | } |
449 | | | 450 | |
450 | if (is_barrier(active)) { /* proto-node used by our idle barrier */ | | 451 | if (is_barrier(active)) { /* proto-node used by our idle barrier */ |
451 | /* | | 452 | /* |
452 | * This request is on the kernel_context timeline, and so | | 453 | * This request is on the kernel_context timeline, and so |
453 | * we can use it to substitute for the pending idle-barrer | | 454 | * we can use it to substitute for the pending idle-barrer |
454 | * request that we want to emit on the kernel_context. | | 455 | * request that we want to emit on the kernel_context. |
455 | */ | | 456 | */ |
456 | __active_del_barrier(ref, node_from_active(active)); | | 457 | __active_del_barrier(ref, node_from_active(active)); |
457 | RCU_INIT_POINTER(active->fence, NULL); | | 458 | RCU_INIT_POINTER(active->fence, NULL); |
458 | atomic_dec(&ref->count); | | 459 | atomic_dec(&ref->count); |
459 | } | | 460 | } |
460 | if (!__i915_active_fence_set(active, fence)) | | 461 | if (!__i915_active_fence_set(active, fence)) |
461 | atomic_inc(&ref->count); | | 462 | atomic_inc(&ref->count); |
462 | | | 463 | |
463 | out: | | 464 | out: |
464 | i915_active_release(ref); | | 465 | i915_active_release(ref); |
465 | return err; | | 466 | return err; |
466 | } | | 467 | } |
467 | | | 468 | |
468 | void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) | | 469 | void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) |
469 | { | | 470 | { |
470 | /* We expect the caller to manage the exclusive timeline ordering */ | | 471 | /* We expect the caller to manage the exclusive timeline ordering */ |
471 | GEM_BUG_ON(i915_active_is_idle(ref)); | | 472 | GEM_BUG_ON(i915_active_is_idle(ref)); |
472 | | | 473 | |
473 | if (!__i915_active_fence_set(&ref->excl, f)) | | 474 | if (!__i915_active_fence_set(&ref->excl, f)) |
474 | atomic_inc(&ref->count); | | 475 | atomic_inc(&ref->count); |
475 | } | | 476 | } |
476 | | | 477 | |
477 | bool i915_active_acquire_if_busy(struct i915_active *ref) | | 478 | bool i915_active_acquire_if_busy(struct i915_active *ref) |
478 | { | | 479 | { |
479 | debug_active_assert(ref); | | 480 | debug_active_assert(ref); |
480 | return atomic_add_unless(&ref->count, 1, 0); | | 481 | return atomic_add_unless(&ref->count, 1, 0); |
481 | } | | 482 | } |
482 | | | 483 | |
483 | int i915_active_acquire(struct i915_active *ref) | | 484 | int i915_active_acquire(struct i915_active *ref) |
484 | { | | 485 | { |
485 | int err; | | 486 | int err; |
486 | | | 487 | |
487 | if (i915_active_acquire_if_busy(ref)) | | 488 | if (i915_active_acquire_if_busy(ref)) |
488 | return 0; | | 489 | return 0; |
489 | | | 490 | |
490 | err = mutex_lock_interruptible(&ref->mutex); | | 491 | err = mutex_lock_interruptible(&ref->mutex); |
491 | if (err) | | 492 | if (err) |
492 | return err; | | 493 | return err; |
493 | | | 494 | |
494 | if (likely(!i915_active_acquire_if_busy(ref))) { | | 495 | if (likely(!i915_active_acquire_if_busy(ref))) { |
495 | if (ref->active) | | 496 | if (ref->active) |
496 | err = ref->active(ref); | | 497 | err = ref->active(ref); |
497 | if (!err) { | | 498 | if (!err) { |
498 | spin_lock_irq(&ref->tree_lock); /* __active_retire() */ | | 499 | spin_lock_irq(&ref->tree_lock); /* __active_retire() */ |
499 | debug_active_activate(ref); | | 500 | debug_active_activate(ref); |
500 | atomic_inc(&ref->count); | | 501 | atomic_inc(&ref->count); |
501 | spin_unlock_irq(&ref->tree_lock); | | 502 | spin_unlock_irq(&ref->tree_lock); |
502 | } | | 503 | } |
503 | } | | 504 | } |
504 | | | 505 | |
505 | mutex_unlock(&ref->mutex); | | 506 | mutex_unlock(&ref->mutex); |
506 | | | 507 | |
507 | return err; | | 508 | return err; |
508 | } | | 509 | } |
509 | | | 510 | |
510 | void i915_active_release(struct i915_active *ref) | | 511 | void i915_active_release(struct i915_active *ref) |
511 | { | | 512 | { |
512 | debug_active_assert(ref); | | 513 | debug_active_assert(ref); |
513 | active_retire(ref); | | 514 | active_retire(ref); |
514 | } | | 515 | } |
515 | | | 516 | |
516 | static void enable_signaling(struct i915_active_fence *active) | | 517 | static void enable_signaling(struct i915_active_fence *active) |
517 | { | | 518 | { |
518 | struct dma_fence *fence; | | 519 | struct dma_fence *fence; |
519 | | | 520 | |
520 | fence = i915_active_fence_get(active); | | 521 | fence = i915_active_fence_get(active); |
521 | if (!fence) | | 522 | if (!fence) |
522 | return; | | 523 | return; |
523 | | | 524 | |
524 | dma_fence_enable_sw_signaling(fence); | | 525 | dma_fence_enable_sw_signaling(fence); |
525 | dma_fence_put(fence); | | 526 | dma_fence_put(fence); |
526 | } | | 527 | } |
527 | | | 528 | |
528 | int i915_active_wait(struct i915_active *ref) | | 529 | int i915_active_wait(struct i915_active *ref) |
529 | { | | 530 | { |
530 | struct active_node *it, *n; | | 531 | struct active_node *it, *n; |
531 | int err = 0; | | 532 | int err = 0; |
532 | | | 533 | |
533 | might_sleep(); | | 534 | might_sleep(); |
534 | | | 535 | |
535 | if (!i915_active_acquire_if_busy(ref)) | | 536 | if (!i915_active_acquire_if_busy(ref)) |
536 | return 0; | | 537 | return 0; |
537 | | | 538 | |
538 | /* Flush lazy signals */ | | 539 | /* Flush lazy signals */ |
539 | enable_signaling(&ref->excl); | | 540 | enable_signaling(&ref->excl); |
540 | rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { | | 541 | rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { |
541 | if (is_barrier(&it->base)) /* unconnected idle barrier */ | | 542 | if (is_barrier(&it->base)) /* unconnected idle barrier */ |
542 | continue; | | 543 | continue; |
543 | | | 544 | |
544 | enable_signaling(&it->base); | | 545 | enable_signaling(&it->base); |
545 | } | | 546 | } |
546 | /* Any fence added after the wait begins will not be auto-signaled */ | | 547 | /* Any fence added after the wait begins will not be auto-signaled */ |
547 | | | 548 | |
548 | i915_active_release(ref); | | 549 | i915_active_release(ref); |
549 | if (err) | | 550 | if (err) |
550 | return err; | | 551 | return err; |
551 | | | 552 | |
552 | spin_lock(&ref->tree_lock); | | 553 | spin_lock(&ref->tree_lock); |
553 | DRM_SPIN_WAIT_UNTIL(err, &ref->tree_wq, &ref->tree_lock, | | 554 | DRM_SPIN_WAIT_UNTIL(err, &ref->tree_wq, &ref->tree_lock, |
554 | i915_active_is_idle(ref)); | | 555 | i915_active_is_idle(ref)); |
555 | spin_unlock(&ref->tree_lock); | | 556 | spin_unlock(&ref->tree_lock); |
556 | if (err) | | 557 | if (err) |
557 | return err; | | 558 | return err; |
558 | | | 559 | |
559 | flush_work(&ref->work); | | 560 | flush_work(&ref->work); |
560 | return 0; | | 561 | return 0; |
561 | } | | 562 | } |
562 | | | 563 | |
563 | int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) | | 564 | int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) |
564 | { | | 565 | { |
565 | int err = 0; | | 566 | int err = 0; |
566 | | | 567 | |
567 | if (rcu_access_pointer(ref->excl.fence)) { | | 568 | if (rcu_access_pointer(ref->excl.fence)) { |
568 | struct dma_fence *fence; | | 569 | struct dma_fence *fence; |
569 | | | 570 | |
570 | rcu_read_lock(); | | 571 | rcu_read_lock(); |
571 | fence = dma_fence_get_rcu_safe(&ref->excl.fence); | | 572 | fence = dma_fence_get_rcu_safe(&ref->excl.fence); |
572 | rcu_read_unlock(); | | 573 | rcu_read_unlock(); |
573 | if (fence) { | | 574 | if (fence) { |
574 | err = i915_request_await_dma_fence(rq, fence); | | 575 | err = i915_request_await_dma_fence(rq, fence); |
575 | dma_fence_put(fence); | | 576 | dma_fence_put(fence); |
576 | } | | 577 | } |
577 | } | | 578 | } |
578 | | | 579 | |
579 | /* In the future we may choose to await on all fences */ | | 580 | /* In the future we may choose to await on all fences */ |
580 | | | 581 | |
581 | return err; | | 582 | return err; |
582 | } | | 583 | } |
583 | | | 584 | |
584 | void i915_active_fini(struct i915_active *ref) | | 585 | void i915_active_fini(struct i915_active *ref) |
585 | { | | 586 | { |
586 | debug_active_fini(ref); | | 587 | debug_active_fini(ref); |
587 | GEM_BUG_ON(atomic_read(&ref->count)); | | 588 | GEM_BUG_ON(atomic_read(&ref->count)); |
588 | GEM_BUG_ON(work_pending(&ref->work)); | | 589 | GEM_BUG_ON(work_pending(&ref->work)); |
589 | GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); | | 590 | GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); |
590 | mutex_destroy(&ref->mutex); | | 591 | mutex_destroy(&ref->mutex); |
591 | spin_lock_destroy(&ref->tree_lock); | | 592 | spin_lock_destroy(&ref->tree_lock); |
592 | } | | 593 | } |
593 | | | 594 | |
594 | static inline bool is_idle_barrier(struct active_node *node, u64 idx) | | 595 | static inline bool is_idle_barrier(struct active_node *node, u64 idx) |
595 | { | | 596 | { |
596 | return node->timeline == idx && !i915_active_fence_isset(&node->base); | | 597 | return node->timeline == idx && !i915_active_fence_isset(&node->base); |
597 | } | | 598 | } |
598 | | | 599 | |
599 | static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) | | 600 | static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) |
600 | { | | 601 | { |
601 | struct rb_node *prev, *p; | | 602 | struct rb_node *prev, *p; |
602 | | | 603 | |
603 | if (RB_EMPTY_ROOT(&ref->tree)) | | 604 | if (RB_EMPTY_ROOT(&ref->tree)) |
604 | return NULL; | | 605 | return NULL; |
605 | | | 606 | |
606 | spin_lock_irq(&ref->tree_lock); | | 607 | spin_lock_irq(&ref->tree_lock); |
607 | GEM_BUG_ON(i915_active_is_idle(ref)); | | 608 | GEM_BUG_ON(i915_active_is_idle(ref)); |
608 | | | 609 | |
609 | /* | | 610 | /* |
610 | * Try to reuse any existing barrier nodes already allocated for this | | 611 | * Try to reuse any existing barrier nodes already allocated for this |
611 | * i915_active, due to overlapping active phases there is likely a | | 612 | * i915_active, due to overlapping active phases there is likely a |
612 | * node kept alive (as we reuse before parking). We prefer to reuse | | 613 | * node kept alive (as we reuse before parking). We prefer to reuse |
613 | * completely idle barriers (less hassle in manipulating the llists), | | 614 | * completely idle barriers (less hassle in manipulating the llists), |
614 | * but otherwise any will do. | | 615 | * but otherwise any will do. |
615 | */ | | 616 | */ |
616 | if (ref->cache && is_idle_barrier(ref->cache, idx)) { | | 617 | if (ref->cache && is_idle_barrier(ref->cache, idx)) { |
617 | p = &ref->cache->node; | | 618 | p = &ref->cache->node; |
618 | goto match; | | 619 | goto match; |
619 | } | | 620 | } |
620 | | | 621 | |
621 | #ifdef __NetBSD__ | | 622 | #ifdef __NetBSD__ |
622 | { | | 623 | { |
623 | struct active_node *node = | | 624 | struct active_node *node = |
624 | rb_tree_find_node_leq(&ref->tree.rbr_tree, &idx); | | 625 | rb_tree_find_node_leq(&ref->tree.rbr_tree, &idx); |
625 | if (node) { | | 626 | if (node) { |
626 | if (node->timeline == idx && is_idle_barrier(node, idx)) { | | 627 | if (node->timeline == idx && is_idle_barrier(node, idx)) { |
627 | p = &node->node; | | 628 | p = &node->node; |
628 | goto match; | | 629 | goto match; |
629 | } | | 630 | } |
630 | prev = &node->node; | | 631 | prev = &node->node; |
631 | } else { | | 632 | } else { |
632 | prev = NULL; | | 633 | prev = NULL; |
633 | } | | 634 | } |
634 | } | | 635 | } |
635 | #else | | 636 | #else |
636 | prev = NULL; | | 637 | prev = NULL; |
637 | p = ref->tree.rb_node; | | 638 | p = ref->tree.rb_node; |
638 | while (p) { | | 639 | while (p) { |
639 | struct active_node *node = | | 640 | struct active_node *node = |
640 | rb_entry(p, struct active_node, node); | | 641 | rb_entry(p, struct active_node, node); |
641 | | | 642 | |
642 | if (is_idle_barrier(node, idx)) | | 643 | if (is_idle_barrier(node, idx)) |
643 | goto match; | | 644 | goto match; |
644 | | | 645 | |
645 | prev = p; | | 646 | prev = p; |
646 | if (node->timeline < idx) | | 647 | if (node->timeline < idx) |
647 | p = p->rb_right; | | 648 | p = p->rb_right; |
648 | else | | 649 | else |
649 | p = p->rb_left; | | 650 | p = p->rb_left; |
650 | } | | 651 | } |
651 | #endif | | 652 | #endif |
652 | | | 653 | |
653 | /* | | 654 | /* |
654 | * No quick match, but we did find the leftmost rb_node for the | | 655 | * No quick match, but we did find the leftmost rb_node for the |
655 | * kernel_context. Walk the rb_tree in-order to see if there were | | 656 | * kernel_context. Walk the rb_tree in-order to see if there were |
656 | * any idle-barriers on this timeline that we missed, or just use | | 657 | * any idle-barriers on this timeline that we missed, or just use |
657 | * the first pending barrier. | | 658 | * the first pending barrier. |
658 | */ | | 659 | */ |
659 | for (p = prev; p; p = rb_next2(&ref->tree, p)) { | | 660 | for (p = prev; p; p = rb_next2(&ref->tree, p)) { |
660 | struct active_node *node = | | 661 | struct active_node *node = |
661 | rb_entry(p, struct active_node, node); | | 662 | rb_entry(p, struct active_node, node); |
662 | struct intel_engine_cs *engine; | | 663 | struct intel_engine_cs *engine; |
663 | | | 664 | |
664 | if (node->timeline > idx) | | 665 | if (node->timeline > idx) |
665 | break; | | 666 | break; |
666 | | | 667 | |
667 | if (node->timeline < idx) | | 668 | if (node->timeline < idx) |
668 | continue; | | 669 | continue; |
669 | | | 670 | |
670 | if (is_idle_barrier(node, idx)) | | 671 | if (is_idle_barrier(node, idx)) |
671 | goto match; | | 672 | goto match; |
672 | | | 673 | |
673 | /* | | 674 | /* |
674 | * The list of pending barriers is protected by the | | 675 | * The list of pending barriers is protected by the |
675 | * kernel_context timeline, which notably we do not hold | | 676 | * kernel_context timeline, which notably we do not hold |
676 | * here. i915_request_add_active_barriers() may consume | | 677 | * here. i915_request_add_active_barriers() may consume |
677 | * the barrier before we claim it, so we have to check | | 678 | * the barrier before we claim it, so we have to check |
678 | * for success. | | 679 | * for success. |
679 | */ | | 680 | */ |
680 | engine = __barrier_to_engine(node); | | 681 | engine = __barrier_to_engine(node); |
681 | smp_rmb(); /* serialise with add_active_barriers */ | | 682 | smp_rmb(); /* serialise with add_active_barriers */ |
682 | if (is_barrier(&node->base) && | | 683 | if (is_barrier(&node->base) && |
683 | ____active_del_barrier(ref, node, engine)) | | 684 | ____active_del_barrier(ref, node, engine)) |
684 | goto match; | | 685 | goto match; |
685 | } | | 686 | } |
686 | | | 687 | |
687 | spin_unlock_irq(&ref->tree_lock); | | 688 | spin_unlock_irq(&ref->tree_lock); |
688 | | | 689 | |
689 | return NULL; | | 690 | return NULL; |
690 | | | 691 | |
691 | match: | | 692 | match: |
692 | rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ | | 693 | rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ |
693 | if (p == &ref->cache->node) | | 694 | if (p == &ref->cache->node) |
694 | ref->cache = NULL; | | 695 | ref->cache = NULL; |
695 | spin_unlock_irq(&ref->tree_lock); | | 696 | spin_unlock_irq(&ref->tree_lock); |
696 | | | 697 | |
697 | return rb_entry(p, struct active_node, node); | | 698 | return rb_entry(p, struct active_node, node); |
698 | } | | 699 | } |
699 | | | 700 | |
700 | int i915_active_acquire_preallocate_barrier(struct i915_active *ref, | | 701 | int i915_active_acquire_preallocate_barrier(struct i915_active *ref, |
701 | struct intel_engine_cs *engine) | | 702 | struct intel_engine_cs *engine) |
702 | { | | 703 | { |
703 | intel_engine_mask_t tmp, mask = engine->mask; | | 704 | intel_engine_mask_t tmp, mask = engine->mask; |
704 | struct llist_node *first = NULL, *last = NULL; | | 705 | struct llist_node *first = NULL, *last = NULL; |
705 | struct intel_gt *gt = engine->gt; | | 706 | struct intel_gt *gt = engine->gt; |
706 | int err; | | 707 | int err; |
707 | | | 708 | |
708 | GEM_BUG_ON(i915_active_is_idle(ref)); | | 709 | GEM_BUG_ON(i915_active_is_idle(ref)); |
709 | | | 710 | |
710 | /* Wait until the previous preallocation is completed */ | | 711 | /* Wait until the previous preallocation is completed */ |
711 | while (!llist_empty(&ref->preallocated_barriers)) | | 712 | while (!llist_empty(&ref->preallocated_barriers)) |
712 | cond_resched(); | | 713 | cond_resched(); |
713 | | | 714 | |
714 | /* | | 715 | /* |
715 | * Preallocate a node for each physical engine supporting the target | | 716 | * Preallocate a node for each physical engine supporting the target |
716 | * engine (remember virtual engines have more than one sibling). | | 717 | * engine (remember virtual engines have more than one sibling). |
717 | * We can then use the preallocated nodes in | | 718 | * We can then use the preallocated nodes in |
718 | * i915_active_acquire_barrier() | | 719 | * i915_active_acquire_barrier() |
719 | */ | | 720 | */ |
720 | for_each_engine_masked(engine, gt, mask, tmp) { | | 721 | for_each_engine_masked(engine, gt, mask, tmp) { |
721 | u64 idx = engine->kernel_context->timeline->fence_context; | | 722 | u64 idx = engine->kernel_context->timeline->fence_context; |
722 | struct llist_node *prev = first; | | 723 | struct llist_node *prev = first; |
723 | struct active_node *node; | | 724 | struct active_node *node; |
724 | | | 725 | |
725 | node = reuse_idle_barrier(ref, idx); | | 726 | node = reuse_idle_barrier(ref, idx); |
726 | if (!node) { | | 727 | if (!node) { |
727 | node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); | | 728 | node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); |
728 | if (!node) { | | 729 | if (!node) { |
729 | err = ENOMEM; | | 730 | err = ENOMEM; |
730 | goto unwind; | | 731 | goto unwind; |
731 | } | | 732 | } |
732 | | | 733 | |
733 | memset(node, 0, sizeof(*node)); | | 734 | memset(node, 0, sizeof(*node)); |
734 | RCU_INIT_POINTER(node->base.fence, NULL); | | 735 | RCU_INIT_POINTER(node->base.fence, NULL); |
735 | node->base.cb.func = node_retire; | | 736 | node->base.cb.func = node_retire; |
736 | node->timeline = idx; | | 737 | node->timeline = idx; |
737 | node->ref = ref; | | 738 | node->ref = ref; |
738 | } | | 739 | } |
739 | | | 740 | |
740 | if (!i915_active_fence_isset(&node->base)) { | | 741 | if (!i915_active_fence_isset(&node->base)) { |
741 | /* | | 742 | /* |
742 | * Mark this as being *our* unconnected proto-node. | | 743 | * Mark this as being *our* unconnected proto-node. |
743 | * | | 744 | * |
744 | * Since this node is not in any list, and we have | | 745 | * Since this node is not in any list, and we have |
745 | * decoupled it from the rbtree, we can reuse the | | 746 | * decoupled it from the rbtree, we can reuse the |
746 | * request to indicate this is an idle-barrier node | | 747 | * request to indicate this is an idle-barrier node |
747 | * and then we can use the rb_node and list pointers | | 748 | * and then we can use the rb_node and list pointers |
748 | * for our tracking of the pending barrier. | | 749 | * for our tracking of the pending barrier. |
749 | */ | | 750 | */ |
750 | RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); | | 751 | RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); |
751 | node->engine = engine; | | 752 | node->engine = engine; |
752 | atomic_inc(&ref->count); | | 753 | atomic_inc(&ref->count); |
753 | } | | 754 | } |
754 | GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); | | 755 | GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); |
755 | | | 756 | |
756 | GEM_BUG_ON(barrier_to_engine(node) != engine); | | 757 | GEM_BUG_ON(barrier_to_engine(node) != engine); |
757 | first = barrier_to_ll(node); | | 758 | first = barrier_to_ll(node); |
758 | first->next = prev; | | 759 | first->next = prev; |
759 | if (!last) | | 760 | if (!last) |
760 | last = first; | | 761 | last = first; |
761 | intel_engine_pm_get(engine); | | 762 | intel_engine_pm_get(engine); |
762 | } | | 763 | } |
763 | | | 764 | |
764 | GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); | | 765 | GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); |
765 | llist_add_batch(first, last, &ref->preallocated_barriers); | | 766 | llist_add_batch(first, last, &ref->preallocated_barriers); |
766 | | | 767 | |
767 | return 0; | | 768 | return 0; |
768 | | | 769 | |
769 | unwind: | | 770 | unwind: |
770 | while (first) { | | 771 | while (first) { |
771 | struct active_node *node = barrier_from_ll(first); | | 772 | struct active_node *node = barrier_from_ll(first); |
772 | | | 773 | |
773 | first = first->next; | | 774 | first = first->next; |
774 | | | 775 | |
775 | atomic_dec(&ref->count); | | 776 | atomic_dec(&ref->count); |
776 | intel_engine_pm_put(barrier_to_engine(node)); | | 777 | intel_engine_pm_put(barrier_to_engine(node)); |
777 | | | 778 | |
778 | kmem_cache_free(global.slab_cache, node); | | 779 | kmem_cache_free(global.slab_cache, node); |
779 | } | | 780 | } |
780 | return err; | | 781 | return err; |
781 | } | | 782 | } |
782 | | | 783 | |
783 | void i915_active_acquire_barrier(struct i915_active *ref) | | 784 | void i915_active_acquire_barrier(struct i915_active *ref) |
784 | { | | 785 | { |
785 | struct llist_node *pos, *next; | | 786 | struct llist_node *pos, *next; |
786 | unsigned long flags; | | 787 | unsigned long flags; |
787 | | | 788 | |
788 | GEM_BUG_ON(i915_active_is_idle(ref)); | | 789 | GEM_BUG_ON(i915_active_is_idle(ref)); |
789 | | | 790 | |
790 | /* | | 791 | /* |
791 | * Transfer the list of preallocated barriers into the | | 792 | * Transfer the list of preallocated barriers into the |
792 | * i915_active rbtree, but only as proto-nodes. They will be | | 793 | * i915_active rbtree, but only as proto-nodes. They will be |
793 | * populated by i915_request_add_active_barriers() to point to the | | 794 | * populated by i915_request_add_active_barriers() to point to the |
794 | * request that will eventually release them. | | 795 | * request that will eventually release them. |
795 | */ | | 796 | */ |
796 | llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { | | 797 | llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { |
797 | struct active_node *node = barrier_from_ll(pos); | | 798 | struct active_node *node = barrier_from_ll(pos); |
798 | struct intel_engine_cs *engine = barrier_to_engine(node); | | 799 | struct intel_engine_cs *engine = barrier_to_engine(node); |
799 | struct rb_node **p, *parent; | | 800 | struct rb_node **p, *parent; |
800 | | | 801 | |
801 | spin_lock_irqsave_nested(&ref->tree_lock, flags, | | 802 | spin_lock_irqsave_nested(&ref->tree_lock, flags, |
802 | SINGLE_DEPTH_NESTING); | | 803 | SINGLE_DEPTH_NESTING); |
803 | #ifdef __NetBSD__ | | 804 | #ifdef __NetBSD__ |
804 | __USE(p); | | 805 | __USE(p); |
805 | __USE(parent); | | 806 | __USE(parent); |
806 | struct active_node *collision __diagused; | | 807 | struct active_node *collision __diagused; |
807 | collision = rb_tree_insert_node(&ref->tree.rbr_tree, node); | | 808 | collision = rb_tree_insert_node(&ref->tree.rbr_tree, node); |
808 | KASSERT(collision == node); | | 809 | KASSERT(collision == node); |
809 | #else | | 810 | #else |
810 | parent = NULL; | | 811 | parent = NULL; |
811 | p = &ref->tree.rb_node; | | 812 | p = &ref->tree.rb_node; |
812 | while (*p) { | | 813 | while (*p) { |
813 | struct active_node *it; | | 814 | struct active_node *it; |
814 | | | 815 | |
815 | parent = *p; | | 816 | parent = *p; |
816 | | | 817 | |
817 | it = rb_entry(parent, struct active_node, node); | | 818 | it = rb_entry(parent, struct active_node, node); |
818 | if (it->timeline < node->timeline) | | 819 | if (it->timeline < node->timeline) |
819 | p = &parent->rb_right; | | 820 | p = &parent->rb_right; |
820 | else | | 821 | else |
821 | p = &parent->rb_left; | | 822 | p = &parent->rb_left; |
822 | } | | 823 | } |
823 | rb_link_node(&node->node, parent, p); | | 824 | rb_link_node(&node->node, parent, p); |
824 | rb_insert_color(&node->node, &ref->tree); | | 825 | rb_insert_color(&node->node, &ref->tree); |
825 | #endif | | 826 | #endif |
826 | spin_unlock_irqrestore(&ref->tree_lock, flags); | | 827 | spin_unlock_irqrestore(&ref->tree_lock, flags); |
827 | | | 828 | |
828 | GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); | | 829 | GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); |
829 | llist_add(barrier_to_ll(node), &engine->barrier_tasks); | | 830 | llist_add(barrier_to_ll(node), &engine->barrier_tasks); |
830 | intel_engine_pm_put(engine); | | 831 | intel_engine_pm_put(engine); |
831 | } | | 832 | } |
832 | } | | 833 | } |
833 | | | 834 | |
834 | static struct dma_fence **ll_to_fence_slot(struct llist_node *node) | | 835 | static struct dma_fence **ll_to_fence_slot(struct llist_node *node) |
835 | { | | 836 | { |
836 | return __active_fence_slot(&barrier_from_ll(node)->base); | | 837 | return __active_fence_slot(&barrier_from_ll(node)->base); |
837 | } | | 838 | } |
838 | | | 839 | |
839 | void i915_request_add_active_barriers(struct i915_request *rq) | | 840 | void i915_request_add_active_barriers(struct i915_request *rq) |
840 | { | | 841 | { |
841 | struct intel_engine_cs *engine = rq->engine; | | 842 | struct intel_engine_cs *engine = rq->engine; |
842 | struct llist_node *node, *next; | | 843 | struct llist_node *node, *next; |
843 | unsigned long flags; | | 844 | unsigned long flags; |
844 | | | 845 | |
845 | GEM_BUG_ON(!intel_context_is_barrier(rq->context)); | | 846 | GEM_BUG_ON(!intel_context_is_barrier(rq->context)); |
846 | GEM_BUG_ON(intel_engine_is_virtual(engine)); | | 847 | GEM_BUG_ON(intel_engine_is_virtual(engine)); |
847 | GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); | | 848 | GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); |
848 | | | 849 | |
849 | node = llist_del_all(&engine->barrier_tasks); | | 850 | node = llist_del_all(&engine->barrier_tasks); |
850 | if (!node) | | 851 | if (!node) |
851 | return; | | 852 | return; |
852 | /* | | 853 | /* |
853 | * Attach the list of proto-fences to the in-flight request such | | 854 | * Attach the list of proto-fences to the in-flight request such |
854 | * that the parent i915_active will be released when this request | | 855 | * that the parent i915_active will be released when this request |
855 | * is retired. | | 856 | * is retired. |
856 | */ | | 857 | */ |
857 | spin_lock_irqsave(&rq->lock, flags); | | 858 | spin_lock_irqsave(&rq->lock, flags); |
858 | llist_for_each_safe(node, next, node) { | | 859 | llist_for_each_safe(node, next, node) { |
859 | /* serialise with reuse_idle_barrier */ | | 860 | /* serialise with reuse_idle_barrier */ |
860 | smp_store_mb(*ll_to_fence_slot(node), &rq->fence); | | 861 | smp_store_mb(*ll_to_fence_slot(node), &rq->fence); |
861 | #ifdef __NetBSD__ | | 862 | #ifdef __NetBSD__ |
862 | /* XXX ugh bletch */ | | 863 | /* XXX ugh bletch */ |
863 | struct i915_active_fence *active = | | 864 | struct i915_active_fence *active = |
864 | container_of(node, struct i915_active_fence, llist); | | 865 | container_of(node, struct i915_active_fence, llist); |
865 | /* XXX something bad went wrong in making this code */ | | 866 | /* XXX something bad went wrong in making this code */ |
866 | KASSERT(active->cb.func == node_retire || | | 867 | KASSERT(active->cb.func == node_retire || |
867 | active->cb.func == excl_retire || | | 868 | active->cb.func == excl_retire || |
868 | active->cb.func == i915_active_noop); | | 869 | active->cb.func == i915_active_noop); |
869 | KASSERTMSG(active->fence == &rq->fence, | | 870 | KASSERTMSG(active->fence == &rq->fence, |
870 | "active=%p fence=%p; rq=%p fence=%p", | | 871 | "active=%p fence=%p; rq=%p fence=%p", |
871 | active, active->fence, rq, &rq->fence); | | 872 | active, active->fence, rq, &rq->fence); |
872 | KASSERTMSG(!active->cb.fcb_onqueue, "active=%p", active); | | 873 | KASSERTMSG(!active->cb.fcb_onqueue, "active=%p", active); |
873 | active->cb.fcb_onqueue = true; | | 874 | active->cb.fcb_onqueue = true; |
874 | TAILQ_INSERT_TAIL(&rq->fence.f_callbacks, &active->cb, | | 875 | TAILQ_INSERT_TAIL(&rq->fence.f_callbacks, &active->cb, |
875 | fcb_entry); | | 876 | fcb_entry); |
876 | #else | | 877 | #else |
877 | list_add_tail((struct list_head *)node, &rq->fence.cb_list); | | 878 | list_add_tail((struct list_head *)node, &rq->fence.cb_list); |
878 | #endif | | 879 | #endif |
879 | } | | 880 | } |
880 | spin_unlock_irqrestore(&rq->lock, flags); | | 881 | spin_unlock_irqrestore(&rq->lock, flags); |
881 | } | | 882 | } |
882 | | | 883 | |
883 | /* | | 884 | /* |
884 | * __i915_active_fence_set: Update the last active fence along its timeline | | 885 | * __i915_active_fence_set: Update the last active fence along its timeline |
885 | * @active: the active tracker | | 886 | * @active: the active tracker |
886 | * @fence: the new fence (under construction) | | 887 | * @fence: the new fence (under construction) |
887 | * | | 888 | * |
888 | * Records the new @fence as the last active fence along its timeline in | | 889 | * Records the new @fence as the last active fence along its timeline in |
889 | * this active tracker, moving the tracking callbacks from the previous | | 890 | * this active tracker, moving the tracking callbacks from the previous |
890 | * fence onto this one. Returns the previous fence (if not already completed), | | 891 | * fence onto this one. Returns the previous fence (if not already completed), |
891 | * which the caller must ensure is executed before the new fence. To ensure | | 892 | * which the caller must ensure is executed before the new fence. To ensure |
892 | * that the order of fences within the timeline of the i915_active_fence is | | 893 | * that the order of fences within the timeline of the i915_active_fence is |
893 | * understood, it should be locked by the caller. | | 894 | * understood, it should be locked by the caller. |
894 | */ | | 895 | */ |
895 | struct dma_fence * | | 896 | struct dma_fence * |
896 | __i915_active_fence_set(struct i915_active_fence *active, | | 897 | __i915_active_fence_set(struct i915_active_fence *active, |
897 | struct dma_fence *fence) | | 898 | struct dma_fence *fence) |
898 | { | | 899 | { |
899 | struct dma_fence *prev; | | 900 | struct dma_fence *prev; |
900 | unsigned long flags; | | 901 | unsigned long flags; |
901 | | | 902 | |
902 | if (fence == rcu_access_pointer(active->fence)) | | 903 | if (fence == rcu_access_pointer(active->fence)) |
903 | return fence; | | 904 | return fence; |
904 | | | 905 | |
905 | GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); | | 906 | GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); |
906 | | | 907 | |
907 | /* | | 908 | /* |
908 | * Consider that we have two threads arriving (A and B), with | | 909 | * Consider that we have two threads arriving (A and B), with |
909 | * C already resident as the active->fence. | | 910 | * C already resident as the active->fence. |
910 | * | | 911 | * |
911 | * A does the xchg first, and so it sees C or NULL depending | | 912 | * A does the xchg first, and so it sees C or NULL depending |
912 | * on the timing of the interrupt handler. If it is NULL, the | | 913 | * on the timing of the interrupt handler. If it is NULL, the |
913 | * previous fence must have been signaled and we know that | | 914 | * previous fence must have been signaled and we know that |
914 | * we are first on the timeline. If it is still present, | | 915 | * we are first on the timeline. If it is still present, |
915 | * we acquire the lock on that fence and serialise with the interrupt | | 916 | * we acquire the lock on that fence and serialise with the interrupt |
916 | * handler, in the process removing it from any future interrupt | | 917 | * handler, in the process removing it from any future interrupt |
917 | * callback. A will then wait on C before executing (if present). | | 918 | * callback. A will then wait on C before executing (if present). |
918 | * | | 919 | * |
919 | * As B is second, it sees A as the previous fence and so waits for | | 920 | * As B is second, it sees A as the previous fence and so waits for |
920 | * it to complete its transition and takes over the occupancy for | | 921 | * it to complete its transition and takes over the occupancy for |
921 | * itself -- remembering that it needs to wait on A before executing. | | 922 | * itself -- remembering that it needs to wait on A before executing. |
922 | * | | 923 | * |
923 | * Note the strong ordering of the timeline also provides consistent | | 924 | * Note the strong ordering of the timeline also provides consistent |
924 | * nesting rules for the fence->lock; the inner lock is always the | | 925 | * nesting rules for the fence->lock; the inner lock is always the |
925 | * older lock. | | 926 | * older lock. |
926 | */ | | 927 | */ |
927 | spin_lock_irqsave(fence->lock, flags); | | 928 | spin_lock_irqsave(fence->lock, flags); |
928 | prev = xchg(__active_fence_slot(active), fence); | | 929 | prev = xchg(__active_fence_slot(active), fence); |
929 | if (prev) { | | 930 | if (prev) { |
930 | GEM_BUG_ON(prev == fence); | | 931 | GEM_BUG_ON(prev == fence); |
931 | spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); | | 932 | spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); |
932 | #ifdef __NetBSD__ | | 933 | #ifdef __NetBSD__ |
933 | /* XXX ugh bletch */ | | 934 | /* XXX ugh bletch */ |
934 | KASSERT(active->cb.func == node_retire || | | 935 | KASSERT(active->cb.func == node_retire || |
935 | active->cb.func == excl_retire || | | 936 | active->cb.func == excl_retire || |
936 | active->cb.func == i915_active_noop); | | 937 | active->cb.func == i915_active_noop); |
937 | if (active->cb.fcb_onqueue) { | | 938 | if (active->cb.fcb_onqueue) { |
938 | TAILQ_REMOVE(&prev->f_callbacks, &active->cb, | | 939 | TAILQ_REMOVE(&prev->f_callbacks, &active->cb, |
939 | fcb_entry); | | 940 | fcb_entry); |
940 | active->cb.fcb_onqueue = false; | | 941 | active->cb.fcb_onqueue = false; |
941 | } | | 942 | } |
942 | #else | | 943 | #else |
943 | __list_del_entry(&active->cb.node); | | 944 | __list_del_entry(&active->cb.node); |
944 | #endif | | 945 | #endif |
945 | spin_unlock(prev->lock); /* serialise with prev->cb_list */ | | 946 | spin_unlock(prev->lock); /* serialise with prev->cb_list */ |
946 | } | | 947 | } |
947 | GEM_BUG_ON(rcu_access_pointer(active->fence) != fence); | | 948 | GEM_BUG_ON(rcu_access_pointer(active->fence) != fence); |
948 | #ifdef __NetBSD__ | | 949 | #ifdef __NetBSD__ |
949 | /* XXX ugh bletch */ | | 950 | /* XXX ugh bletch */ |
950 | KASSERT(!active->cb.fcb_onqueue); | | 951 | KASSERT(!active->cb.fcb_onqueue); |
951 | active->cb.fcb_onqueue = true; | | 952 | active->cb.fcb_onqueue = true; |
952 | TAILQ_INSERT_TAIL(&fence->f_callbacks, &active->cb, fcb_entry); | | 953 | TAILQ_INSERT_TAIL(&fence->f_callbacks, &active->cb, fcb_entry); |
953 | #else | | 954 | #else |
954 | list_add_tail(&active->cb.node, &fence->cb_list); | | 955 | list_add_tail(&active->cb.node, &fence->cb_list); |
955 | #endif | | 956 | #endif |
956 | spin_unlock_irqrestore(fence->lock, flags); | | 957 | spin_unlock_irqrestore(fence->lock, flags); |
957 | | | 958 | |
958 | return prev; | | 959 | return prev; |
959 | } | | 960 | } |
960 | | | 961 | |
961 | int i915_active_fence_set(struct i915_active_fence *active, | | 962 | int i915_active_fence_set(struct i915_active_fence *active, |
962 | struct i915_request *rq) | | 963 | struct i915_request *rq) |
963 | { | | 964 | { |
964 | struct dma_fence *fence; | | 965 | struct dma_fence *fence; |
965 | int err = 0; | | 966 | int err = 0; |
966 | | | 967 | |
967 | /* Must maintain timeline ordering wrt previous active requests */ | | 968 | /* Must maintain timeline ordering wrt previous active requests */ |
968 | rcu_read_lock(); | | 969 | rcu_read_lock(); |
969 | fence = __i915_active_fence_set(active, &rq->fence); | | 970 | fence = __i915_active_fence_set(active, &rq->fence); |
970 | if (fence) /* but the previous fence may not belong to that timeline! */ | | 971 | if (fence) /* but the previous fence may not belong to that timeline! */ |
971 | fence = dma_fence_get_rcu(fence); | | 972 | fence = dma_fence_get_rcu(fence); |
972 | rcu_read_unlock(); | | 973 | rcu_read_unlock(); |
973 | if (fence) { | | 974 | if (fence) { |
974 | err = i915_request_await_dma_fence(rq, fence); | | 975 | err = i915_request_await_dma_fence(rq, fence); |
975 | dma_fence_put(fence); | | 976 | dma_fence_put(fence); |
976 | } | | 977 | } |
977 | | | 978 | |
978 | return err; | | 979 | return err; |
979 | } | | 980 | } |
980 | | | 981 | |
981 | void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) | | 982 | void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) |
982 | { | | 983 | { |
983 | active_fence_cb(fence, cb); | | 984 | active_fence_cb(fence, cb); |
984 | } | | 985 | } |
985 | | | 986 | |
986 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) | | 987 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
987 | #include "selftests/i915_active.c" | | 988 | #include "selftests/i915_active.c" |
988 | #endif | | 989 | #endif |
989 | | | 990 | |
990 | static void i915_global_active_shrink(void) | | 991 | static void i915_global_active_shrink(void) |
991 | { | | 992 | { |
992 | kmem_cache_shrink(global.slab_cache); | | 993 | kmem_cache_shrink(global.slab_cache); |
993 | } | | 994 | } |
994 | | | 995 | |
995 | static void i915_global_active_exit(void) | | 996 | static void i915_global_active_exit(void) |
996 | { | | 997 | { |
997 | kmem_cache_destroy(global.slab_cache); | | 998 | kmem_cache_destroy(global.slab_cache); |
998 | } | | 999 | } |
999 | | | 1000 | |
1000 | static struct i915_global_active global = { { | | 1001 | static struct i915_global_active global = { { |
1001 | .shrink = i915_global_active_shrink, | | 1002 | .shrink = i915_global_active_shrink, |
1002 | .exit = i915_global_active_exit, | | 1003 | .exit = i915_global_active_exit, |
1003 | } }; | | 1004 | } }; |
1004 | | | 1005 | |
1005 | int __init i915_global_active_init(void) | | 1006 | int __init i915_global_active_init(void) |
1006 | { | | 1007 | { |
1007 | global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); | | 1008 | global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); |
1008 | if (!global.slab_cache) | | 1009 | if (!global.slab_cache) |
1009 | return -ENOMEM; | | 1010 | return -ENOMEM; |
1010 | | | 1011 | |
1011 | i915_global_register(&global.base); | | 1012 | i915_global_register(&global.base); |
1012 | return 0; | | 1013 | return 0; |
1013 | } | | 1014 | } |