Mon Feb 14 20:37:51 2022 UTC ()
i915: Defer final wakeup on active until after retirement.

Not sure what I was thinking when I moved this earlier!


(riastradh)
diff -r1.10 -r1.11 src/sys/external/bsd/drm2/dist/drm/i915/i915_active.c

cvs diff -r1.10 -r1.11 src/sys/external/bsd/drm2/dist/drm/i915/i915_active.c (switch to unified diff)

--- src/sys/external/bsd/drm2/dist/drm/i915/i915_active.c 2021/12/24 00:14:03 1.10
+++ src/sys/external/bsd/drm2/dist/drm/i915/i915_active.c 2022/02/14 20:37:51 1.11
@@ -1,1013 +1,1014 @@ @@ -1,1013 +1,1014 @@
1/* $NetBSD: i915_active.c,v 1.10 2021/12/24 00:14:03 riastradh Exp $ */ 1/* $NetBSD: i915_active.c,v 1.11 2022/02/14 20:37:51 riastradh Exp $ */
2 2
3/* 3/*
4 * SPDX-License-Identifier: MIT 4 * SPDX-License-Identifier: MIT
5 * 5 *
6 * Copyright © 2019 Intel Corporation 6 * Copyright © 2019 Intel Corporation
7 */ 7 */
8 8
9#include <sys/cdefs.h> 9#include <sys/cdefs.h>
10__KERNEL_RCSID(0, "$NetBSD: i915_active.c,v 1.10 2021/12/24 00:14:03 riastradh Exp $"); 10__KERNEL_RCSID(0, "$NetBSD: i915_active.c,v 1.11 2022/02/14 20:37:51 riastradh Exp $");
11 11
12#include <linux/debugobjects.h> 12#include <linux/debugobjects.h>
13 13
14#include "gt/intel_context.h" 14#include "gt/intel_context.h"
15#include "gt/intel_engine_pm.h" 15#include "gt/intel_engine_pm.h"
16#include "gt/intel_ring.h" 16#include "gt/intel_ring.h"
17 17
18#include "i915_drv.h" 18#include "i915_drv.h"
19#include "i915_active.h" 19#include "i915_active.h"
20#include "i915_globals.h" 20#include "i915_globals.h"
21 21
22#include <linux/nbsd-namespace.h> 22#include <linux/nbsd-namespace.h>
23 23
24/* 24/*
25 * Active refs memory management 25 * Active refs memory management
26 * 26 *
27 * To be more economical with memory, we reap all the i915_active trees as 27 * To be more economical with memory, we reap all the i915_active trees as
28 * they idle (when we know the active requests are inactive) and allocate the 28 * they idle (when we know the active requests are inactive) and allocate the
29 * nodes from a local slab cache to hopefully reduce the fragmentation. 29 * nodes from a local slab cache to hopefully reduce the fragmentation.
30 */ 30 */
31static struct i915_global_active { 31static struct i915_global_active {
32 struct i915_global base; 32 struct i915_global base;
33 struct kmem_cache *slab_cache; 33 struct kmem_cache *slab_cache;
34} global; 34} global;
35 35
36struct active_node { 36struct active_node {
37 struct i915_active_fence base; 37 struct i915_active_fence base;
38 struct i915_active *ref; 38 struct i915_active *ref;
39 struct rb_node node; 39 struct rb_node node;
40 u64 timeline; 40 u64 timeline;
41 struct intel_engine_cs *engine; 41 struct intel_engine_cs *engine;
42}; 42};
43 43
44static inline struct active_node * 44static inline struct active_node *
45node_from_active(struct i915_active_fence *active) 45node_from_active(struct i915_active_fence *active)
46{ 46{
47 return container_of(active, struct active_node, base); 47 return container_of(active, struct active_node, base);
48} 48}
49 49
50#define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) 50#define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
51 51
52static inline bool is_barrier(const struct i915_active_fence *active) 52static inline bool is_barrier(const struct i915_active_fence *active)
53{ 53{
54 return IS_ERR(rcu_access_pointer(active->fence)); 54 return IS_ERR(rcu_access_pointer(active->fence));
55} 55}
56 56
57static inline struct llist_node *barrier_to_ll(struct active_node *node) 57static inline struct llist_node *barrier_to_ll(struct active_node *node)
58{ 58{
59 GEM_BUG_ON(!is_barrier(&node->base)); 59 GEM_BUG_ON(!is_barrier(&node->base));
60 return &node->base.llist; 60 return &node->base.llist;
61} 61}
62 62
63static inline struct intel_engine_cs * 63static inline struct intel_engine_cs *
64__barrier_to_engine(struct active_node *node) 64__barrier_to_engine(struct active_node *node)
65{ 65{
66 return READ_ONCE(node->engine); 66 return READ_ONCE(node->engine);
67} 67}
68 68
69static inline struct intel_engine_cs * 69static inline struct intel_engine_cs *
70barrier_to_engine(struct active_node *node) 70barrier_to_engine(struct active_node *node)
71{ 71{
72 GEM_BUG_ON(!is_barrier(&node->base)); 72 GEM_BUG_ON(!is_barrier(&node->base));
73 return __barrier_to_engine(node); 73 return __barrier_to_engine(node);
74} 74}
75 75
76static inline struct active_node *barrier_from_ll(struct llist_node *x) 76static inline struct active_node *barrier_from_ll(struct llist_node *x)
77{ 77{
78 return container_of(x, struct active_node, base.llist); 78 return container_of(x, struct active_node, base.llist);
79} 79}
80 80
81#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) 81#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
82 82
83static void *active_debug_hint(void *addr) 83static void *active_debug_hint(void *addr)
84{ 84{
85 struct i915_active *ref = addr; 85 struct i915_active *ref = addr;
86 86
87 return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; 87 return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref;
88} 88}
89 89
90static struct debug_obj_descr active_debug_desc = { 90static struct debug_obj_descr active_debug_desc = {
91 .name = "i915_active", 91 .name = "i915_active",
92 .debug_hint = active_debug_hint, 92 .debug_hint = active_debug_hint,
93}; 93};
94 94
95static void debug_active_init(struct i915_active *ref) 95static void debug_active_init(struct i915_active *ref)
96{ 96{
97 debug_object_init(ref, &active_debug_desc); 97 debug_object_init(ref, &active_debug_desc);
98} 98}
99 99
100static void debug_active_activate(struct i915_active *ref) 100static void debug_active_activate(struct i915_active *ref)
101{ 101{
102 lockdep_assert_held(&ref->tree_lock); 102 lockdep_assert_held(&ref->tree_lock);
103 if (!atomic_read(&ref->count)) /* before the first inc */ 103 if (!atomic_read(&ref->count)) /* before the first inc */
104 debug_object_activate(ref, &active_debug_desc); 104 debug_object_activate(ref, &active_debug_desc);
105} 105}
106 106
107static void debug_active_deactivate(struct i915_active *ref) 107static void debug_active_deactivate(struct i915_active *ref)
108{ 108{
109 lockdep_assert_held(&ref->tree_lock); 109 lockdep_assert_held(&ref->tree_lock);
110 if (!atomic_read(&ref->count)) /* after the last dec */ 110 if (!atomic_read(&ref->count)) /* after the last dec */
111 debug_object_deactivate(ref, &active_debug_desc); 111 debug_object_deactivate(ref, &active_debug_desc);
112} 112}
113 113
114static void debug_active_fini(struct i915_active *ref) 114static void debug_active_fini(struct i915_active *ref)
115{ 115{
116 debug_object_free(ref, &active_debug_desc); 116 debug_object_free(ref, &active_debug_desc);
117} 117}
118 118
119static void debug_active_assert(struct i915_active *ref) 119static void debug_active_assert(struct i915_active *ref)
120{ 120{
121 debug_object_assert_init(ref, &active_debug_desc); 121 debug_object_assert_init(ref, &active_debug_desc);
122} 122}
123 123
124#else 124#else
125 125
126static inline void debug_active_init(struct i915_active *ref) { } 126static inline void debug_active_init(struct i915_active *ref) { }
127static inline void debug_active_activate(struct i915_active *ref) { } 127static inline void debug_active_activate(struct i915_active *ref) { }
128static inline void debug_active_deactivate(struct i915_active *ref) { } 128static inline void debug_active_deactivate(struct i915_active *ref) { }
129static inline void debug_active_fini(struct i915_active *ref) { } 129static inline void debug_active_fini(struct i915_active *ref) { }
130static inline void debug_active_assert(struct i915_active *ref) { } 130static inline void debug_active_assert(struct i915_active *ref) { }
131 131
132#endif 132#endif
133 133
134#ifdef __NetBSD__ 134#ifdef __NetBSD__
135 135
136static int 136static int
137compare_nodes(void *cookie, const void *va, const void *vb) 137compare_nodes(void *cookie, const void *va, const void *vb)
138{ 138{
139 const struct active_node *a = va; 139 const struct active_node *a = va;
140 const struct active_node *b = vb; 140 const struct active_node *b = vb;
141 141
142 if (a->timeline < b->timeline) 142 if (a->timeline < b->timeline)
143 return -1; 143 return -1;
144 if (a->timeline > b->timeline) 144 if (a->timeline > b->timeline)
145 return +1; 145 return +1;
146 if ((uintptr_t)a < (uintptr_t)b) 146 if ((uintptr_t)a < (uintptr_t)b)
147 return -1; 147 return -1;
148 if ((uintptr_t)a > (uintptr_t)b) 148 if ((uintptr_t)a > (uintptr_t)b)
149 return +1; 149 return +1;
150 return 0; 150 return 0;
151} 151}
152 152
153static int 153static int
154compare_node_key(void *cookie, const void *vn, const void *vk) 154compare_node_key(void *cookie, const void *vn, const void *vk)
155{ 155{
156 const struct active_node *a = vn; 156 const struct active_node *a = vn;
157 const uint64_t *k = vk; 157 const uint64_t *k = vk;
158 158
159 if (a->timeline < *k) 159 if (a->timeline < *k)
160 return -1; 160 return -1;
161 if (a->timeline > *k) 161 if (a->timeline > *k)
162 return +1; 162 return +1;
163 return 0; 163 return 0;
164} 164}
165 165
166static const rb_tree_ops_t active_rb_ops = { 166static const rb_tree_ops_t active_rb_ops = {
167 .rbto_compare_nodes = compare_nodes, 167 .rbto_compare_nodes = compare_nodes,
168 .rbto_compare_key = compare_node_key, 168 .rbto_compare_key = compare_node_key,
169 .rbto_node_offset = offsetof(struct active_node, node), 169 .rbto_node_offset = offsetof(struct active_node, node),
170}; 170};
171 171
172#endif 172#endif
173 173
174static void 174static void
175__active_retire(struct i915_active *ref) 175__active_retire(struct i915_active *ref)
176{ 176{
177 struct active_node *it, *n; 177 struct active_node *it, *n;
178 struct rb_root root; 178 struct rb_root root;
179 unsigned long flags; 179 unsigned long flags;
180 180
181 GEM_BUG_ON(i915_active_is_idle(ref)); 181 GEM_BUG_ON(i915_active_is_idle(ref));
182 182
183 /* return the unused nodes to our slabcache -- flushing the allocator */ 183 /* return the unused nodes to our slabcache -- flushing the allocator */
184 if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags)) 184 if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags))
185 return; 185 return;
186 186
187 GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); 187 GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
188 debug_active_deactivate(ref); 188 debug_active_deactivate(ref);
189 189
190 root = ref->tree; 190 root = ref->tree;
191#ifdef __NetBSD__ 191#ifdef __NetBSD__
192 rb_tree_init(&ref->tree.rbr_tree, &active_rb_ops); 192 rb_tree_init(&ref->tree.rbr_tree, &active_rb_ops);
193#else 193#else
194 ref->tree = RB_ROOT; 194 ref->tree = RB_ROOT;
195#endif 195#endif
196 ref->cache = NULL; 196 ref->cache = NULL;
197 197
198 DRM_SPIN_WAKEUP_ALL(&ref->tree_wq, &ref->tree_lock); 
199 
200 spin_unlock_irqrestore(&ref->tree_lock, flags); 198 spin_unlock_irqrestore(&ref->tree_lock, flags);
201 199
202 /* After the final retire, the entire struct may be freed */ 200 /* After the final retire, the entire struct may be freed */
203 if (ref->retire) 201 if (ref->retire)
204 ref->retire(ref); 202 ref->retire(ref);
205 203
206 /* ... except if you wait on it, you must manage your own references! */ 204 /* ... except if you wait on it, you must manage your own references! */
 205 spin_lock(&ref->tree_lock);
 206 DRM_SPIN_WAKEUP_ALL(&ref->tree_wq, &ref->tree_lock);
 207 spin_unlock(&ref->tree_lock);
207 208
208 rbtree_postorder_for_each_entry_safe(it, n, &root, node) { 209 rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
209 GEM_BUG_ON(i915_active_fence_isset(&it->base)); 210 GEM_BUG_ON(i915_active_fence_isset(&it->base));
210 kmem_cache_free(global.slab_cache, it); 211 kmem_cache_free(global.slab_cache, it);
211 } 212 }
212} 213}
213 214
214static void 215static void
215active_work(struct work_struct *wrk) 216active_work(struct work_struct *wrk)
216{ 217{
217 struct i915_active *ref = container_of(wrk, typeof(*ref), work); 218 struct i915_active *ref = container_of(wrk, typeof(*ref), work);
218 219
219 GEM_BUG_ON(!atomic_read(&ref->count)); 220 GEM_BUG_ON(!atomic_read(&ref->count));
220 if (atomic_add_unless(&ref->count, -1, 1)) 221 if (atomic_add_unless(&ref->count, -1, 1))
221 return; 222 return;
222 223
223 __active_retire(ref); 224 __active_retire(ref);
224} 225}
225 226
226static void 227static void
227active_retire(struct i915_active *ref) 228active_retire(struct i915_active *ref)
228{ 229{
229 GEM_BUG_ON(!atomic_read(&ref->count)); 230 GEM_BUG_ON(!atomic_read(&ref->count));
230 if (atomic_add_unless(&ref->count, -1, 1)) 231 if (atomic_add_unless(&ref->count, -1, 1))
231 return; 232 return;
232 233
233 if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) { 234 if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) {
234 queue_work(system_unbound_wq, &ref->work); 235 queue_work(system_unbound_wq, &ref->work);
235 return; 236 return;
236 } 237 }
237 238
238 __active_retire(ref); 239 __active_retire(ref);
239} 240}
240 241
241static inline struct dma_fence ** 242static inline struct dma_fence **
242__active_fence_slot(struct i915_active_fence *active) 243__active_fence_slot(struct i915_active_fence *active)
243{ 244{
244 return (struct dma_fence ** __force)&active->fence; 245 return (struct dma_fence ** __force)&active->fence;
245} 246}
246 247
247static inline bool 248static inline bool
248active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) 249active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
249{ 250{
250 struct i915_active_fence *active = 251 struct i915_active_fence *active =
251 container_of(cb, typeof(*active), cb); 252 container_of(cb, typeof(*active), cb);
252 253
253 return cmpxchg(__active_fence_slot(active), fence, NULL) == fence; 254 return cmpxchg(__active_fence_slot(active), fence, NULL) == fence;
254} 255}
255 256
256static void 257static void
257node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) 258node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
258{ 259{
259 if (active_fence_cb(fence, cb)) 260 if (active_fence_cb(fence, cb))
260 active_retire(container_of(cb, struct active_node, base.cb)->ref); 261 active_retire(container_of(cb, struct active_node, base.cb)->ref);
261} 262}
262 263
263static void 264static void
264excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) 265excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
265{ 266{
266 if (active_fence_cb(fence, cb)) 267 if (active_fence_cb(fence, cb))
267 active_retire(container_of(cb, struct i915_active, excl.cb)); 268 active_retire(container_of(cb, struct i915_active, excl.cb));
268} 269}
269 270
270static struct i915_active_fence * 271static struct i915_active_fence *
271active_instance(struct i915_active *ref, struct intel_timeline *tl) 272active_instance(struct i915_active *ref, struct intel_timeline *tl)
272{ 273{
273 struct active_node *node, *prealloc; 274 struct active_node *node, *prealloc;
274 struct rb_node **p, *parent; 275 struct rb_node **p, *parent;
275 u64 idx = tl->fence_context; 276 u64 idx = tl->fence_context;
276 277
277 /* 278 /*
278 * We track the most recently used timeline to skip a rbtree search 279 * We track the most recently used timeline to skip a rbtree search
279 * for the common case, under typical loads we never need the rbtree 280 * for the common case, under typical loads we never need the rbtree
280 * at all. We can reuse the last slot if it is empty, that is 281 * at all. We can reuse the last slot if it is empty, that is
281 * after the previous activity has been retired, or if it matches the 282 * after the previous activity has been retired, or if it matches the
282 * current timeline. 283 * current timeline.
283 */ 284 */
284 node = READ_ONCE(ref->cache); 285 node = READ_ONCE(ref->cache);
285 if (node && node->timeline == idx) 286 if (node && node->timeline == idx)
286 return &node->base; 287 return &node->base;
287 288
288 /* Preallocate a replacement, just in case */ 289 /* Preallocate a replacement, just in case */
289 prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 290 prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
290 if (!prealloc) 291 if (!prealloc)
291 return NULL; 292 return NULL;
292 memset(prealloc, 0, sizeof(*prealloc)); 293 memset(prealloc, 0, sizeof(*prealloc));
293 294
294 spin_lock_irq(&ref->tree_lock); 295 spin_lock_irq(&ref->tree_lock);
295 GEM_BUG_ON(i915_active_is_idle(ref)); 296 GEM_BUG_ON(i915_active_is_idle(ref));
296 297
297#ifdef __NetBSD__ 298#ifdef __NetBSD__
298 __USE(parent); 299 __USE(parent);
299 __USE(p); 300 __USE(p);
300 node = rb_tree_find_node(&ref->tree.rbr_tree, &idx); 301 node = rb_tree_find_node(&ref->tree.rbr_tree, &idx);
301 if (node) { 302 if (node) {
302 KASSERT(node->timeline == idx); 303 KASSERT(node->timeline == idx);
303 kmem_cache_free(global.slab_cache, prealloc); 304 kmem_cache_free(global.slab_cache, prealloc);
304 goto out; 305 goto out;
305 } 306 }
306#else 307#else
307 parent = NULL; 308 parent = NULL;
308 p = &ref->tree.rb_node; 309 p = &ref->tree.rb_node;
309 while (*p) { 310 while (*p) {
310 parent = *p; 311 parent = *p;
311 312
312 node = rb_entry(parent, struct active_node, node); 313 node = rb_entry(parent, struct active_node, node);
313 if (node->timeline == idx) { 314 if (node->timeline == idx) {
314 kmem_cache_free(global.slab_cache, prealloc); 315 kmem_cache_free(global.slab_cache, prealloc);
315 goto out; 316 goto out;
316 } 317 }
317 318
318 if (node->timeline < idx) 319 if (node->timeline < idx)
319 p = &parent->rb_right; 320 p = &parent->rb_right;
320 else 321 else
321 p = &parent->rb_left; 322 p = &parent->rb_left;
322 } 323 }
323#endif 324#endif
324 325
325 node = prealloc; 326 node = prealloc;
326 __i915_active_fence_init(&node->base, NULL, node_retire); 327 __i915_active_fence_init(&node->base, NULL, node_retire);
327 node->ref = ref; 328 node->ref = ref;
328 node->timeline = idx; 329 node->timeline = idx;
329 330
330#ifdef __NetBSD__ 331#ifdef __NetBSD__
331 struct active_node *collision __diagused; 332 struct active_node *collision __diagused;
332 collision = rb_tree_insert_node(&ref->tree.rbr_tree, node); 333 collision = rb_tree_insert_node(&ref->tree.rbr_tree, node);
333 KASSERT(collision == node); 334 KASSERT(collision == node);
334#else 335#else
335 rb_link_node(&node->node, parent, p); 336 rb_link_node(&node->node, parent, p);
336 rb_insert_color(&node->node, &ref->tree); 337 rb_insert_color(&node->node, &ref->tree);
337#endif 338#endif
338 339
339out: 340out:
340 ref->cache = node; 341 ref->cache = node;
341 spin_unlock_irq(&ref->tree_lock); 342 spin_unlock_irq(&ref->tree_lock);
342 343
343 BUILD_BUG_ON(offsetof(typeof(*node), base)); 344 BUILD_BUG_ON(offsetof(typeof(*node), base));
344 return &node->base; 345 return &node->base;
345} 346}
346 347
347void __i915_active_init(struct i915_active *ref, 348void __i915_active_init(struct i915_active *ref,
348 int (*active)(struct i915_active *ref), 349 int (*active)(struct i915_active *ref),
349 void (*retire)(struct i915_active *ref), 350 void (*retire)(struct i915_active *ref),
350 struct lock_class_key *mkey, 351 struct lock_class_key *mkey,
351 struct lock_class_key *wkey) 352 struct lock_class_key *wkey)
352{ 353{
353 unsigned long bits; 354 unsigned long bits;
354 355
355 debug_active_init(ref); 356 debug_active_init(ref);
356 357
357 ref->flags = 0; 358 ref->flags = 0;
358 ref->active = active; 359 ref->active = active;
359 ref->retire = ptr_unpack_bits(retire, &bits, 2); 360 ref->retire = ptr_unpack_bits(retire, &bits, 2);
360 if (bits & I915_ACTIVE_MAY_SLEEP) 361 if (bits & I915_ACTIVE_MAY_SLEEP)
361 ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; 362 ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
362 363
363 spin_lock_init(&ref->tree_lock); 364 spin_lock_init(&ref->tree_lock);
364 DRM_INIT_WAITQUEUE(&ref->tree_wq, "i915act"); 365 DRM_INIT_WAITQUEUE(&ref->tree_wq, "i915act");
365#ifdef __NetBSD__ 366#ifdef __NetBSD__
366 rb_tree_init(&ref->tree.rbr_tree, &active_rb_ops); 367 rb_tree_init(&ref->tree.rbr_tree, &active_rb_ops);
367#else 368#else
368 ref->tree = RB_ROOT; 369 ref->tree = RB_ROOT;
369#endif 370#endif
370 ref->cache = NULL; 371 ref->cache = NULL;
371 372
372 init_llist_head(&ref->preallocated_barriers); 373 init_llist_head(&ref->preallocated_barriers);
373 atomic_set(&ref->count, 0); 374 atomic_set(&ref->count, 0);
374 __mutex_init(&ref->mutex, "i915_active", mkey); 375 __mutex_init(&ref->mutex, "i915_active", mkey);
375 __i915_active_fence_init(&ref->excl, NULL, excl_retire); 376 __i915_active_fence_init(&ref->excl, NULL, excl_retire);
376 INIT_WORK(&ref->work, active_work); 377 INIT_WORK(&ref->work, active_work);
377#if IS_ENABLED(CONFIG_LOCKDEP) 378#if IS_ENABLED(CONFIG_LOCKDEP)
378 lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0); 379 lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0);
379#endif 380#endif
380} 381}
381 382
382static bool ____active_del_barrier(struct i915_active *ref, 383static bool ____active_del_barrier(struct i915_active *ref,
383 struct active_node *node, 384 struct active_node *node,
384 struct intel_engine_cs *engine) 385 struct intel_engine_cs *engine)
385 386
386{ 387{
387 struct llist_node *head = NULL, *tail = NULL; 388 struct llist_node *head = NULL, *tail = NULL;
388 struct llist_node *pos, *next; 389 struct llist_node *pos, *next;
389 390
390 GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); 391 GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
391 392
392 /* 393 /*
393 * Rebuild the llist excluding our node. We may perform this 394 * Rebuild the llist excluding our node. We may perform this
394 * outside of the kernel_context timeline mutex and so someone 395 * outside of the kernel_context timeline mutex and so someone
395 * else may be manipulating the engine->barrier_tasks, in 396 * else may be manipulating the engine->barrier_tasks, in
396 * which case either we or they will be upset :) 397 * which case either we or they will be upset :)
397 * 398 *
398 * A second __active_del_barrier() will report failure to claim 399 * A second __active_del_barrier() will report failure to claim
399 * the active_node and the caller will just shrug and know not to 400 * the active_node and the caller will just shrug and know not to
400 * claim ownership of its node. 401 * claim ownership of its node.
401 * 402 *
402 * A concurrent i915_request_add_active_barriers() will miss adding 403 * A concurrent i915_request_add_active_barriers() will miss adding
403 * any of the tasks, but we will try again on the next -- and since 404 * any of the tasks, but we will try again on the next -- and since
404 * we are actively using the barrier, we know that there will be 405 * we are actively using the barrier, we know that there will be
405 * at least another opportunity when we idle. 406 * at least another opportunity when we idle.
406 */ 407 */
407 llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { 408 llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
408 if (node == barrier_from_ll(pos)) { 409 if (node == barrier_from_ll(pos)) {
409 node = NULL; 410 node = NULL;
410 continue; 411 continue;
411 } 412 }
412 413
413 pos->next = head; 414 pos->next = head;
414 head = pos; 415 head = pos;
415 if (!tail) 416 if (!tail)
416 tail = pos; 417 tail = pos;
417 } 418 }
418 if (head) 419 if (head)
419 llist_add_batch(head, tail, &engine->barrier_tasks); 420 llist_add_batch(head, tail, &engine->barrier_tasks);
420 421
421 return !node; 422 return !node;
422} 423}
423 424
424static bool 425static bool
425__active_del_barrier(struct i915_active *ref, struct active_node *node) 426__active_del_barrier(struct i915_active *ref, struct active_node *node)
426{ 427{
427 return ____active_del_barrier(ref, node, barrier_to_engine(node)); 428 return ____active_del_barrier(ref, node, barrier_to_engine(node));
428} 429}
429 430
430int i915_active_ref(struct i915_active *ref, 431int i915_active_ref(struct i915_active *ref,
431 struct intel_timeline *tl, 432 struct intel_timeline *tl,
432 struct dma_fence *fence) 433 struct dma_fence *fence)
433{ 434{
434 struct i915_active_fence *active; 435 struct i915_active_fence *active;
435 int err; 436 int err;
436 437
437 lockdep_assert_held(&tl->mutex); 438 lockdep_assert_held(&tl->mutex);
438 439
439 /* Prevent reaping in case we malloc/wait while building the tree */ 440 /* Prevent reaping in case we malloc/wait while building the tree */
440 err = i915_active_acquire(ref); 441 err = i915_active_acquire(ref);
441 if (err) 442 if (err)
442 return err; 443 return err;
443 444
444 active = active_instance(ref, tl); 445 active = active_instance(ref, tl);
445 if (!active) { 446 if (!active) {
446 err = -ENOMEM; 447 err = -ENOMEM;
447 goto out; 448 goto out;
448 } 449 }
449 450
450 if (is_barrier(active)) { /* proto-node used by our idle barrier */ 451 if (is_barrier(active)) { /* proto-node used by our idle barrier */
451 /* 452 /*
452 * This request is on the kernel_context timeline, and so 453 * This request is on the kernel_context timeline, and so
453 * we can use it to substitute for the pending idle-barrer 454 * we can use it to substitute for the pending idle-barrer
454 * request that we want to emit on the kernel_context. 455 * request that we want to emit on the kernel_context.
455 */ 456 */
456 __active_del_barrier(ref, node_from_active(active)); 457 __active_del_barrier(ref, node_from_active(active));
457 RCU_INIT_POINTER(active->fence, NULL); 458 RCU_INIT_POINTER(active->fence, NULL);
458 atomic_dec(&ref->count); 459 atomic_dec(&ref->count);
459 } 460 }
460 if (!__i915_active_fence_set(active, fence)) 461 if (!__i915_active_fence_set(active, fence))
461 atomic_inc(&ref->count); 462 atomic_inc(&ref->count);
462 463
463out: 464out:
464 i915_active_release(ref); 465 i915_active_release(ref);
465 return err; 466 return err;
466} 467}
467 468
468void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) 469void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
469{ 470{
470 /* We expect the caller to manage the exclusive timeline ordering */ 471 /* We expect the caller to manage the exclusive timeline ordering */
471 GEM_BUG_ON(i915_active_is_idle(ref)); 472 GEM_BUG_ON(i915_active_is_idle(ref));
472 473
473 if (!__i915_active_fence_set(&ref->excl, f)) 474 if (!__i915_active_fence_set(&ref->excl, f))
474 atomic_inc(&ref->count); 475 atomic_inc(&ref->count);
475} 476}
476 477
477bool i915_active_acquire_if_busy(struct i915_active *ref) 478bool i915_active_acquire_if_busy(struct i915_active *ref)
478{ 479{
479 debug_active_assert(ref); 480 debug_active_assert(ref);
480 return atomic_add_unless(&ref->count, 1, 0); 481 return atomic_add_unless(&ref->count, 1, 0);
481} 482}
482 483
483int i915_active_acquire(struct i915_active *ref) 484int i915_active_acquire(struct i915_active *ref)
484{ 485{
485 int err; 486 int err;
486 487
487 if (i915_active_acquire_if_busy(ref)) 488 if (i915_active_acquire_if_busy(ref))
488 return 0; 489 return 0;
489 490
490 err = mutex_lock_interruptible(&ref->mutex); 491 err = mutex_lock_interruptible(&ref->mutex);
491 if (err) 492 if (err)
492 return err; 493 return err;
493 494
494 if (likely(!i915_active_acquire_if_busy(ref))) { 495 if (likely(!i915_active_acquire_if_busy(ref))) {
495 if (ref->active) 496 if (ref->active)
496 err = ref->active(ref); 497 err = ref->active(ref);
497 if (!err) { 498 if (!err) {
498 spin_lock_irq(&ref->tree_lock); /* __active_retire() */ 499 spin_lock_irq(&ref->tree_lock); /* __active_retire() */
499 debug_active_activate(ref); 500 debug_active_activate(ref);
500 atomic_inc(&ref->count); 501 atomic_inc(&ref->count);
501 spin_unlock_irq(&ref->tree_lock); 502 spin_unlock_irq(&ref->tree_lock);
502 } 503 }
503 } 504 }
504 505
505 mutex_unlock(&ref->mutex); 506 mutex_unlock(&ref->mutex);
506 507
507 return err; 508 return err;
508} 509}
509 510
510void i915_active_release(struct i915_active *ref) 511void i915_active_release(struct i915_active *ref)
511{ 512{
512 debug_active_assert(ref); 513 debug_active_assert(ref);
513 active_retire(ref); 514 active_retire(ref);
514} 515}
515 516
516static void enable_signaling(struct i915_active_fence *active) 517static void enable_signaling(struct i915_active_fence *active)
517{ 518{
518 struct dma_fence *fence; 519 struct dma_fence *fence;
519 520
520 fence = i915_active_fence_get(active); 521 fence = i915_active_fence_get(active);
521 if (!fence) 522 if (!fence)
522 return; 523 return;
523 524
524 dma_fence_enable_sw_signaling(fence); 525 dma_fence_enable_sw_signaling(fence);
525 dma_fence_put(fence); 526 dma_fence_put(fence);
526} 527}
527 528
528int i915_active_wait(struct i915_active *ref) 529int i915_active_wait(struct i915_active *ref)
529{ 530{
530 struct active_node *it, *n; 531 struct active_node *it, *n;
531 int err = 0; 532 int err = 0;
532 533
533 might_sleep(); 534 might_sleep();
534 535
535 if (!i915_active_acquire_if_busy(ref)) 536 if (!i915_active_acquire_if_busy(ref))
536 return 0; 537 return 0;
537 538
538 /* Flush lazy signals */ 539 /* Flush lazy signals */
539 enable_signaling(&ref->excl); 540 enable_signaling(&ref->excl);
540 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 541 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
541 if (is_barrier(&it->base)) /* unconnected idle barrier */ 542 if (is_barrier(&it->base)) /* unconnected idle barrier */
542 continue; 543 continue;
543 544
544 enable_signaling(&it->base); 545 enable_signaling(&it->base);
545 } 546 }
546 /* Any fence added after the wait begins will not be auto-signaled */ 547 /* Any fence added after the wait begins will not be auto-signaled */
547 548
548 i915_active_release(ref); 549 i915_active_release(ref);
549 if (err) 550 if (err)
550 return err; 551 return err;
551 552
552 spin_lock(&ref->tree_lock); 553 spin_lock(&ref->tree_lock);
553 DRM_SPIN_WAIT_UNTIL(err, &ref->tree_wq, &ref->tree_lock, 554 DRM_SPIN_WAIT_UNTIL(err, &ref->tree_wq, &ref->tree_lock,
554 i915_active_is_idle(ref)); 555 i915_active_is_idle(ref));
555 spin_unlock(&ref->tree_lock); 556 spin_unlock(&ref->tree_lock);
556 if (err) 557 if (err)
557 return err; 558 return err;
558 559
559 flush_work(&ref->work); 560 flush_work(&ref->work);
560 return 0; 561 return 0;
561} 562}
562 563
563int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) 564int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
564{ 565{
565 int err = 0; 566 int err = 0;
566 567
567 if (rcu_access_pointer(ref->excl.fence)) { 568 if (rcu_access_pointer(ref->excl.fence)) {
568 struct dma_fence *fence; 569 struct dma_fence *fence;
569 570
570 rcu_read_lock(); 571 rcu_read_lock();
571 fence = dma_fence_get_rcu_safe(&ref->excl.fence); 572 fence = dma_fence_get_rcu_safe(&ref->excl.fence);
572 rcu_read_unlock(); 573 rcu_read_unlock();
573 if (fence) { 574 if (fence) {
574 err = i915_request_await_dma_fence(rq, fence); 575 err = i915_request_await_dma_fence(rq, fence);
575 dma_fence_put(fence); 576 dma_fence_put(fence);
576 } 577 }
577 } 578 }
578 579
579 /* In the future we may choose to await on all fences */ 580 /* In the future we may choose to await on all fences */
580 581
581 return err; 582 return err;
582} 583}
583 584
584void i915_active_fini(struct i915_active *ref) 585void i915_active_fini(struct i915_active *ref)
585{ 586{
586 debug_active_fini(ref); 587 debug_active_fini(ref);
587 GEM_BUG_ON(atomic_read(&ref->count)); 588 GEM_BUG_ON(atomic_read(&ref->count));
588 GEM_BUG_ON(work_pending(&ref->work)); 589 GEM_BUG_ON(work_pending(&ref->work));
589 GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); 590 GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
590 mutex_destroy(&ref->mutex); 591 mutex_destroy(&ref->mutex);
591 spin_lock_destroy(&ref->tree_lock); 592 spin_lock_destroy(&ref->tree_lock);
592} 593}
593 594
594static inline bool is_idle_barrier(struct active_node *node, u64 idx) 595static inline bool is_idle_barrier(struct active_node *node, u64 idx)
595{ 596{
596 return node->timeline == idx && !i915_active_fence_isset(&node->base); 597 return node->timeline == idx && !i915_active_fence_isset(&node->base);
597} 598}
598 599
599static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) 600static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
600{ 601{
601 struct rb_node *prev, *p; 602 struct rb_node *prev, *p;
602 603
603 if (RB_EMPTY_ROOT(&ref->tree)) 604 if (RB_EMPTY_ROOT(&ref->tree))
604 return NULL; 605 return NULL;
605 606
606 spin_lock_irq(&ref->tree_lock); 607 spin_lock_irq(&ref->tree_lock);
607 GEM_BUG_ON(i915_active_is_idle(ref)); 608 GEM_BUG_ON(i915_active_is_idle(ref));
608 609
609 /* 610 /*
610 * Try to reuse any existing barrier nodes already allocated for this 611 * Try to reuse any existing barrier nodes already allocated for this
611 * i915_active, due to overlapping active phases there is likely a 612 * i915_active, due to overlapping active phases there is likely a
612 * node kept alive (as we reuse before parking). We prefer to reuse 613 * node kept alive (as we reuse before parking). We prefer to reuse
613 * completely idle barriers (less hassle in manipulating the llists), 614 * completely idle barriers (less hassle in manipulating the llists),
614 * but otherwise any will do. 615 * but otherwise any will do.
615 */ 616 */
616 if (ref->cache && is_idle_barrier(ref->cache, idx)) { 617 if (ref->cache && is_idle_barrier(ref->cache, idx)) {
617 p = &ref->cache->node; 618 p = &ref->cache->node;
618 goto match; 619 goto match;
619 } 620 }
620 621
621#ifdef __NetBSD__ 622#ifdef __NetBSD__
622 { 623 {
623 struct active_node *node = 624 struct active_node *node =
624 rb_tree_find_node_leq(&ref->tree.rbr_tree, &idx); 625 rb_tree_find_node_leq(&ref->tree.rbr_tree, &idx);
625 if (node) { 626 if (node) {
626 if (node->timeline == idx && is_idle_barrier(node, idx)) { 627 if (node->timeline == idx && is_idle_barrier(node, idx)) {
627 p = &node->node; 628 p = &node->node;
628 goto match; 629 goto match;
629 } 630 }
630 prev = &node->node; 631 prev = &node->node;
631 } else { 632 } else {
632 prev = NULL; 633 prev = NULL;
633 } 634 }
634 } 635 }
635#else 636#else
636 prev = NULL; 637 prev = NULL;
637 p = ref->tree.rb_node; 638 p = ref->tree.rb_node;
638 while (p) { 639 while (p) {
639 struct active_node *node = 640 struct active_node *node =
640 rb_entry(p, struct active_node, node); 641 rb_entry(p, struct active_node, node);
641 642
642 if (is_idle_barrier(node, idx)) 643 if (is_idle_barrier(node, idx))
643 goto match; 644 goto match;
644 645
645 prev = p; 646 prev = p;
646 if (node->timeline < idx) 647 if (node->timeline < idx)
647 p = p->rb_right; 648 p = p->rb_right;
648 else 649 else
649 p = p->rb_left; 650 p = p->rb_left;
650 } 651 }
651#endif 652#endif
652 653
653 /* 654 /*
654 * No quick match, but we did find the leftmost rb_node for the 655 * No quick match, but we did find the leftmost rb_node for the
655 * kernel_context. Walk the rb_tree in-order to see if there were 656 * kernel_context. Walk the rb_tree in-order to see if there were
656 * any idle-barriers on this timeline that we missed, or just use 657 * any idle-barriers on this timeline that we missed, or just use
657 * the first pending barrier. 658 * the first pending barrier.
658 */ 659 */
659 for (p = prev; p; p = rb_next2(&ref->tree, p)) { 660 for (p = prev; p; p = rb_next2(&ref->tree, p)) {
660 struct active_node *node = 661 struct active_node *node =
661 rb_entry(p, struct active_node, node); 662 rb_entry(p, struct active_node, node);
662 struct intel_engine_cs *engine; 663 struct intel_engine_cs *engine;
663 664
664 if (node->timeline > idx) 665 if (node->timeline > idx)
665 break; 666 break;
666 667
667 if (node->timeline < idx) 668 if (node->timeline < idx)
668 continue; 669 continue;
669 670
670 if (is_idle_barrier(node, idx)) 671 if (is_idle_barrier(node, idx))
671 goto match; 672 goto match;
672 673
673 /* 674 /*
674 * The list of pending barriers is protected by the 675 * The list of pending barriers is protected by the
675 * kernel_context timeline, which notably we do not hold 676 * kernel_context timeline, which notably we do not hold
676 * here. i915_request_add_active_barriers() may consume 677 * here. i915_request_add_active_barriers() may consume
677 * the barrier before we claim it, so we have to check 678 * the barrier before we claim it, so we have to check
678 * for success. 679 * for success.
679 */ 680 */
680 engine = __barrier_to_engine(node); 681 engine = __barrier_to_engine(node);
681 smp_rmb(); /* serialise with add_active_barriers */ 682 smp_rmb(); /* serialise with add_active_barriers */
682 if (is_barrier(&node->base) && 683 if (is_barrier(&node->base) &&
683 ____active_del_barrier(ref, node, engine)) 684 ____active_del_barrier(ref, node, engine))
684 goto match; 685 goto match;
685 } 686 }
686 687
687 spin_unlock_irq(&ref->tree_lock); 688 spin_unlock_irq(&ref->tree_lock);
688 689
689 return NULL; 690 return NULL;
690 691
691match: 692match:
692 rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ 693 rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
693 if (p == &ref->cache->node) 694 if (p == &ref->cache->node)
694 ref->cache = NULL; 695 ref->cache = NULL;
695 spin_unlock_irq(&ref->tree_lock); 696 spin_unlock_irq(&ref->tree_lock);
696 697
697 return rb_entry(p, struct active_node, node); 698 return rb_entry(p, struct active_node, node);
698} 699}
699 700
700int i915_active_acquire_preallocate_barrier(struct i915_active *ref, 701int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
701 struct intel_engine_cs *engine) 702 struct intel_engine_cs *engine)
702{ 703{
703 intel_engine_mask_t tmp, mask = engine->mask; 704 intel_engine_mask_t tmp, mask = engine->mask;
704 struct llist_node *first = NULL, *last = NULL; 705 struct llist_node *first = NULL, *last = NULL;
705 struct intel_gt *gt = engine->gt; 706 struct intel_gt *gt = engine->gt;
706 int err; 707 int err;
707 708
708 GEM_BUG_ON(i915_active_is_idle(ref)); 709 GEM_BUG_ON(i915_active_is_idle(ref));
709 710
710 /* Wait until the previous preallocation is completed */ 711 /* Wait until the previous preallocation is completed */
711 while (!llist_empty(&ref->preallocated_barriers)) 712 while (!llist_empty(&ref->preallocated_barriers))
712 cond_resched(); 713 cond_resched();
713 714
714 /* 715 /*
715 * Preallocate a node for each physical engine supporting the target 716 * Preallocate a node for each physical engine supporting the target
716 * engine (remember virtual engines have more than one sibling). 717 * engine (remember virtual engines have more than one sibling).
717 * We can then use the preallocated nodes in 718 * We can then use the preallocated nodes in
718 * i915_active_acquire_barrier() 719 * i915_active_acquire_barrier()
719 */ 720 */
720 for_each_engine_masked(engine, gt, mask, tmp) { 721 for_each_engine_masked(engine, gt, mask, tmp) {
721 u64 idx = engine->kernel_context->timeline->fence_context; 722 u64 idx = engine->kernel_context->timeline->fence_context;
722 struct llist_node *prev = first; 723 struct llist_node *prev = first;
723 struct active_node *node; 724 struct active_node *node;
724 725
725 node = reuse_idle_barrier(ref, idx); 726 node = reuse_idle_barrier(ref, idx);
726 if (!node) { 727 if (!node) {
727 node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 728 node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
728 if (!node) { 729 if (!node) {
729 err = ENOMEM; 730 err = ENOMEM;
730 goto unwind; 731 goto unwind;
731 } 732 }
732 733
733 memset(node, 0, sizeof(*node)); 734 memset(node, 0, sizeof(*node));
734 RCU_INIT_POINTER(node->base.fence, NULL); 735 RCU_INIT_POINTER(node->base.fence, NULL);
735 node->base.cb.func = node_retire; 736 node->base.cb.func = node_retire;
736 node->timeline = idx; 737 node->timeline = idx;
737 node->ref = ref; 738 node->ref = ref;
738 } 739 }
739 740
740 if (!i915_active_fence_isset(&node->base)) { 741 if (!i915_active_fence_isset(&node->base)) {
741 /* 742 /*
742 * Mark this as being *our* unconnected proto-node. 743 * Mark this as being *our* unconnected proto-node.
743 * 744 *
744 * Since this node is not in any list, and we have 745 * Since this node is not in any list, and we have
745 * decoupled it from the rbtree, we can reuse the 746 * decoupled it from the rbtree, we can reuse the
746 * request to indicate this is an idle-barrier node 747 * request to indicate this is an idle-barrier node
747 * and then we can use the rb_node and list pointers 748 * and then we can use the rb_node and list pointers
748 * for our tracking of the pending barrier. 749 * for our tracking of the pending barrier.
749 */ 750 */
750 RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); 751 RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
751 node->engine = engine; 752 node->engine = engine;
752 atomic_inc(&ref->count); 753 atomic_inc(&ref->count);
753 } 754 }
754 GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); 755 GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
755 756
756 GEM_BUG_ON(barrier_to_engine(node) != engine); 757 GEM_BUG_ON(barrier_to_engine(node) != engine);
757 first = barrier_to_ll(node); 758 first = barrier_to_ll(node);
758 first->next = prev; 759 first->next = prev;
759 if (!last) 760 if (!last)
760 last = first; 761 last = first;
761 intel_engine_pm_get(engine); 762 intel_engine_pm_get(engine);
762 } 763 }
763 764
764 GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); 765 GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
765 llist_add_batch(first, last, &ref->preallocated_barriers); 766 llist_add_batch(first, last, &ref->preallocated_barriers);
766 767
767 return 0; 768 return 0;
768 769
769unwind: 770unwind:
770 while (first) { 771 while (first) {
771 struct active_node *node = barrier_from_ll(first); 772 struct active_node *node = barrier_from_ll(first);
772 773
773 first = first->next; 774 first = first->next;
774 775
775 atomic_dec(&ref->count); 776 atomic_dec(&ref->count);
776 intel_engine_pm_put(barrier_to_engine(node)); 777 intel_engine_pm_put(barrier_to_engine(node));
777 778
778 kmem_cache_free(global.slab_cache, node); 779 kmem_cache_free(global.slab_cache, node);
779 } 780 }
780 return err; 781 return err;
781} 782}
782 783
783void i915_active_acquire_barrier(struct i915_active *ref) 784void i915_active_acquire_barrier(struct i915_active *ref)
784{ 785{
785 struct llist_node *pos, *next; 786 struct llist_node *pos, *next;
786 unsigned long flags; 787 unsigned long flags;
787 788
788 GEM_BUG_ON(i915_active_is_idle(ref)); 789 GEM_BUG_ON(i915_active_is_idle(ref));
789 790
790 /* 791 /*
791 * Transfer the list of preallocated barriers into the 792 * Transfer the list of preallocated barriers into the
792 * i915_active rbtree, but only as proto-nodes. They will be 793 * i915_active rbtree, but only as proto-nodes. They will be
793 * populated by i915_request_add_active_barriers() to point to the 794 * populated by i915_request_add_active_barriers() to point to the
794 * request that will eventually release them. 795 * request that will eventually release them.
795 */ 796 */
796 llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { 797 llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
797 struct active_node *node = barrier_from_ll(pos); 798 struct active_node *node = barrier_from_ll(pos);
798 struct intel_engine_cs *engine = barrier_to_engine(node); 799 struct intel_engine_cs *engine = barrier_to_engine(node);
799 struct rb_node **p, *parent; 800 struct rb_node **p, *parent;
800 801
801 spin_lock_irqsave_nested(&ref->tree_lock, flags, 802 spin_lock_irqsave_nested(&ref->tree_lock, flags,
802 SINGLE_DEPTH_NESTING); 803 SINGLE_DEPTH_NESTING);
803#ifdef __NetBSD__ 804#ifdef __NetBSD__
804 __USE(p); 805 __USE(p);
805 __USE(parent); 806 __USE(parent);
806 struct active_node *collision __diagused; 807 struct active_node *collision __diagused;
807 collision = rb_tree_insert_node(&ref->tree.rbr_tree, node); 808 collision = rb_tree_insert_node(&ref->tree.rbr_tree, node);
808 KASSERT(collision == node); 809 KASSERT(collision == node);
809#else 810#else
810 parent = NULL; 811 parent = NULL;
811 p = &ref->tree.rb_node; 812 p = &ref->tree.rb_node;
812 while (*p) { 813 while (*p) {
813 struct active_node *it; 814 struct active_node *it;
814 815
815 parent = *p; 816 parent = *p;
816 817
817 it = rb_entry(parent, struct active_node, node); 818 it = rb_entry(parent, struct active_node, node);
818 if (it->timeline < node->timeline) 819 if (it->timeline < node->timeline)
819 p = &parent->rb_right; 820 p = &parent->rb_right;
820 else 821 else
821 p = &parent->rb_left; 822 p = &parent->rb_left;
822 } 823 }
823 rb_link_node(&node->node, parent, p); 824 rb_link_node(&node->node, parent, p);
824 rb_insert_color(&node->node, &ref->tree); 825 rb_insert_color(&node->node, &ref->tree);
825#endif 826#endif
826 spin_unlock_irqrestore(&ref->tree_lock, flags); 827 spin_unlock_irqrestore(&ref->tree_lock, flags);
827 828
828 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 829 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
829 llist_add(barrier_to_ll(node), &engine->barrier_tasks); 830 llist_add(barrier_to_ll(node), &engine->barrier_tasks);
830 intel_engine_pm_put(engine); 831 intel_engine_pm_put(engine);
831 } 832 }
832} 833}
833 834
834static struct dma_fence **ll_to_fence_slot(struct llist_node *node) 835static struct dma_fence **ll_to_fence_slot(struct llist_node *node)
835{ 836{
836 return __active_fence_slot(&barrier_from_ll(node)->base); 837 return __active_fence_slot(&barrier_from_ll(node)->base);
837} 838}
838 839
839void i915_request_add_active_barriers(struct i915_request *rq) 840void i915_request_add_active_barriers(struct i915_request *rq)
840{ 841{
841 struct intel_engine_cs *engine = rq->engine; 842 struct intel_engine_cs *engine = rq->engine;
842 struct llist_node *node, *next; 843 struct llist_node *node, *next;
843 unsigned long flags; 844 unsigned long flags;
844 845
845 GEM_BUG_ON(!intel_context_is_barrier(rq->context)); 846 GEM_BUG_ON(!intel_context_is_barrier(rq->context));
846 GEM_BUG_ON(intel_engine_is_virtual(engine)); 847 GEM_BUG_ON(intel_engine_is_virtual(engine));
847 GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); 848 GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
848 849
849 node = llist_del_all(&engine->barrier_tasks); 850 node = llist_del_all(&engine->barrier_tasks);
850 if (!node) 851 if (!node)
851 return; 852 return;
852 /* 853 /*
853 * Attach the list of proto-fences to the in-flight request such 854 * Attach the list of proto-fences to the in-flight request such
854 * that the parent i915_active will be released when this request 855 * that the parent i915_active will be released when this request
855 * is retired. 856 * is retired.
856 */ 857 */
857 spin_lock_irqsave(&rq->lock, flags); 858 spin_lock_irqsave(&rq->lock, flags);
858 llist_for_each_safe(node, next, node) { 859 llist_for_each_safe(node, next, node) {
859 /* serialise with reuse_idle_barrier */ 860 /* serialise with reuse_idle_barrier */
860 smp_store_mb(*ll_to_fence_slot(node), &rq->fence); 861 smp_store_mb(*ll_to_fence_slot(node), &rq->fence);
861#ifdef __NetBSD__ 862#ifdef __NetBSD__
862 /* XXX ugh bletch */ 863 /* XXX ugh bletch */
863 struct i915_active_fence *active = 864 struct i915_active_fence *active =
864 container_of(node, struct i915_active_fence, llist); 865 container_of(node, struct i915_active_fence, llist);
865 /* XXX something bad went wrong in making this code */ 866 /* XXX something bad went wrong in making this code */
866 KASSERT(active->cb.func == node_retire || 867 KASSERT(active->cb.func == node_retire ||
867 active->cb.func == excl_retire || 868 active->cb.func == excl_retire ||
868 active->cb.func == i915_active_noop); 869 active->cb.func == i915_active_noop);
869 KASSERTMSG(active->fence == &rq->fence, 870 KASSERTMSG(active->fence == &rq->fence,
870 "active=%p fence=%p; rq=%p fence=%p", 871 "active=%p fence=%p; rq=%p fence=%p",
871 active, active->fence, rq, &rq->fence); 872 active, active->fence, rq, &rq->fence);
872 KASSERTMSG(!active->cb.fcb_onqueue, "active=%p", active); 873 KASSERTMSG(!active->cb.fcb_onqueue, "active=%p", active);
873 active->cb.fcb_onqueue = true; 874 active->cb.fcb_onqueue = true;
874 TAILQ_INSERT_TAIL(&rq->fence.f_callbacks, &active->cb, 875 TAILQ_INSERT_TAIL(&rq->fence.f_callbacks, &active->cb,
875 fcb_entry); 876 fcb_entry);
876#else 877#else
877 list_add_tail((struct list_head *)node, &rq->fence.cb_list); 878 list_add_tail((struct list_head *)node, &rq->fence.cb_list);
878#endif 879#endif
879 } 880 }
880 spin_unlock_irqrestore(&rq->lock, flags); 881 spin_unlock_irqrestore(&rq->lock, flags);
881} 882}
882 883
883/* 884/*
884 * __i915_active_fence_set: Update the last active fence along its timeline 885 * __i915_active_fence_set: Update the last active fence along its timeline
885 * @active: the active tracker 886 * @active: the active tracker
886 * @fence: the new fence (under construction) 887 * @fence: the new fence (under construction)
887 * 888 *
888 * Records the new @fence as the last active fence along its timeline in 889 * Records the new @fence as the last active fence along its timeline in
889 * this active tracker, moving the tracking callbacks from the previous 890 * this active tracker, moving the tracking callbacks from the previous
890 * fence onto this one. Returns the previous fence (if not already completed), 891 * fence onto this one. Returns the previous fence (if not already completed),
891 * which the caller must ensure is executed before the new fence. To ensure 892 * which the caller must ensure is executed before the new fence. To ensure
892 * that the order of fences within the timeline of the i915_active_fence is 893 * that the order of fences within the timeline of the i915_active_fence is
893 * understood, it should be locked by the caller. 894 * understood, it should be locked by the caller.
894 */ 895 */
895struct dma_fence * 896struct dma_fence *
896__i915_active_fence_set(struct i915_active_fence *active, 897__i915_active_fence_set(struct i915_active_fence *active,
897 struct dma_fence *fence) 898 struct dma_fence *fence)
898{ 899{
899 struct dma_fence *prev; 900 struct dma_fence *prev;
900 unsigned long flags; 901 unsigned long flags;
901 902
902 if (fence == rcu_access_pointer(active->fence)) 903 if (fence == rcu_access_pointer(active->fence))
903 return fence; 904 return fence;
904 905
905 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); 906 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
906 907
907 /* 908 /*
908 * Consider that we have two threads arriving (A and B), with 909 * Consider that we have two threads arriving (A and B), with
909 * C already resident as the active->fence. 910 * C already resident as the active->fence.
910 * 911 *
911 * A does the xchg first, and so it sees C or NULL depending 912 * A does the xchg first, and so it sees C or NULL depending
912 * on the timing of the interrupt handler. If it is NULL, the 913 * on the timing of the interrupt handler. If it is NULL, the
913 * previous fence must have been signaled and we know that 914 * previous fence must have been signaled and we know that
914 * we are first on the timeline. If it is still present, 915 * we are first on the timeline. If it is still present,
915 * we acquire the lock on that fence and serialise with the interrupt 916 * we acquire the lock on that fence and serialise with the interrupt
916 * handler, in the process removing it from any future interrupt 917 * handler, in the process removing it from any future interrupt
917 * callback. A will then wait on C before executing (if present). 918 * callback. A will then wait on C before executing (if present).
918 * 919 *
919 * As B is second, it sees A as the previous fence and so waits for 920 * As B is second, it sees A as the previous fence and so waits for
920 * it to complete its transition and takes over the occupancy for 921 * it to complete its transition and takes over the occupancy for
921 * itself -- remembering that it needs to wait on A before executing. 922 * itself -- remembering that it needs to wait on A before executing.
922 * 923 *
923 * Note the strong ordering of the timeline also provides consistent 924 * Note the strong ordering of the timeline also provides consistent
924 * nesting rules for the fence->lock; the inner lock is always the 925 * nesting rules for the fence->lock; the inner lock is always the
925 * older lock. 926 * older lock.
926 */ 927 */
927 spin_lock_irqsave(fence->lock, flags); 928 spin_lock_irqsave(fence->lock, flags);
928 prev = xchg(__active_fence_slot(active), fence); 929 prev = xchg(__active_fence_slot(active), fence);
929 if (prev) { 930 if (prev) {
930 GEM_BUG_ON(prev == fence); 931 GEM_BUG_ON(prev == fence);
931 spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); 932 spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
932#ifdef __NetBSD__ 933#ifdef __NetBSD__
933 /* XXX ugh bletch */ 934 /* XXX ugh bletch */
934 KASSERT(active->cb.func == node_retire || 935 KASSERT(active->cb.func == node_retire ||
935 active->cb.func == excl_retire || 936 active->cb.func == excl_retire ||
936 active->cb.func == i915_active_noop); 937 active->cb.func == i915_active_noop);
937 if (active->cb.fcb_onqueue) { 938 if (active->cb.fcb_onqueue) {
938 TAILQ_REMOVE(&prev->f_callbacks, &active->cb, 939 TAILQ_REMOVE(&prev->f_callbacks, &active->cb,
939 fcb_entry); 940 fcb_entry);
940 active->cb.fcb_onqueue = false; 941 active->cb.fcb_onqueue = false;
941 } 942 }
942#else 943#else
943 __list_del_entry(&active->cb.node); 944 __list_del_entry(&active->cb.node);
944#endif 945#endif
945 spin_unlock(prev->lock); /* serialise with prev->cb_list */ 946 spin_unlock(prev->lock); /* serialise with prev->cb_list */
946 } 947 }
947 GEM_BUG_ON(rcu_access_pointer(active->fence) != fence); 948 GEM_BUG_ON(rcu_access_pointer(active->fence) != fence);
948#ifdef __NetBSD__ 949#ifdef __NetBSD__
949 /* XXX ugh bletch */ 950 /* XXX ugh bletch */
950 KASSERT(!active->cb.fcb_onqueue); 951 KASSERT(!active->cb.fcb_onqueue);
951 active->cb.fcb_onqueue = true; 952 active->cb.fcb_onqueue = true;
952 TAILQ_INSERT_TAIL(&fence->f_callbacks, &active->cb, fcb_entry); 953 TAILQ_INSERT_TAIL(&fence->f_callbacks, &active->cb, fcb_entry);
953#else 954#else
954 list_add_tail(&active->cb.node, &fence->cb_list); 955 list_add_tail(&active->cb.node, &fence->cb_list);
955#endif 956#endif
956 spin_unlock_irqrestore(fence->lock, flags); 957 spin_unlock_irqrestore(fence->lock, flags);
957 958
958 return prev; 959 return prev;
959} 960}
960 961
961int i915_active_fence_set(struct i915_active_fence *active, 962int i915_active_fence_set(struct i915_active_fence *active,
962 struct i915_request *rq) 963 struct i915_request *rq)
963{ 964{
964 struct dma_fence *fence; 965 struct dma_fence *fence;
965 int err = 0; 966 int err = 0;
966 967
967 /* Must maintain timeline ordering wrt previous active requests */ 968 /* Must maintain timeline ordering wrt previous active requests */
968 rcu_read_lock(); 969 rcu_read_lock();
969 fence = __i915_active_fence_set(active, &rq->fence); 970 fence = __i915_active_fence_set(active, &rq->fence);
970 if (fence) /* but the previous fence may not belong to that timeline! */ 971 if (fence) /* but the previous fence may not belong to that timeline! */
971 fence = dma_fence_get_rcu(fence); 972 fence = dma_fence_get_rcu(fence);
972 rcu_read_unlock(); 973 rcu_read_unlock();
973 if (fence) { 974 if (fence) {
974 err = i915_request_await_dma_fence(rq, fence); 975 err = i915_request_await_dma_fence(rq, fence);
975 dma_fence_put(fence); 976 dma_fence_put(fence);
976 } 977 }
977 978
978 return err; 979 return err;
979} 980}
980 981
981void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) 982void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
982{ 983{
983 active_fence_cb(fence, cb); 984 active_fence_cb(fence, cb);
984} 985}
985 986
986#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 987#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
987#include "selftests/i915_active.c" 988#include "selftests/i915_active.c"
988#endif 989#endif
989 990
990static void i915_global_active_shrink(void) 991static void i915_global_active_shrink(void)
991{ 992{
992 kmem_cache_shrink(global.slab_cache); 993 kmem_cache_shrink(global.slab_cache);
993} 994}
994 995
995static void i915_global_active_exit(void) 996static void i915_global_active_exit(void)
996{ 997{
997 kmem_cache_destroy(global.slab_cache); 998 kmem_cache_destroy(global.slab_cache);
998} 999}
999 1000
1000static struct i915_global_active global = { { 1001static struct i915_global_active global = { {
1001 .shrink = i915_global_active_shrink, 1002 .shrink = i915_global_active_shrink,
1002 .exit = i915_global_active_exit, 1003 .exit = i915_global_active_exit,
1003} }; 1004} };
1004 1005
1005int __init i915_global_active_init(void) 1006int __init i915_global_active_init(void)
1006{ 1007{
1007 global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); 1008 global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
1008 if (!global.slab_cache) 1009 if (!global.slab_cache)
1009 return -ENOMEM; 1010 return -ENOMEM;
1010 1011
1011 i915_global_register(&global.base); 1012 i915_global_register(&global.base);
1012 return 0; 1013 return 0;
1013} 1014}