Skip to content

Commit 09c62f0

Browse files
Sahar A. Kashibrechtvl
authored andcommitted
Fix: SSS and Motion Blur or Curves not working on HIP-RT
This change fixes the remaining failing tests with SSS when using HIP-RT. This includes crash when SSS is used on curves, and objects with motion blur and SSS rendering black. The root cause for both cases was the fact that traversal was always assuming regular BVH (built for triangles), while curves and motion triangles are using custom primitives, which requires specialized BVH traversal. This change includes: - Early output from `scene_intersect_local()` for non-triangle and non-motion-triangle primitives. This fixes `sss_hair.blend` test, and also avoids unnecessary BVH traversal when the local intersection is requested from curve object. The same early-output could be added to other BVH traversal implementation. - Use `hiprtGeomCustomTraversalAnyHitCustomStack` for motion triangles primitives. This fixes motion blur on objects with SSS render black. Fixes #135856 Co-authored-by: Sahar A. Kashi <sahar.alipourkashi@amd.com> Co-authored-by: Sergey Sharybin <sergey@blender.org> Pull Request: https://projects.blender.org/blender/blender/pulls/135943
1 parent ebcd987 commit 09c62f0

File tree

3 files changed

+109
-119
lines changed

3 files changed

+109
-119
lines changed

src/kernel/device/hiprt/bvh.h

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -84,24 +84,28 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
8484
ccl_private uint *lcg_state,
8585
const int max_hits)
8686
{
87+
if (local_isect != nullptr) {
88+
local_isect->num_hits = 0;
89+
}
90+
8791
if (!scene_intersect_valid(ray)) {
88-
if (local_isect) {
89-
local_isect->num_hits = 0;
90-
}
92+
return false;
93+
}
94+
95+
const int primitive_type = kernel_data_fetch(objects, local_object).primitive_type;
96+
if (!(primitive_type & PRIMITIVE_TRIANGLE)) {
97+
/* Local intersection functions are only considering triangle and motion triangle orimitives.
98+
* If the local intersection is requested from other primitives (curve or point cloud) perform
99+
* an early return to avoid tree travsersal with no primitive intersection. */
91100
return false;
92101
}
93102

94103
float3 P = ray->P;
95104
float3 dir = bvh_clamp_direction(ray->D);
96105
float3 idir = bvh_inverse_direction(dir);
97106

98-
if (local_isect != nullptr) {
99-
local_isect->num_hits = 0;
100-
}
101-
102107
const int object_flag = kernel_data_fetch(object_flag, local_object);
103108
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
104-
105109
# if BVH_FEATURE(BVH_MOTION)
106110
bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
107111
# else
@@ -127,20 +131,30 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
127131
GET_TRAVERSAL_STACK()
128132

129133
void *local_geom = (void *)(kernel_data_fetch(blas_ptr, local_object));
130-
// we don't need custom intersection functions for SSR
131-
# ifdef HIPRT_SHARED_STACK
132-
hiprtGeomTraversalAnyHitCustomStack<Stack> traversal((hiprtGeometry)local_geom,
133-
ray_hip,
134-
stack,
135-
hiprtTraversalHintDefault,
136-
&payload,
137-
kernel_params.table_local_intersect,
138-
2);
139-
# else
140-
hiprtGeomTraversalAnyHit traversal(
141-
local_geom, ray_hip, table, hiprtTraversalHintDefault, &payload);
142-
# endif
143-
hiprtHit hit = traversal.getNextHit();
134+
135+
hiprtHit hit;
136+
if (primitive_type == PRIMITIVE_MOTION_TRIANGLE) {
137+
/* Motion triangle BVH uses custom primitives which requires custom traversal. */
138+
hiprtGeomCustomTraversalAnyHitCustomStack<Stack> traversal((hiprtGeometry)local_geom,
139+
ray_hip,
140+
stack,
141+
hiprtTraversalHintDefault,
142+
&payload,
143+
kernel_params.table_local_intersect,
144+
2);
145+
hit = traversal.getNextHit();
146+
}
147+
else {
148+
hiprtGeomTraversalAnyHitCustomStack<Stack> traversal((hiprtGeometry)local_geom,
149+
ray_hip,
150+
stack,
151+
hiprtTraversalHintDefault,
152+
&payload,
153+
kernel_params.table_local_intersect,
154+
2);
155+
hit = traversal.getNextHit();
156+
}
157+
144158
return hit.hasHit();
145159
}
146160
#endif //__BVH_LOCAL__

src/kernel/device/hiprt/common.h

Lines changed: 66 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ struct ShadowPayload {
2828
struct LocalPayload {
2929
KernelGlobals kg;
3030
RaySelfPrimitives self;
31-
int prim_type;
3231
float ray_time;
3332
int local_object;
3433
uint max_hits;
@@ -42,56 +41,35 @@ struct LocalPayload {
4241
RAY_RT.maxT = RAY->tmax; \
4342
RAY_RT.minT = RAY->tmin;
4443

45-
# if defined(HIPRT_SHARED_STACK)
46-
# define GET_TRAVERSAL_STACK() \
47-
Stack stack(kg->global_stack_buffer, kg->shared_stack); \
48-
Instance_Stack instance_stack;
49-
# else
50-
# define GET_TRAVERSAL_STACK()
51-
# endif
52-
53-
# ifdef HIPRT_SHARED_STACK
54-
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
55-
hiprtSceneTraversalAnyHitCustomStack<Stack, Instance_Stack> traversal( \
56-
(hiprtScene)kernel_data.device_bvh, \
57-
ray_hip, \
58-
stack, \
59-
instance_stack, \
60-
visibility, \
61-
hiprtTraversalHintDefault, \
62-
&payload, \
63-
kernel_params.FUNCTION_TABLE, \
64-
RAY_TYPE, \
65-
RAY_TIME);
66-
67-
# define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
68-
hiprtSceneTraversalClosestCustomStack<Stack, Instance_Stack> traversal( \
69-
(hiprtScene)kernel_data.device_bvh, \
70-
ray_hip, \
71-
stack, \
72-
instance_stack, \
73-
visibility, \
74-
hiprtTraversalHintDefault, \
75-
&payload, \
76-
kernel_params.FUNCTION_TABLE, \
77-
RAY_TYPE, \
78-
RAY_TIME);
79-
# else
80-
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE) \
81-
hiprtSceneTraversalAnyHit traversal(kernel_data.device_bvh, \
82-
ray_hip, \
83-
visibility, \
84-
FUNCTION_TABLE, \
85-
hiprtTraversalHintDefault, \
86-
&payload);
87-
# define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE) \
88-
hiprtSceneTraversalClosest traversal(kernel_data.device_bvh, \
89-
ray_hip, \
90-
visibility, \
91-
FUNCTION_TABLE, \
92-
hiprtTraversalHintDefault, \
93-
&payload);
94-
# endif
44+
# define GET_TRAVERSAL_STACK() \
45+
Stack stack(kg->global_stack_buffer, kg->shared_stack); \
46+
Instance_Stack instance_stack;
47+
48+
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
49+
hiprtSceneTraversalAnyHitCustomStack<Stack, Instance_Stack> traversal( \
50+
(hiprtScene)kernel_data.device_bvh, \
51+
ray_hip, \
52+
stack, \
53+
instance_stack, \
54+
visibility, \
55+
hiprtTraversalHintDefault, \
56+
&payload, \
57+
kernel_params.FUNCTION_TABLE, \
58+
RAY_TYPE, \
59+
RAY_TIME);
60+
61+
# define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
62+
hiprtSceneTraversalClosestCustomStack<Stack, Instance_Stack> traversal( \
63+
(hiprtScene)kernel_data.device_bvh, \
64+
ray_hip, \
65+
stack, \
66+
instance_stack, \
67+
visibility, \
68+
hiprtTraversalHintDefault, \
69+
&payload, \
70+
kernel_params.FUNCTION_TABLE, \
71+
RAY_TYPE, \
72+
RAY_TIME);
9573

9674
ccl_device_inline void set_intersect_point(KernelGlobals kg,
9775
hiprtHit &hit,
@@ -243,23 +221,19 @@ ccl_device_inline bool motion_triangle_custom_local_intersect(const hiprtRay &ra
243221

244222
LocalIntersection *local_isect = local_payload->local_isect;
245223

246-
bool b_hit = motion_triangle_intersect_local(kg,
247-
local_isect,
248-
ray.origin,
249-
ray.direction,
250-
local_payload->ray_time,
251-
object_id,
252-
prim_id_global,
253-
prim_id_local,
254-
ray.minT,
255-
ray.maxT,
256-
local_payload->lcg_state,
257-
local_payload->max_hits);
224+
return motion_triangle_intersect_local(kg,
225+
local_isect,
226+
ray.origin,
227+
ray.direction,
228+
local_payload->ray_time,
229+
object_id,
230+
prim_id_global,
231+
prim_id_local,
232+
ray.minT,
233+
ray.maxT,
234+
local_payload->lcg_state,
235+
local_payload->max_hits);
258236

259-
if (b_hit) {
260-
local_payload->prim_type = PRIMITIVE_MOTION_TRIANGLE;
261-
}
262-
return b_hit;
263237
# else
264238
return false;
265239
# endif
@@ -585,18 +559,33 @@ ccl_device_inline bool local_intersection_filter(const hiprtRay &ray,
585559
# ifdef __BVH_LOCAL__
586560
LocalPayload *payload = (LocalPayload *)user_data;
587561
KernelGlobals kg = payload->kg;
588-
int object_id = payload->local_object;
589-
int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
590-
int prim = hit.primID + prim_offset;
562+
const int object_id = payload->local_object;
563+
const uint max_hits = payload->max_hits;
564+
565+
/* Triangle primitive uses hardware intersection, other primitives do custom intersection
566+
* which does reservoir samlping for intersections. For the custom primitives only check
567+
* whether we can stop travsersal early on. The rest of the checks here only do for the
568+
* regular triangles. */
569+
const int primitive_type = kernel_data_fetch(objects, object_id).primitive_type;
570+
if (primitive_type != PRIMITIVE_TRIANGLE) {
571+
if (max_hits == 0) {
572+
return false;
573+
}
574+
return true;
575+
}
576+
577+
const int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
578+
const int prim = hit.primID + prim_offset;
591579
# ifndef __RAY_OFFSET__
592580
if (intersection_skip_self_local(payload->self, prim)) {
593581
return true; // continue search
594582
}
595583
# endif
596-
uint max_hits = payload->max_hits;
584+
597585
if (max_hits == 0) {
598586
return false; // stop search
599587
}
588+
600589
int hit_index = 0;
601590
if (payload->lcg_state) {
602591
for (int i = min(max_hits, payload->local_isect->num_hits) - 1; i >= 0; --i) {
@@ -618,19 +607,20 @@ ccl_device_inline bool local_intersection_filter(const hiprtRay &ray,
618607
}
619608
payload->local_isect->num_hits = 1;
620609
}
610+
621611
Intersection *isect = &payload->local_isect->hits[hit_index];
622612
isect->t = hit.t;
623-
isect->prim = prim;
624-
isect->object = object_id;
625-
isect->type = PRIMITIVE_TRIANGLE; // kernel_data_fetch(__objects, object_id).primitive_type;
626-
627613
isect->u = hit.uv.x;
628614
isect->v = hit.uv.y;
615+
isect->prim = prim;
616+
isect->object = object_id;
617+
isect->type = primitive_type;
629618

630619
payload->local_isect->Ng[hit_index] = hit.normal;
631620

632621
return true;
633-
622+
# else
623+
return false;
634624
# endif
635625
}
636626

src/kernel/device/hiprt/globals.h

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414
#include "util/color.h" // IWYU pragma: export
1515
#include "util/texture.h" // IWYU pragma: export
1616

17-
#define HIPRT_SHARED_STACK
18-
1917
/* The size of global stack available to each thread (memory reserved for each thread in
2018
* global_stack_buffer). */
2119
#define HIPRT_THREAD_STACK_SIZE 64
@@ -36,29 +34,20 @@ CCL_NAMESPACE_BEGIN
3634

3735
struct KernelGlobalsGPU {
3836
hiprtGlobalStackBuffer global_stack_buffer;
39-
#ifdef HIPRT_SHARED_STACK
4037
hiprtSharedStackBuffer shared_stack;
41-
#endif
4238
};
4339

4440
using KernelGlobals = ccl_global KernelGlobalsGPU *ccl_restrict;
4541

46-
#if defined(HIPRT_SHARED_STACK)
47-
4842
/* This macro allocates shared memory and to pass the shared memory down to intersection functions
4943
* KernelGlobals is used. */
50-
# define HIPRT_INIT_KERNEL_GLOBAL() \
51-
ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \
52-
ccl_global KernelGlobalsGPU kg_gpu; \
53-
KernelGlobals kg = &kg_gpu; \
54-
kg->shared_stack.stackData = &shared_stack[0]; \
55-
kg->shared_stack.stackSize = HIPRT_SHARED_STACK_SIZE; \
56-
kg->global_stack_buffer = stack_buffer;
57-
#else
58-
# define HIPRT_INIT_KERNEL_GLOBAL() \
59-
KernelGlobals kg = nullptr; \
60-
kg->global_stack_buffer = stack_buffer;
61-
#endif
44+
#define HIPRT_INIT_KERNEL_GLOBAL() \
45+
ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \
46+
ccl_global KernelGlobalsGPU kg_gpu; \
47+
KernelGlobals kg = &kg_gpu; \
48+
kg->shared_stack.stackData = &shared_stack[0]; \
49+
kg->shared_stack.stackSize = HIPRT_SHARED_STACK_SIZE; \
50+
kg->global_stack_buffer = stack_buffer;
6251

6352
struct KernelParamsHIPRT {
6453
KernelData data;
@@ -149,11 +138,8 @@ enum Filter_Function_Table_Index {
149138
#ifdef __KERNEL_GPU__
150139
__constant__ KernelParamsHIPRT kernel_params;
151140

152-
# ifdef HIPRT_SHARED_STACK
153141
typedef hiprtGlobalStack Stack;
154142
typedef hiprtEmptyInstanceStack Instance_Stack;
155-
# endif
156-
157143
#endif
158144

159145
/* Abstraction macros */

0 commit comments

Comments
 (0)