/* * IntersectSSE -- * * SIMD/SSE-intrinsic kdtree traversal. * * NOTE: It's critical that for each dimension the sign of all the * ray.d[] for each packet is the same. * * Returns: * Void, but hit->tHit and hit->triNum are filled in. */ static void IntersectSSE(const __m128& rayO[4], const __m128& rayD[4], KDTree kdtree[], __m128 *tHit, uint32 triHit[4]) { __m128 tMin, tMax; __m128 activeMask, liveMask; uint32 node; // Precompute invD[0..2] = 1/rayD[0..2] // Precompute rightward[0..2] = NODE_SIZE * (rayD[0..2] < 0) // Intersect ray against scene bounding box and produce tMin, tMax *tHit = _mm_set1_ps(FLT_MAX); triHit[0..3] = -1; liveMask = activeMask = _mm_cmple_ps(tMin, tMax); if (_mm_movemask_ps(liveMask) == 0) { /* All missed scene bbox */ return; } node = 0; while (true) { while (true) { /* Traversing an interior node */ uint32 near, far, rightward, splitAxis; __m128 tSplit, plane, wantNear, wantFar; /* * NOTE: 'left' is a byte offset, not an index. This saves a shift * when masking off the splitAxis and when doing address * computations, but then nodes have to be fetched using pointer * arithmetic. That's hidden behind GET_NODE() here. */ near = GET_NODE(kdtree, node)->left; plane = _mm_set1_ps(GET_NODE(kdtree, node)->plane); splitAxis = near & 0x3; near = near & (~0x3); if (splitAxis == 3) return node; /* At a leaf */ tSplit = _mm_mul_ps(_mm_sub_ps(plane, rayO[splitAxis]), invD[splitAxis]); wantNear = _mm_and_ps(_mm_cmpge_ps(tSplit, tMin), activeMask); wantFar = _mm_and_ps(_mm_cmple_ps(tSplit, tMax), activeMask); /* * NOTE: This relies upon left + 1 being the index of the right child! * * NOTE2: With more registers and CMOVs, this can still be branch-free * and independent of packing. */ far = near + NODE_SIZE - rightward; near += rightward; if (_mm_movemask_ps(wantNear) == 0) { /* Only near to go far */ node = far; continue; } if (_mm_movemask_ps(wantFar) == 0) { /* Only near to go near */ node = near; continue; } stack.Push(_mm_max_ps(tSplit, tMin), tMax, far); tMax = _mm_min_ps(tSplit, tMax); activeMask = _mm_cmple_ps(tMin, tMax); node = near; } /* * Traversing a leaf */ triIndex = (GET_NODE(kdtree, node)->left)>>2; numTris = *((int *) GET_NODE(kdtree, node)->plane); IntersectLeafTriangles(ray, triIndex, numTris, tHit, triHit); liveMask = _mm_cmpgt_ps(*tHit, _mm_max_ps(tMax, tMin)); if (_mm_movemask_ps(liveMask) == 0) { /* Early exit, all hit */ return; } /* * Sometimes a node is popped after the subset of rays that caused it * to be pushed have already hit geometry. In that case, the computed * activeMask will be zero (one tMax is masked with liveMask) and we * just skip the node and pop the next one. */ do { if (stack.IsEmpty()) { /* Nothing else to do, missed everything */ return; } stack.Pop(&tMin, &tMax, &node); tMax = _mm_and_ps(tMax, liveMask); /* Force tMax = 0 for done rays */ activeMask = _mm_cmple_ps(tMin, tMax); } while (_mm_movemask_ps(activeMask) == 0); } }