Nori
|
00001 /* 00002 This class contains a generic implementation of a KD-Tree for shapes in 00003 n dimensions. It was originally part of Mitsuba and slightly adapted for 00004 use in Nori. 00005 00006 Copyright (c) 2007-2012 by Wenzel Jakob 00007 00008 Mitsuba is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License Version 3 00010 as published by the Free Software Foundation. 00011 00012 Mitsuba is distributed in the hope that it will be useful, 00013 but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 GNU General Public License for more details. 00016 00017 You should have received a copy of the GNU General Public License 00018 along with this program. If not, see <http://www.gnu.org/licenses/>. 00019 */ 00020 00021 /* 00022 * ======================================================================= 00023 * WARNING WARNING WARNING WARNING WARNING WARNING 00024 * ======================================================================= 00025 * Remember to put on SAFETY GOGGLES before looking at this file. You 00026 * are most certainly not expected to read or understand any of it. 00027 * ======================================================================= 00028 */ 00029 00030 #if !defined(__KDTREE_GENERIC_H) 00031 #define __KDTREE_GENERIC_H 00032 00033 #include <nori/bbox.h> 00034 #include <boost/static_assert.hpp> 00035 #include <boost/tuple/tuple.hpp> 00036 #include <QElapsedTimer> 00037 #include <QWaitCondition> 00038 #include <QMutex> 00039 #include <QThread> 00040 #include <stack> 00041 #include <map> 00042 00043 /** Compile-time KD-tree depth limit. Allows to put certain 00044 data structures on the stack */ 00045 #define NORI_KD_MAXDEPTH 48 00046 00047 /// Set to 1 to display various statistics about the kd-tree 00048 #define NORI_KD_VERBOSE 0 00049 00050 /// OrderedChunkAllocator: don't create chunks smaller than 512 KiB 00051 #define NORI_KD_MIN_ALLOC 512*1024 00052 00053 /// Allocate nodes & index lists in blocks of 512 KiB 00054 #define NORI_KD_BLOCKSIZE_KD (512*1024/sizeof(KDNode)) 00055 #define NORI_KD_BLOCKSIZE_IDX (512*1024/sizeof(uint32_t)) 00056 00057 /** 00058 * \brief To avoid numerical issues, the size of the scene 00059 * bounding box is increased by this amount 00060 */ 00061 #define NORI_KD_BBOX_EPSILON 1e-3f 00062 00063 NORI_NAMESPACE_BEGIN 00064 00065 /** 00066 * \brief Special "ordered" memory allocator 00067 * 00068 * During kd-tree construction, large amounts of memory are required 00069 * to temporarily hold index and edge event lists. When not implemented 00070 * properly, these allocations can become a critical bottleneck. 00071 * The class \ref OrderedChunkAllocator provides a specialized 00072 * memory allocator, which reserves memory in chunks of at least 00073 * 128KiB. An important assumption made by the allocator is that 00074 * memory will be released in the exact same order, in which it was 00075 * previously allocated. This makes it possible to create an 00076 * implementation with a very low memory overhead. Note that no locking 00077 * is done, hence each thread will need its own allocator. 00078 * 00079 * \author Wenzel Jakob 00080 */ 00081 class OrderedChunkAllocator { 00082 public: 00083 inline OrderedChunkAllocator(size_t minAllocation = NORI_KD_MIN_ALLOC) 00084 : m_minAllocation(minAllocation) { 00085 m_chunks.reserve(16); 00086 } 00087 00088 ~OrderedChunkAllocator() { 00089 cleanup(); 00090 } 00091 00092 /** 00093 * \brief Release all memory used by the allocator 00094 */ 00095 void cleanup() { 00096 for (std::vector<Chunk>::iterator it = m_chunks.begin(); 00097 it != m_chunks.end(); ++it) 00098 freeAligned((*it).start); 00099 m_chunks.clear(); 00100 } 00101 00102 /** 00103 * \brief Merge the chunks of another allocator into this one 00104 */ 00105 void merge(const OrderedChunkAllocator &other) { 00106 m_chunks.reserve(m_chunks.size() + other.m_chunks.size()); 00107 m_chunks.insert(m_chunks.end(), other.m_chunks.begin(), 00108 other.m_chunks.end()); 00109 } 00110 00111 /** 00112 * \brief Forget about all chunks without actually freeing them. 00113 * This is useful when the chunks have been merged into another 00114 * allocator. 00115 */ 00116 void forget() { 00117 m_chunks.clear(); 00118 } 00119 00120 /** 00121 * \brief Request a block of memory from the allocator 00122 * 00123 * Walks through the list of chunks to find one with enough 00124 * free memory. If no chunk could be found, a new one is created. 00125 */ 00126 template <typename T> T * allocate(size_t size) { 00127 size *= sizeof(T); 00128 for (std::vector<Chunk>::iterator it = m_chunks.begin(); 00129 it != m_chunks.end(); ++it) { 00130 Chunk &chunk = *it; 00131 if (chunk.remainder() >= size) { 00132 T* result = reinterpret_cast<T *>(chunk.cur); 00133 chunk.cur += size; 00134 return result; 00135 } 00136 } 00137 00138 /* No chunk had enough free memory */ 00139 size_t allocSize = std::max(size, 00140 m_minAllocation); 00141 00142 Chunk chunk; 00143 chunk.start = (uint8_t *) allocAligned(allocSize); 00144 chunk.cur = chunk.start + size; 00145 chunk.size = allocSize; 00146 m_chunks.push_back(chunk); 00147 00148 return reinterpret_cast<T *>(chunk.start); 00149 } 00150 00151 template <typename T> void release(T *ptr) { 00152 for (std::vector<Chunk>::iterator it = m_chunks.begin(); 00153 it != m_chunks.end(); ++it) { 00154 Chunk &chunk = *it; 00155 if ((uint8_t *) ptr >= chunk.start && 00156 (uint8_t *) ptr < chunk.start + chunk.size) { 00157 chunk.cur = (uint8_t *) ptr; 00158 return; 00159 } 00160 } 00161 } 00162 00163 /** 00164 * \brief Shrink the size of the last allocated chunk 00165 */ 00166 template <typename T> void shrinkAllocation(T *ptr, size_t newSize) { 00167 newSize *= sizeof(T); 00168 for (std::vector<Chunk>::iterator it = m_chunks.begin(); 00169 it != m_chunks.end(); ++it) { 00170 Chunk &chunk = *it; 00171 if ((uint8_t *) ptr >= chunk.start && 00172 (uint8_t *) ptr < chunk.start + chunk.size) { 00173 chunk.cur = (uint8_t *) ptr + newSize; 00174 return; 00175 } 00176 } 00177 } 00178 00179 inline size_t getChunkCount() const { return m_chunks.size(); } 00180 00181 /** 00182 * \brief Return the total amount of chunk memory in bytes 00183 */ 00184 size_t size() const { 00185 size_t result = 0; 00186 for (std::vector<Chunk>::const_iterator it = m_chunks.begin(); 00187 it != m_chunks.end(); ++it) 00188 result += (*it).size; 00189 return result; 00190 } 00191 00192 /** 00193 * \brief Return the total amount of used memory in bytes 00194 */ 00195 size_t used() const { 00196 size_t result = 0; 00197 for (std::vector<Chunk>::const_iterator it = m_chunks.begin(); 00198 it != m_chunks.end(); ++it) 00199 result += (*it).used(); 00200 return result; 00201 } 00202 private: 00203 struct Chunk { 00204 size_t size; 00205 uint8_t *start, *cur; 00206 00207 inline size_t used() const { 00208 return cur - start; 00209 } 00210 00211 inline size_t remainder() const { 00212 return size - used(); 00213 } 00214 }; 00215 00216 size_t m_minAllocation; 00217 std::vector<Chunk> m_chunks; 00218 }; 00219 00220 /** 00221 * \brief Basic vector implementation, which stores all data 00222 * in a list of fixed-sized blocks. 00223 * 00224 * This leads to a more conservative memory usage when the 00225 * final size of a (possibly very large) growing vector is 00226 * unknown. Also, frequent reallocations & copies are avoided. 00227 * 00228 * \author Wenzel Jakob 00229 */ 00230 template <typename T, size_t BlockSize> class BlockedVector { 00231 public: 00232 BlockedVector() : m_pos(0) {} 00233 00234 ~BlockedVector() { 00235 clear(); 00236 } 00237 00238 /** 00239 * \brief Append an element to the end 00240 */ 00241 inline void push_back(const T &value) { 00242 size_t blockIdx = m_pos / BlockSize; 00243 size_t offset = m_pos % BlockSize; 00244 if (blockIdx == m_blocks.size()) 00245 m_blocks.push_back(new T[BlockSize]); 00246 m_blocks[blockIdx][offset] = value; 00247 m_pos++; 00248 } 00249 00250 /** 00251 * \brief Allocate a certain number of elements and 00252 * return a pointer to the first one. 00253 * 00254 * The implementation will ensure that they lie 00255 * contiguous in memory -- note that this can potentially 00256 * create unused elements in the previous block if a new 00257 * one has to be allocated. 00258 */ 00259 inline T * allocate(size_t size) { 00260 size_t blockIdx = m_pos / BlockSize; 00261 size_t offset = m_pos % BlockSize; 00262 T *result; 00263 if (EXPECT_TAKEN(offset + size <= BlockSize)) { 00264 if (blockIdx == m_blocks.size()) 00265 m_blocks.push_back(new T[BlockSize]); 00266 result = m_blocks[blockIdx] + offset; 00267 m_pos += size; 00268 } else { 00269 ++blockIdx; 00270 if (blockIdx == m_blocks.size()) 00271 m_blocks.push_back(new T[BlockSize]); 00272 result = m_blocks[blockIdx]; 00273 m_pos += BlockSize - offset + size; 00274 } 00275 return result; 00276 } 00277 00278 inline T &operator[](size_t index) { 00279 return *(m_blocks[index / BlockSize] + 00280 (index % BlockSize)); 00281 } 00282 00283 inline const T &operator[](size_t index) const { 00284 return *(m_blocks[index / BlockSize] + 00285 (index % BlockSize)); 00286 } 00287 00288 00289 /** 00290 * \brief Return the currently used number of items 00291 */ 00292 inline size_t size() const { 00293 return m_pos; 00294 } 00295 00296 /** 00297 * \brief Return the number of allocated blocks 00298 */ 00299 inline size_t blockCount() const { 00300 return m_blocks.size(); 00301 } 00302 00303 /** 00304 * \brief Return the total capacity 00305 */ 00306 inline size_t capacity() const { 00307 return m_blocks.size() * BlockSize; 00308 } 00309 00310 /** 00311 * \brief Resize the vector to the given size. 00312 * 00313 * Note: this implementation doesn't support 00314 * enlarging the vector and simply changes the 00315 * last item pointer. 00316 */ 00317 inline void resize(size_t pos) { 00318 m_pos = pos; 00319 } 00320 00321 /** 00322 * \brief Release all memory 00323 */ 00324 void clear() { 00325 for (typename std::vector<T *>::iterator it = m_blocks.begin(); 00326 it != m_blocks.end(); ++it) 00327 delete[] *it; 00328 m_blocks.clear(); 00329 m_pos = 0; 00330 } 00331 private: 00332 std::vector<T *> m_blocks; 00333 size_t m_pos; 00334 }; 00335 00336 /** 00337 * \brief Compact storage for primitive classifcation 00338 * 00339 * When classifying primitives with respect to a split plane, 00340 * a data structure is needed to hold the tertiary result of 00341 * this operation. This class implements a compact storage 00342 * (2 bits per entry) in the spirit of the std::vector<bool> 00343 * specialization. 00344 * 00345 * \author Wenzel Jakob 00346 */ 00347 class ClassificationStorage { 00348 public: 00349 ClassificationStorage() 00350 : m_buffer(NULL), m_bufferSize(0) { } 00351 00352 ~ClassificationStorage() { 00353 if (m_buffer) 00354 delete[] m_buffer; 00355 } 00356 00357 void setPrimitiveCount(size_t size) { 00358 if (m_buffer) 00359 delete[] m_buffer; 00360 if (size > 0) { 00361 m_bufferSize = size/4 + ((size % 4) > 0 ? 1 : 0); 00362 m_buffer = new uint8_t[m_bufferSize]; 00363 } else { 00364 m_buffer = NULL; 00365 } 00366 } 00367 00368 inline void set(uint32_t index, int value) { 00369 uint8_t *ptr = m_buffer + (index >> 2); 00370 uint8_t shift = (index & 3) << 1; 00371 *ptr = (*ptr & ~(3 << shift)) | (value << shift); 00372 } 00373 00374 inline int get(uint32_t index) const { 00375 uint8_t *ptr = m_buffer + (index >> 2); 00376 uint8_t shift = (index & 3) << 1; 00377 return (*ptr >> shift) & 3; 00378 } 00379 00380 inline size_t size() const { 00381 return m_bufferSize; 00382 } 00383 private: 00384 uint8_t *m_buffer; 00385 size_t m_bufferSize; 00386 }; 00387 00388 /** 00389 * \brief Base class of all kd-trees. 00390 * 00391 * This class defines the byte layout for KD-tree nodes and 00392 * provides methods for querying the tree structure. 00393 */ 00394 template <typename BoundingBoxType> class KDTreeBase { 00395 public: 00396 /// Index number format (max 2^32 prims) 00397 typedef uint32_t IndexType; 00398 00399 /// Size number format 00400 typedef uint32_t SizeType; 00401 00402 /** 00403 * \brief KD-tree node in 8 bytes. 00404 */ 00405 struct KDNode { 00406 union { 00407 /* Inner node */ 00408 struct { 00409 /* Bit layout: 00410 31 : False (inner node) 00411 30 : Indirection node flag 00412 29-3 : Offset to the left child 00413 or indirection table entry 00414 2-0 : Split axis 00415 */ 00416 uint32_t combined; 00417 00418 /// Split plane coordinate 00419 float split; 00420 } inner; 00421 00422 /* Leaf node */ 00423 struct { 00424 /* Bit layout: 00425 31 : True (leaf node) 00426 30-0 : Offset to the node's primitive list 00427 */ 00428 uint32_t combined; 00429 00430 /// End offset of the primitive list 00431 uint32_t end; 00432 } leaf; 00433 }; 00434 00435 enum EMask { 00436 ETypeMask = 1 << 31, 00437 EIndirectionMask = 1 << 30, 00438 ELeafOffsetMask = ~ETypeMask, 00439 EInnerAxisMask = 0x3, 00440 EInnerOffsetMask = ~(EInnerAxisMask + EIndirectionMask), 00441 ERelOffsetLimit = (1<<28) - 1 00442 }; 00443 00444 /// Initialize a leaf kd-Tree node 00445 inline void initLeafNode(unsigned int offset, unsigned int numPrims) { 00446 leaf.combined = (uint32_t) ETypeMask | offset; 00447 leaf.end = offset + numPrims; 00448 } 00449 00450 /** 00451 * Initialize an interior kd-Tree node. Reports a failure if the 00452 * relative offset to the left child node is too large. 00453 */ 00454 inline bool initInnerNode(int axis, float split, ptrdiff_t relOffset) { 00455 if (relOffset < 0 || relOffset > ERelOffsetLimit) 00456 return false; 00457 inner.combined = axis | ((uint32_t) relOffset << 2); 00458 inner.split = split; 00459 return true; 00460 } 00461 00462 /** 00463 * \brief Initialize an interior indirection node. 00464 * 00465 * Indirections are necessary whenever the children cannot be 00466 * referenced using a relative pointer, which can happen when 00467 * they lie in different memory chunks. In this case, the node 00468 * stores an index into a globally shared pointer list. 00469 */ 00470 inline void initIndirectionNode(int axis, float split, 00471 uint32_t indirectionEntry) { 00472 inner.combined = EIndirectionMask 00473 | ((uint32_t) indirectionEntry << 2) 00474 | axis; 00475 inner.split = split; 00476 } 00477 00478 /// Is this a leaf node? 00479 inline bool isLeaf() const { 00480 return leaf.combined & (uint32_t) ETypeMask; 00481 } 00482 00483 /// Is this an indirection node? 00484 inline bool isIndirection() const { 00485 return leaf.combined & (uint32_t) EIndirectionMask; 00486 } 00487 00488 /// Assuming this is a leaf node, return the first primitive index 00489 inline IndexType getPrimStart() const { 00490 return leaf.combined & (uint32_t) ELeafOffsetMask; 00491 } 00492 00493 /// Assuming this is a leaf node, return the last primitive index 00494 inline IndexType getPrimEnd() const { 00495 return leaf.end; 00496 } 00497 00498 /// Return the index of an indirection node 00499 inline IndexType getIndirectionIndex() const { 00500 return(inner.combined & (uint32_t) EInnerOffsetMask) >> 2; 00501 } 00502 00503 /// Return the left child (assuming that this is an interior node) 00504 inline const KDNode * getLeft() const { 00505 return this + 00506 ((inner.combined & (uint32_t) EInnerOffsetMask) >> 2); 00507 } 00508 00509 /// Return the sibling of the current node 00510 inline const KDNode * getSibling() const { 00511 return (const KDNode *) ((ptrdiff_t) this ^ (ptrdiff_t) 8); 00512 } 00513 00514 /// Return the left child (assuming that this is an interior node) 00515 inline KDNode * getLeft() { 00516 return this + 00517 ((inner.combined & (uint32_t) EInnerOffsetMask) >> 2); 00518 } 00519 00520 /// Return the left child (assuming that this is an interior node) 00521 inline const KDNode * getRight() const { 00522 return getLeft() + 1; 00523 } 00524 00525 /** 00526 * \brief Return the split plane location (assuming that this 00527 * is an interior node) 00528 */ 00529 inline float getSplit() const { 00530 return inner.split; 00531 } 00532 00533 /// Return the split axis (assuming that this is an interior node) 00534 inline int getAxis() const { 00535 return inner.combined & (uint32_t) EInnerAxisMask; 00536 } 00537 }; 00538 00539 BOOST_STATIC_ASSERT(sizeof(KDNode) == 8); 00540 00541 /// Return the root node of the kd-tree 00542 inline const KDNode *getRoot() const { 00543 return m_nodes; 00544 } 00545 00546 /// Return whether or not the kd-tree has been built 00547 inline bool isBuilt() const { 00548 return m_nodes != NULL; 00549 } 00550 00551 /// Return a (slightly enlarged) axis-aligned bounding box containing all primitives 00552 inline const BoundingBoxType &getBoundingBox() const { return m_bbox; } 00553 00554 /// Return a tight axis-aligned bounding box containing all primitives 00555 inline const BoundingBoxType &getTightBoundingBox() const { return m_tightBBox;} 00556 protected: 00557 KDNode *m_nodes; 00558 BoundingBoxType m_bbox, m_tightBBox; 00559 }; 00560 00561 #if defined(WIN32) 00562 /* Use strict IEEE 754 floating point computations 00563 for the following kd-tree building code */ 00564 NORI_NAMESPACE_END 00565 #pragma float_control(precise, on) 00566 NORI_NAMESPACE_BEGIN 00567 #endif 00568 00569 /** 00570 * \brief Optimized KD-tree acceleration data structure for 00571 * n-dimensional (n<=4) shapes and various queries on them. 00572 * 00573 * Note that this class mainly concerns itself with data that cover a 00574 * region of space. For point data, other implementations will be more 00575 * suitable. The most important application in Mitsuba is the fast 00576 * construction of high-quality trees for ray tracing. See the class 00577 * \ref SAHKDTree for this specialization. 00578 * 00579 * The code in this class is a fully generic kd-tree implementation, which 00580 * can theoretically support any kind of shape. However, subclasses still 00581 * need to provide the following signatures for a functional implementation: 00582 * 00583 * \code 00584 * /// Return the total number of primitives 00585 * inline SizeType getPrimitiveCount() const; 00586 * 00587 * /// Return the axis-aligned bounding box of a certain primitive 00588 * inline BoundingBoxType getBoundingBox(IndexType primIdx) const; 00589 * 00590 * /// Return the bounding box of a primitive when clipped to another box 00591 * inline BoundingBoxType getClippedBoundingBox(IndexType primIdx, const BoundingBoxType &bbox) const; 00592 * \endcode 00593 * 00594 * This class follows the "Curiously recurring template" design pattern 00595 * so that the above functions can be inlined (in particular, no virtual 00596 * calls will be necessary!). 00597 * 00598 * When the kd-tree is initially built, this class optimizes a cost 00599 * heuristic every time a split plane has to be chosen. For ray tracing, 00600 * the heuristic is usually the surface area heuristic (SAH), but other 00601 * choices are possible as well. The tree construction heuristic must be 00602 * passed as a template argument, which can use a supplied bounding box and 00603 * split candidate to compute approximate probabilities of recursing into 00604 * the left and right subrees during a typical kd-tree query operation. 00605 * See \ref SurfaceAreaHeuristic for an example of the interface that 00606 * must be implemented. 00607 * 00608 * The kd-tree construction algorithm creates 'perfect split' trees as 00609 * outlined in the paper "On Building fast kd-Trees for Ray Tracing, and on 00610 * doing that in O(N log N)" by Ingo Wald and Vlastimil Havran. This works 00611 * even when the tree is not meant to be used for ray tracing. 00612 * For polygonal meshes, the involved Sutherland-Hodgman iterations can be 00613 * quite expensive in terms of the overall construction time. The 00614 * \ref setClip method can be used to deactivate perfect splits at the 00615 * cost of a lower-quality tree. 00616 * 00617 * Because the O(N log N) construction algorithm tends to cause many 00618 * incoherent memory accesses, a fast approximate technique (Min-max 00619 * binning) is used near the top of the tree, which significantly reduces 00620 * cache misses. Once the input data has been narrowed down to a 00621 * reasonable amount, the implementation switches over to the O(N log N) 00622 * builder. When multiple processors are available, the build process runs 00623 * in parallel. 00624 * 00625 * \author Wenzel Jakob 00626 * \ingroup librender 00627 */ 00628 template <typename BoundingBoxType, typename TreeConstructionHeuristic, typename Derived> 00629 class GenericKDTree : public KDTreeBase<BoundingBoxType> { 00630 protected: 00631 // Some forward declarations 00632 struct MinMaxBins; 00633 struct EdgeEvent; 00634 struct EdgeEventOrdering; 00635 00636 public: 00637 typedef KDTreeBase<BoundingBoxType> Parent; 00638 typedef typename Parent::SizeType SizeType; 00639 typedef typename Parent::IndexType IndexType; 00640 typedef typename Parent::KDNode KDNode; 00641 typedef typename BoundingBoxType::Scalar Scalar; 00642 typedef typename BoundingBoxType::PointType PointType; 00643 typedef typename BoundingBoxType::VectorType VectorType; 00644 00645 using Parent::m_nodes; 00646 using Parent::m_bbox; 00647 using Parent::m_tightBBox; 00648 using Parent::isBuilt; 00649 00650 /** 00651 * \brief Create a new kd-tree instance initialized with 00652 * the default parameters. 00653 */ 00654 GenericKDTree() : m_indices(NULL) { 00655 m_nodes = NULL; 00656 m_traversalCost = 15; 00657 m_queryCost = 20; 00658 m_emptySpaceBonus = 0.9f; 00659 m_clip = true; 00660 m_stopPrims = 6; 00661 m_maxBadRefines = 3; 00662 m_exactPrimThreshold = 65536; 00663 m_maxDepth = 0; 00664 m_retract = true; 00665 m_parallelBuild = true; 00666 m_minMaxBins = 128; 00667 } 00668 00669 /** 00670 * \brief Release all memory 00671 */ 00672 virtual ~GenericKDTree() { 00673 if (m_indices) 00674 delete[] m_indices; 00675 if (m_nodes) 00676 freeAligned(m_nodes-1); // undo alignment shift 00677 } 00678 00679 /** 00680 * \brief Set the traversal cost used by the tree construction heuristic 00681 */ 00682 inline void setTraversalCost(float traversalCost) { 00683 m_traversalCost = traversalCost; 00684 } 00685 00686 /** 00687 * \brief Return the traversal cost used by the tree construction heuristic 00688 */ 00689 inline float getTraversalCost() const { 00690 return m_traversalCost; 00691 } 00692 00693 /** 00694 * \brief Set the query cost used by the tree construction heuristic 00695 * (This is the average cost for testing a contained shape against 00696 * a kd-tree search query) 00697 */ 00698 inline void setQueryCost(float queryCost) { 00699 m_queryCost = queryCost; 00700 } 00701 00702 /** 00703 * \brief Return the query cost used by the tree construction heuristic 00704 * (This is the average cost for testing a contained shape against 00705 * a kd-tree search query) 00706 */ 00707 inline float getQueryCost() const { 00708 return m_queryCost; 00709 } 00710 00711 /** 00712 * \brief Set the bonus factor for empty space used by the 00713 * tree construction heuristic 00714 */ 00715 inline void setEmptySpaceBonus(float emptySpaceBonus) { 00716 m_emptySpaceBonus = emptySpaceBonus; 00717 } 00718 00719 /** 00720 * \brief Return the bonus factor for empty space used by the 00721 * tree construction heuristic 00722 */ 00723 inline float getEmptySpaceBonus() const { 00724 return m_emptySpaceBonus; 00725 } 00726 00727 /** 00728 * \brief Set the maximum tree depth (0 = use heuristic) 00729 */ 00730 inline void setMaxDepth(SizeType maxDepth) { 00731 m_maxDepth = maxDepth; 00732 } 00733 00734 /** 00735 * \brief Set the number of bins used for Min-Max binning 00736 */ 00737 inline void setMinMaxBins(SizeType minMaxBins) { 00738 m_minMaxBins = minMaxBins; 00739 } 00740 00741 /** 00742 * \brief Return the number of bins used for Min-Max binning 00743 */ 00744 inline SizeType getMinMaxBins() const { 00745 return m_minMaxBins; 00746 } 00747 00748 /** 00749 * \brief Return maximum tree depth (0 = use heuristic) 00750 */ 00751 inline SizeType getMaxDepth() const { 00752 return m_maxDepth; 00753 } 00754 00755 /** 00756 * \brief Specify whether or not to use primitive clipping will 00757 * be used in the tree construction. 00758 */ 00759 inline void setClip(bool clip) { 00760 m_clip = clip; 00761 } 00762 00763 /** 00764 * \brief Return whether or not to use primitive clipping will 00765 * be used in the tree construction. 00766 */ 00767 inline bool getClip() const { 00768 return m_clip; 00769 } 00770 00771 /** 00772 * \brief Specify whether or not bad splits can be "retracted". 00773 */ 00774 inline void setRetract(bool retract) { 00775 m_retract = retract; 00776 } 00777 00778 /** 00779 * \brief Return whether or not bad splits can be "retracted". 00780 */ 00781 inline bool getRetract() const { 00782 return m_retract; 00783 } 00784 00785 /** 00786 * \brief Set the number of bad refines allowed to happen 00787 * in succession before a leaf node will be created. 00788 */ 00789 inline void setMaxBadRefines(SizeType maxBadRefines) { 00790 m_maxBadRefines = maxBadRefines; 00791 } 00792 00793 /** 00794 * \brief Return the number of bad refines allowed to happen 00795 * in succession before a leaf node will be created. 00796 */ 00797 inline SizeType getMaxBadRefines() const { 00798 return m_maxBadRefines; 00799 } 00800 00801 /** 00802 * \brief Set the number of primitives, at which recursion will 00803 * stop when building the tree. 00804 */ 00805 inline void setStopPrims(SizeType stopPrims) { 00806 m_stopPrims = stopPrims; 00807 } 00808 00809 /** 00810 * \brief Return the number of primitives, at which recursion will 00811 * stop when building the tree. 00812 */ 00813 inline SizeType getStopPrims() const { 00814 return m_stopPrims; 00815 } 00816 00817 /** 00818 * \brief Specify whether or not tree construction 00819 * should run in parallel. 00820 */ 00821 inline void setParallelBuild(bool parallel) { 00822 m_parallelBuild = parallel; 00823 } 00824 00825 /** 00826 * \brief Return whether or not tree construction 00827 * will run in parallel. 00828 */ 00829 inline bool getParallelBuild() const { 00830 return m_parallelBuild; 00831 } 00832 00833 /** 00834 * \brief Specify the number of primitives, at which the builder will 00835 * switch from (approximate) Min-Max binning to the accurate 00836 * O(n log n) optimization method. 00837 */ 00838 inline void setExactPrimitiveThreshold(SizeType exactPrimThreshold) { 00839 m_exactPrimThreshold = exactPrimThreshold; 00840 } 00841 00842 /** 00843 * \brief Return the number of primitives, at which the builder will 00844 * switch from (approximate) Min-Max binning to the accurate 00845 * O(n log n) optimization method. 00846 */ 00847 inline SizeType getExactPrimitiveThreshold() const { 00848 return m_exactPrimThreshold; 00849 } 00850 protected: 00851 /** 00852 * \brief Build a KD-tree over the supplied geometry 00853 * 00854 * To be called by the subclass. 00855 */ 00856 void buildInternal() { 00857 /* Some samity checks */ 00858 if (isBuilt()) 00859 throw NoriException("The kd-tree has already been built!"); 00860 if (m_traversalCost <= 0) 00861 throw NoriException("The traveral cost must be > 0"); 00862 if (m_queryCost <= 0) 00863 throw NoriException("The query cost must be > 0"); 00864 if (m_emptySpaceBonus <= 0 || m_emptySpaceBonus > 1) 00865 throw NoriException("The empty space bonus must be in [0, 1]"); 00866 if (m_minMaxBins <= 1) 00867 throw NoriException("The number of min-max bins must be > 2"); 00868 00869 SizeType primCount = cast()->getPrimitiveCount(); 00870 if (primCount == 0) { 00871 cout << "Warning: kd-tree contains no geometry!" << endl; 00872 // +1 shift is for alignment purposes (see KDNode::getSibling) 00873 m_nodes = static_cast<KDNode *>(allocAligned(sizeof(KDNode) * 2))+1; 00874 m_nodes[0].initLeafNode(0, 0); 00875 return; 00876 } 00877 00878 if (primCount <= m_exactPrimThreshold) 00879 m_parallelBuild = false; 00880 00881 BuildContext ctx(primCount, m_minMaxBins); 00882 00883 /* Establish an ad-hoc depth cutoff value (Formula from PBRT) */ 00884 if (m_maxDepth == 0) 00885 m_maxDepth = (int) (8 + 1.3f * std::log((float) primCount)/std::log(2.0f)); 00886 m_maxDepth = std::min(m_maxDepth, (SizeType) NORI_KD_MAXDEPTH); 00887 00888 OrderedChunkAllocator &leftAlloc = ctx.leftAlloc; 00889 IndexType *indices = leftAlloc.allocate<IndexType>(primCount); 00890 00891 QElapsedTimer timer; 00892 timer.start(); 00893 00894 BoundingBoxType &bbox = m_bbox; 00895 bbox.reset(); 00896 for (IndexType i=0; i<primCount; ++i) { 00897 bbox.expandBy(cast()->getBoundingBox(i)); 00898 indices[i] = i; 00899 } 00900 00901 #if NORI_KD_VERBOSE == 1 00902 cout << "kd-tree configuration" << endl 00903 << " Traversal cost : " << m_traversalCost << endl 00904 << " Query cost : " << m_queryCost << endl 00905 << " Empty space bonus : " << m_emptySpaceBonus << endl 00906 << " Max. tree depth : " << m_maxDepth << endl 00907 << " Scene bouning box (min) : " << qPrintable(bbox.min.toString()) << endl 00908 << " Scene bouning box (max) : " << qPrintable(bbox.max.toString()) << endl 00909 << " Min-max bins : " << m_minMaxBins << endl 00910 << " O(n log n method) : use for " << m_exactPrimThreshold << " primitives" << endl 00911 << " Perfect splits : " << m_clip << endl 00912 << " Retract bad splits : " << m_retract << endl 00913 << " Stopping primitive count : " << m_stopPrims << endl 00914 << " Build tree in parallel : " << m_parallelBuild << endl << endl; 00915 #endif 00916 00917 SizeType procCount = getCoreCount(); 00918 if (procCount == 1) 00919 m_parallelBuild = false; 00920 00921 if (m_parallelBuild) { 00922 m_builders.resize(procCount); 00923 for (SizeType i=0; i<procCount; ++i) { 00924 m_builders[i] = new TreeBuilder(i, this); 00925 m_builders[i]->start(); 00926 } 00927 } 00928 00929 KDNode *prelimRoot = ctx.nodes.allocate(1); 00930 buildTreeMinMax(ctx, 1, prelimRoot, bbox, bbox, 00931 indices, primCount, true, 0); 00932 ctx.leftAlloc.release(indices); 00933 00934 if (m_parallelBuild) { 00935 m_interface.mutex.lock(); 00936 m_interface.done = true; 00937 m_interface.cond.wakeAll(); 00938 m_interface.mutex.unlock(); 00939 for (SizeType i=0; i<m_builders.size(); ++i) 00940 m_builders[i]->wait(); 00941 } 00942 00943 size_t totalUsage = m_indirections.capacity() 00944 * sizeof(KDNode *) + ctx.size(); 00945 00946 /// Clean up event lists and print statistics 00947 ctx.leftAlloc.cleanup(); 00948 ctx.rightAlloc.cleanup(); 00949 for (SizeType i=0; i<m_builders.size(); ++i) { 00950 BuildContext &subCtx = m_builders[i]->getContext(); 00951 totalUsage += subCtx.size(); 00952 subCtx.leftAlloc.cleanup(); 00953 subCtx.rightAlloc.cleanup(); 00954 ctx.accumulateStatisticsFrom(subCtx); 00955 } 00956 00957 std::stack<boost::tuple<const KDNode *, KDNode *, 00958 const BuildContext *, BoundingBoxType> > stack; 00959 00960 float expTraversalSteps = 0; 00961 float expLeavesVisited = 0; 00962 float expPrimitivesIntersected = 0; 00963 float heuristicCost = 0; 00964 00965 SizeType nodePtr = 0, indexPtr = 0; 00966 SizeType maxPrimsInLeaf = 0; 00967 const SizeType primBucketCount = 16; 00968 SizeType primBuckets[primBucketCount]; 00969 memset(primBuckets, 0, sizeof(SizeType)*primBucketCount); 00970 m_nodeCount = ctx.innerNodeCount + ctx.leafNodeCount; 00971 m_indexCount = ctx.primIndexCount; 00972 00973 // +1 shift is for alignment purposes (see KDNode::getSibling) 00974 m_nodes = static_cast<KDNode *> (allocAligned( 00975 sizeof(KDNode) * (m_nodeCount+1)))+1; 00976 m_indices = new IndexType[m_indexCount]; 00977 00978 /* The following code rewrites all tree nodes with proper relative 00979 indices. It also computes the final tree cost and some other 00980 useful heuristics */ 00981 stack.push(boost::make_tuple(prelimRoot, &m_nodes[nodePtr++], 00982 &ctx, bbox)); 00983 while (!stack.empty()) { 00984 const KDNode *node = boost::get<0>(stack.top()); 00985 KDNode *target = boost::get<1>(stack.top()); 00986 const BuildContext *context = boost::get<2>(stack.top()); 00987 BoundingBoxType bbox = boost::get<3>(stack.top()); 00988 stack.pop(); 00989 typename std::map<const KDNode *, IndexType>::const_iterator it 00990 = m_interface.threadMap.find(node); 00991 // Check if we're switching to a subtree built by a worker thread 00992 if (it != m_interface.threadMap.end()) 00993 context = &m_builders[(*it).second]->getContext(); 00994 00995 if (node->isLeaf()) { 00996 SizeType primStart = node->getPrimStart(), 00997 primEnd = node->getPrimEnd(), 00998 primsInLeaf = primEnd-primStart; 00999 target->initLeafNode(indexPtr, primsInLeaf); 01000 01001 float quantity = TreeConstructionHeuristic::getQuantity(bbox), 01002 weightedQuantity = quantity * primsInLeaf; 01003 expLeavesVisited += quantity; 01004 expPrimitivesIntersected += weightedQuantity; 01005 heuristicCost += weightedQuantity * m_queryCost; 01006 if (primsInLeaf < primBucketCount) 01007 primBuckets[primsInLeaf]++; 01008 if (primsInLeaf > maxPrimsInLeaf) 01009 maxPrimsInLeaf = primsInLeaf; 01010 01011 const BlockedVector<IndexType, NORI_KD_BLOCKSIZE_IDX> &indices 01012 = context->indices; 01013 for (SizeType idx = primStart; idx<primEnd; ++idx) { 01014 m_indices[indexPtr++] = indices[idx]; 01015 } 01016 } else { 01017 float quantity = TreeConstructionHeuristic::getQuantity(bbox); 01018 expTraversalSteps += quantity; 01019 heuristicCost += quantity * m_traversalCost; 01020 01021 const KDNode * left; 01022 if (EXPECT_TAKEN(!node->isIndirection())) 01023 left = node->getLeft(); 01024 else 01025 left = m_indirections[node->getIndirectionIndex()]; 01026 01027 KDNode * children = &m_nodes[nodePtr]; 01028 nodePtr += 2; 01029 int axis = node->getAxis(); 01030 float split = node->getSplit(); 01031 bool result = target->initInnerNode(axis, split, children - target); 01032 if (!result) 01033 throw NoriException("Cannot represent relative pointer -- " 01034 "too many primitives?"); 01035 01036 float tmp = bbox.min[axis]; 01037 bbox.min[axis] = split; 01038 stack.push(boost::make_tuple(left+1, children+1, context, bbox)); 01039 bbox.min[axis] = tmp; 01040 bbox.max[axis] = split; 01041 stack.push(boost::make_tuple(left, children, context, bbox)); 01042 } 01043 } 01044 01045 /* Free some more memory */ 01046 ctx.nodes.clear(); 01047 ctx.indices.clear(); 01048 for (SizeType i=0; i<m_builders.size(); ++i) { 01049 BuildContext &subCtx = m_builders[i]->getContext(); 01050 subCtx.nodes.clear(); 01051 subCtx.indices.clear(); 01052 } 01053 std::vector<KDNode *>().swap(m_indirections); 01054 01055 if (m_builders.size() > 0) { 01056 for (SizeType i=0; i<m_builders.size(); ++i) 01057 delete m_builders[i]; 01058 m_builders.clear(); 01059 } 01060 01061 float rootQuantity = TreeConstructionHeuristic::getQuantity(bbox); 01062 expTraversalSteps /= rootQuantity; 01063 expLeavesVisited /= rootQuantity; 01064 expPrimitivesIntersected /= rootQuantity; 01065 heuristicCost /= rootQuantity; 01066 01067 /* Slightly enlarge the bounding box 01068 (necessary e.g. when the scene is planar) */ 01069 m_tightBBox = bbox; 01070 01071 const float eps = NORI_KD_BBOX_EPSILON; 01072 01073 bbox.min -= (bbox.max-bbox.min) * eps + VectorType(eps); 01074 bbox.max += (bbox.max-bbox.min) * eps + VectorType(eps); 01075 01076 #if NORI_KD_VERBOSE == 1 01077 cout << "Structural kd-tree statistics" << endl 01078 << " Parallel work units : " << m_interface.threadMap.size() << endl 01079 << " Node storage cost : " << (nodePtr * sizeof(KDNode)) / 1024 << " KiB" << endl 01080 << " Index storage cost : " << (indexPtr * sizeof(IndexType)) / 1024 << " KiB" << endl 01081 << " Inner nodes : " << ctx.innerNodeCount << endl 01082 << " Leaf nodes : " << ctx.leafNodeCount << endl 01083 << " Nonempty leaf nodes : " << ctx.nonemptyLeafNodeCount << endl << endl; 01084 01085 cout << "Qualitative kd-tree statistics" << endl 01086 << " Retracted splits : " << ctx.retractedSplits << endl 01087 << " Pruned primitives : " << ctx.pruned << endl 01088 << " Largest leaf node : " << maxPrimsInLeaf << endl 01089 << " Avg. prims / nonempty leaf : " << ctx.primIndexCount / (float) ctx.nonemptyLeafNodeCount << endl 01090 << " Expected traversals/query : " << expTraversalSteps << endl 01091 << " Expected leaf visits/query : " << expLeavesVisited << endl 01092 << " Expected prim. visits/query : " << expPrimitivesIntersected << endl 01093 << " Final cost : " << heuristicCost << endl << endl; 01094 #endif 01095 01096 cout << "Finished after " << timer.elapsed() << " ms (used " 01097 << totalUsage/1024 << " KiB of temp. memory)" << endl 01098 << "The final kd-tree requires " << (nodePtr*sizeof(KDNode) + 01099 indexPtr * sizeof(IndexType)) / 1024 << " KiB of memory" << endl; 01100 } 01101 01102 protected: 01103 /// Primitive classification during tree-construction 01104 enum EClassificationResult { 01105 /// Straddling primitive 01106 EBothSides = 0, 01107 /// Primitive is entirely on the left side of the split 01108 ELeftSide = 1, 01109 /// Primitive is entirely on the right side of the split 01110 ERightSide = 2, 01111 /// Edge events have been generated for the straddling primitive 01112 EBothSidesProcessed = 3 01113 }; 01114 01115 /** 01116 * \brief Describes the beginning or end of a primitive 01117 * when projected onto a certain dimension. 01118 */ 01119 struct EdgeEvent { 01120 /// Possible event types 01121 enum EEventType { 01122 EEdgeEnd = 0, 01123 EEdgePlanar = 1, 01124 EEdgeStart = 2 01125 }; 01126 01127 /// Dummy constructor 01128 inline EdgeEvent() { } 01129 01130 /// Create a new edge event 01131 inline EdgeEvent(uint16_t type, int axis, float pos, IndexType index) 01132 : pos(pos), index(index), type(type), axis(axis) { } 01133 01134 /// Plane position 01135 float pos; 01136 /// Primitive index 01137 IndexType index; 01138 /// Event type: end/planar/start 01139 unsigned int type:2; 01140 /// Event axis 01141 unsigned int axis:2; 01142 }; 01143 01144 BOOST_STATIC_ASSERT(sizeof(EdgeEvent) == 12); 01145 01146 /// Edge event comparison functor 01147 struct EdgeEventOrdering : public std::binary_function<EdgeEvent, EdgeEvent, bool> { 01148 inline bool operator()(const EdgeEvent &a, const EdgeEvent &b) const { 01149 if (a.axis != b.axis) 01150 return a.axis < b.axis; 01151 if (a.pos != b.pos) 01152 return a.pos < b.pos; 01153 return a.type < b.type; 01154 } 01155 }; 01156 01157 /** 01158 * \brief Data type for split candidates computed by 01159 * the O(n log n) greedy optimization method. 01160 * */ 01161 struct SplitCandidate { 01162 float cost; 01163 float pos; 01164 int axis; 01165 SizeType numLeft, numRight; 01166 bool planarLeft; 01167 01168 inline SplitCandidate() : 01169 cost(std::numeric_limits<float>::infinity()), 01170 pos(0), axis(0), numLeft(0), numRight(0), planarLeft(false) { 01171 } 01172 }; 01173 01174 /** 01175 * \brief Per-thread context used to manage memory allocations, 01176 * also records some useful statistics. 01177 */ 01178 struct BuildContext { 01179 OrderedChunkAllocator leftAlloc, rightAlloc; 01180 BlockedVector<KDNode, NORI_KD_BLOCKSIZE_KD> nodes; 01181 BlockedVector<IndexType, NORI_KD_BLOCKSIZE_IDX> indices; 01182 ClassificationStorage classStorage; 01183 MinMaxBins minMaxBins; 01184 01185 SizeType leafNodeCount; 01186 SizeType nonemptyLeafNodeCount; 01187 SizeType innerNodeCount; 01188 SizeType primIndexCount; 01189 SizeType retractedSplits; 01190 SizeType pruned; 01191 01192 BuildContext(SizeType primCount, SizeType binCount) 01193 : minMaxBins(binCount) { 01194 classStorage.setPrimitiveCount(primCount); 01195 leafNodeCount = 0; 01196 nonemptyLeafNodeCount = 0; 01197 innerNodeCount = 0; 01198 primIndexCount = 0; 01199 retractedSplits = 0; 01200 pruned = 0; 01201 } 01202 01203 size_t size() { 01204 return leftAlloc.size() + rightAlloc.size() 01205 + nodes.capacity() * sizeof(KDNode) 01206 + indices.capacity() * sizeof(IndexType) 01207 + classStorage.size(); 01208 } 01209 01210 void accumulateStatisticsFrom(const BuildContext &ctx) { 01211 leafNodeCount += ctx.leafNodeCount; 01212 nonemptyLeafNodeCount += ctx.nonemptyLeafNodeCount; 01213 innerNodeCount += ctx.innerNodeCount; 01214 primIndexCount += ctx.primIndexCount; 01215 retractedSplits += ctx.retractedSplits; 01216 pruned += ctx.pruned; 01217 } 01218 }; 01219 01220 /** 01221 * \brief Communication data structure used to pass jobs to 01222 * kd-tree builder threads 01223 */ 01224 struct BuildInterface { 01225 /* Communcation */ 01226 QMutex mutex; 01227 QWaitCondition cond, condJobTaken; 01228 std::map<const KDNode *, IndexType> threadMap; 01229 bool done; 01230 01231 /* Job description for building a subtree */ 01232 int depth; 01233 KDNode *node; 01234 BoundingBoxType nodeBoundingBox; 01235 EdgeEvent *eventStart, *eventEnd; 01236 SizeType primCount; 01237 int badRefines; 01238 01239 inline BuildInterface() { 01240 node = NULL; 01241 done = false; 01242 } 01243 }; 01244 01245 /** 01246 * \brief kd-tree builder thread 01247 */ 01248 class TreeBuilder : public QThread { 01249 public: 01250 TreeBuilder(IndexType id, GenericKDTree *parent) 01251 : QThread(), m_id(id), 01252 m_parent(parent), 01253 m_context(parent->cast()->getPrimitiveCount(), 01254 parent->getMinMaxBins()), 01255 m_interface(parent->m_interface) { 01256 } 01257 01258 void run() { 01259 OrderedChunkAllocator &leftAlloc = m_context.leftAlloc; 01260 while (true) { 01261 m_interface.mutex.lock(); 01262 while (!m_interface.done && !m_interface.node) 01263 m_interface.cond.wait(&m_interface.mutex); 01264 if (m_interface.done) { 01265 m_interface.mutex.unlock(); 01266 break; 01267 } 01268 int depth = m_interface.depth; 01269 KDNode *node = m_interface.node; 01270 BoundingBoxType nodeBoundingBox = m_interface.nodeBoundingBox; 01271 size_t eventCount = m_interface.eventEnd - m_interface.eventStart; 01272 SizeType primCount = m_interface.primCount; 01273 int badRefines = m_interface.badRefines; 01274 EdgeEvent *eventStart = leftAlloc.allocate<EdgeEvent>(eventCount), 01275 *eventEnd = eventStart + eventCount; 01276 memcpy(eventStart, m_interface.eventStart, 01277 eventCount * sizeof(EdgeEvent)); 01278 m_interface.threadMap[node] = m_id; 01279 m_interface.node = NULL; 01280 m_interface.condJobTaken.wakeOne(); 01281 m_interface.mutex.unlock(); 01282 01283 std::sort(eventStart, eventEnd, EdgeEventOrdering()); 01284 m_parent->buildTree(m_context, depth, node, 01285 nodeBoundingBox, eventStart, eventEnd, primCount, true, badRefines); 01286 leftAlloc.release(eventStart); 01287 } 01288 } 01289 01290 inline BuildContext &getContext() { 01291 return m_context; 01292 } 01293 01294 private: 01295 IndexType m_id; 01296 GenericKDTree *m_parent; 01297 BuildContext m_context; 01298 BuildInterface &m_interface; 01299 }; 01300 01301 /// Cast to the derived class 01302 inline Derived *cast() { 01303 return static_cast<Derived *>(this); 01304 } 01305 01306 /// Cast to the derived class (const version) 01307 inline const Derived *cast() const { 01308 return static_cast<const Derived *>(this); 01309 } 01310 01311 /** 01312 * \brief Create an edge event list for a given list of primitives. 01313 * 01314 * This is necessary when passing from Min-Max binning to the more 01315 * accurate O(n log n) optimizier. 01316 */ 01317 boost::tuple<EdgeEvent *, EdgeEvent *, SizeType> createEventList( 01318 OrderedChunkAllocator &alloc, const BoundingBoxType &nodeBoundingBox, 01319 IndexType *prims, SizeType primCount) { 01320 SizeType initialSize = primCount * 2 * PointType::Dimension, actualPrimCount = 0; 01321 EdgeEvent *eventStart = alloc.allocate<EdgeEvent>(initialSize); 01322 EdgeEvent *eventEnd = eventStart; 01323 01324 for (SizeType i=0; i<primCount; ++i) { 01325 IndexType index = prims[i]; 01326 BoundingBoxType bbox; 01327 if (m_clip) { 01328 bbox = cast()->getClippedBoundingBox(index, nodeBoundingBox); 01329 if (!bbox.isValid() || bbox.getSurfaceArea() == 0) 01330 continue; 01331 } else { 01332 bbox = cast()->getBoundingBox(index); 01333 } 01334 01335 for (int axis=0; axis<PointType::Dimension; ++axis) { 01336 float min = (float) bbox.min[axis], max = (float) bbox.max[axis]; 01337 01338 if (min == max) { 01339 *eventEnd++ = EdgeEvent(EdgeEvent::EEdgePlanar, axis, 01340 min, index); 01341 } else { 01342 *eventEnd++ = EdgeEvent(EdgeEvent::EEdgeStart, axis, 01343 min, index); 01344 *eventEnd++ = EdgeEvent(EdgeEvent::EEdgeEnd, axis, 01345 max, index); 01346 } 01347 } 01348 ++actualPrimCount; 01349 } 01350 01351 SizeType newSize = (SizeType) (eventEnd - eventStart); 01352 if (newSize != initialSize) 01353 alloc.shrinkAllocation<EdgeEvent>(eventStart, newSize); 01354 01355 return boost::make_tuple(eventStart, eventEnd, actualPrimCount); 01356 } 01357 01358 /** 01359 * \brief Leaf node creation helper function 01360 * 01361 * \param ctx 01362 * Thread-specific build context containing allocators etc. 01363 * \param node 01364 * KD-tree node entry to be filled 01365 * \param eventStart 01366 * Start pointer of an edge event list 01367 * \param eventEnd 01368 * End pointer of an edge event list 01369 * \param primCount 01370 * Total primitive count for the current node 01371 */ 01372 void createLeaf(BuildContext &ctx, KDNode *node, EdgeEvent *eventStart, 01373 EdgeEvent *eventEnd, SizeType primCount) { 01374 node->initLeafNode((SizeType) ctx.indices.size(), primCount); 01375 if (primCount > 0) { 01376 SizeType seenPrims = 0; 01377 ctx.nonemptyLeafNodeCount++; 01378 01379 for (EdgeEvent *event = eventStart; event != eventEnd 01380 && event->axis == 0; ++event) { 01381 if (event->type == EdgeEvent::EEdgeStart || 01382 event->type == EdgeEvent::EEdgePlanar) { 01383 ctx.indices.push_back(event->index); 01384 seenPrims++; 01385 } 01386 } 01387 ctx.primIndexCount += primCount; 01388 } 01389 ctx.leafNodeCount++; 01390 } 01391 01392 /** 01393 * \brief Leaf node creation helper function 01394 * 01395 * \param ctx 01396 * Thread-specific build context containing allocators etc. 01397 * \param node 01398 * KD-tree node entry to be filled 01399 * \param indices 01400 * Start pointer of an index list 01401 * \param primCount 01402 * Total primitive count for the current node 01403 */ 01404 void createLeaf(BuildContext &ctx, KDNode *node, SizeType *indices, 01405 SizeType primCount) { 01406 node->initLeafNode((SizeType) ctx.indices.size(), primCount); 01407 if (primCount > 0) { 01408 ctx.nonemptyLeafNodeCount++; 01409 for (SizeType i=0; i<primCount; ++i) 01410 ctx.indices.push_back(indices[i]); 01411 ctx.primIndexCount += primCount; 01412 } 01413 ctx.leafNodeCount++; 01414 } 01415 01416 /** 01417 * \brief Leaf node creation helper function. 01418 * 01419 * Creates a unique index list by collapsing 01420 * a subtree with a bad cost. 01421 * 01422 * \param ctx 01423 * Thread-specific build context containing allocators etc. 01424 * \param node 01425 * KD-tree node entry to be filled 01426 * \param start 01427 * Start pointer of the subtree indices 01428 */ 01429 void createLeafAfterRetraction(BuildContext &ctx, KDNode *node, SizeType start) { 01430 SizeType indexCount = (SizeType) (ctx.indices.size() - start); 01431 01432 OrderedChunkAllocator &alloc = ctx.leftAlloc; 01433 01434 /* A temporary list is allocated to do the sorting (the indices 01435 are not guaranteed to be contiguous in memory) */ 01436 IndexType *tempStart = alloc.allocate<IndexType>(indexCount), 01437 *tempEnd = tempStart + indexCount, 01438 *ptr = tempStart; 01439 01440 for (SizeType i=start, end = start + indexCount; i<end; ++i) 01441 *ptr++ = ctx.indices[i]; 01442 01443 /* Generate an index list without duplicate entries */ 01444 std::sort(tempStart, tempEnd, std::less<IndexType>()); 01445 ptr = tempStart; 01446 01447 int idx = start; 01448 while (ptr < tempEnd) { 01449 ctx.indices[idx] = *ptr++; 01450 while (ptr < tempEnd && *ptr == ctx.indices[idx]) 01451 ++ptr; 01452 idx++; 01453 } 01454 01455 int nSeen = idx-start; 01456 ctx.primIndexCount = ctx.primIndexCount - indexCount + nSeen; 01457 ctx.indices.resize(idx); 01458 alloc.release(tempStart); 01459 node->initLeafNode(start, nSeen); 01460 ctx.nonemptyLeafNodeCount++; 01461 ctx.leafNodeCount++; 01462 } 01463 01464 /** 01465 * \brief Implements the transition from min-max-binning to the 01466 * O(n log n) optimization. 01467 * 01468 * \param ctx 01469 * Thread-specific build context containing allocators etc. 01470 * \param depth 01471 * Current tree depth (1 == root node) 01472 * \param node 01473 * KD-tree node entry to be filled 01474 * \param nodeBoundingBox 01475 * Axis-aligned bounding box of the current node 01476 * \param indices 01477 * Index list of all triangles in the current node (for min-max binning) 01478 * \param primCount 01479 * Total primitive count for the current node 01480 * \param isLeftChild 01481 * Is this node the left child of its parent? This is important for 01482 * memory management using the \ref OrderedChunkAllocator. 01483 * \param badRefines 01484 * Number of "probable bad refines" further up the tree. This makes 01485 * it possible to split along an initially bad-looking candidate in 01486 * the hope that the cost was significantly overestimated. The 01487 * counter makes sure that only a limited number of such splits can 01488 * happen in succession. 01489 * \returns 01490 * Final cost of the node 01491 */ 01492 inline float transitionToNLogN(BuildContext &ctx, unsigned int depth, KDNode *node, 01493 const BoundingBoxType &nodeBoundingBox, IndexType *indices, 01494 SizeType primCount, bool isLeftChild, SizeType badRefines) { 01495 OrderedChunkAllocator &alloc = isLeftChild 01496 ? ctx.leftAlloc : ctx.rightAlloc; 01497 boost::tuple<EdgeEvent *, EdgeEvent *, SizeType> events 01498 = createEventList(alloc, nodeBoundingBox, indices, primCount); 01499 float cost; 01500 if (m_parallelBuild) { 01501 m_interface.mutex.lock(); 01502 m_interface.depth = depth; 01503 m_interface.node = node; 01504 m_interface.nodeBoundingBox = nodeBoundingBox; 01505 m_interface.eventStart = boost::get<0>(events); 01506 m_interface.eventEnd = boost::get<1>(events); 01507 m_interface.primCount = boost::get<2>(events); 01508 m_interface.badRefines = badRefines; 01509 m_interface.cond.wakeOne(); 01510 01511 /* Wait for a worker thread to take this job */ 01512 while (m_interface.node) 01513 m_interface.condJobTaken.wait(&m_interface.mutex); 01514 m_interface.mutex.unlock(); 01515 01516 // Never tear down this subtree (return a cost of -infinity) 01517 cost = -std::numeric_limits<float>::infinity(); 01518 } else { 01519 std::sort(boost::get<0>(events), boost::get<1>(events), 01520 EdgeEventOrdering()); 01521 01522 cost = buildTree(ctx, depth, node, nodeBoundingBox, 01523 boost::get<0>(events), boost::get<1>(events), 01524 boost::get<2>(events), isLeftChild, badRefines); 01525 } 01526 alloc.release(boost::get<0>(events)); 01527 return cost; 01528 } 01529 01530 /** 01531 * \brief Build helper function (min-max binning) 01532 * 01533 * \param ctx 01534 * Thread-specific build context containing allocators etc. 01535 * \param depth 01536 * Current tree depth (1 == root node) 01537 * \param node 01538 * KD-tree node entry to be filled 01539 * \param nodeBoundingBox 01540 * Axis-aligned bounding box of the current node 01541 * \param tightBBox 01542 * Tight bounding box of the contained geometry (for min-max binning) 01543 * \param indices 01544 * Index list of all triangles in the current node (for min-max binning) 01545 * \param primCount 01546 * Total primitive count for the current node 01547 * \param isLeftChild 01548 * Is this node the left child of its parent? This is important for 01549 * memory management using the \ref OrderedChunkAllocator. 01550 * \param badRefines 01551 * Number of "probable bad refines" further up the tree. This makes 01552 * it possible to split along an initially bad-looking candidate in 01553 * the hope that the cost was significantly overestimated. The 01554 * counter makes sure that only a limited number of such splits can 01555 * happen in succession. 01556 * \returns 01557 * Final cost of the node 01558 */ 01559 float buildTreeMinMax(BuildContext &ctx, unsigned int depth, KDNode *node, 01560 const BoundingBoxType &nodeBoundingBox, const BoundingBoxType &tightBBox, IndexType *indices, 01561 SizeType primCount, bool isLeftChild, SizeType badRefines) { 01562 01563 float leafCost = primCount * m_queryCost; 01564 if (primCount <= m_stopPrims || depth >= m_maxDepth) { 01565 createLeaf(ctx, node, indices, primCount); 01566 return leafCost; 01567 } 01568 01569 if (primCount <= m_exactPrimThreshold) 01570 return transitionToNLogN(ctx, depth, node, nodeBoundingBox, indices, 01571 primCount, isLeftChild, badRefines); 01572 01573 /* ==================================================================== */ 01574 /* Binning */ 01575 /* ==================================================================== */ 01576 01577 ctx.minMaxBins.setBoundingBox(tightBBox); 01578 ctx.minMaxBins.bin(cast(), indices, primCount); 01579 01580 /* ==================================================================== */ 01581 /* Split candidate search */ 01582 /* ==================================================================== */ 01583 SplitCandidate bestSplit = ctx.minMaxBins.minimizeCost(m_traversalCost, 01584 m_queryCost); 01585 01586 if (bestSplit.cost == std::numeric_limits<float>::infinity()) { 01587 /* This is bad: we have either run out of floating point precision to 01588 accurately represent split planes (e.g. 'tightBBox' is almost collapsed 01589 along an axis), or the compiler made overly liberal use of floating point 01590 optimizations, causing the two stages of the min-max binning code to 01591 become inconsistent. The two ways to proceed at this point are to 01592 either create a leaf (bad) or switch over to the O(n log n) greedy 01593 optimization, which is done below */ 01594 cout << "Min-max binning failed. Retrying with the O(n log n) greedy algorithm." << endl; 01595 return transitionToNLogN(ctx, depth, node, nodeBoundingBox, indices, 01596 primCount, isLeftChild, badRefines); 01597 } 01598 01599 /* "Bad refines" heuristic from PBRT */ 01600 if (bestSplit.cost >= leafCost) { 01601 if ((bestSplit.cost > 4 * leafCost && primCount < 16) 01602 || badRefines >= m_maxBadRefines) { 01603 createLeaf(ctx, node, indices, primCount); 01604 return leafCost; 01605 } 01606 ++badRefines; 01607 } 01608 01609 /* ==================================================================== */ 01610 /* Partitioning */ 01611 /* ==================================================================== */ 01612 01613 boost::tuple<BoundingBoxType, IndexType *, BoundingBoxType, IndexType *> partition = 01614 ctx.minMaxBins.partition(ctx, cast(), indices, bestSplit, 01615 isLeftChild, m_traversalCost, m_queryCost); 01616 01617 /* ==================================================================== */ 01618 /* Recursion */ 01619 /* ==================================================================== */ 01620 01621 KDNode * children = ctx.nodes.allocate(2); 01622 01623 SizeType nodePosBeforeSplit = (SizeType) ctx.nodes.size(); 01624 SizeType indexPosBeforeSplit = (SizeType) ctx.indices.size(); 01625 SizeType leafNodeCountBeforeSplit = ctx.leafNodeCount; 01626 SizeType nonemptyLeafNodeCountBeforeSplit = ctx.nonemptyLeafNodeCount; 01627 SizeType innerNodeCountBeforeSplit = ctx.innerNodeCount; 01628 01629 if (!node->initInnerNode(bestSplit.axis, bestSplit.pos, children-node)) { 01630 m_indirectionLock.lock(); 01631 SizeType indirectionIdx = (SizeType) m_indirections.size(); 01632 m_indirections.push_back(children); 01633 /* Unable to store relative offset -- create an indirection 01634 table entry */ 01635 node->initIndirectionNode(bestSplit.axis, bestSplit.pos, 01636 indirectionIdx); 01637 m_indirectionLock.unlock(); 01638 } 01639 ctx.innerNodeCount++; 01640 01641 BoundingBoxType childBoundingBox(nodeBoundingBox); 01642 childBoundingBox.max[bestSplit.axis] = bestSplit.pos; 01643 01644 float leftCost = buildTreeMinMax(ctx, depth+1, children, 01645 childBoundingBox, boost::get<0>(partition), boost::get<1>(partition), 01646 bestSplit.numLeft, true, badRefines); 01647 01648 childBoundingBox.min[bestSplit.axis] = bestSplit.pos; 01649 childBoundingBox.max[bestSplit.axis] = nodeBoundingBox.max[bestSplit.axis]; 01650 01651 float rightCost = buildTreeMinMax(ctx, depth+1, children + 1, 01652 childBoundingBox, boost::get<2>(partition), boost::get<3>(partition), 01653 bestSplit.numRight, false, badRefines); 01654 01655 TreeConstructionHeuristic tch(nodeBoundingBox); 01656 std::pair<float, float> prob = tch(bestSplit.axis, 01657 bestSplit.pos - nodeBoundingBox.min[bestSplit.axis], 01658 nodeBoundingBox.max[bestSplit.axis] - bestSplit.pos); 01659 01660 /* Compute the final cost given the updated cost 01661 values received from the children */ 01662 float finalCost = m_traversalCost + 01663 (prob.first * leftCost + prob.second * rightCost); 01664 01665 /* Release the index lists not needed by the children anymore */ 01666 if (isLeftChild) 01667 ctx.rightAlloc.release(boost::get<3>(partition)); 01668 else 01669 ctx.leftAlloc.release(boost::get<1>(partition)); 01670 01671 /* ==================================================================== */ 01672 /* Final decision */ 01673 /* ==================================================================== */ 01674 01675 if (!m_retract || finalCost < primCount * m_queryCost) { 01676 return finalCost; 01677 } else { 01678 /* In the end, splitting didn't help to reduce the cost. 01679 Tear up everything below this node and create a leaf */ 01680 ctx.nodes.resize(nodePosBeforeSplit); 01681 ctx.retractedSplits++; 01682 ctx.leafNodeCount = leafNodeCountBeforeSplit; 01683 ctx.nonemptyLeafNodeCount = nonemptyLeafNodeCountBeforeSplit; 01684 ctx.innerNodeCount = innerNodeCountBeforeSplit; 01685 createLeafAfterRetraction(ctx, node, indexPosBeforeSplit); 01686 return leafCost; 01687 } 01688 } 01689 01690 /* 01691 * \brief Build helper function (greedy O(n log n) optimization) 01692 * 01693 * \param ctx 01694 * Thread-specific build context containing allocators etc. 01695 * \param depth 01696 * Current tree depth (1 == root node) 01697 * \param node 01698 * KD-tree node entry to be filled 01699 * \param nodeBoundingBox 01700 * Axis-aligned bounding box of the current node 01701 * \param eventStart 01702 * Pointer to the beginning of a sorted edge event list 01703 * \param eventEnd 01704 * Pointer to the end of a sorted edge event list 01705 * \param primCount 01706 * Total primitive count for the current node 01707 * \param isLeftChild 01708 * Is this node the left child of its parent? This is important for 01709 * memory management using the \ref OrderedChunkAllocator. 01710 * \param badRefines 01711 * Number of "probable bad refines" further up the tree. This makes 01712 * it possible to split along an initially bad-looking candidate in 01713 * the hope that the cost was significantly overestimated. The 01714 * counter makes sure that only a limited number of such splits can 01715 * happen in succession. 01716 * \returns 01717 * Final cost of the node 01718 */ 01719 float buildTree(BuildContext &ctx, unsigned int depth, KDNode *node, 01720 const BoundingBoxType &nodeBoundingBox, EdgeEvent *eventStart, EdgeEvent *eventEnd, 01721 SizeType primCount, bool isLeftChild, SizeType badRefines) { 01722 01723 float leafCost = primCount * m_queryCost; 01724 if (primCount <= m_stopPrims || depth >= m_maxDepth) { 01725 createLeaf(ctx, node, eventStart, eventEnd, primCount); 01726 return leafCost; 01727 } 01728 01729 SplitCandidate bestSplit; 01730 01731 /* ==================================================================== */ 01732 /* Split candidate search */ 01733 /* ==================================================================== */ 01734 01735 /* First, find the optimal splitting plane according to the 01736 tree construction heuristic. To do this in O(n), the search is 01737 implemented as a sweep over the edge events */ 01738 01739 /* Initially, the split plane is placed left of the scene 01740 and thus all geometry is on its right side */ 01741 SizeType numLeft[PointType::Dimension], 01742 numRight[PointType::Dimension]; 01743 01744 for (int i=0; i<PointType::Dimension; ++i) { 01745 numLeft[i] = 0; 01746 numRight[i] = primCount; 01747 } 01748 01749 EdgeEvent *eventsByAxis[PointType::Dimension]; 01750 int eventsByAxisCtr = 1; 01751 eventsByAxis[0] = eventStart; 01752 TreeConstructionHeuristic tch(nodeBoundingBox); 01753 01754 /* Iterate over all events on the current axis */ 01755 for (EdgeEvent *event = eventStart; event < eventEnd; ) { 01756 /* Record the current position and count the number 01757 and type of remaining events, which are also here. 01758 Due to the sort ordering, there is no need to worry 01759 about an axis change in the loops below */ 01760 int axis = event->axis; 01761 float pos = event->pos; 01762 SizeType numStart = 0, numEnd = 0, numPlanar = 0; 01763 01764 /* Count "end" events */ 01765 while (event < eventEnd && event->pos == pos 01766 && event->axis == axis 01767 && event->type == EdgeEvent::EEdgeEnd) { 01768 ++numEnd; ++event; 01769 } 01770 01771 /* Count "planar" events */ 01772 while (event < eventEnd && event->pos == pos 01773 && event->axis == axis 01774 && event->type == EdgeEvent::EEdgePlanar) { 01775 ++numPlanar; ++event; 01776 } 01777 01778 /* Count "start" events */ 01779 while (event < eventEnd && event->pos == pos 01780 && event->axis == axis 01781 && event->type == EdgeEvent::EEdgeStart) { 01782 ++numStart; ++event; 01783 } 01784 01785 /* Keep track of the beginning of dimensions */ 01786 if (event < eventEnd && event->axis != axis) { 01787 eventsByAxis[eventsByAxisCtr++] = event; 01788 } 01789 01790 /* The split plane can now be moved onto 't'. Accordingly, all planar 01791 and ending primitives are removed from the right side */ 01792 numRight[axis] -= numPlanar + numEnd; 01793 01794 /* Calculate a score using the tree construction heuristic */ 01795 if (EXPECT_TAKEN(pos > nodeBoundingBox.min[axis] && pos < nodeBoundingBox.max[axis])) { 01796 const SizeType nL = numLeft[axis], nR = numRight[axis]; 01797 const float nLF = (float) nL, nRF = (float) nR; 01798 01799 std::pair<float, float> prob = tch(axis, 01800 pos - nodeBoundingBox.min[axis], 01801 nodeBoundingBox.max[axis] - pos); 01802 01803 if (numPlanar == 0) { 01804 float cost = m_traversalCost + m_queryCost 01805 * (prob.first * nLF + prob.second * nRF); 01806 if (nL == 0 || nR == 0) 01807 cost *= m_emptySpaceBonus; 01808 if (cost < bestSplit.cost) { 01809 bestSplit.pos = pos; 01810 bestSplit.axis = axis; 01811 bestSplit.cost = cost; 01812 bestSplit.numLeft = nL; 01813 bestSplit.numRight = nR; 01814 } 01815 } else { 01816 float costPlanarLeft = m_traversalCost 01817 + m_queryCost * (prob.first * (nL+numPlanar) + prob.second * nRF); 01818 float costPlanarRight = m_traversalCost 01819 + m_queryCost * (prob.first * nLF + prob.second * (nR+numPlanar)); 01820 01821 if (nL + numPlanar == 0 || nR == 0) 01822 costPlanarLeft *= m_emptySpaceBonus; 01823 if (nL == 0 || nR + numPlanar == 0) 01824 costPlanarRight *= m_emptySpaceBonus; 01825 01826 if (costPlanarLeft < bestSplit.cost || 01827 costPlanarRight < bestSplit.cost) { 01828 bestSplit.pos = pos; 01829 bestSplit.axis = axis; 01830 01831 if (costPlanarLeft < costPlanarRight) { 01832 bestSplit.cost = costPlanarLeft; 01833 bestSplit.numLeft = nL + numPlanar; 01834 bestSplit.numRight = nR; 01835 bestSplit.planarLeft = true; 01836 } else { 01837 bestSplit.cost = costPlanarRight; 01838 bestSplit.numLeft = nL; 01839 bestSplit.numRight = nR + numPlanar; 01840 bestSplit.planarLeft = false; 01841 } 01842 } 01843 } 01844 } 01845 01846 /* The split plane is moved past 't'. All prims, 01847 which were planar on 't', are moved to the left 01848 side. Also, starting prims are now also left of 01849 the split plane. */ 01850 numLeft[axis] += numStart + numPlanar; 01851 } 01852 01853 /* "Bad refines" heuristic from PBRT */ 01854 if (bestSplit.cost >= leafCost) { 01855 if ((bestSplit.cost > 4 * leafCost && primCount < 16) 01856 || badRefines >= m_maxBadRefines 01857 || bestSplit.cost == std::numeric_limits<float>::infinity()) { 01858 createLeaf(ctx, node, eventStart, eventEnd, primCount); 01859 return leafCost; 01860 } 01861 ++badRefines; 01862 } 01863 01864 /* ==================================================================== */ 01865 /* Primitive Classification */ 01866 /* ==================================================================== */ 01867 01868 ClassificationStorage &storage = ctx.classStorage; 01869 01870 /* Initially mark all prims as being located on both sides */ 01871 for (EdgeEvent *event = eventsByAxis[bestSplit.axis]; 01872 event < eventEnd && event->axis == bestSplit.axis; ++event) 01873 storage.set(event->index, EBothSides); 01874 01875 SizeType primsLeft = 0, primsRight = 0, primsBoth = primCount; 01876 /* Sweep over all edge events and classify the primitives wrt. the split */ 01877 for (EdgeEvent *event = eventsByAxis[bestSplit.axis]; 01878 event < eventEnd && event->axis == bestSplit.axis; ++event) { 01879 if (event->type == EdgeEvent::EEdgeEnd && event->pos <= bestSplit.pos) { 01880 /* The primitive's interval ends before or on the split plane 01881 -> classify to the left side */ 01882 storage.set(event->index, ELeftSide); 01883 primsBoth--; 01884 primsLeft++; 01885 } else if (event->type == EdgeEvent::EEdgeStart 01886 && event->pos >= bestSplit.pos) { 01887 /* The primitive's interval starts after or on the split plane 01888 -> classify to the right side */ 01889 storage.set(event->index, ERightSide); 01890 primsBoth--; 01891 primsRight++; 01892 } else if (event->type == EdgeEvent::EEdgePlanar) { 01893 /* If the planar primitive is not on the split plane, the 01894 classification is easy. Otherwise, place it on the side with 01895 the lower cost */ 01896 if (event->pos < bestSplit.pos || (event->pos == bestSplit.pos 01897 && bestSplit.planarLeft)) { 01898 storage.set(event->index, ELeftSide); 01899 primsBoth--; 01900 primsLeft++; 01901 } else if (event->pos > bestSplit.pos 01902 || (event->pos == bestSplit.pos && !bestSplit.planarLeft)) { 01903 storage.set(event->index, ERightSide); 01904 primsBoth--; 01905 primsRight++; 01906 } else { 01907 } 01908 } 01909 } 01910 01911 /* Some sanity checks */ 01912 01913 OrderedChunkAllocator &leftAlloc = ctx.leftAlloc, 01914 &rightAlloc = ctx.rightAlloc; 01915 01916 EdgeEvent *leftEventsStart, *rightEventsStart; 01917 if (isLeftChild) { 01918 leftEventsStart = eventStart; 01919 rightEventsStart = rightAlloc.allocate<EdgeEvent>(bestSplit.numRight * 2 * PointType::Dimension); 01920 } else { 01921 leftEventsStart = leftAlloc.allocate<EdgeEvent>(bestSplit.numLeft * 2 * PointType::Dimension); 01922 rightEventsStart = eventStart; 01923 } 01924 01925 EdgeEvent *leftEventsEnd = leftEventsStart, *rightEventsEnd = rightEventsStart; 01926 01927 BoundingBoxType leftNodeBoundingBox = nodeBoundingBox, rightNodeBoundingBox = nodeBoundingBox; 01928 leftNodeBoundingBox.max[bestSplit.axis] = bestSplit.pos; 01929 rightNodeBoundingBox.min[bestSplit.axis] = bestSplit.pos; 01930 01931 SizeType prunedLeft = 0, prunedRight = 0; 01932 01933 /* ==================================================================== */ 01934 /* Partitioning */ 01935 /* ==================================================================== */ 01936 01937 if (m_clip) { 01938 EdgeEvent 01939 *leftEventsTempStart = leftAlloc.allocate<EdgeEvent>(primsLeft * 2 * PointType::Dimension), 01940 *rightEventsTempStart = rightAlloc.allocate<EdgeEvent>(primsRight * 2 * PointType::Dimension), 01941 *newEventsLeftStart = leftAlloc.allocate<EdgeEvent>(primsBoth * 2 * PointType::Dimension), 01942 *newEventsRightStart = rightAlloc.allocate<EdgeEvent>(primsBoth * 2 * PointType::Dimension); 01943 01944 EdgeEvent *leftEventsTempEnd = leftEventsTempStart, 01945 *rightEventsTempEnd = rightEventsTempStart, 01946 *newEventsLeftEnd = newEventsLeftStart, 01947 *newEventsRightEnd = newEventsRightStart; 01948 01949 for (EdgeEvent *event = eventStart; event<eventEnd; ++event) { 01950 int classification = storage.get(event->index); 01951 01952 if (classification == ELeftSide) { 01953 /* Left-only primitive. Move to the left list and advance */ 01954 *leftEventsTempEnd++ = *event; 01955 } else if (classification == ERightSide) { 01956 /* Right-only primitive. Move to the right list and advance */ 01957 *rightEventsTempEnd++ = *event; 01958 } else if (classification == EBothSides) { 01959 /* The primitive overlaps the split plane. Re-clip and 01960 generate new events for each side */ 01961 const IndexType index = event->index; 01962 01963 BoundingBoxType clippedLeft = cast()->getClippedBoundingBox(index, leftNodeBoundingBox); 01964 BoundingBoxType clippedRight = cast()->getClippedBoundingBox(index, rightNodeBoundingBox); 01965 01966 if (clippedLeft.isValid() && clippedLeft.getSurfaceArea() > 0) { 01967 for (int axis=0; axis<PointType::Dimension; ++axis) { 01968 float min = (float) clippedLeft.min[axis], 01969 max = (float) clippedLeft.max[axis]; 01970 01971 if (min == max) { 01972 *newEventsLeftEnd++ = EdgeEvent( 01973 EdgeEvent::EEdgePlanar, 01974 axis, min, index); 01975 } else { 01976 *newEventsLeftEnd++ = EdgeEvent( 01977 EdgeEvent::EEdgeStart, 01978 axis, min, index); 01979 *newEventsLeftEnd++ = EdgeEvent( 01980 EdgeEvent::EEdgeEnd, 01981 axis, max, index); 01982 } 01983 } 01984 } else { 01985 prunedLeft++; 01986 } 01987 01988 if (clippedRight.isValid() && clippedRight.getSurfaceArea() > 0) { 01989 for (int axis=0; axis<PointType::Dimension; ++axis) { 01990 float min = (float) clippedRight.min[axis], 01991 max = (float) clippedRight.max[axis]; 01992 01993 if (min == max) { 01994 *newEventsRightEnd++ = EdgeEvent( 01995 EdgeEvent::EEdgePlanar, 01996 axis, min, index); 01997 } else { 01998 *newEventsRightEnd++ = EdgeEvent( 01999 EdgeEvent::EEdgeStart, 02000 axis, min, index); 02001 *newEventsRightEnd++ = EdgeEvent( 02002 EdgeEvent::EEdgeEnd, 02003 axis, max, index); 02004 } 02005 } 02006 } else { 02007 prunedRight++; 02008 } 02009 02010 /* Mark this primitive as processed so that clipping 02011 is only done once */ 02012 storage.set(index, EBothSidesProcessed); 02013 } 02014 } 02015 02016 ctx.pruned += prunedLeft + prunedRight; 02017 02018 /* Sort the events from overlapping prims */ 02019 std::sort(newEventsLeftStart, newEventsLeftEnd, EdgeEventOrdering()); 02020 std::sort(newEventsRightStart, newEventsRightEnd, EdgeEventOrdering()); 02021 02022 /* Merge the left list */ 02023 leftEventsEnd = std::merge(leftEventsTempStart, 02024 leftEventsTempEnd, newEventsLeftStart, newEventsLeftEnd, 02025 leftEventsStart, EdgeEventOrdering()); 02026 02027 /* Merge the right list */ 02028 rightEventsEnd = std::merge(rightEventsTempStart, 02029 rightEventsTempEnd, newEventsRightStart, newEventsRightEnd, 02030 rightEventsStart, EdgeEventOrdering()); 02031 02032 /* Release temporary memory */ 02033 leftAlloc.release(newEventsLeftStart); 02034 leftAlloc.release(leftEventsTempStart); 02035 rightAlloc.release(newEventsRightStart); 02036 rightAlloc.release(rightEventsTempStart); 02037 } else { 02038 for (EdgeEvent *event = eventStart; event < eventEnd; ++event) { 02039 int classification = storage.get(event->index); 02040 02041 if (classification == ELeftSide) { 02042 /* Left-only primitive. Move to the left list and advance */ 02043 *leftEventsEnd++ = *event; 02044 } else if (classification == ERightSide) { 02045 /* Right-only primitive. Move to the right list and advance */ 02046 *rightEventsEnd++ = *event; 02047 } else if (classification == EBothSides) { 02048 /* The primitive overlaps the split plane. Its edge events 02049 must be added to both lists. */ 02050 *leftEventsEnd++ = *event; 02051 *rightEventsEnd++ = *event; 02052 } 02053 } 02054 } 02055 02056 /* Shrink the edge event storage now that we know exactly how 02057 many are on each side */ 02058 ctx.leftAlloc.shrinkAllocation(leftEventsStart, 02059 leftEventsEnd - leftEventsStart); 02060 02061 ctx.rightAlloc.shrinkAllocation(rightEventsStart, 02062 rightEventsEnd - rightEventsStart); 02063 02064 /* ==================================================================== */ 02065 /* Recursion */ 02066 /* ==================================================================== */ 02067 02068 KDNode * children = ctx.nodes.allocate(2); 02069 02070 SizeType nodePosBeforeSplit = (SizeType) ctx.nodes.size(); 02071 SizeType indexPosBeforeSplit = (SizeType) ctx.indices.size(); 02072 SizeType leafNodeCountBeforeSplit = ctx.leafNodeCount; 02073 SizeType nonemptyLeafNodeCountBeforeSplit = ctx.nonemptyLeafNodeCount; 02074 SizeType innerNodeCountBeforeSplit = ctx.innerNodeCount; 02075 02076 if (!node->initInnerNode(bestSplit.axis, bestSplit.pos, children-node)) { 02077 m_indirectionLock.lock(); 02078 SizeType indirectionIdx = (SizeType) m_indirections.size(); 02079 m_indirections.push_back(children); 02080 /* Unable to store relative offset -- create an indirection 02081 table entry */ 02082 node->initIndirectionNode(bestSplit.axis, bestSplit.pos, 02083 indirectionIdx); 02084 m_indirectionLock.unlock(); 02085 } 02086 ctx.innerNodeCount++; 02087 02088 float leftCost = buildTree(ctx, depth+1, children, 02089 leftNodeBoundingBox, leftEventsStart, leftEventsEnd, 02090 bestSplit.numLeft - prunedLeft, true, badRefines); 02091 02092 float rightCost = buildTree(ctx, depth+1, children+1, 02093 rightNodeBoundingBox, rightEventsStart, rightEventsEnd, 02094 bestSplit.numRight - prunedRight, false, badRefines); 02095 02096 std::pair<float, float> prob = tch(bestSplit.axis, 02097 bestSplit.pos - nodeBoundingBox.min[bestSplit.axis], 02098 nodeBoundingBox.max[bestSplit.axis] - bestSplit.pos); 02099 02100 /* Compute the final cost given the updated cost 02101 values received from the children */ 02102 float finalCost = m_traversalCost + 02103 (prob.first * leftCost + prob.second * rightCost); 02104 02105 /* Release the index lists not needed by the children anymore */ 02106 if (isLeftChild) 02107 ctx.rightAlloc.release(rightEventsStart); 02108 else 02109 ctx.leftAlloc.release(leftEventsStart); 02110 02111 /* ==================================================================== */ 02112 /* Final decision */ 02113 /* ==================================================================== */ 02114 02115 if (!m_retract || finalCost < primCount * m_queryCost) { 02116 return finalCost; 02117 } else { 02118 /* In the end, splitting didn't help to reduce the SAH cost. 02119 Tear up everything below this node and create a leaf */ 02120 ctx.nodes.resize(nodePosBeforeSplit); 02121 ctx.retractedSplits++; 02122 ctx.leafNodeCount = leafNodeCountBeforeSplit; 02123 ctx.nonemptyLeafNodeCount = nonemptyLeafNodeCountBeforeSplit; 02124 ctx.innerNodeCount = innerNodeCountBeforeSplit; 02125 createLeafAfterRetraction(ctx, node, indexPosBeforeSplit); 02126 return leafCost; 02127 } 02128 02129 return bestSplit.cost; 02130 } 02131 02132 /** 02133 * \brief Min-max binning as described in 02134 * "Highly Parallel Fast KD-tree Construction for Interactive 02135 * Ray Tracing of Dynamic Scenes" 02136 * by M. Shevtsov, A. Soupikov and A. Kapustin 02137 */ 02138 struct MinMaxBins { 02139 MinMaxBins(SizeType nBins) : m_binCount(nBins) { 02140 m_minBins = new SizeType[m_binCount*PointType::Dimension]; 02141 m_maxBins = new SizeType[m_binCount*PointType::Dimension]; 02142 } 02143 02144 ~MinMaxBins() { 02145 delete[] m_minBins; 02146 delete[] m_maxBins; 02147 } 02148 02149 /** 02150 * \brief Prepare to bin for the specified bounds 02151 */ 02152 void setBoundingBox(const BoundingBoxType &bbox) { 02153 m_bbox = bbox; 02154 m_binSize = m_bbox.getExtents() / (float) m_binCount; 02155 for (int axis=0; axis<PointType::Dimension; ++axis) 02156 m_invBinSize[axis] = 1/m_binSize[axis]; 02157 } 02158 02159 /** 02160 * \brief Run min-max binning 02161 * 02162 * \param derived Derived class to be used to determine the BoundingBox for 02163 * a given list of primitives 02164 * \param indices Primitive indirection list 02165 * \param primCount Specifies the length of \a indices 02166 */ 02167 void bin(const Derived *derived, IndexType *indices, 02168 SizeType primCount) { 02169 m_primCount = primCount; 02170 memset(m_minBins, 0, sizeof(SizeType) * PointType::Dimension * m_binCount); 02171 memset(m_maxBins, 0, sizeof(SizeType) * PointType::Dimension * m_binCount); 02172 const int64_t maxBin = m_binCount-1; 02173 02174 for (SizeType i=0; i<m_primCount; ++i) { 02175 const BoundingBoxType bbox = derived->getBoundingBox(indices[i]); 02176 for (int axis=0; axis<PointType::Dimension; ++axis) { 02177 int64_t minIdx = (int64_t) ((bbox.min[axis] - m_bbox.min[axis]) 02178 * m_invBinSize[axis]); 02179 int64_t maxIdx = (int64_t) ((bbox.max[axis] - m_bbox.min[axis]) 02180 * m_invBinSize[axis]); 02181 m_maxBins[axis * m_binCount 02182 + std::max((int64_t) 0, std::min(maxIdx, maxBin))]++; 02183 m_minBins[axis * m_binCount 02184 + std::max((int64_t) 0, std::min(minIdx, maxBin))]++; 02185 } 02186 } 02187 } 02188 02189 /** 02190 * \brief Evaluate the tree construction heuristic at each bin boundary 02191 * and return the minimizer for the given cost constants. Min-max 02192 * binning uses no "empty space bonus" since it cannot create such 02193 * splits. 02194 */ 02195 SplitCandidate minimizeCost(float traversalCost, float queryCost) { 02196 SplitCandidate candidate; 02197 int binIdx = 0, leftBin = 0; 02198 TreeConstructionHeuristic tch(m_bbox); 02199 02200 for (int axis=0; axis<PointType::Dimension; ++axis) { 02201 VectorType extents = m_bbox.getExtents(); 02202 SizeType numLeft = 0, numRight = m_primCount; 02203 float leftWidth = 0, rightWidth = extents[axis]; 02204 const float binSize = m_binSize[axis]; 02205 02206 for (int i=0; i<m_binCount-1; ++i) { 02207 numLeft += m_minBins[binIdx]; 02208 numRight -= m_maxBins[binIdx]; 02209 leftWidth += binSize; 02210 rightWidth -= binSize; 02211 std::pair<float, float> prob = 02212 tch(axis, leftWidth, rightWidth); 02213 02214 float cost = traversalCost + queryCost 02215 * (prob.first * numLeft + prob.second * numRight); 02216 02217 if (cost < candidate.cost) { 02218 candidate.cost = cost; 02219 candidate.axis = axis; 02220 candidate.numLeft = numLeft; 02221 candidate.numRight = numRight; 02222 leftBin = i; 02223 } 02224 02225 binIdx++; 02226 } 02227 binIdx++; 02228 } 02229 02230 const int axis = candidate.axis; 02231 const float min = m_bbox.min[axis]; 02232 02233 /* The following part may seem a bit paranoid. It is ensures that the 02234 * returned split plane is consistent with the floating point calculations 02235 * done by the binning code in \ref bin(). Since reciprocals and 02236 * various floating point roundoff errors are involved, simply setting 02237 * 02238 * candidate.pos = m_bbox.min[axis] + (leftBin+1) * m_binSize[axis]; 02239 * 02240 * can potentially lead to a slightly different number of primitives being 02241 * classified to the left and right if we were to do check each 02242 * primitive against this split position. We can't have that, however, 02243 * since the partitioning code assumes that these numbers are correct. 02244 * This lets it avoid doing another costly sweep, hence all the 02245 * floating point madness below. 02246 */ 02247 float invBinSize = m_invBinSize[axis]; 02248 float split = min + (leftBin + 1) * m_binSize[axis]; 02249 float splitNext = nextafterf(split, 02250 std::numeric_limits<float>::max()); 02251 int idx = (int) ((split - min) * invBinSize); 02252 int idxNext = (int) ((splitNext - min) * invBinSize); 02253 02254 /** 02255 * The split plane should pass through the last discrete floating 02256 * floating value, which would still be classified into 02257 * the left bin. If this is not computed correctly, do binary 02258 * search. 02259 */ 02260 if (!(idx == leftBin && idxNext == leftBin+1)) { 02261 float left = m_bbox.min[axis]; 02262 float right = m_bbox.max[axis]; 02263 int it = 0; 02264 while (true) { 02265 split = left + (right-left)/2; 02266 splitNext = nextafterf(split, 02267 std::numeric_limits<float>::max()); 02268 idx = (int) ((split - min) * invBinSize); 02269 idxNext = (int) ((splitNext - min) * invBinSize); 02270 02271 if (idx == leftBin && idxNext == leftBin+1) { 02272 /* Got it! */ 02273 break; 02274 } else if (std::abs(idx-idxNext) > 1 || ++it > 50) { 02275 /* Insufficient floating point resolution 02276 -> a leaf will be created. */ 02277 candidate.cost = std::numeric_limits<float>::infinity(); 02278 break; 02279 } 02280 02281 if (idx <= leftBin) 02282 left = split; 02283 else 02284 right = split; 02285 } 02286 } 02287 02288 if (split <= m_bbox.min[axis] || split >= m_bbox.max[axis]) { 02289 /* Insufficient floating point resolution 02290 -> a leaf will be created. */ 02291 candidate.cost = std::numeric_limits<float>::infinity(); 02292 } 02293 02294 candidate.pos = split; 02295 02296 return candidate; 02297 } 02298 02299 /** 02300 * \brief Given a suitable split candiate, compute tight bounding 02301 * boxes for the left and right subtrees and return associated 02302 * primitive lists. 02303 */ 02304 boost::tuple<BoundingBoxType, IndexType *, BoundingBoxType, IndexType *> partition( 02305 BuildContext &ctx, const Derived *derived, IndexType *primIndices, 02306 SplitCandidate &split, bool isLeftChild, float traversalCost, 02307 float queryCost) { 02308 const float splitPos = split.pos; 02309 const int axis = split.axis; 02310 SizeType numLeft = 0, numRight = 0; 02311 BoundingBoxType leftBounds, rightBounds; 02312 02313 IndexType *leftIndices, *rightIndices; 02314 if (isLeftChild) { 02315 OrderedChunkAllocator &rightAlloc = ctx.rightAlloc; 02316 leftIndices = primIndices; 02317 rightIndices = rightAlloc.allocate<IndexType>(split.numRight); 02318 } else { 02319 OrderedChunkAllocator &leftAlloc = ctx.leftAlloc; 02320 leftIndices = leftAlloc.allocate<IndexType>(split.numLeft); 02321 rightIndices = primIndices; 02322 } 02323 02324 for (SizeType i=0; i<m_primCount; ++i) { 02325 const IndexType primIndex = primIndices[i]; 02326 const BoundingBoxType bbox = derived->getBoundingBox(primIndex); 02327 02328 if (bbox.max[axis] <= splitPos) { 02329 leftBounds.expandBy(bbox); 02330 leftIndices[numLeft++] = primIndex; 02331 } else if (bbox.min[axis] > splitPos) { 02332 rightBounds.expandBy(bbox); 02333 rightIndices[numRight++] = primIndex; 02334 } else { 02335 leftBounds.expandBy(bbox); 02336 rightBounds.expandBy(bbox); 02337 leftIndices[numLeft++] = primIndex; 02338 rightIndices[numRight++] = primIndex; 02339 } 02340 } 02341 02342 leftBounds.clip(m_bbox); 02343 rightBounds.clip(m_bbox); 02344 02345 /// Release the unused memory regions 02346 if (isLeftChild) 02347 ctx.leftAlloc.shrinkAllocation(leftIndices, split.numLeft); 02348 else 02349 ctx.rightAlloc.shrinkAllocation(rightIndices, split.numRight); 02350 02351 leftBounds.max[axis] = std::min(leftBounds.max[axis], (float) splitPos); 02352 rightBounds.min[axis] = std::max(rightBounds.min[axis], (float) splitPos); 02353 02354 if (leftBounds.max[axis] != rightBounds.min[axis]) { 02355 /* There is some space between the child nodes -- move 02356 the split plane onto one of the BoundingBoxs so that the 02357 heuristic cost is minimized */ 02358 TreeConstructionHeuristic tch(m_bbox); 02359 02360 std::pair<float, float> prob1 = tch(axis, 02361 leftBounds.max[axis] - m_bbox.min[axis], 02362 m_bbox.max[axis] - leftBounds.max[axis]); 02363 std::pair<float, float> prob2 = tch(axis, 02364 rightBounds.min[axis] - m_bbox.min[axis], 02365 m_bbox.max[axis] - rightBounds.min[axis]); 02366 float cost1 = traversalCost + queryCost 02367 * (prob1.first * numLeft + prob1.second * numRight); 02368 float cost2 = traversalCost + queryCost 02369 * (prob2.first * numLeft + prob2.second * numRight); 02370 02371 if (cost1 <= cost2) { 02372 split.cost = cost1; 02373 split.pos = leftBounds.max[axis]; 02374 } else { 02375 split.cost = cost2; 02376 split.pos = rightBounds.min[axis]; 02377 } 02378 02379 leftBounds.max[axis] = std::min(leftBounds.max[axis], 02380 (float) split.pos); 02381 rightBounds.min[axis] = std::max(rightBounds.min[axis], 02382 (float) split.pos); 02383 } 02384 02385 return boost::make_tuple(leftBounds, leftIndices, 02386 rightBounds, rightIndices); 02387 } 02388 private: 02389 SizeType *m_minBins; 02390 SizeType *m_maxBins; 02391 SizeType m_primCount; 02392 int m_binCount; 02393 BoundingBoxType m_bbox; 02394 VectorType m_binSize; 02395 VectorType m_invBinSize; 02396 }; 02397 02398 protected: 02399 IndexType *m_indices; 02400 float m_traversalCost; 02401 float m_queryCost; 02402 float m_emptySpaceBonus; 02403 bool m_clip, m_retract, m_parallelBuild; 02404 SizeType m_maxDepth; 02405 SizeType m_stopPrims; 02406 SizeType m_maxBadRefines; 02407 SizeType m_exactPrimThreshold; 02408 SizeType m_minMaxBins; 02409 SizeType m_nodeCount; 02410 SizeType m_indexCount; 02411 std::vector<TreeBuilder *> m_builders; 02412 std::vector<KDNode *> m_indirections; 02413 QMutex m_indirectionLock; 02414 BuildInterface m_interface; 02415 }; 02416 02417 /** 02418 * \brief Implements the 3D surface area heuristic so that it can be 02419 * used by the \ref GenericKDTree construction algorithm. 02420 */ 02421 class SurfaceAreaHeuristic3 { 02422 public: 02423 /** 02424 * \brief Initialize the surface area heuristic with the bounds of 02425 * a parent node 02426 * 02427 * Precomputes some information so that traversal probabilities 02428 * of potential split planes can be evaluated efficiently 02429 */ 02430 inline SurfaceAreaHeuristic3(const BoundingBox3f &aabb) { 02431 const Vector3f extents(aabb.getExtents()); 02432 const float temp = 1.0f / (extents[0] * extents[1] 02433 + extents[1]*extents[2] + extents[0]*extents[2]); 02434 m_temp0 = Vector3f( 02435 extents[1] * extents[2], 02436 extents[0] * extents[2], 02437 extents[0] * extents[1]) * temp; 02438 m_temp1 = Vector3f( 02439 extents[1] + extents[2], 02440 extents[0] + extents[2], 02441 extents[0] + extents[1]) * temp; 02442 } 02443 02444 /** 02445 * Given a split on axis \a axis that produces children having extents 02446 * \a leftWidth and \a rightWidth along \a axis, compute the probability 02447 * of traversing the left and right child during a typical query 02448 * operation. 02449 */ 02450 inline std::pair<float, float> operator()(int axis, float leftWidth, float rightWidth) const { 02451 return std::pair<float, float>( 02452 m_temp0[axis] + m_temp1[axis] * leftWidth, 02453 m_temp0[axis] + m_temp1[axis] * rightWidth); 02454 } 02455 02456 /** 02457 * Compute the underlying quantity used by the tree construction 02458 * heuristic. This is used to compute the final cost of a kd-tree. 02459 */ 02460 inline static float getQuantity(const BoundingBox3f &aabb) { 02461 return aabb.getSurfaceArea(); 02462 } 02463 private: 02464 Vector3f m_temp0, m_temp1; 02465 }; 02466 02467 #if defined(WIN32) 02468 /* Revert back to fast / non-strict IEEE 754 02469 floating point computations */ 02470 NORI_NAMESPACE_END 02471 #pragma float_control(precise, off) 02472 NORI_NAMESPACE_BEGIN 02473 #endif 02474 02475 NORI_NAMESPACE_END 02476 02477 #endif /* __KDTREE_GENERIC_H */