nori/api/gkdtree_8h_source.html

00001 /*
00002     This class contains a generic implementation of a KD-Tree for shapes in
00003         n dimensions. It was originally part of Mitsuba and slightly adapted for
00004         use in Nori.
00005
00006     Copyright (c) 2007-2012 by Wenzel Jakob
00007
00008     Mitsuba is free software; you can redistribute it and/or modify
00009     it under the terms of the GNU General Public License Version 3
00010     as published by the Free Software Foundation.
00011
00012     Mitsuba is distributed in the hope that it will be useful,
00013     but WITHOUT ANY WARRANTY; without even the implied warranty of
00014     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00015     GNU General Public License for more details.
00016
00017     You should have received a copy of the GNU General Public License
00018     along with this program. If not, see <http://www.gnu.org/licenses/>.
00019 */
00020
00021 /*
00022  * =======================================================================
00023  *   WARNING    WARNING    WARNING    WARNING    WARNING    WARNING
00024  * =======================================================================
00025  *   Remember to put on SAFETY GOGGLES before looking at this file. You
00026  *   are most certainly not expected to read or understand any of it.
00027  * =======================================================================
00028  */
00029
00030 #if !defined(__KDTREE_GENERIC_H)
00031 #define __KDTREE_GENERIC_H
00032
00033 #include <nori/bbox.h>
00034 #include <boost/static_assert.hpp>
00035 #include <boost/tuple/tuple.hpp>
00036 #include <QElapsedTimer>
00037 #include <QWaitCondition>
00038 #include <QMutex>
00039 #include <QThread>
00040 #include <stack>
00041 #include <map>
00042
00043 /** Compile-time KD-tree depth limit. Allows to put certain
00044     data structures on the stack */
00045 #define NORI_KD_MAXDEPTH 48
00046
00047 /// Set to 1 to display various statistics about the kd-tree
00048 #define NORI_KD_VERBOSE 0
00049
00050 /// OrderedChunkAllocator: don't create chunks smaller than 512 KiB
00051 #define NORI_KD_MIN_ALLOC 512*1024
00052
00053 /// Allocate nodes & index lists in blocks of 512 KiB
00054 #define NORI_KD_BLOCKSIZE_KD  (512*1024/sizeof(KDNode))
00055 #define NORI_KD_BLOCKSIZE_IDX (512*1024/sizeof(uint32_t))
00056
00057 /**
00058  * \brief To avoid numerical issues, the size of the scene
00059  * bounding box is increased by this amount
00060  */
00061 #define NORI_KD_BBOX_EPSILON 1e-3f
00062
00063 NORI_NAMESPACE_BEGIN
00064
00065 /**
00066  * \brief Special "ordered" memory allocator
00067  *
00068  * During kd-tree construction, large amounts of memory are required
00069  * to temporarily hold index and edge event lists. When not implemented
00070  * properly, these allocations can become a critical bottleneck.
00071  * The class \ref OrderedChunkAllocator provides a specialized
00072  * memory allocator, which reserves memory in chunks of at least
00073  * 128KiB. An important assumption made by the allocator is that
00074  * memory will be released in the exact same order, in which it was
00075  * previously allocated. This makes it possible to create an
00076  * implementation with a very low memory overhead. Note that no locking
00077  * is done, hence each thread will need its own allocator.
00078  *
00079  * \author Wenzel Jakob
00080  */
00081 class OrderedChunkAllocator {
00082 public:
00083         inline OrderedChunkAllocator(size_t minAllocation = NORI_KD_MIN_ALLOC)
00084                         : m_minAllocation(minAllocation) {
00085                 m_chunks.reserve(16);
00086         }
00087
00088         ~OrderedChunkAllocator() {
00089                 cleanup();
00090         }
00091
00092         /**
00093          * \brief Release all memory used by the allocator
00094          */
00095         void cleanup() {
00096                 for (std::vector<Chunk>::iterator it = m_chunks.begin();
00097                                 it != m_chunks.end(); ++it)
00098                         freeAligned((*it).start);
00099                 m_chunks.clear();
00100         }
00101
00102         /**
00103          * \brief Merge the chunks of another allocator into this one
00104          */
00105         void merge(const OrderedChunkAllocator &other) {
00106                 m_chunks.reserve(m_chunks.size() + other.m_chunks.size());
00107                 m_chunks.insert(m_chunks.end(), other.m_chunks.begin(),
00108                                 other.m_chunks.end());
00109         }
00110
00111         /**
00112          * \brief Forget about all chunks without actually freeing them.
00113          * This is useful when the chunks have been merged into another
00114          * allocator.
00115          */
00116         void forget() {
00117                 m_chunks.clear();
00118         }
00119
00120         /**
00121          * \brief Request a block of memory from the allocator
00122          *
00123          * Walks through the list of chunks to find one with enough
00124          * free memory. If no chunk could be found, a new one is created.
00125          */
00126         template <typename T> T * allocate(size_t size) {
00127                 size *= sizeof(T);
00128                 for (std::vector<Chunk>::iterator it = m_chunks.begin();
00129                                 it != m_chunks.end(); ++it) {
00130                         Chunk &chunk = *it;
00131                         if (chunk.remainder() >= size) {
00132                                 T* result = reinterpret_cast<T *>(chunk.cur);
00133                                 chunk.cur += size;
00134                                 return result;
00135                         }
00136                 }
00137
00138                 /* No chunk had enough free memory */
00139                 size_t allocSize = std::max(size,
00140                         m_minAllocation);
00141
00142                 Chunk chunk;
00143                 chunk.start = (uint8_t *) allocAligned(allocSize);
00144                 chunk.cur = chunk.start + size;
00145                 chunk.size = allocSize;
00146                 m_chunks.push_back(chunk);
00147
00148                 return reinterpret_cast<T *>(chunk.start);
00149         }
00150
00151         template <typename T> void release(T *ptr) {
00152                 for (std::vector<Chunk>::iterator it = m_chunks.begin();
00153                                 it != m_chunks.end(); ++it) {
00154                         Chunk &chunk = *it;
00155                         if ((uint8_t *) ptr >= chunk.start &&
00156                                 (uint8_t *) ptr < chunk.start + chunk.size) {
00157                                 chunk.cur = (uint8_t *) ptr;
00158                                 return;
00159                         }
00160                 }
00161         }
00162
00163         /**
00164          * \brief Shrink the size of the last allocated chunk
00165          */
00166         template <typename T> void shrinkAllocation(T *ptr, size_t newSize) {
00167                 newSize *= sizeof(T);
00168                 for (std::vector<Chunk>::iterator it = m_chunks.begin();
00169                                 it != m_chunks.end(); ++it) {
00170                         Chunk &chunk = *it;
00171                         if ((uint8_t *) ptr >= chunk.start &&
00172                                 (uint8_t *) ptr < chunk.start + chunk.size) {
00173                                 chunk.cur = (uint8_t *) ptr + newSize;
00174                                 return;
00175                         }
00176                 }
00177         }
00178
00179         inline size_t getChunkCount() const { return m_chunks.size(); }
00180
00181         /**
00182          * \brief Return the total amount of chunk memory in bytes
00183          */
00184         size_t size() const {
00185                 size_t result = 0;
00186                 for (std::vector<Chunk>::const_iterator it = m_chunks.begin();
00187                                 it != m_chunks.end(); ++it)
00188                         result += (*it).size;
00189                 return result;
00190         }
00191
00192         /**
00193          * \brief Return the total amount of used memory in bytes
00194          */
00195         size_t used() const {
00196                 size_t result = 0;
00197                 for (std::vector<Chunk>::const_iterator it = m_chunks.begin();
00198                                 it != m_chunks.end(); ++it)
00199                         result += (*it).used();
00200                 return result;
00201         }
00202 private:
00203         struct Chunk {
00204                 size_t size;
00205                 uint8_t *start, *cur;
00206
00207                 inline size_t used() const {
00208                         return cur - start;
00209                 }
00210
00211                 inline size_t remainder() const {
00212                         return size - used();
00213                 }
00214         };
00215
00216         size_t m_minAllocation;
00217         std::vector<Chunk> m_chunks;
00218 };
00219
00220 /**
00221  * \brief Basic vector implementation, which stores all data
00222  * in a list of fixed-sized blocks.
00223  *
00224  * This leads to a more conservative memory usage when the
00225  * final size of a (possibly very large) growing vector is
00226  * unknown. Also, frequent reallocations & copies are avoided.
00227  *
00228  * \author Wenzel Jakob
00229  */
00230 template <typename T, size_t BlockSize> class BlockedVector {
00231 public:
00232         BlockedVector() : m_pos(0) {}
00233
00234         ~BlockedVector() {
00235                 clear();
00236         }
00237
00238         /**
00239          * \brief Append an element to the end
00240          */
00241         inline void push_back(const T &value) {
00242                 size_t blockIdx = m_pos / BlockSize;
00243                 size_t offset = m_pos % BlockSize;
00244                 if (blockIdx == m_blocks.size())
00245                         m_blocks.push_back(new T[BlockSize]);
00246                 m_blocks[blockIdx][offset] = value;
00247                 m_pos++;
00248         }
00249
00250         /**
00251          * \brief Allocate a certain number of elements and
00252          * return a pointer to the first one.
00253          *
00254          * The implementation will ensure that they lie
00255          * contiguous in memory -- note that this can potentially
00256          * create unused elements in the previous block if a new
00257          * one has to be allocated.
00258          */
00259         inline T * allocate(size_t size) {
00260                 size_t blockIdx = m_pos / BlockSize;
00261                 size_t offset = m_pos % BlockSize;
00262                 T *result;
00263                 if (EXPECT_TAKEN(offset + size <= BlockSize)) {
00264                         if (blockIdx == m_blocks.size())
00265                                 m_blocks.push_back(new T[BlockSize]);
00266                         result = m_blocks[blockIdx] + offset;
00267                         m_pos += size;
00268                 } else {
00269                         ++blockIdx;
00270                         if (blockIdx == m_blocks.size())
00271                                 m_blocks.push_back(new T[BlockSize]);
00272                         result = m_blocks[blockIdx];
00273                         m_pos += BlockSize - offset + size;
00274                 }
00275                 return result;
00276         }
00277
00278         inline T &operator[](size_t index) {
00279                 return *(m_blocks[index / BlockSize] +
00280                         (index % BlockSize));
00281         }
00282
00283         inline const T &operator[](size_t index) const {
00284                 return *(m_blocks[index / BlockSize] +
00285                         (index % BlockSize));
00286         }
00287
00288
00289         /**
00290          * \brief Return the currently used number of items
00291          */
00292         inline size_t size() const {
00293                 return m_pos;
00294         }
00295
00296         /**
00297          * \brief Return the number of allocated blocks
00298          */
00299         inline size_t blockCount() const {
00300                 return m_blocks.size();
00301         }
00302
00303         /**
00304          * \brief Return the total capacity
00305          */
00306         inline size_t capacity() const {
00307                 return m_blocks.size() * BlockSize;
00308         }
00309
00310         /**
00311          * \brief Resize the vector to the given size.
00312          *
00313          * Note: this implementation doesn't support
00314          * enlarging the vector and simply changes the
00315          * last item pointer.
00316          */
00317         inline void resize(size_t pos) {
00318                 m_pos = pos;
00319         }
00320
00321         /**
00322          * \brief Release all memory
00323          */
00324         void clear() {
00325                 for (typename std::vector<T *>::iterator it = m_blocks.begin();
00326                                 it != m_blocks.end(); ++it)
00327                         delete[] *it;
00328                 m_blocks.clear();
00329                 m_pos = 0;
00330         }
00331 private:
00332         std::vector<T *> m_blocks;
00333         size_t m_pos;
00334 };
00335
00336 /**
00337  * \brief Compact storage for primitive classifcation
00338  *
00339  * When classifying primitives with respect to a split plane,
00340  * a data structure is needed to hold the tertiary result of
00341  * this operation. This class implements a compact storage
00342  * (2 bits per entry) in the spirit of the std::vector<bool>
00343  * specialization.
00344  *
00345  * \author Wenzel Jakob
00346  */
00347 class ClassificationStorage {
00348 public:
00349         ClassificationStorage()
00350                 : m_buffer(NULL), m_bufferSize(0) { }
00351
00352         ~ClassificationStorage() {
00353                 if (m_buffer)
00354                         delete[] m_buffer;
00355         }
00356
00357         void setPrimitiveCount(size_t size) {
00358                 if (m_buffer)
00359                         delete[] m_buffer;
00360                 if (size > 0) {
00361                         m_bufferSize = size/4 + ((size % 4) > 0 ? 1 : 0);
00362                         m_buffer = new uint8_t[m_bufferSize];
00363                 } else {
00364                         m_buffer = NULL;
00365                 }
00366         }
00367
00368         inline void set(uint32_t index, int value) {
00369                 uint8_t *ptr = m_buffer + (index >> 2);
00370                 uint8_t shift = (index & 3) << 1;
00371                 *ptr = (*ptr & ~(3 << shift)) | (value << shift);
00372         }
00373
00374         inline int get(uint32_t index) const {
00375                 uint8_t *ptr = m_buffer + (index >> 2);
00376                 uint8_t shift = (index & 3) << 1;
00377                 return (*ptr >> shift) & 3;
00378         }
00379
00380         inline size_t size() const {
00381                 return m_bufferSize;
00382         }
00383 private:
00384         uint8_t *m_buffer;
00385         size_t m_bufferSize;
00386 };
00387
00388 /**
00389  * \brief Base class of all kd-trees.
00390  *
00391  * This class defines the byte layout for KD-tree nodes and
00392  * provides methods for querying the tree structure.
00393  */
00394 template <typename BoundingBoxType> class KDTreeBase {
00395 public:
00396         /// Index number format (max 2^32 prims)
00397         typedef uint32_t IndexType;
00398
00399         /// Size number format
00400         typedef uint32_t SizeType;
00401
00402         /**
00403          * \brief KD-tree node in 8 bytes.
00404          */
00405         struct KDNode {
00406                 union {
00407                         /* Inner node */
00408                         struct {
00409                                 /* Bit layout:
00410                                    31   : False (inner node)
00411                                    30   : Indirection node flag
00412                                    29-3 : Offset to the left child
00413                                           or indirection table entry
00414                                    2-0  : Split axis
00415                                 */
00416                                 uint32_t combined;
00417
00418                                 /// Split plane coordinate
00419                                 float split;
00420                         } inner;
00421
00422                         /* Leaf node */
00423                         struct {
00424                                 /* Bit layout:
00425                                    31   : True (leaf node)
00426                                    30-0 : Offset to the node's primitive list
00427                                 */
00428                                 uint32_t combined;
00429
00430                                 /// End offset of the primitive list
00431                                 uint32_t end;
00432                         } leaf;
00433                 };
00434
00435                 enum EMask {
00436                         ETypeMask = 1 << 31,
00437                         EIndirectionMask = 1 << 30,
00438                         ELeafOffsetMask = ~ETypeMask,
00439                         EInnerAxisMask = 0x3,
00440                         EInnerOffsetMask = ~(EInnerAxisMask + EIndirectionMask),
00441                         ERelOffsetLimit = (1<<28) - 1
00442                 };
00443
00444                 /// Initialize a leaf kd-Tree node
00445                 inline void initLeafNode(unsigned int offset, unsigned int numPrims) {
00446                         leaf.combined = (uint32_t) ETypeMask | offset;
00447                         leaf.end = offset + numPrims;
00448                 }
00449
00450                 /**
00451                  * Initialize an interior kd-Tree node. Reports a failure if the
00452                  * relative offset to the left child node is too large.
00453                  */
00454                 inline bool initInnerNode(int axis, float split, ptrdiff_t relOffset) {
00455                         if (relOffset < 0 || relOffset > ERelOffsetLimit)
00456                                 return false;
00457                         inner.combined = axis | ((uint32_t) relOffset << 2);
00458                         inner.split = split;
00459                         return true;
00460                 }
00461
00462                 /**
00463                  * \brief Initialize an interior indirection node.
00464                  *
00465                  * Indirections are necessary whenever the children cannot be
00466                  * referenced using a relative pointer, which can happen when
00467                  * they lie in different memory chunks. In this case, the node
00468                  * stores an index into a globally shared pointer list.
00469                  */
00470                 inline void initIndirectionNode(int axis, float split,
00471                                 uint32_t indirectionEntry) {
00472                         inner.combined = EIndirectionMask
00473                                 | ((uint32_t) indirectionEntry << 2)
00474                                 | axis;
00475                         inner.split = split;
00476                 }
00477
00478                 /// Is this a leaf node?
00479                 inline bool isLeaf() const {
00480                         return leaf.combined & (uint32_t) ETypeMask;
00481                 }
00482
00483                 /// Is this an indirection node?
00484                 inline bool isIndirection() const {
00485                         return leaf.combined & (uint32_t) EIndirectionMask;
00486                 }
00487
00488                 /// Assuming this is a leaf node, return the first primitive index
00489                 inline IndexType getPrimStart() const {
00490                         return leaf.combined & (uint32_t) ELeafOffsetMask;
00491                 }
00492
00493                 /// Assuming this is a leaf node, return the last primitive index
00494                 inline IndexType getPrimEnd() const {
00495                         return leaf.end;
00496                 }
00497
00498                 /// Return the index of an indirection node
00499                 inline IndexType getIndirectionIndex() const {
00500                         return(inner.combined & (uint32_t) EInnerOffsetMask) >> 2;
00501                 }
00502
00503                 /// Return the left child (assuming that this is an interior node)
00504                 inline const KDNode * getLeft() const {
00505                         return this +
00506                                 ((inner.combined & (uint32_t) EInnerOffsetMask) >> 2);
00507                 }
00508
00509                 /// Return the sibling of the current node
00510                 inline const KDNode * getSibling() const {
00511                         return (const KDNode *) ((ptrdiff_t) this ^ (ptrdiff_t) 8);
00512                 }
00513
00514                 /// Return the left child (assuming that this is an interior node)
00515                 inline KDNode * getLeft() {
00516                         return this +
00517                                 ((inner.combined & (uint32_t) EInnerOffsetMask) >> 2);
00518                 }
00519
00520                 /// Return the left child (assuming that this is an interior node)
00521                 inline const KDNode * getRight() const {
00522                         return getLeft() + 1;
00523                 }
00524
00525                 /**
00526                  * \brief Return the split plane location (assuming that this
00527                  * is an interior node)
00528                  */
00529                 inline float getSplit() const {
00530                         return inner.split;
00531                 }
00532
00533                 /// Return the split axis (assuming that this is an interior node)
00534                 inline int getAxis() const {
00535                         return inner.combined & (uint32_t) EInnerAxisMask;
00536                 }
00537         };
00538
00539         BOOST_STATIC_ASSERT(sizeof(KDNode) == 8);
00540
00541         /// Return the root node of the kd-tree
00542         inline const KDNode *getRoot() const {
00543                 return m_nodes;
00544         }
00545
00546         /// Return whether or not the kd-tree has been built
00547         inline bool isBuilt() const {
00548                 return m_nodes != NULL;
00549         }
00550
00551         /// Return a (slightly enlarged) axis-aligned bounding box containing all primitives
00552         inline const BoundingBoxType &getBoundingBox() const { return m_bbox; }
00553
00554         /// Return a tight axis-aligned bounding box containing all primitives
00555         inline const BoundingBoxType &getTightBoundingBox() const { return m_tightBBox;}
00556 protected:
00557         KDNode *m_nodes;
00558         BoundingBoxType m_bbox, m_tightBBox;
00559 };
00560
00561 #if defined(WIN32)
00562 /* Use strict IEEE 754 floating point computations
00563    for the following kd-tree building code */
00564 NORI_NAMESPACE_END
00565 #pragma float_control(precise, on)
00566 NORI_NAMESPACE_BEGIN
00567 #endif
00568
00569 /**
00570  * \brief Optimized KD-tree acceleration data structure for
00571  * n-dimensional (n<=4) shapes and various queries on them.
00572  *
00573  * Note that this class mainly concerns itself with data that cover a
00574  * region of space. For point data, other implementations will be more
00575  * suitable. The most important application in Mitsuba is the fast
00576  * construction of high-quality trees for ray tracing. See the class
00577  * \ref SAHKDTree for this specialization.
00578  *
00579  * The code in this class is a fully generic kd-tree implementation, which
00580  * can theoretically support any kind of shape. However, subclasses still
00581  * need to provide the following signatures for a functional implementation:
00582  *
00583  * \code
00584  * /// Return the total number of primitives
00585  * inline SizeType getPrimitiveCount() const;
00586  *
00587  * /// Return the axis-aligned bounding box of a certain primitive
00588  * inline BoundingBoxType getBoundingBox(IndexType primIdx) const;
00589  *
00590  * /// Return the bounding box of a primitive when clipped to another box
00591  * inline BoundingBoxType getClippedBoundingBox(IndexType primIdx, const BoundingBoxType &bbox) const;
00592  * \endcode
00593  *
00594  * This class follows the "Curiously recurring template" design pattern
00595  * so that the above functions can be inlined (in particular, no virtual
00596  * calls will be necessary!).
00597  *
00598  * When the kd-tree is initially built, this class optimizes a cost
00599  * heuristic every time a split plane has to be chosen. For ray tracing,
00600  * the heuristic is usually the surface area heuristic (SAH), but other
00601  * choices are possible as well. The tree construction heuristic must be
00602  * passed as a template argument, which can use a supplied bounding box and
00603  * split candidate to compute approximate probabilities of recursing into
00604  * the left and right subrees during a typical kd-tree query operation.
00605  * See \ref SurfaceAreaHeuristic for an example of the interface that
00606  * must be implemented.
00607  *
00608  * The kd-tree construction algorithm creates 'perfect split' trees as
00609  * outlined in the paper "On Building fast kd-Trees for Ray Tracing, and on
00610  * doing that in O(N log N)" by Ingo Wald and Vlastimil Havran. This works
00611  * even when the tree is not meant to be used for ray tracing.
00612  * For polygonal meshes, the involved Sutherland-Hodgman iterations can be
00613  * quite expensive in terms of the overall construction time. The
00614  * \ref setClip method can be used to deactivate perfect splits at the
00615  * cost of a lower-quality tree.
00616  *
00617  * Because the O(N log N) construction algorithm tends to cause many
00618  * incoherent memory accesses, a fast approximate technique (Min-max
00619  * binning) is used near the top of the tree, which significantly reduces
00620  * cache misses. Once the input data has been narrowed down to a
00621  * reasonable amount, the implementation switches over to the O(N log N)
00622  * builder. When multiple processors are available, the build process runs
00623  * in parallel.
00624  *
00625  * \author Wenzel Jakob
00626  * \ingroup librender
00627  */
00628 template <typename BoundingBoxType, typename TreeConstructionHeuristic, typename Derived>
00629         class GenericKDTree : public KDTreeBase<BoundingBoxType> {
00630 protected:
00631         // Some forward declarations
00632         struct MinMaxBins;
00633         struct EdgeEvent;
00634         struct EdgeEventOrdering;
00635
00636 public:
00637         typedef KDTreeBase<BoundingBoxType>            Parent;
00638         typedef typename Parent::SizeType       SizeType;
00639         typedef typename Parent::IndexType      IndexType;
00640         typedef typename Parent::KDNode         KDNode;
00641         typedef typename BoundingBoxType::Scalar       Scalar;
00642         typedef typename BoundingBoxType::PointType    PointType;
00643         typedef typename BoundingBoxType::VectorType   VectorType;
00644
00645         using Parent::m_nodes;
00646         using Parent::m_bbox;
00647         using Parent::m_tightBBox;
00648         using Parent::isBuilt;
00649
00650         /**
00651          * \brief Create a new kd-tree instance initialized with
00652          * the default parameters.
00653          */
00654         GenericKDTree() : m_indices(NULL) {
00655                 m_nodes = NULL;
00656                 m_traversalCost = 15;
00657                 m_queryCost = 20;
00658                 m_emptySpaceBonus = 0.9f;
00659                 m_clip = true;
00660                 m_stopPrims = 6;
00661                 m_maxBadRefines = 3;
00662                 m_exactPrimThreshold = 65536;
00663                 m_maxDepth = 0;
00664                 m_retract = true;
00665                 m_parallelBuild = true;
00666                 m_minMaxBins = 128;
00667         }
00668
00669         /**
00670          * \brief Release all memory
00671          */
00672         virtual ~GenericKDTree() {
00673                 if (m_indices)
00674                         delete[] m_indices;
00675                 if (m_nodes)
00676                         freeAligned(m_nodes-1); // undo alignment shift
00677         }
00678
00679         /**
00680          * \brief Set the traversal cost used by the tree construction heuristic
00681          */
00682         inline void setTraversalCost(float traversalCost) {
00683                 m_traversalCost = traversalCost;
00684         }
00685
00686         /**
00687          * \brief Return the traversal cost used by the tree construction heuristic
00688          */
00689         inline float getTraversalCost() const {
00690                 return m_traversalCost;
00691         }
00692
00693         /**
00694          * \brief Set the query cost used by the tree construction heuristic
00695          * (This is the average cost for testing a contained shape against
00696          *  a kd-tree search query)
00697          */
00698         inline void setQueryCost(float queryCost) {
00699                 m_queryCost = queryCost;
00700         }
00701
00702         /**
00703          * \brief Return the query cost used by the tree construction heuristic
00704          * (This is the average cost for testing a contained shape against
00705          *  a kd-tree search query)
00706          */
00707         inline float getQueryCost() const {
00708                 return m_queryCost;
00709         }
00710
00711         /**
00712          * \brief Set the bonus factor for empty space used by the
00713          * tree construction heuristic
00714          */
00715         inline void setEmptySpaceBonus(float emptySpaceBonus) {
00716                 m_emptySpaceBonus = emptySpaceBonus;
00717         }
00718
00719         /**
00720          * \brief Return the bonus factor for empty space used by the
00721          * tree construction heuristic
00722          */
00723         inline float getEmptySpaceBonus() const {
00724                 return m_emptySpaceBonus;
00725         }
00726
00727         /**
00728          * \brief Set the maximum tree depth (0 = use heuristic)
00729          */
00730         inline void setMaxDepth(SizeType maxDepth) {
00731                 m_maxDepth = maxDepth;
00732         }
00733
00734         /**
00735          * \brief Set the number of bins used for Min-Max binning
00736          */
00737         inline void setMinMaxBins(SizeType minMaxBins) {
00738                 m_minMaxBins = minMaxBins;
00739         }
00740
00741         /**
00742          * \brief Return the number of bins used for Min-Max binning
00743          */
00744         inline SizeType getMinMaxBins() const {
00745                 return m_minMaxBins;
00746         }
00747
00748         /**
00749          * \brief Return maximum tree depth (0 = use heuristic)
00750          */
00751         inline SizeType getMaxDepth() const {
00752                 return m_maxDepth;
00753         }
00754
00755         /**
00756          * \brief Specify whether or not to use primitive clipping will
00757          * be used in the tree construction.
00758          */
00759         inline void setClip(bool clip) {
00760                 m_clip = clip;
00761         }
00762
00763         /**
00764          * \brief Return whether or not to use primitive clipping will
00765          * be used in the tree construction.
00766          */
00767         inline bool getClip() const {
00768                 return m_clip;
00769         }
00770
00771         /**
00772          * \brief Specify whether or not bad splits can be "retracted".
00773          */
00774         inline void setRetract(bool retract) {
00775                 m_retract = retract;
00776         }
00777
00778         /**
00779          * \brief Return whether or not bad splits can be "retracted".
00780          */
00781         inline bool getRetract() const {
00782                 return m_retract;
00783         }
00784
00785         /**
00786          * \brief Set the number of bad refines allowed to happen
00787          * in succession before a leaf node will be created.
00788          */
00789         inline void setMaxBadRefines(SizeType maxBadRefines) {
00790                 m_maxBadRefines = maxBadRefines;
00791         }
00792
00793         /**
00794          * \brief Return the number of bad refines allowed to happen
00795          * in succession before a leaf node will be created.
00796          */
00797         inline SizeType getMaxBadRefines() const {
00798                 return m_maxBadRefines;
00799         }
00800
00801         /**
00802          * \brief Set the number of primitives, at which recursion will
00803          * stop when building the tree.
00804          */
00805         inline void setStopPrims(SizeType stopPrims) {
00806                 m_stopPrims = stopPrims;
00807         }
00808
00809         /**
00810          * \brief Return the number of primitives, at which recursion will
00811          * stop when building the tree.
00812          */
00813         inline SizeType getStopPrims() const {
00814                 return m_stopPrims;
00815         }
00816
00817         /**
00818          * \brief Specify whether or not tree construction
00819          * should run in parallel.
00820          */
00821         inline void setParallelBuild(bool parallel) {
00822                 m_parallelBuild = parallel;
00823         }
00824
00825         /**
00826          * \brief Return whether or not tree construction
00827          * will run in parallel.
00828          */
00829         inline bool getParallelBuild() const {
00830                 return m_parallelBuild;
00831         }
00832
00833         /**
00834          * \brief Specify the number of primitives, at which the builder will
00835          * switch from (approximate) Min-Max binning to the accurate
00836          * O(n log n) optimization method.
00837          */
00838         inline void setExactPrimitiveThreshold(SizeType exactPrimThreshold) {
00839                 m_exactPrimThreshold = exactPrimThreshold;
00840         }
00841
00842         /**
00843          * \brief Return the number of primitives, at which the builder will
00844          * switch from (approximate) Min-Max binning to the accurate
00845          * O(n log n) optimization method.
00846          */
00847         inline SizeType getExactPrimitiveThreshold() const {
00848                 return m_exactPrimThreshold;
00849         }
00850 protected:
00851         /**
00852          * \brief Build a KD-tree over the supplied geometry
00853          *
00854          * To be called by the subclass.
00855          */
00856         void buildInternal() {
00857                 /* Some samity checks */
00858                 if (isBuilt())
00859                         throw NoriException("The kd-tree has already been built!");
00860                 if (m_traversalCost <= 0)
00861                         throw NoriException("The traveral cost must be > 0");
00862                 if (m_queryCost <= 0)
00863                         throw NoriException("The query cost must be > 0");
00864                 if (m_emptySpaceBonus <= 0 || m_emptySpaceBonus > 1)
00865                         throw NoriException("The empty space bonus must be in [0, 1]");
00866                 if (m_minMaxBins <= 1)
00867                         throw NoriException("The number of min-max bins must be > 2");
00868
00869                 SizeType primCount = cast()->getPrimitiveCount();
00870                 if (primCount == 0) {
00871                         cout << "Warning: kd-tree contains no geometry!" << endl;
00872                         // +1 shift is for alignment purposes (see KDNode::getSibling)
00873                         m_nodes = static_cast<KDNode *>(allocAligned(sizeof(KDNode) * 2))+1;
00874                         m_nodes[0].initLeafNode(0, 0);
00875                         return;
00876                 }
00877
00878                 if (primCount <= m_exactPrimThreshold)
00879                         m_parallelBuild = false;
00880
00881                 BuildContext ctx(primCount, m_minMaxBins);
00882
00883                 /* Establish an ad-hoc depth cutoff value (Formula from PBRT) */
00884                 if (m_maxDepth == 0)
00885                         m_maxDepth = (int) (8 + 1.3f * std::log((float) primCount)/std::log(2.0f));
00886                 m_maxDepth = std::min(m_maxDepth, (SizeType) NORI_KD_MAXDEPTH);
00887
00888                 OrderedChunkAllocator &leftAlloc = ctx.leftAlloc;
00889                 IndexType *indices = leftAlloc.allocate<IndexType>(primCount);
00890
00891                 QElapsedTimer timer;
00892                 timer.start();
00893
00894                 BoundingBoxType &bbox = m_bbox;
00895                 bbox.reset();
00896                 for (IndexType i=0; i<primCount; ++i) {
00897                         bbox.expandBy(cast()->getBoundingBox(i));
00898                         indices[i] = i;
00899                 }
00900
00901                 #if NORI_KD_VERBOSE == 1
00902                         cout << "kd-tree configuration" << endl
00903                                 << "  Traversal cost             : " << m_traversalCost << endl
00904                                 << "  Query cost                 : " << m_queryCost << endl
00905                                 << "  Empty space bonus          : " << m_emptySpaceBonus << endl
00906                                 << "  Max. tree depth            : " << m_maxDepth << endl
00907                                 << "  Scene bouning box (min)    : " << qPrintable(bbox.min.toString()) << endl
00908                                 << "  Scene bouning box (max)    : " << qPrintable(bbox.max.toString()) << endl
00909                                 << "  Min-max bins               : " << m_minMaxBins << endl
00910                                 << "  O(n log n method)          : use for " << m_exactPrimThreshold << " primitives" << endl
00911                                 << "  Perfect splits             : " << m_clip << endl
00912                                 << "  Retract bad splits         : " << m_retract << endl
00913                                 << "  Stopping primitive count   : " << m_stopPrims << endl
00914                                 << "  Build tree in parallel     : " << m_parallelBuild << endl << endl;
00915                 #endif
00916
00917                 SizeType procCount = getCoreCount();
00918                 if (procCount == 1)
00919                         m_parallelBuild = false;
00920
00921                 if (m_parallelBuild) {
00922                         m_builders.resize(procCount);
00923                         for (SizeType i=0; i<procCount; ++i) {
00924                                 m_builders[i] = new TreeBuilder(i, this);
00925                                 m_builders[i]->start();
00926                         }
00927                 }
00928
00929                 KDNode *prelimRoot = ctx.nodes.allocate(1);
00930                 buildTreeMinMax(ctx, 1, prelimRoot, bbox, bbox,
00931                                 indices, primCount, true, 0);
00932                 ctx.leftAlloc.release(indices);
00933
00934                 if (m_parallelBuild) {
00935                         m_interface.mutex.lock();
00936                         m_interface.done = true;
00937                         m_interface.cond.wakeAll();
00938                         m_interface.mutex.unlock();
00939                         for (SizeType i=0; i<m_builders.size(); ++i)
00940                                 m_builders[i]->wait();
00941                 }
00942
00943                 size_t totalUsage = m_indirections.capacity()
00944                         * sizeof(KDNode *) + ctx.size();
00945
00946                 /// Clean up event lists and print statistics
00947                 ctx.leftAlloc.cleanup();
00948                 ctx.rightAlloc.cleanup();
00949                 for (SizeType i=0; i<m_builders.size(); ++i) {
00950                         BuildContext &subCtx = m_builders[i]->getContext();
00951                         totalUsage += subCtx.size();
00952                         subCtx.leftAlloc.cleanup();
00953                         subCtx.rightAlloc.cleanup();
00954                         ctx.accumulateStatisticsFrom(subCtx);
00955                 }
00956
00957                 std::stack<boost::tuple<const KDNode *, KDNode *,
00958                                 const BuildContext *, BoundingBoxType> > stack;
00959
00960                 float expTraversalSteps = 0;
00961                 float expLeavesVisited = 0;
00962                 float expPrimitivesIntersected = 0;
00963                 float heuristicCost = 0;
00964
00965                 SizeType nodePtr = 0, indexPtr = 0;
00966                 SizeType maxPrimsInLeaf = 0;
00967                 const SizeType primBucketCount = 16;
00968                 SizeType primBuckets[primBucketCount];
00969                 memset(primBuckets, 0, sizeof(SizeType)*primBucketCount);
00970                 m_nodeCount = ctx.innerNodeCount + ctx.leafNodeCount;
00971                 m_indexCount = ctx.primIndexCount;
00972
00973                 // +1 shift is for alignment purposes (see KDNode::getSibling)
00974                 m_nodes = static_cast<KDNode *> (allocAligned(
00975                                 sizeof(KDNode) * (m_nodeCount+1)))+1;
00976                 m_indices = new IndexType[m_indexCount];
00977
00978                 /* The following code rewrites all tree nodes with proper relative
00979                    indices. It also computes the final tree cost and some other
00980                    useful heuristics */
00981                 stack.push(boost::make_tuple(prelimRoot, &m_nodes[nodePtr++],
00982                                         &ctx, bbox));
00983                 while (!stack.empty()) {
00984                         const KDNode *node = boost::get<0>(stack.top());
00985                         KDNode *target = boost::get<1>(stack.top());
00986                         const BuildContext *context = boost::get<2>(stack.top());
00987                         BoundingBoxType bbox = boost::get<3>(stack.top());
00988                         stack.pop();
00989                         typename std::map<const KDNode *, IndexType>::const_iterator it
00990                                 = m_interface.threadMap.find(node);
00991                         // Check if we're switching to a subtree built by a worker thread
00992                         if (it != m_interface.threadMap.end())
00993                                 context = &m_builders[(*it).second]->getContext();
00994
00995                         if (node->isLeaf()) {
00996                                 SizeType primStart = node->getPrimStart(),
00997                                                   primEnd = node->getPrimEnd(),
00998                                                   primsInLeaf = primEnd-primStart;
00999                                 target->initLeafNode(indexPtr, primsInLeaf);
01000
01001                                 float quantity = TreeConstructionHeuristic::getQuantity(bbox),
01002                                           weightedQuantity = quantity * primsInLeaf;
01003                                 expLeavesVisited += quantity;
01004                                 expPrimitivesIntersected += weightedQuantity;
01005                                 heuristicCost += weightedQuantity * m_queryCost;
01006                                 if (primsInLeaf < primBucketCount)
01007                                         primBuckets[primsInLeaf]++;
01008                                 if (primsInLeaf > maxPrimsInLeaf)
01009                                         maxPrimsInLeaf = primsInLeaf;
01010
01011                                 const BlockedVector<IndexType, NORI_KD_BLOCKSIZE_IDX> &indices
01012                                         = context->indices;
01013                                 for (SizeType idx = primStart; idx<primEnd; ++idx) {
01014                                         m_indices[indexPtr++] = indices[idx];
01015                                 }
01016                         } else {
01017                                 float quantity = TreeConstructionHeuristic::getQuantity(bbox);
01018                                 expTraversalSteps += quantity;
01019                                 heuristicCost += quantity * m_traversalCost;
01020
01021                                 const KDNode * left;
01022                                 if (EXPECT_TAKEN(!node->isIndirection()))
01023                                         left = node->getLeft();
01024                                 else
01025                                         left = m_indirections[node->getIndirectionIndex()];
01026
01027                                 KDNode * children = &m_nodes[nodePtr];
01028                                 nodePtr += 2;
01029                                 int axis = node->getAxis();
01030                                 float split = node->getSplit();
01031                                 bool result = target->initInnerNode(axis, split, children - target);
01032                                 if (!result)
01033                                         throw NoriException("Cannot represent relative pointer -- "
01034                                                 "too many primitives?");
01035
01036                                 float tmp = bbox.min[axis];
01037                                 bbox.min[axis] = split;
01038                                 stack.push(boost::make_tuple(left+1, children+1, context, bbox));
01039                                 bbox.min[axis] = tmp;
01040                                 bbox.max[axis] = split;
01041                                 stack.push(boost::make_tuple(left, children, context, bbox));
01042                         }
01043                 }
01044
01045                 /* Free some more memory */
01046                 ctx.nodes.clear();
01047                 ctx.indices.clear();
01048                 for (SizeType i=0; i<m_builders.size(); ++i) {
01049                         BuildContext &subCtx = m_builders[i]->getContext();
01050                         subCtx.nodes.clear();
01051                         subCtx.indices.clear();
01052                 }
01053                 std::vector<KDNode *>().swap(m_indirections);
01054
01055                 if (m_builders.size() > 0) {
01056                         for (SizeType i=0; i<m_builders.size(); ++i)
01057                                 delete m_builders[i];
01058                         m_builders.clear();
01059                 }
01060
01061                 float rootQuantity = TreeConstructionHeuristic::getQuantity(bbox);
01062                 expTraversalSteps /= rootQuantity;
01063                 expLeavesVisited /= rootQuantity;
01064                 expPrimitivesIntersected /= rootQuantity;
01065                 heuristicCost /= rootQuantity;
01066
01067                 /* Slightly enlarge the bounding box
01068                    (necessary e.g. when the scene is planar) */
01069                 m_tightBBox = bbox;
01070
01071                 const float eps = NORI_KD_BBOX_EPSILON;
01072
01073                 bbox.min -= (bbox.max-bbox.min) * eps + VectorType(eps);
01074                 bbox.max += (bbox.max-bbox.min) * eps + VectorType(eps);
01075
01076                 #if NORI_KD_VERBOSE == 1
01077                         cout << "Structural kd-tree statistics" << endl
01078                                 << "  Parallel work units         : " << m_interface.threadMap.size() << endl
01079                                 << "  Node storage cost           : " << (nodePtr * sizeof(KDNode)) / 1024 << " KiB" << endl
01080                                 << "  Index storage cost          : " << (indexPtr * sizeof(IndexType)) / 1024 << " KiB" << endl
01081                                 << "  Inner nodes                 : " << ctx.innerNodeCount << endl
01082                                 << "  Leaf nodes                  : " << ctx.leafNodeCount << endl
01083                                 << "  Nonempty leaf nodes         : " << ctx.nonemptyLeafNodeCount << endl << endl;
01084
01085                         cout << "Qualitative kd-tree statistics" << endl
01086                                 << "  Retracted splits            : " << ctx.retractedSplits << endl
01087                                 << "  Pruned primitives           : " << ctx.pruned << endl
01088                                 << "  Largest leaf node           : " << maxPrimsInLeaf << endl
01089                                 << "  Avg. prims / nonempty leaf  : " << ctx.primIndexCount / (float) ctx.nonemptyLeafNodeCount << endl
01090                                 << "  Expected traversals/query   : " << expTraversalSteps << endl
01091                                 << "  Expected leaf visits/query  : " << expLeavesVisited << endl
01092                                 << "  Expected prim. visits/query : " << expPrimitivesIntersected << endl
01093                                 << "  Final cost                  : " << heuristicCost << endl << endl;
01094                 #endif
01095
01096                 cout << "Finished after " << timer.elapsed() << " ms (used "
01097                         << totalUsage/1024 << " KiB of temp. memory)" << endl
01098                         << "The final kd-tree requires " << (nodePtr*sizeof(KDNode) +
01099                         indexPtr * sizeof(IndexType)) / 1024 << " KiB of memory" << endl;
01100         }
01101
01102 protected:
01103         /// Primitive classification during tree-construction
01104         enum EClassificationResult {
01105                 /// Straddling primitive
01106                 EBothSides = 0,
01107                 /// Primitive is entirely on the left side of the split
01108                 ELeftSide = 1,
01109                 /// Primitive is entirely on the right side of the split
01110                 ERightSide = 2,
01111                 /// Edge events have been generated for the straddling primitive
01112                 EBothSidesProcessed = 3
01113         };
01114
01115         /**
01116          * \brief Describes the beginning or end of a primitive
01117          * when projected onto a certain dimension.
01118          */
01119         struct EdgeEvent {
01120                 /// Possible event types
01121                 enum EEventType {
01122                         EEdgeEnd = 0,
01123                         EEdgePlanar = 1,
01124                         EEdgeStart = 2
01125                 };
01126
01127                 /// Dummy constructor
01128                 inline EdgeEvent() { }
01129
01130                 /// Create a new edge event
01131                 inline EdgeEvent(uint16_t type, int axis, float pos, IndexType index)
01132                  : pos(pos), index(index), type(type), axis(axis) { }
01133
01134                 /// Plane position
01135                 float pos;
01136                 /// Primitive index
01137                 IndexType index;
01138                 /// Event type: end/planar/start
01139                 unsigned int type:2;
01140                 /// Event axis
01141                 unsigned int axis:2;
01142         };
01143
01144         BOOST_STATIC_ASSERT(sizeof(EdgeEvent) == 12);
01145
01146         /// Edge event comparison functor
01147         struct EdgeEventOrdering : public std::binary_function<EdgeEvent, EdgeEvent, bool> {
01148                 inline bool operator()(const EdgeEvent &a, const EdgeEvent &b) const {
01149                         if (a.axis != b.axis)
01150                                 return a.axis < b.axis;
01151                         if (a.pos != b.pos)
01152                                 return a.pos < b.pos;
01153                         return a.type < b.type;
01154                 }
01155         };
01156
01157         /**
01158          * \brief Data type for split candidates computed by
01159          * the O(n log n) greedy optimization method.
01160          * */
01161         struct SplitCandidate {
01162                 float cost;
01163                 float pos;
01164                 int axis;
01165                 SizeType numLeft, numRight;
01166                 bool planarLeft;
01167
01168                 inline SplitCandidate() :
01169                         cost(std::numeric_limits<float>::infinity()),
01170                         pos(0), axis(0), numLeft(0), numRight(0), planarLeft(false) {
01171                 }
01172         };
01173
01174         /**
01175          * \brief Per-thread context used to manage memory allocations,
01176          * also records some useful statistics.
01177          */
01178         struct BuildContext {
01179                 OrderedChunkAllocator leftAlloc, rightAlloc;
01180                 BlockedVector<KDNode, NORI_KD_BLOCKSIZE_KD> nodes;
01181                 BlockedVector<IndexType, NORI_KD_BLOCKSIZE_IDX> indices;
01182                 ClassificationStorage classStorage;
01183                 MinMaxBins minMaxBins;
01184
01185                 SizeType leafNodeCount;
01186                 SizeType nonemptyLeafNodeCount;
01187                 SizeType innerNodeCount;
01188                 SizeType primIndexCount;
01189                 SizeType retractedSplits;
01190                 SizeType pruned;
01191
01192                 BuildContext(SizeType primCount, SizeType binCount)
01193                         : minMaxBins(binCount) {
01194                         classStorage.setPrimitiveCount(primCount);
01195                         leafNodeCount = 0;
01196                         nonemptyLeafNodeCount = 0;
01197                         innerNodeCount = 0;
01198                         primIndexCount = 0;
01199                         retractedSplits = 0;
01200                         pruned = 0;
01201                 }
01202
01203                 size_t size() {
01204                         return leftAlloc.size() + rightAlloc.size()
01205                                 + nodes.capacity() * sizeof(KDNode)
01206                                 + indices.capacity() * sizeof(IndexType)
01207                                 + classStorage.size();
01208                 }
01209
01210                 void accumulateStatisticsFrom(const BuildContext &ctx) {
01211                         leafNodeCount += ctx.leafNodeCount;
01212                         nonemptyLeafNodeCount += ctx.nonemptyLeafNodeCount;
01213                         innerNodeCount += ctx.innerNodeCount;
01214                         primIndexCount += ctx.primIndexCount;
01215                         retractedSplits += ctx.retractedSplits;
01216                         pruned += ctx.pruned;
01217                 }
01218         };
01219
01220         /**
01221          * \brief Communication data structure used to pass jobs to
01222          * kd-tree builder threads
01223          */
01224         struct BuildInterface {
01225                 /* Communcation */
01226                 QMutex mutex;
01227                 QWaitCondition cond, condJobTaken;
01228                 std::map<const KDNode *, IndexType> threadMap;
01229                 bool done;
01230
01231                 /* Job description for building a subtree */
01232                 int depth;
01233                 KDNode *node;
01234                 BoundingBoxType nodeBoundingBox;
01235                 EdgeEvent *eventStart, *eventEnd;
01236                 SizeType primCount;
01237                 int badRefines;
01238
01239                 inline BuildInterface() {
01240                         node = NULL;
01241                         done = false;
01242                 }
01243         };
01244
01245         /**
01246          * \brief kd-tree builder thread
01247          */
01248         class TreeBuilder : public QThread {
01249         public:
01250                 TreeBuilder(IndexType id, GenericKDTree *parent)
01251                         : QThread(), m_id(id),
01252                         m_parent(parent),
01253                         m_context(parent->cast()->getPrimitiveCount(),
01254                                           parent->getMinMaxBins()),
01255                         m_interface(parent->m_interface) {
01256                 }
01257
01258                 void run() {
01259                         OrderedChunkAllocator &leftAlloc = m_context.leftAlloc;
01260                         while (true) {
01261                                 m_interface.mutex.lock();
01262                                 while (!m_interface.done && !m_interface.node)
01263                                         m_interface.cond.wait(&m_interface.mutex);
01264                                 if (m_interface.done) {
01265                                         m_interface.mutex.unlock();
01266                                         break;
01267                                 }
01268                                 int depth = m_interface.depth;
01269                                 KDNode *node = m_interface.node;
01270                                 BoundingBoxType nodeBoundingBox = m_interface.nodeBoundingBox;
01271                                 size_t eventCount = m_interface.eventEnd - m_interface.eventStart;
01272                                 SizeType primCount = m_interface.primCount;
01273                                 int badRefines = m_interface.badRefines;
01274                                 EdgeEvent *eventStart = leftAlloc.allocate<EdgeEvent>(eventCount),
01275                                                   *eventEnd = eventStart + eventCount;
01276                                 memcpy(eventStart, m_interface.eventStart,
01277                                                 eventCount * sizeof(EdgeEvent));
01278                                 m_interface.threadMap[node] = m_id;
01279                                 m_interface.node = NULL;
01280                                 m_interface.condJobTaken.wakeOne();
01281                                 m_interface.mutex.unlock();
01282
01283                                 std::sort(eventStart, eventEnd, EdgeEventOrdering());
01284                                 m_parent->buildTree(m_context, depth, node,
01285                                         nodeBoundingBox, eventStart, eventEnd, primCount, true, badRefines);
01286                                 leftAlloc.release(eventStart);
01287                         }
01288                 }
01289
01290                 inline BuildContext &getContext() {
01291                         return m_context;
01292                 }
01293
01294         private:
01295                 IndexType m_id;
01296                 GenericKDTree *m_parent;
01297                 BuildContext m_context;
01298                 BuildInterface &m_interface;
01299         };
01300
01301         /// Cast to the derived class
01302         inline Derived *cast() {
01303                 return static_cast<Derived *>(this);
01304         }
01305
01306         /// Cast to the derived class (const version)
01307         inline const Derived *cast() const {
01308                 return static_cast<const Derived *>(this);
01309         }
01310
01311         /**
01312          * \brief Create an edge event list for a given list of primitives.
01313          *
01314          * This is necessary when passing from Min-Max binning to the more
01315          * accurate O(n log n) optimizier.
01316          */
01317         boost::tuple<EdgeEvent *, EdgeEvent *, SizeType> createEventList(
01318                         OrderedChunkAllocator &alloc, const BoundingBoxType &nodeBoundingBox,
01319                         IndexType *prims, SizeType primCount) {
01320                 SizeType initialSize = primCount * 2 * PointType::Dimension, actualPrimCount = 0;
01321                 EdgeEvent *eventStart = alloc.allocate<EdgeEvent>(initialSize);
01322                 EdgeEvent *eventEnd = eventStart;
01323
01324                 for (SizeType i=0; i<primCount; ++i) {
01325                         IndexType index = prims[i];
01326                         BoundingBoxType bbox;
01327                         if (m_clip) {
01328                                 bbox = cast()->getClippedBoundingBox(index, nodeBoundingBox);
01329                                 if (!bbox.isValid() || bbox.getSurfaceArea() == 0)
01330                                         continue;
01331                         } else {
01332                                 bbox = cast()->getBoundingBox(index);
01333                         }
01334
01335                         for (int axis=0; axis<PointType::Dimension; ++axis) {
01336                                 float min = (float) bbox.min[axis], max = (float) bbox.max[axis];
01337
01338                                 if (min == max) {
01339                                         *eventEnd++ = EdgeEvent(EdgeEvent::EEdgePlanar, axis,
01340                                                         min, index);
01341                                 } else {
01342                                         *eventEnd++ = EdgeEvent(EdgeEvent::EEdgeStart, axis,
01343                                                         min, index);
01344                                         *eventEnd++ = EdgeEvent(EdgeEvent::EEdgeEnd, axis,
01345                                                         max, index);
01346                                 }
01347                         }
01348                         ++actualPrimCount;
01349                 }
01350
01351                 SizeType newSize = (SizeType) (eventEnd - eventStart);
01352                 if (newSize != initialSize)
01353                         alloc.shrinkAllocation<EdgeEvent>(eventStart, newSize);
01354
01355                 return boost::make_tuple(eventStart, eventEnd, actualPrimCount);
01356         }
01357
01358         /**
01359          * \brief Leaf node creation helper function
01360          *
01361          * \param ctx
01362          *     Thread-specific build context containing allocators etc.
01363          * \param node
01364          *     KD-tree node entry to be filled
01365          * \param eventStart
01366          *     Start pointer of an edge event list
01367          * \param eventEnd
01368          *     End pointer of an edge event list
01369          * \param primCount
01370          *     Total primitive count for the current node
01371          */
01372         void createLeaf(BuildContext &ctx, KDNode *node, EdgeEvent *eventStart,
01373                         EdgeEvent *eventEnd, SizeType primCount) {
01374                 node->initLeafNode((SizeType) ctx.indices.size(), primCount);
01375                 if (primCount > 0) {
01376                         SizeType seenPrims = 0;
01377                         ctx.nonemptyLeafNodeCount++;
01378
01379                         for (EdgeEvent *event = eventStart; event != eventEnd
01380                                         && event->axis == 0; ++event) {
01381                                 if (event->type == EdgeEvent::EEdgeStart ||
01382                                         event->type == EdgeEvent::EEdgePlanar) {
01383                                         ctx.indices.push_back(event->index);
01384                                         seenPrims++;
01385                                 }
01386                         }
01387                         ctx.primIndexCount += primCount;
01388                 }
01389                 ctx.leafNodeCount++;
01390         }
01391
01392         /**
01393          * \brief Leaf node creation helper function
01394          *
01395          * \param ctx
01396          *     Thread-specific build context containing allocators etc.
01397          * \param node
01398          *     KD-tree node entry to be filled
01399          * \param indices
01400          *     Start pointer of an index list
01401          * \param primCount
01402          *     Total primitive count for the current node
01403          */
01404         void createLeaf(BuildContext &ctx, KDNode *node, SizeType *indices,
01405                         SizeType primCount) {
01406                 node->initLeafNode((SizeType) ctx.indices.size(), primCount);
01407                 if (primCount > 0) {
01408                         ctx.nonemptyLeafNodeCount++;
01409                         for (SizeType i=0; i<primCount; ++i)
01410                                 ctx.indices.push_back(indices[i]);
01411                         ctx.primIndexCount += primCount;
01412                 }
01413                 ctx.leafNodeCount++;
01414         }
01415
01416         /**
01417          * \brief Leaf node creation helper function.
01418          *
01419          * Creates a unique index list by collapsing
01420          * a subtree with a bad cost.
01421          *
01422          * \param ctx
01423          *     Thread-specific build context containing allocators etc.
01424          * \param node
01425          *     KD-tree node entry to be filled
01426          * \param start
01427          *     Start pointer of the subtree indices
01428          */
01429         void createLeafAfterRetraction(BuildContext &ctx, KDNode *node, SizeType start) {
01430                 SizeType indexCount = (SizeType) (ctx.indices.size() - start);
01431
01432                 OrderedChunkAllocator &alloc = ctx.leftAlloc;
01433
01434                 /* A temporary list is allocated to do the sorting (the indices
01435                    are not guaranteed to be contiguous in memory) */
01436                 IndexType *tempStart = alloc.allocate<IndexType>(indexCount),
01437                                    *tempEnd = tempStart + indexCount,
01438                                    *ptr = tempStart;
01439
01440                 for (SizeType i=start, end = start + indexCount; i<end; ++i)
01441                         *ptr++ = ctx.indices[i];
01442
01443                 /* Generate an index list without duplicate entries */
01444                 std::sort(tempStart, tempEnd, std::less<IndexType>());
01445                 ptr = tempStart;
01446
01447                 int idx = start;
01448                 while (ptr < tempEnd) {
01449                         ctx.indices[idx] = *ptr++;
01450                         while (ptr < tempEnd && *ptr == ctx.indices[idx])
01451                                 ++ptr;
01452                         idx++;
01453                 }
01454
01455                 int nSeen = idx-start;
01456                 ctx.primIndexCount = ctx.primIndexCount - indexCount + nSeen;
01457                 ctx.indices.resize(idx);
01458                 alloc.release(tempStart);
01459                 node->initLeafNode(start, nSeen);
01460                 ctx.nonemptyLeafNodeCount++;
01461                 ctx.leafNodeCount++;
01462         }
01463
01464         /**
01465          * \brief Implements the transition from min-max-binning to the
01466          * O(n log n) optimization.
01467          *
01468          * \param ctx
01469          *     Thread-specific build context containing allocators etc.
01470          * \param depth
01471          *     Current tree depth (1 == root node)
01472          * \param node
01473          *     KD-tree node entry to be filled
01474          * \param nodeBoundingBox
01475          *     Axis-aligned bounding box of the current node
01476          * \param indices
01477          *     Index list of all triangles in the current node (for min-max binning)
01478          * \param primCount
01479          *     Total primitive count for the current node
01480          * \param isLeftChild
01481          *     Is this node the left child of its parent? This is important for
01482          *     memory management using the \ref OrderedChunkAllocator.
01483          * \param badRefines
01484          *     Number of "probable bad refines" further up the tree. This makes
01485          *     it possible to split along an initially bad-looking candidate in
01486          *     the hope that the cost was significantly overestimated. The
01487          *     counter makes sure that only a limited number of such splits can
01488          *     happen in succession.
01489          * \returns
01490          *     Final cost of the node
01491          */
01492         inline float transitionToNLogN(BuildContext &ctx, unsigned int depth, KDNode *node,
01493                         const BoundingBoxType &nodeBoundingBox, IndexType *indices,
01494                         SizeType primCount, bool isLeftChild, SizeType badRefines) {
01495                 OrderedChunkAllocator &alloc = isLeftChild
01496                                 ? ctx.leftAlloc : ctx.rightAlloc;
01497                 boost::tuple<EdgeEvent *, EdgeEvent *, SizeType> events
01498                                 = createEventList(alloc, nodeBoundingBox, indices, primCount);
01499                 float cost;
01500                 if (m_parallelBuild) {
01501                         m_interface.mutex.lock();
01502                         m_interface.depth = depth;
01503                         m_interface.node = node;
01504                         m_interface.nodeBoundingBox = nodeBoundingBox;
01505                         m_interface.eventStart = boost::get<0>(events);
01506                         m_interface.eventEnd = boost::get<1>(events);
01507                         m_interface.primCount = boost::get<2>(events);
01508                         m_interface.badRefines = badRefines;
01509                         m_interface.cond.wakeOne();
01510
01511                         /* Wait for a worker thread to take this job */
01512                         while (m_interface.node)
01513                                 m_interface.condJobTaken.wait(&m_interface.mutex);
01514                         m_interface.mutex.unlock();
01515
01516                         // Never tear down this subtree (return a cost of -infinity)
01517                         cost = -std::numeric_limits<float>::infinity();
01518                 } else {
01519                         std::sort(boost::get<0>(events), boost::get<1>(events),
01520                                         EdgeEventOrdering());
01521
01522                         cost = buildTree(ctx, depth, node, nodeBoundingBox,
01523                                 boost::get<0>(events), boost::get<1>(events),
01524                                 boost::get<2>(events), isLeftChild, badRefines);
01525                 }
01526                 alloc.release(boost::get<0>(events));
01527                 return cost;
01528         }
01529
01530         /**
01531          * \brief Build helper function (min-max binning)
01532          *
01533          * \param ctx
01534          *     Thread-specific build context containing allocators etc.
01535          * \param depth
01536          *     Current tree depth (1 == root node)
01537          * \param node
01538          *     KD-tree node entry to be filled
01539          * \param nodeBoundingBox
01540          *     Axis-aligned bounding box of the current node
01541          * \param tightBBox
01542          *     Tight bounding box of the contained geometry (for min-max binning)
01543          * \param indices
01544          *     Index list of all triangles in the current node (for min-max binning)
01545          * \param primCount
01546          *     Total primitive count for the current node
01547          * \param isLeftChild
01548          *     Is this node the left child of its parent? This is important for
01549          *     memory management using the \ref OrderedChunkAllocator.
01550          * \param badRefines
01551          *     Number of "probable bad refines" further up the tree. This makes
01552          *     it possible to split along an initially bad-looking candidate in
01553          *     the hope that the cost was significantly overestimated. The
01554          *     counter makes sure that only a limited number of such splits can
01555          *     happen in succession.
01556          * \returns
01557          *     Final cost of the node
01558          */
01559         float buildTreeMinMax(BuildContext &ctx, unsigned int depth, KDNode *node,
01560                         const BoundingBoxType &nodeBoundingBox, const BoundingBoxType &tightBBox, IndexType *indices,
01561                         SizeType primCount, bool isLeftChild, SizeType badRefines) {
01562
01563                 float leafCost = primCount * m_queryCost;
01564                 if (primCount <= m_stopPrims || depth >= m_maxDepth) {
01565                         createLeaf(ctx, node, indices, primCount);
01566                         return leafCost;
01567                 }
01568
01569                 if (primCount <= m_exactPrimThreshold)
01570                         return transitionToNLogN(ctx, depth, node, nodeBoundingBox, indices,
01571                                 primCount, isLeftChild, badRefines);
01572
01573                 /* ==================================================================== */
01574             /*                              Binning                                 */
01575             /* ==================================================================== */
01576
01577                 ctx.minMaxBins.setBoundingBox(tightBBox);
01578                 ctx.minMaxBins.bin(cast(), indices, primCount);
01579
01580                 /* ==================================================================== */
01581             /*                        Split candidate search                        */
01582         /* ==================================================================== */
01583                 SplitCandidate bestSplit = ctx.minMaxBins.minimizeCost(m_traversalCost,
01584                                 m_queryCost);
01585
01586                 if (bestSplit.cost == std::numeric_limits<float>::infinity()) {
01587                         /* This is bad: we have either run out of floating point precision to
01588                            accurately represent split planes (e.g. 'tightBBox' is almost collapsed
01589                            along an axis), or the compiler made overly liberal use of floating point
01590                            optimizations, causing the two stages of the min-max binning code to
01591                            become inconsistent. The two ways to proceed at this point are to
01592                            either create a leaf (bad) or switch over to the O(n log n) greedy
01593                            optimization, which is done below */
01594                         cout << "Min-max binning failed. Retrying with the O(n log n) greedy algorithm." << endl;
01595                         return transitionToNLogN(ctx, depth, node, nodeBoundingBox, indices,
01596                                 primCount, isLeftChild, badRefines);
01597                 }
01598
01599                 /* "Bad refines" heuristic from PBRT */
01600                 if (bestSplit.cost >= leafCost) {
01601                         if ((bestSplit.cost > 4 * leafCost && primCount < 16)
01602                                 || badRefines >= m_maxBadRefines) {
01603                                 createLeaf(ctx, node, indices, primCount);
01604                                 return leafCost;
01605                         }
01606                         ++badRefines;
01607                 }
01608
01609                 /* ==================================================================== */
01610             /*                            Partitioning                              */
01611             /* ==================================================================== */
01612
01613                 boost::tuple<BoundingBoxType, IndexType *, BoundingBoxType, IndexType *> partition =
01614                         ctx.minMaxBins.partition(ctx, cast(), indices, bestSplit,
01615                                 isLeftChild, m_traversalCost, m_queryCost);
01616
01617                 /* ==================================================================== */
01618             /*                              Recursion                               */
01619             /* ==================================================================== */
01620
01621                 KDNode * children = ctx.nodes.allocate(2);
01622
01623                 SizeType nodePosBeforeSplit = (SizeType) ctx.nodes.size();
01624                 SizeType indexPosBeforeSplit = (SizeType) ctx.indices.size();
01625                 SizeType leafNodeCountBeforeSplit = ctx.leafNodeCount;
01626                 SizeType nonemptyLeafNodeCountBeforeSplit = ctx.nonemptyLeafNodeCount;
01627                 SizeType innerNodeCountBeforeSplit = ctx.innerNodeCount;
01628
01629                 if (!node->initInnerNode(bestSplit.axis, bestSplit.pos, children-node)) {
01630                         m_indirectionLock.lock();
01631                         SizeType indirectionIdx = (SizeType) m_indirections.size();
01632                         m_indirections.push_back(children);
01633                         /* Unable to store relative offset -- create an indirection
01634                            table entry */
01635                         node->initIndirectionNode(bestSplit.axis, bestSplit.pos,
01636                                         indirectionIdx);
01637                         m_indirectionLock.unlock();
01638                 }
01639                 ctx.innerNodeCount++;
01640
01641                 BoundingBoxType childBoundingBox(nodeBoundingBox);
01642                 childBoundingBox.max[bestSplit.axis] = bestSplit.pos;
01643
01644                 float leftCost = buildTreeMinMax(ctx, depth+1, children,
01645                                 childBoundingBox, boost::get<0>(partition), boost::get<1>(partition),
01646                                 bestSplit.numLeft, true, badRefines);
01647
01648                 childBoundingBox.min[bestSplit.axis] = bestSplit.pos;
01649                 childBoundingBox.max[bestSplit.axis] = nodeBoundingBox.max[bestSplit.axis];
01650
01651                 float rightCost = buildTreeMinMax(ctx, depth+1, children + 1,
01652                                 childBoundingBox, boost::get<2>(partition), boost::get<3>(partition),
01653                                 bestSplit.numRight, false, badRefines);
01654
01655                 TreeConstructionHeuristic tch(nodeBoundingBox);
01656                 std::pair<float, float> prob = tch(bestSplit.axis,
01657                         bestSplit.pos - nodeBoundingBox.min[bestSplit.axis],
01658                         nodeBoundingBox.max[bestSplit.axis] - bestSplit.pos);
01659
01660                 /* Compute the final cost given the updated cost
01661                    values received from the children */
01662                 float finalCost = m_traversalCost +
01663                         (prob.first * leftCost + prob.second * rightCost);
01664
01665                 /* Release the index lists not needed by the children anymore */
01666                 if (isLeftChild)
01667                         ctx.rightAlloc.release(boost::get<3>(partition));
01668                 else
01669                         ctx.leftAlloc.release(boost::get<1>(partition));
01670
01671                 /* ==================================================================== */
01672             /*                           Final decision                             */
01673             /* ==================================================================== */
01674
01675                 if (!m_retract || finalCost < primCount * m_queryCost) {
01676                         return finalCost;
01677                 } else {
01678                         /* In the end, splitting didn't help to reduce the cost.
01679                            Tear up everything below this node and create a leaf */
01680                         ctx.nodes.resize(nodePosBeforeSplit);
01681                         ctx.retractedSplits++;
01682                         ctx.leafNodeCount = leafNodeCountBeforeSplit;
01683                         ctx.nonemptyLeafNodeCount = nonemptyLeafNodeCountBeforeSplit;
01684                         ctx.innerNodeCount = innerNodeCountBeforeSplit;
01685                         createLeafAfterRetraction(ctx, node, indexPosBeforeSplit);
01686                         return leafCost;
01687                 }
01688         }
01689
01690         /*
01691          * \brief Build helper function (greedy O(n log n) optimization)
01692          *
01693          * \param ctx
01694          *     Thread-specific build context containing allocators etc.
01695          * \param depth
01696          *     Current tree depth (1 == root node)
01697          * \param node
01698          *     KD-tree node entry to be filled
01699          * \param nodeBoundingBox
01700          *     Axis-aligned bounding box of the current node
01701          * \param eventStart
01702          *     Pointer to the beginning of a sorted edge event list
01703          * \param eventEnd
01704          *     Pointer to the end of a sorted edge event list
01705          * \param primCount
01706          *     Total primitive count for the current node
01707          * \param isLeftChild
01708          *     Is this node the left child of its parent? This is important for
01709          *     memory management using the \ref OrderedChunkAllocator.
01710          * \param badRefines
01711          *     Number of "probable bad refines" further up the tree. This makes
01712          *     it possible to split along an initially bad-looking candidate in
01713          *     the hope that the cost was significantly overestimated. The
01714          *     counter makes sure that only a limited number of such splits can
01715          *     happen in succession.
01716          * \returns
01717          *     Final cost of the node
01718          */
01719         float buildTree(BuildContext &ctx, unsigned int depth, KDNode *node,
01720                 const BoundingBoxType &nodeBoundingBox, EdgeEvent *eventStart, EdgeEvent *eventEnd,
01721                 SizeType primCount, bool isLeftChild, SizeType badRefines) {
01722
01723                 float leafCost = primCount * m_queryCost;
01724                 if (primCount <= m_stopPrims || depth >= m_maxDepth) {
01725                         createLeaf(ctx, node, eventStart, eventEnd, primCount);
01726                         return leafCost;
01727                 }
01728
01729                 SplitCandidate bestSplit;
01730
01731                 /* ==================================================================== */
01732             /*                        Split candidate search                        */
01733         /* ==================================================================== */
01734
01735                 /* First, find the optimal splitting plane according to the
01736                    tree construction heuristic. To do this in O(n), the search is
01737                    implemented as a sweep over the edge events */
01738
01739                 /* Initially, the split plane is placed left of the scene
01740                    and thus all geometry is on its right side */
01741                 SizeType numLeft[PointType::Dimension],
01742                                   numRight[PointType::Dimension];
01743
01744                 for (int i=0; i<PointType::Dimension; ++i) {
01745                         numLeft[i] = 0;
01746                         numRight[i] = primCount;
01747                 }
01748
01749                 EdgeEvent *eventsByAxis[PointType::Dimension];
01750                 int eventsByAxisCtr = 1;
01751                 eventsByAxis[0] = eventStart;
01752                 TreeConstructionHeuristic tch(nodeBoundingBox);
01753
01754                 /* Iterate over all events on the current axis */
01755                 for (EdgeEvent *event = eventStart; event < eventEnd; ) {
01756                         /* Record the current position and count the number
01757                            and type of remaining events, which are also here.
01758                            Due to the sort ordering, there is no need to worry
01759                            about an axis change in the loops below */
01760                         int axis = event->axis;
01761                         float pos = event->pos;
01762                         SizeType numStart = 0, numEnd = 0, numPlanar = 0;
01763
01764                         /* Count "end" events */
01765                         while (event < eventEnd && event->pos == pos
01766                                 && event->axis == axis
01767                                 && event->type == EdgeEvent::EEdgeEnd) {
01768                                 ++numEnd; ++event;
01769                         }
01770
01771                         /* Count "planar" events */
01772                         while (event < eventEnd && event->pos == pos
01773                                 && event->axis == axis
01774                                 && event->type == EdgeEvent::EEdgePlanar) {
01775                                 ++numPlanar; ++event;
01776                         }
01777
01778                         /* Count "start" events */
01779                         while (event < eventEnd && event->pos == pos
01780                                 && event->axis == axis
01781                                 && event->type == EdgeEvent::EEdgeStart) {
01782                                 ++numStart; ++event;
01783                         }
01784
01785                         /* Keep track of the beginning of dimensions */
01786                         if (event < eventEnd && event->axis != axis) {
01787                                 eventsByAxis[eventsByAxisCtr++] = event;
01788                         }
01789
01790                         /* The split plane can now be moved onto 't'. Accordingly, all planar
01791                            and ending primitives are removed from the right side */
01792                         numRight[axis] -= numPlanar + numEnd;
01793
01794                         /* Calculate a score using the tree construction heuristic */
01795                         if (EXPECT_TAKEN(pos > nodeBoundingBox.min[axis] && pos < nodeBoundingBox.max[axis])) {
01796                                 const SizeType nL = numLeft[axis], nR = numRight[axis];
01797                                 const float nLF = (float) nL, nRF = (float) nR;
01798
01799                                 std::pair<float, float> prob = tch(axis,
01800                                                 pos - nodeBoundingBox.min[axis],
01801                                                 nodeBoundingBox.max[axis] - pos);
01802
01803                                 if (numPlanar == 0) {
01804                                         float cost = m_traversalCost + m_queryCost
01805                                                 * (prob.first * nLF + prob.second * nRF);
01806                                         if (nL == 0 || nR == 0)
01807                                                 cost *= m_emptySpaceBonus;
01808                                         if (cost < bestSplit.cost) {
01809                                                 bestSplit.pos = pos;
01810                                                 bestSplit.axis = axis;
01811                                                 bestSplit.cost = cost;
01812                                                 bestSplit.numLeft = nL;
01813                                                 bestSplit.numRight = nR;
01814                                         }
01815                                 } else {
01816                                         float costPlanarLeft  = m_traversalCost
01817                                                 + m_queryCost * (prob.first * (nL+numPlanar) + prob.second * nRF);
01818                                         float costPlanarRight = m_traversalCost
01819                                                 + m_queryCost * (prob.first * nLF + prob.second * (nR+numPlanar));
01820
01821                                         if (nL + numPlanar == 0 || nR == 0)
01822                                                 costPlanarLeft *= m_emptySpaceBonus;
01823                                         if (nL == 0 || nR + numPlanar == 0)
01824                                                 costPlanarRight *= m_emptySpaceBonus;
01825
01826                                         if (costPlanarLeft < bestSplit.cost ||
01827                                                 costPlanarRight < bestSplit.cost) {
01828                                                 bestSplit.pos = pos;
01829                                                 bestSplit.axis = axis;
01830
01831                                                 if (costPlanarLeft < costPlanarRight) {
01832                                                         bestSplit.cost = costPlanarLeft;
01833                                                         bestSplit.numLeft = nL + numPlanar;
01834                                                         bestSplit.numRight = nR;
01835                                                         bestSplit.planarLeft = true;
01836                                                 } else {
01837                                                         bestSplit.cost = costPlanarRight;
01838                                                         bestSplit.numLeft = nL;
01839                                                         bestSplit.numRight = nR + numPlanar;
01840                                                         bestSplit.planarLeft = false;
01841                                                 }
01842                                         }
01843                                 }
01844                         }
01845
01846                         /* The split plane is moved past 't'. All prims,
01847                                 which were planar on 't', are moved to the left
01848                                 side. Also, starting prims are now also left of
01849                                 the split plane. */
01850                         numLeft[axis] += numStart + numPlanar;
01851                 }
01852
01853                 /* "Bad refines" heuristic from PBRT */
01854                 if (bestSplit.cost >= leafCost) {
01855                         if ((bestSplit.cost > 4 * leafCost && primCount < 16)
01856                                 || badRefines >= m_maxBadRefines
01857                                 || bestSplit.cost == std::numeric_limits<float>::infinity()) {
01858                                 createLeaf(ctx, node, eventStart, eventEnd, primCount);
01859                                 return leafCost;
01860                         }
01861                         ++badRefines;
01862                 }
01863
01864                 /* ==================================================================== */
01865                 /*                      Primitive Classification                        */
01866                 /* ==================================================================== */
01867
01868                 ClassificationStorage &storage = ctx.classStorage;
01869
01870                 /* Initially mark all prims as being located on both sides */
01871                 for (EdgeEvent *event = eventsByAxis[bestSplit.axis];
01872                          event < eventEnd && event->axis == bestSplit.axis; ++event)
01873                         storage.set(event->index, EBothSides);
01874
01875                 SizeType primsLeft = 0, primsRight = 0, primsBoth = primCount;
01876                 /* Sweep over all edge events and classify the primitives wrt. the split */
01877                 for (EdgeEvent *event = eventsByAxis[bestSplit.axis];
01878                          event < eventEnd && event->axis == bestSplit.axis; ++event) {
01879                         if (event->type == EdgeEvent::EEdgeEnd && event->pos <= bestSplit.pos) {
01880                                 /* The primitive's interval ends before or on the split plane
01881                                    -> classify to the left side */
01882                                 storage.set(event->index, ELeftSide);
01883                                 primsBoth--;
01884                                 primsLeft++;
01885                         } else if (event->type == EdgeEvent::EEdgeStart
01886                                         && event->pos >= bestSplit.pos) {
01887                                 /* The primitive's interval starts after or on the split plane
01888                                    -> classify to the right side */
01889                                 storage.set(event->index, ERightSide);
01890                                 primsBoth--;
01891                                 primsRight++;
01892                         } else if (event->type == EdgeEvent::EEdgePlanar) {
01893                                 /* If the planar primitive is not on the split plane, the
01894                                    classification is easy. Otherwise, place it on the side with
01895                                    the lower cost */
01896                                 if (event->pos < bestSplit.pos || (event->pos == bestSplit.pos
01897                                                 && bestSplit.planarLeft)) {
01898                                         storage.set(event->index, ELeftSide);
01899                                         primsBoth--;
01900                                         primsLeft++;
01901                                 } else if (event->pos > bestSplit.pos
01902                                            || (event->pos == bestSplit.pos && !bestSplit.planarLeft)) {
01903                                         storage.set(event->index, ERightSide);
01904                                         primsBoth--;
01905                                         primsRight++;
01906                                 } else {
01907                                 }
01908                         }
01909                 }
01910
01911                 /* Some sanity checks */
01912
01913                 OrderedChunkAllocator &leftAlloc = ctx.leftAlloc,
01914                         &rightAlloc = ctx.rightAlloc;
01915
01916                 EdgeEvent *leftEventsStart, *rightEventsStart;
01917                 if (isLeftChild) {
01918                         leftEventsStart = eventStart;
01919                         rightEventsStart = rightAlloc.allocate<EdgeEvent>(bestSplit.numRight * 2 * PointType::Dimension);
01920                 } else {
01921                         leftEventsStart = leftAlloc.allocate<EdgeEvent>(bestSplit.numLeft * 2 * PointType::Dimension);
01922                         rightEventsStart = eventStart;
01923                 }
01924
01925                 EdgeEvent *leftEventsEnd = leftEventsStart, *rightEventsEnd = rightEventsStart;
01926
01927                 BoundingBoxType leftNodeBoundingBox = nodeBoundingBox, rightNodeBoundingBox = nodeBoundingBox;
01928                 leftNodeBoundingBox.max[bestSplit.axis] = bestSplit.pos;
01929                 rightNodeBoundingBox.min[bestSplit.axis] = bestSplit.pos;
01930
01931                 SizeType prunedLeft = 0, prunedRight = 0;
01932
01933                 /* ==================================================================== */
01934                 /*                            Partitioning                              */
01935                 /* ==================================================================== */
01936
01937                 if (m_clip) {
01938                         EdgeEvent
01939                           *leftEventsTempStart = leftAlloc.allocate<EdgeEvent>(primsLeft * 2 * PointType::Dimension),
01940                           *rightEventsTempStart = rightAlloc.allocate<EdgeEvent>(primsRight * 2 * PointType::Dimension),
01941                           *newEventsLeftStart = leftAlloc.allocate<EdgeEvent>(primsBoth * 2 * PointType::Dimension),
01942                           *newEventsRightStart = rightAlloc.allocate<EdgeEvent>(primsBoth * 2 * PointType::Dimension);
01943
01944                         EdgeEvent *leftEventsTempEnd = leftEventsTempStart,
01945                                         *rightEventsTempEnd = rightEventsTempStart,
01946                                         *newEventsLeftEnd = newEventsLeftStart,
01947                                         *newEventsRightEnd = newEventsRightStart;
01948
01949                         for (EdgeEvent *event = eventStart; event<eventEnd; ++event) {
01950                                 int classification = storage.get(event->index);
01951
01952                                 if (classification == ELeftSide) {
01953                                         /* Left-only primitive. Move to the left list and advance */
01954                                         *leftEventsTempEnd++ = *event;
01955                                 } else if (classification == ERightSide) {
01956                                         /* Right-only primitive. Move to the right list and advance */
01957                                         *rightEventsTempEnd++ = *event;
01958                                 } else if (classification == EBothSides) {
01959                                         /* The primitive overlaps the split plane. Re-clip and
01960                                            generate new events for each side */
01961                                         const IndexType index = event->index;
01962
01963                                         BoundingBoxType clippedLeft = cast()->getClippedBoundingBox(index, leftNodeBoundingBox);
01964                                         BoundingBoxType clippedRight = cast()->getClippedBoundingBox(index, rightNodeBoundingBox);
01965
01966                                         if (clippedLeft.isValid() && clippedLeft.getSurfaceArea() > 0) {
01967                                                 for (int axis=0; axis<PointType::Dimension; ++axis) {
01968                                                         float min = (float) clippedLeft.min[axis],
01969                                                                   max = (float) clippedLeft.max[axis];
01970
01971                                                         if (min == max) {
01972                                                                 *newEventsLeftEnd++ = EdgeEvent(
01973                                                                                 EdgeEvent::EEdgePlanar,
01974                                                                                 axis, min, index);
01975                                                         } else {
01976                                                                 *newEventsLeftEnd++ = EdgeEvent(
01977                                                                                 EdgeEvent::EEdgeStart,
01978                                                                                 axis, min, index);
01979                                                                 *newEventsLeftEnd++ = EdgeEvent(
01980                                                                                 EdgeEvent::EEdgeEnd,
01981                                                                                 axis, max, index);
01982                                                         }
01983                                                 }
01984                                         } else {
01985                                                 prunedLeft++;
01986                                         }
01987
01988                                         if (clippedRight.isValid() && clippedRight.getSurfaceArea() > 0) {
01989                                                 for (int axis=0; axis<PointType::Dimension; ++axis) {
01990                                                         float min = (float) clippedRight.min[axis],
01991                                                                   max = (float) clippedRight.max[axis];
01992
01993                                                         if (min == max) {
01994                                                                 *newEventsRightEnd++ = EdgeEvent(
01995                                                                                 EdgeEvent::EEdgePlanar,
01996                                                                                 axis, min, index);
01997                                                         } else {
01998                                                                 *newEventsRightEnd++ = EdgeEvent(
01999                                                                                 EdgeEvent::EEdgeStart,
02000                                                                                 axis, min, index);
02001                                                                 *newEventsRightEnd++ = EdgeEvent(
02002                                                                                 EdgeEvent::EEdgeEnd,
02003                                                                                 axis, max, index);
02004                                                         }
02005                                                 }
02006                                         } else {
02007                                                 prunedRight++;
02008                                         }
02009
02010                                         /* Mark this primitive as processed so that clipping
02011                                                 is only done once */
02012                                         storage.set(index, EBothSidesProcessed);
02013                                 }
02014                         }
02015
02016                         ctx.pruned += prunedLeft + prunedRight;
02017
02018                         /* Sort the events from overlapping prims */
02019                         std::sort(newEventsLeftStart, newEventsLeftEnd, EdgeEventOrdering());
02020                         std::sort(newEventsRightStart, newEventsRightEnd, EdgeEventOrdering());
02021
02022                         /* Merge the left list */
02023                         leftEventsEnd = std::merge(leftEventsTempStart,
02024                                 leftEventsTempEnd, newEventsLeftStart, newEventsLeftEnd,
02025                                 leftEventsStart, EdgeEventOrdering());
02026
02027                         /* Merge the right list */
02028                         rightEventsEnd = std::merge(rightEventsTempStart,
02029                                 rightEventsTempEnd, newEventsRightStart, newEventsRightEnd,
02030                                 rightEventsStart, EdgeEventOrdering());
02031
02032                         /* Release temporary memory */
02033                         leftAlloc.release(newEventsLeftStart);
02034                         leftAlloc.release(leftEventsTempStart);
02035                         rightAlloc.release(newEventsRightStart);
02036                         rightAlloc.release(rightEventsTempStart);
02037                 } else {
02038                         for (EdgeEvent *event = eventStart; event < eventEnd; ++event) {
02039                                 int classification = storage.get(event->index);
02040
02041                                 if (classification == ELeftSide) {
02042                                         /* Left-only primitive. Move to the left list and advance */
02043                                         *leftEventsEnd++ = *event;
02044                                 } else if (classification == ERightSide) {
02045                                         /* Right-only primitive. Move to the right list and advance */
02046                                         *rightEventsEnd++ = *event;
02047                                 } else if (classification == EBothSides) {
02048                                         /* The primitive overlaps the split plane. Its edge events
02049                                            must be added to both lists. */
02050                                         *leftEventsEnd++ = *event;
02051                                         *rightEventsEnd++ = *event;
02052                                 }
02053                         }
02054                 }
02055
02056                 /* Shrink the edge event storage now that we know exactly how
02057                    many are on each side */
02058                 ctx.leftAlloc.shrinkAllocation(leftEventsStart,
02059                                 leftEventsEnd - leftEventsStart);
02060
02061                 ctx.rightAlloc.shrinkAllocation(rightEventsStart,
02062                                 rightEventsEnd - rightEventsStart);
02063
02064                 /* ==================================================================== */
02065                 /*                              Recursion                               */
02066                 /* ==================================================================== */
02067
02068                 KDNode * children = ctx.nodes.allocate(2);
02069
02070                 SizeType nodePosBeforeSplit = (SizeType) ctx.nodes.size();
02071                 SizeType indexPosBeforeSplit = (SizeType) ctx.indices.size();
02072                 SizeType leafNodeCountBeforeSplit = ctx.leafNodeCount;
02073                 SizeType nonemptyLeafNodeCountBeforeSplit = ctx.nonemptyLeafNodeCount;
02074                 SizeType innerNodeCountBeforeSplit = ctx.innerNodeCount;
02075
02076                 if (!node->initInnerNode(bestSplit.axis, bestSplit.pos, children-node)) {
02077                         m_indirectionLock.lock();
02078                         SizeType indirectionIdx = (SizeType) m_indirections.size();
02079                         m_indirections.push_back(children);
02080                         /* Unable to store relative offset -- create an indirection
02081                            table entry */
02082                         node->initIndirectionNode(bestSplit.axis, bestSplit.pos,
02083                                         indirectionIdx);
02084                         m_indirectionLock.unlock();
02085                 }
02086                 ctx.innerNodeCount++;
02087
02088                 float leftCost = buildTree(ctx, depth+1, children,
02089                                 leftNodeBoundingBox, leftEventsStart, leftEventsEnd,
02090                                 bestSplit.numLeft - prunedLeft, true, badRefines);
02091
02092                 float rightCost = buildTree(ctx, depth+1, children+1,
02093                                 rightNodeBoundingBox, rightEventsStart, rightEventsEnd,
02094                                 bestSplit.numRight - prunedRight, false, badRefines);
02095
02096                 std::pair<float, float> prob = tch(bestSplit.axis,
02097                         bestSplit.pos - nodeBoundingBox.min[bestSplit.axis],
02098                         nodeBoundingBox.max[bestSplit.axis] - bestSplit.pos);
02099
02100                 /* Compute the final cost given the updated cost
02101                    values received from the children */
02102                 float finalCost = m_traversalCost +
02103                         (prob.first * leftCost + prob.second * rightCost);
02104
02105                 /* Release the index lists not needed by the children anymore */
02106                 if (isLeftChild)
02107                         ctx.rightAlloc.release(rightEventsStart);
02108                 else
02109                         ctx.leftAlloc.release(leftEventsStart);
02110
02111                 /* ==================================================================== */
02112             /*                           Final decision                             */
02113             /* ==================================================================== */
02114
02115                 if (!m_retract || finalCost < primCount * m_queryCost) {
02116                         return finalCost;
02117                 } else {
02118                         /* In the end, splitting didn't help to reduce the SAH cost.
02119                            Tear up everything below this node and create a leaf */
02120                         ctx.nodes.resize(nodePosBeforeSplit);
02121                         ctx.retractedSplits++;
02122                         ctx.leafNodeCount = leafNodeCountBeforeSplit;
02123                         ctx.nonemptyLeafNodeCount = nonemptyLeafNodeCountBeforeSplit;
02124                         ctx.innerNodeCount = innerNodeCountBeforeSplit;
02125                         createLeafAfterRetraction(ctx, node, indexPosBeforeSplit);
02126                         return leafCost;
02127                 }
02128
02129                 return bestSplit.cost;
02130         }
02131
02132         /**
02133          * \brief Min-max binning as described in
02134          * "Highly Parallel Fast KD-tree Construction for Interactive
02135          *  Ray Tracing of Dynamic Scenes"
02136          * by M. Shevtsov, A. Soupikov and A. Kapustin
02137          */
02138         struct MinMaxBins {
02139                 MinMaxBins(SizeType nBins) : m_binCount(nBins) {
02140                         m_minBins = new SizeType[m_binCount*PointType::Dimension];
02141                         m_maxBins = new SizeType[m_binCount*PointType::Dimension];
02142                 }
02143
02144                 ~MinMaxBins() {
02145                         delete[] m_minBins;
02146                         delete[] m_maxBins;
02147                 }
02148
02149                 /**
02150                  * \brief Prepare to bin for the specified bounds
02151                  */
02152                 void setBoundingBox(const BoundingBoxType &bbox) {
02153                         m_bbox = bbox;
02154                         m_binSize = m_bbox.getExtents() / (float) m_binCount;
02155                         for (int axis=0; axis<PointType::Dimension; ++axis)
02156                                 m_invBinSize[axis] = 1/m_binSize[axis];
02157                 }
02158
02159                 /**
02160                  * \brief Run min-max binning
02161                  *
02162                  * \param derived Derived class to be used to determine the BoundingBox for
02163                  *     a given list of primitives
02164                  * \param indices Primitive indirection list
02165                  * \param primCount Specifies the length of \a indices
02166                  */
02167                 void bin(const Derived *derived, IndexType *indices,
02168                                 SizeType primCount) {
02169                         m_primCount = primCount;
02170                         memset(m_minBins, 0, sizeof(SizeType) * PointType::Dimension * m_binCount);
02171                         memset(m_maxBins, 0, sizeof(SizeType) * PointType::Dimension * m_binCount);
02172                         const int64_t maxBin = m_binCount-1;
02173
02174                         for (SizeType i=0; i<m_primCount; ++i) {
02175                                 const BoundingBoxType bbox = derived->getBoundingBox(indices[i]);
02176                                 for (int axis=0; axis<PointType::Dimension; ++axis) {
02177                                         int64_t minIdx = (int64_t) ((bbox.min[axis] - m_bbox.min[axis])
02178                                                         * m_invBinSize[axis]);
02179                                         int64_t maxIdx = (int64_t) ((bbox.max[axis] - m_bbox.min[axis])
02180                                                         * m_invBinSize[axis]);
02181                                         m_maxBins[axis * m_binCount
02182                                                 + std::max((int64_t) 0, std::min(maxIdx, maxBin))]++;
02183                                         m_minBins[axis * m_binCount
02184                                                 + std::max((int64_t) 0, std::min(minIdx, maxBin))]++;
02185                                 }
02186                         }
02187                 }
02188
02189                 /**
02190                  * \brief Evaluate the tree construction heuristic at each bin boundary
02191                  * and return the minimizer for the given cost constants. Min-max
02192                  * binning uses no "empty space bonus" since it cannot create such
02193                  * splits.
02194                  */
02195                 SplitCandidate minimizeCost(float traversalCost, float queryCost) {
02196                         SplitCandidate candidate;
02197                         int binIdx = 0, leftBin = 0;
02198                         TreeConstructionHeuristic tch(m_bbox);
02199
02200                         for (int axis=0; axis<PointType::Dimension; ++axis) {
02201                                 VectorType extents = m_bbox.getExtents();
02202                                 SizeType numLeft = 0, numRight = m_primCount;
02203                                 float leftWidth = 0, rightWidth = extents[axis];
02204                                 const float binSize = m_binSize[axis];
02205
02206                                 for (int i=0; i<m_binCount-1; ++i) {
02207                                         numLeft  += m_minBins[binIdx];
02208                                         numRight -= m_maxBins[binIdx];
02209                                         leftWidth += binSize;
02210                                         rightWidth -= binSize;
02211                                         std::pair<float, float> prob =
02212                                                 tch(axis, leftWidth, rightWidth);
02213
02214                                         float cost = traversalCost + queryCost
02215                                                 * (prob.first * numLeft + prob.second * numRight);
02216
02217                                         if (cost < candidate.cost) {
02218                                                 candidate.cost = cost;
02219                                                 candidate.axis = axis;
02220                                                 candidate.numLeft = numLeft;
02221                                                 candidate.numRight = numRight;
02222                                                 leftBin = i;
02223                                         }
02224
02225                                         binIdx++;
02226                                 }
02227                                 binIdx++;
02228                         }
02229
02230                         const int axis = candidate.axis;
02231                         const float min = m_bbox.min[axis];
02232
02233                         /* The following part may seem a bit paranoid. It is ensures that the
02234                          * returned split plane is consistent with the floating point calculations
02235                          * done by the binning code in \ref bin(). Since reciprocals and
02236                          * various floating point roundoff errors are involved, simply setting
02237                          *
02238                          * candidate.pos = m_bbox.min[axis] + (leftBin+1) * m_binSize[axis];
02239                          *
02240                          * can potentially lead to a slightly different number of primitives being
02241                          * classified to the left and right if we were to do check each
02242                          * primitive against this split position. We can't have that, however,
02243                          * since the partitioning code assumes that these numbers are correct.
02244                          * This lets it avoid doing another costly sweep, hence all the
02245                          * floating point madness below.
02246                          */
02247                         float invBinSize = m_invBinSize[axis];
02248                         float split = min + (leftBin + 1) * m_binSize[axis];
02249                         float splitNext = nextafterf(split,
02250                                   std::numeric_limits<float>::max());
02251                         int idx     = (int) ((split - min) * invBinSize);
02252                         int idxNext = (int) ((splitNext - min) * invBinSize);
02253
02254                         /**
02255                          * The split plane should pass through the last discrete floating
02256                          * floating value, which would still be classified into
02257                          * the left bin. If this is not computed correctly, do binary
02258                          * search.
02259                          */
02260                         if (!(idx == leftBin && idxNext == leftBin+1)) {
02261                                 float left = m_bbox.min[axis];
02262                                 float right = m_bbox.max[axis];
02263                                 int it = 0;
02264                                 while (true) {
02265                                         split = left + (right-left)/2;
02266                                         splitNext = nextafterf(split,
02267                                                 std::numeric_limits<float>::max());
02268                                         idx     = (int) ((split - min) * invBinSize);
02269                                         idxNext = (int) ((splitNext - min) * invBinSize);
02270
02271                                         if (idx == leftBin && idxNext == leftBin+1) {
02272                                                 /* Got it! */
02273                                                 break;
02274                                         } else if (std::abs(idx-idxNext) > 1 || ++it > 50) {
02275                                                 /* Insufficient floating point resolution
02276                                                    -> a leaf will be created. */
02277                                                 candidate.cost = std::numeric_limits<float>::infinity();
02278                                                 break;
02279                                         }
02280
02281                                         if (idx <= leftBin)
02282                                                 left = split;
02283                                         else
02284                                                 right = split;
02285                                 }
02286                         }
02287
02288                         if (split <= m_bbox.min[axis] || split >= m_bbox.max[axis]) {
02289                                 /* Insufficient floating point resolution
02290                                    -> a leaf will be created. */
02291                                 candidate.cost = std::numeric_limits<float>::infinity();
02292                         }
02293
02294                         candidate.pos = split;
02295
02296                         return candidate;
02297                 }
02298
02299                 /**
02300                  * \brief Given a suitable split candiate, compute tight bounding
02301                  * boxes for the left and right subtrees and return associated
02302                  * primitive lists.
02303                  */
02304                 boost::tuple<BoundingBoxType, IndexType *, BoundingBoxType, IndexType *> partition(
02305                                 BuildContext &ctx, const Derived *derived, IndexType *primIndices,
02306                                 SplitCandidate &split, bool isLeftChild, float traversalCost,
02307                                 float queryCost) {
02308                         const float splitPos = split.pos;
02309                         const int axis = split.axis;
02310                         SizeType numLeft = 0, numRight = 0;
02311                         BoundingBoxType leftBounds, rightBounds;
02312
02313                         IndexType *leftIndices, *rightIndices;
02314                         if (isLeftChild) {
02315                                 OrderedChunkAllocator &rightAlloc = ctx.rightAlloc;
02316                                 leftIndices = primIndices;
02317                                 rightIndices = rightAlloc.allocate<IndexType>(split.numRight);
02318                         } else {
02319                                 OrderedChunkAllocator &leftAlloc = ctx.leftAlloc;
02320                                 leftIndices = leftAlloc.allocate<IndexType>(split.numLeft);
02321                                 rightIndices = primIndices;
02322                         }
02323
02324                         for (SizeType i=0; i<m_primCount; ++i) {
02325                                 const IndexType primIndex = primIndices[i];
02326                                 const BoundingBoxType bbox = derived->getBoundingBox(primIndex);
02327
02328                                 if (bbox.max[axis] <= splitPos) {
02329                                         leftBounds.expandBy(bbox);
02330                                         leftIndices[numLeft++] = primIndex;
02331                                 } else if (bbox.min[axis] > splitPos) {
02332                                         rightBounds.expandBy(bbox);
02333                                         rightIndices[numRight++] = primIndex;
02334                                 } else {
02335                                         leftBounds.expandBy(bbox);
02336                                         rightBounds.expandBy(bbox);
02337                                         leftIndices[numLeft++] = primIndex;
02338                                         rightIndices[numRight++] = primIndex;
02339                                 }
02340                         }
02341
02342                         leftBounds.clip(m_bbox);
02343                         rightBounds.clip(m_bbox);
02344
02345                         /// Release the unused memory regions
02346                         if (isLeftChild)
02347                                 ctx.leftAlloc.shrinkAllocation(leftIndices, split.numLeft);
02348                         else
02349                                 ctx.rightAlloc.shrinkAllocation(rightIndices, split.numRight);
02350
02351                         leftBounds.max[axis] = std::min(leftBounds.max[axis], (float) splitPos);
02352                         rightBounds.min[axis] = std::max(rightBounds.min[axis], (float) splitPos);
02353
02354                         if (leftBounds.max[axis] != rightBounds.min[axis]) {
02355                                 /* There is some space between the child nodes -- move
02356                                    the split plane onto one of the BoundingBoxs so that the
02357                                    heuristic cost is minimized */
02358                                 TreeConstructionHeuristic tch(m_bbox);
02359
02360                                 std::pair<float, float> prob1 = tch(axis,
02361                                         leftBounds.max[axis] - m_bbox.min[axis],
02362                                         m_bbox.max[axis] - leftBounds.max[axis]);
02363                                 std::pair<float, float> prob2 = tch(axis,
02364                                         rightBounds.min[axis] - m_bbox.min[axis],
02365                                         m_bbox.max[axis] - rightBounds.min[axis]);
02366                                 float cost1 = traversalCost + queryCost
02367                                         * (prob1.first * numLeft + prob1.second * numRight);
02368                                 float cost2 = traversalCost + queryCost
02369                                         * (prob2.first * numLeft + prob2.second * numRight);
02370
02371                                 if (cost1 <= cost2) {
02372                                         split.cost = cost1;
02373                                         split.pos = leftBounds.max[axis];
02374                                 } else {
02375                                         split.cost = cost2;
02376                                         split.pos = rightBounds.min[axis];
02377                                 }
02378
02379                                 leftBounds.max[axis] = std::min(leftBounds.max[axis],
02380                                                 (float) split.pos);
02381                                 rightBounds.min[axis] = std::max(rightBounds.min[axis],
02382                                                 (float) split.pos);
02383                         }
02384
02385                         return boost::make_tuple(leftBounds, leftIndices,
02386                                         rightBounds, rightIndices);
02387                 }
02388         private:
02389                 SizeType *m_minBins;
02390                 SizeType *m_maxBins;
02391                 SizeType m_primCount;
02392                 int m_binCount;
02393                 BoundingBoxType m_bbox;
02394                 VectorType m_binSize;
02395                 VectorType m_invBinSize;
02396         };
02397
02398 protected:
02399         IndexType *m_indices;
02400         float m_traversalCost;
02401         float m_queryCost;
02402         float m_emptySpaceBonus;
02403         bool m_clip, m_retract, m_parallelBuild;
02404         SizeType m_maxDepth;
02405         SizeType m_stopPrims;
02406         SizeType m_maxBadRefines;
02407         SizeType m_exactPrimThreshold;
02408         SizeType m_minMaxBins;
02409         SizeType m_nodeCount;
02410         SizeType m_indexCount;
02411         std::vector<TreeBuilder *> m_builders;
02412         std::vector<KDNode *> m_indirections;
02413         QMutex m_indirectionLock;
02414         BuildInterface m_interface;
02415 };
02416
02417 /**
02418  * \brief Implements the 3D surface area heuristic so that it can be
02419  * used by the \ref GenericKDTree construction algorithm.
02420  */
02421 class SurfaceAreaHeuristic3 {
02422 public:
02423         /**
02424          * \brief Initialize the surface area heuristic with the bounds of
02425          * a parent node
02426          *
02427          * Precomputes some information so that traversal probabilities
02428          * of potential split planes can be evaluated efficiently
02429          */
02430         inline SurfaceAreaHeuristic3(const BoundingBox3f &aabb) {
02431                 const Vector3f extents(aabb.getExtents());
02432                 const float temp = 1.0f / (extents[0] * extents[1]
02433                                 + extents[1]*extents[2] + extents[0]*extents[2]);
02434                 m_temp0 = Vector3f(
02435                         extents[1] * extents[2],
02436                         extents[0] * extents[2],
02437                         extents[0] * extents[1]) * temp;
02438                 m_temp1 = Vector3f(
02439                         extents[1] + extents[2],
02440                         extents[0] + extents[2],
02441                         extents[0] + extents[1]) * temp;
02442         }
02443
02444         /**
02445          * Given a split on axis \a axis that produces children having extents
02446          * \a leftWidth and \a rightWidth along \a axis, compute the probability
02447          * of traversing the left and right child during a typical query
02448          * operation.
02449          */
02450         inline std::pair<float, float> operator()(int axis, float leftWidth, float rightWidth) const {
02451                 return std::pair<float, float>(
02452                         m_temp0[axis] + m_temp1[axis] * leftWidth,
02453                         m_temp0[axis] + m_temp1[axis] * rightWidth);
02454         }
02455
02456         /**
02457          * Compute the underlying quantity used by the tree construction
02458          * heuristic. This is used to compute the final cost of a kd-tree.
02459          */
02460         inline static float getQuantity(const BoundingBox3f &aabb) {
02461                 return aabb.getSurfaceArea();
02462         }
02463 private:
02464         Vector3f m_temp0, m_temp1;
02465 };
02466
02467 #if defined(WIN32)
02468 /* Revert back to fast / non-strict IEEE 754
02469    floating point computations */
02470 NORI_NAMESPACE_END
02471 #pragma float_control(precise, off)
02472 NORI_NAMESPACE_BEGIN
02473 #endif
02474
02475 NORI_NAMESPACE_END
02476
02477 #endif /* __KDTREE_GENERIC_H */