Array Internals (Advanced)
[lite array - A C++ Multidimensional Array Library]

Classes
class	lite::c_iterator< value_type_ >
	This class implements an iterator that returns the same value everywhere. More...
class	lite::u_iterator< base_iter_type_, func_type_, is_static_ >
	This class implements an iterator that returns the result of applying a function object to the return value of another iterator. More...
class	lite::b_iterator< left_base_iterator_type_, right_base_iterator_type_, func_type_, is_static_ >
	This class implements an iterator that returns the result of applying a binary function object to the return value of two other iterators. More...
struct	lite::size_transformer< transform_type_, input_size_type_ >
	This can be used to apply a transform object of type transform_type_ to a size object of type input_size_type_. More...
struct	lite::iterator_transformer< transform_type_, input_iterator_type_, input_size_type_ >
	This can be used to apply a transform object of type transform_type_ to a pair of iterator object and size object respectively of types input_size_type_ and input_size_type_. More...
struct	lite::array_signature_traits< signature_, base_iterator_type_ >
	This can be used to extract information from array signatures and get the appropriate iterator type and size type for a signature. More...
struct	lite::default_array_traits
	This is the default array traits type used by the class lite::array.See Array Traits for more information. More...
class	lite::reference_rep< iterator_type_ >
	This is a tag type that can be used to specify reference representation for an array. More...
class	lite::internal_rep< reversed_ >
	This is a tag type that can be used to specify internal representation for an array. More...
class	lite::hybrid_rep< reversed_, internal_buf_size_ >
	This is a tag type that can be used to specify hybrid representation for an array. More...
class	lite::s_iterator
	An `s_iterator` is used to represent a general multidimensional strided iterator using a stl style iterator and stride lengths for each dimension. More...
Functions
template<class value_type_ >
LITE_INLINE c_iterator < value_type_ >	lite::make_c_iterator (const value_type_ &val)
	Constructs and returns a lite::c_iterator that returns val everywhere.
template<typename func_type_ , typename base_iter_type_ >
LITE_INLINE u_iterator < base_iter_type_, func_type_, true >	lite::make_u_iterator (const base_iter_type_ &it)
	Creates a lite::u_iterator from a base iterator it and a static function object of type func_type_.
template<typename func_type_ , typename base_iter_type_ >
LITE_INLINE u_iterator < base_iter_type_, func_type_, false >	lite::make_u_iterator (const base_iter_type_ &it, const func_type_ &func)
	Creates a lite::u_iterator from a base iterator it and function object func.
template<typename func_type_ , typename left_base_iterator_type_ , typename right_base_iterator_type_ >
LITE_INLINE b_iterator < left_base_iterator_type_, right_base_iterator_type_, func_type_, true >	lite::make_b_iterator (const left_base_iterator_type_ &left_it, const right_base_iterator_type_ &right_it)
	Creates a lite::b_iterator from base iterators left_it and right_it and a static function object of type func_type_.
template<typename func_type_ , typename left_base_iterator_type_ , typename right_base_iterator_type_ >
LITE_INLINE b_iterator < left_base_iterator_type_, right_base_iterator_type_, func_type_, false >	lite::make_b_iterator (const left_base_iterator_type_ &left_it, const right_base_iterator_type_ &right_it, const func_type_ &func)
	Creates a lite::b_iterator from the base iterators left_it and right_it and function object func.
template<typename iterator_type_ , typename size_type_ >
LITE_INLINE void	lite::for_each (const iterator_type_ &iter, const size_type_ &size)
	Evaluates every element of the array pointed by iter and of size size.
template<typename iterator_type_ , typename function_type_ , typename size_type_ >
LITE_INLINE void	lite::for_each (const iterator_type_ &iter, function_type_ &func, const size_type_ &size)
	Applies the function object func to every element of the array pointed by iter and of size size.
template<typename iterator_type_ , typename size_type_ >
LITE_INLINE bool	lite::for_each_c (const iterator_type_ &iter, const size_type_ &size)
	Evaluates every element of the array pointed by iter and of size size while the elements evaluate to true. Returns true if all the elements evaluated to true.
template<typename iterator_type_ , typename function_type_ , typename size_type_ >
LITE_INLINE bool	lite::for_each_c (const iterator_type_ &iter, function_type_ &func, const size_type_ &size)
	Applies the function object func to every element of the array pointed by iter and of size size, while the function object evaluates to true. Returns true if the function object evaluates to true for all the elements.
template<typename signature_ , typename traits_type_ , typename rep_ >
LITE_INLINE void	lite::for_each (const array< signature_, traits_type_, rep_ > &a)
	Evaluates every element of the array a.
template<typename signature_ , typename traits_type_ , typename rep_ >
LITE_INLINE void	lite::for_each (array< signature_, traits_type_, rep_ > &a)
	Evaluates every element of the array a.
template<typename signature_ , typename traits_type_ , typename rep_ , typename function_type_ >
LITE_INLINE void	lite::for_each (const array< signature_, traits_type_, rep_ > &a, function_type_ &func)
	Applies the function object func to every element of the array a.
template<typename signature_ , typename traits_type_ , typename rep_ , typename function_type_ >
LITE_INLINE void	lite::for_each (array< signature_, traits_type_, rep_ > &a, function_type_ &func)
	Applies the function object func to every element of the array a.
template<typename signature_ , typename traits_type_ , typename rep_ >
LITE_INLINE bool	lite::for_each_c (const array< signature_, traits_type_, rep_ > &a)
	Evaluates every element of the array a while the elements evaluate to true. Returns true if all the elements evaluated to true.
template<typename signature_ , typename traits_type_ , typename rep_ >
LITE_INLINE bool	lite::for_each_c (array< signature_, traits_type_, rep_ > &a)
	Evaluates every element of the array a while the elements evaluate to true. Returns true if all the elements evaluated to true.
template<typename signature_ , typename traits_type_ , typename rep_ , typename function_type_ >
LITE_INLINE bool	lite::for_each_c (const array< signature_, traits_type_, rep_ > &a, function_type_ &func)
	Applies the function object func to every element of the array a while the function object evaluates to true. Returns true if the function object evaluates to true for all the elements.
template<typename signature_ , typename traits_type_ , typename rep_ , typename function_type_ >
LITE_INLINE bool	lite::for_each_c (array< signature_, traits_type_, rep_ > &a, function_type_ &func)
	Applies the function object func to every element of the array a while the function object evaluates to true. Returns true if the function object evaluates to true for all the elements.

Detailed Description

This section contains advanced information about the internal representation of arrays, array traits type and defining new transforms among other things.

Array Representation (Advanced)

There are two category of array representation as explained in Array Representations :

The third template parameter of lite::array specifies the representation type of the array:

template<
    typename signature_, 
    typename traits_type_ = default_array_traits, 
    typename rep_ = typename traits_type_::template representation_type<signature_>::type>
class array;

If not specified explicitly, the appropriate representation type is chosen automatically by the traits_type_. This is usually appropriate for primary arrays. However, sometimes it is necessary to supply this argument explicitly (for example to define a reference array). The argument is usually just a tag type. The actual representation is implemented by specializing the class lite::array for that specific tag type. The following 3 representation types are defined in the library:

Other user-defined representations can also be defined by defining new representation tags and further specializing the class lite::array for those tags.

Internal Representation

The internal representation is specified by specifying lite::interna_rep as the representation tag of the lite::array. It has the following declaration:

        template<bool reversed_>
        class internal_rep;

The content of the array is stored as part of the array object itself (thus the name internal_rep). The array must have only fixed size dimensions. The size of the array object will be exactly the same as the size of the storage required for the array as there is no other information stored within the array object (zero overhead). The array objects of with representation are POD object according to C++0x definition (see C++ standard, 3.9:10). These are light weight objects that can be efficiently copied and passed around as value objects. The lite::default_array_traits automatically chooses this representation for small (both single and multidimensional) arrays of fixed size.

if reversed_ is true then the array will be stored in the reverse order of dimensions (see Array Storage).

For example the following, declares a column-major 5 by 5 matrix:

        array<float[5][5], default_array_traits, internal_rep<true> > a;

Hybrid Representation

The hybrid representation is specified by specifying lite::hybrid_rep as the representation tag of the lite::array. It has the following declaration:

        template<bool reversed_, int internal_buf_size_>
        class hybrid_rep;

The array object will contain an internal buffer of size internal_buf_size_. The size is in terms of the number of elements of an array. The content of the array will be stored in the internal buffer if it fits. If the array has more than internal_buf_size_ elements then it will be allocated externally on the heap. The array can have a mix of fixed and variable size dimensions. In addition to the internal buffer, there is an iterator and a size object maintained internally by the array object. The lite::default_array_traits automatically chooses this representation for large arrays and arrays with at least one dimension with non-const size.

A hybrid array which at least one non-constant size dimension is initially empty unless a size with non-zero volume is specified for it in the constructor. The function lite::array::release() can be used to release the memory that is allocated to the array. Even for a fixed size array, a call to lite::array:release() still releases the memory, so you cannot dereference or use the array until you call lite::array::resize() to reallocate it again. A fixed size array is automatically allocated upon construction.

if reversed_ is true then the array will be stored in the reverse order of dimensions (see Array Storage).

For example the following, declares a column-major square matrix of variable size with an internal buffer of 1024 elements:

        int n;
        std::cin >> n;
        array<float[1][1], default_array_traits, hybrid_rep<true, 1024> > a(n ,n);

In the above example, the array will be stored in the internal buffer if (n <= 32).

Reference Representation

The reference representation is specified by specifying lite::reference_rep as the representation tag of the lite::array. It has the following declaration:

        template<typename iterator_type_>
        class reference_rep;

The array object will contain only an iterator of type iterator_type_ and a size object of the type appropriate for the array signature. The iterator_type_ should satisfy the requirements of a muti-dimensional iterator (see Multi-Dimensional Iterators).

Note that the specialization of class lite::array for this representation provides an almost totally different set of constructors (See lite::array for details).

For an array with this representation, all typedef that stat with const_ are exactly the same as those without it. Also all the member functions and operators are declared const. Furthermore, An array with this representation must always be declared as const when used as a return type.

For example the following, declares a reference array to another array:

        typedef array<float[5][5]> A;
        typedef array<float[5][5], default_array_traits, reference_rep<A::iterator> > A_Ref;

        A a;
        A_Ref a_ref = a;

        a_ref(2,3) = 5;     // a_ref can be used as an alias for a
        // the previous line is equivalent to:
        a(2,3) = 5;

Array Storage

There are two storage types that can be specified for internal and hybrid representations:

Forward Storage: This is the default type in which consecutive elements of the last dimension are adjacent in memory and then the dimension before it and so on. This is the generalization of row major storage and is what is used by the C/C++ by default.
Reverse Storage: With this storage, consecutive elements of the first dimension are adjacent in memory and then the second dimension and so on. This is the generalization of column major storage and is what is used in Fortran by default.

For example the following, declares a column-major 5 by 5 matrix:

        array<float[5][5], default_array_traits, internal_rep<true> > a;

Temporary Arrays

There are times that you need to make a copy of another array for temporary use in a function or return a temporary array object from a function. In those cases, it is better to use an array type that is more appropriate for temporary storage to improve performance.

A temporary array type that uses the Hybrid Representation, usually has a much larger internal buffer so as to avoid allocation/deallocation of memory from heap. For any array type, compatible temporary array types are available as member typedefs as shown in the following example. The following example shows a function that computes the inverse of a matrix using Gauss-Jordan elimination:

// compute the inverse of matrix a
template<typename value_type_, int n_, typename traits_type_, typename rep_>
const array<value_type_[n_][n_], traits_type_>::temporary_array
inverse(const array<value_type_[n_][n_], traits_type_, rep_>& a)
{
    using std::abs;

    typedef array<value_type_[n_][n_], traits_type_, rep_> a_type;
    typedef typename a_type::temporary_array temporary_type;

    const int n = a.size().i0;
    temporary_type result(a.size());
    temporary_type aa =a;

    result = value_type_();

    result[diagonal()] = value_type_(1);

    for (int i=0; i<n; i++) {
        const block<0,1> blk(0, n-i);
        int i_max = i;
        value_type_ val_max = abs(aa(i, i));

        // find the strongest row
        for (int i2 = i+1; i2<n; i2++)
            if (abs(aa(i2, i)) > val_max) {
                i_max = i2;
                val_max = abs(aa(i2, i));
            }

        // swap row i_max with row i
        if (i_max != i) {
            typename transform_traits<temporary_type, row>::temporary_array res_tmp;

            res_tmp = result[row(i)];
            result[row(i)] = result[row(i_max)];
            result[row(i_max)] = res_tmp; 

            typename transform_traits<temporary_type, block<0,1> >::temporary_array org_tmp;
                
            org_tmp = aa[blk(i,i)];
            aa[blk(i,i)] = aa[blk(i_max, i)];
            aa[blk(i_max,i)] = org_tmp; 
        }

        // if a singular matrix return a zero matrix
        if (aa(i, i) == value_type_()) {
            result = value_type_();
            break;
        }

        // normalize the row
        value_type_ factor = value_type_(1)/aa(i, i);

        result[row(i)] *= factor;
        aa[blk(i, i)] *= factor;

        value_type_ epsilon = std::numeric_limits<value_type_>::epsilon()*aa(i, i)*n;

        // simplify the column in the lower triangle
        for (int i2 = i+1; i2<n; i2++) 
            if (abs(aa(i2, i)) > epsilon) {
                result[row(i2)] -= aa(i2, i)*result[row(i)];
                aa[blk(i2,i)] -= aa(i2, i)*aa[blk(i,i)];
            }
    }

    // simplify the upper triangle
    for (int i = n-1; i>=0; i--) {
        value_type_ epsilon = std::numeric_limits<value_type_>::epsilon()*aa(i, i)*n;

        for (int i2 = 0; i2<i; i2++) 
            if (abs(aa(i2, i)) > epsilon) 
                // we don't need to update the original here as it won't be used any more!
                result[row(i2)] -= aa(i2, i)*result[row(i)];
    }

    return result;
}

Array Traits

The second template parameter of the class lite::array specifies a traits type:

        template<
            typename signature_, 
            typename traits_type_ = default_array_traits, 
            typename rep_ = typename traits_type_::template representation_type<signature_>::type>
        class array;

The primary purpose of a traits class is to choose the proper array representation based on the array signature. It can be also use to specify other user-defined policies.

An array traits type should provide the following interface:

struct some_array_traits
{
    template<typename signature_>
    struct representation_type
    {
        // This is the default representation to be used for arrays
        typedef ??? type;

        // This is the default representation to be used for temporary arrays
        typedef ??? temporary_type;

        // This is the default representation to be used for temporary arrays with forward storage
        typedef ??? fwd_temporary_type;

        // This is the default representation to be used for temporary arrays with reverse storage
        typedef ??? rev_temporary_type;
    };
};

The lite::default_array_traits is the only pre-defined array traits type.

Multi-Dimensional Iterators

A multi-dimensional iterator is an iterator that can move along more than one dimension. A multidimensional array can be fully represented by a multidimensional iterator pointing to the start of the array and a size object specifying the size of the array along each dimension. the lite:array::begin() returns a multidimensional iterator pointing to the start of the array. All multidimensional iterators should provide the following operations (assume Iter is a multidimensional iterator type):

    // Index through the iterator \a it using the indices \a i0 to \a iN.
    std::iterator_traits<Iter>::reference at(Iter it, int i0 ..., int iM);

    // Increment the iterator \a it along dimension \a dim_ by one position.
    template<int dim_>
    void inc(Iter it);

    // Decrement the iterator \a it along dimension \a dim_ by one position.
    template<int dim_>
    void dec(Iter it);

    // Move the iterator \a it along dimension \a dim_ by \a diff positions.
    template<int dim_>
    void shift(Iter it, int diff);

In addition to the above operations, the class std::iterator_traits should be specialized to provide correct typedefs just like a stl style iterator.

Note that the at() function may take any number of indices from 0 to N where N is the number of dimensions of the iterator it. If M indices are provides such that (M < N) then then the provided indices will be used for the last M dimensions and for the first N-M dimensions an index of 0 will be assumed. For example:

        typedef array<float[5][6][7]> A;
        A a;

        A::iterator it = a.begin();

        at(it, 5) = -1;                     // set a(0,0,5) to -1
        at(it, 0, 5) = -1;                  // set a(0,0,5) to -1
        at(it, 0, 0, 5) = -1;               // set a(0,0,5) to -1
        
        inc<2>(it);
        at(it) = -1;                        // set a(0,0,1) to -1

        shift<1>(it, 3);
        at(it, 1, 0, 0) = -1;               // set a(1,3,1) to -1

        shift<1>(it, -2);                   
        at(it, -1) = -1;                     // set a(0,1,0) to -1

Note that any iterator used by any lite::array object should satisfy all the requirements of a multidimensional iterator. For example:

        typedef array<float[5][6]> A;
        A a, b, c;

        std::cout << at((2*a+b+c).begin(), 0, 1);
        // is equivalent to:
        std::cout << 2*a(0,1)+b(0,1)+c(0,1);

        std::cout << at(a[row(1)].begin(), 0, 1);
        // is equivalent to:
        std::cout << a(1, 1);

The following multidimensional iterator types are provided by the library:

s_iterator see lite::s_iterator
c_iterator see lite::c_iterator
u_iterator see lite::u_iterator
b_iterator see lite::b_iterator

You can define your own iterator type by implementing the requirement of a generalized iterator. Most of the array operations use the above iterator types with appropriate function objects to achieve their functionality.

Defining New Transforms

In order to define a new transform, you need to implement the specialization of the following two types for your transform type:

lite::size_transformer : This is used to apply a transform object to a size object
lite::iterator_transformer : This is used to apply a transform object to an iterator object

Together, the above two transformers are used to apply a transform object to an array object. The transform is applied to the iterator and the size of the original array and then a new reference array is created from the resulting iterator and resulting size. The resulting array uses the Reference Representation.

The following pseudo definitions specify the interface that each of the two transformers should implement:

        template<typename transform_type_, typename input_size_type_>
        struct size_transformer
        {
        public:
            // The type of the resulting size object after applying the transform
            typedef ??? size_type;

            // Apply the transform to the size object org and store the result in res.
            static void transform(const input_size_type_& org, size_type& res);

            // Apply the transform object trans to the size object org and store the result in res.
            static void transform(const transform_type& trans, const input_size_type_& org, size_type& res);
        };

        template<typename transform_type_, typename input_iterator_type_, typename input_size_type_>
        struct iterator_transformer
        {
        public:
            // The type of the resulting iterator object after applying the transform
            typedef ??? iterator_type;

            // Apply the transform to the iterator object org and store the result in res.
            static void transform(
                const input_iterator_type& org, 
                iterator_type& res);

            // Apply the transform object trans to the iterator object org and store the result in res.
            static void transform(
                const transform_type_& trans, 
                const input_iterator_type& org, 
                iterator_type& res);

            // Apply the transform object trans to the iterator object org that is used together with 
            // size object sz and store the result in res.
            static void transform(
                const transform_type_& trans, 
                const input_iterator_type& org, 
                const input_size_type_& sz,
                iterator_type& res);
        };

In order to define a new transform, in addition to defining a type for your transform objects, you need to specialize and implement the lite::size_transformer and lite::iterator_transformer for you new transform type. For lite::c_iterator, lite::u_iterator and lite::b_iterator, there are already specializations in place that do the following by default:

For lite::c_iterator, the default transformer does nothing and returns the same iterator
For lite::u_iterator, the default transformer applies the transform object to the base iterator and creates a new lite::u_iterator using the transformed base iterator and the same function object.
For lite::b_iterator, the default transformer applies the transform object to the both of the left and right base iterators and then creates a new lite::b_iterator using the transformed base iterators and the same function object.

Therefore, if the above default behavior works well for your transform type (which is usually the case), you only need to define an iterator transformer for lite::s_iterator.

The following example shows the details of how a transform object is applied to an array:

        int n;

        std::cin >> n;

        typedef array<float[1][1]> A;

        A a(n, n);

        transform_traits<A, row>::array a_row_ref1 = a[row(1)]; 

        // the previous line is equivalent to the following code:

        typedef size_transformer<row, A::size_type> ST; 
        typedef iterator_transformer<row, A::iterator, A::size_type> IT;

        ST::size_type sz;
        IT:iterator_type it;
        row r(1);

        ST::transform(r, a.size(), sz);
        IT::transform(r, a.begin(), it);

        typedef lite::detail::size_to_signature<ST::size_type, std::iterator_traits<A::iterator>::value_type>::type Sig;

        array<Sig, A::traits_type, reference_rep<IT::iterator_type> > a_row_ref2(it, sz);

The specialization of class lite::array for lite::reference_rep has a special constructor that can be used to simplify the above code:

        int n;

        std::cin >> n;

        typedef array<float[1][1]> A;

        A a(n, n);

        transform_traits<A, row>::array a_row_ref1 = a[row(1)]; 

        // the previous line is equivalent to the following code:

        typedef size_transformer<row, A::size_type> ST; 
        typedef iterator_transformer<row, A::iterator, A::size_type> IT;
        typedef lite::detail::size_to_signature<ST::size_type, std::iterator_traits<IT>::value_type>::type Sig;

        array<Sig, A::traits_type, reference_rep<IT::iterator_type> > a_row_ref2(a.begin(), a.size(), row(1));

Note that you do not need to specialize and implement the lite::transform_traits as it uses a generic definition that depends on lite::size_transformer and lite::iterator_transformer.

Lazy Evaluation

Many of the array operations do not actually compute their result but instead return a reference array that computes the result on demand (lazy evaluation). The resulting reference array uses a combination of lite::c_iterator, lite::u_iterator or lite::b_iterator and a function object.

You can use the lite::apply() (See the example in Array Operations) to create a reference array that applies a function object to one or two other arrays.

Most of the array operations that actually compute their result use the lite::for_each() or lite::for_each_c().

Array Operations and Evaluation

The lite::for_each() and lite::for_each_c() family of functions lie at the heart of the library. Almost all array operations are eventually performed by a call to one of the overloads of these two functions. The for_each functions are heavily specialized/optimized to unroll loops with fixed/variable number of iterations.

For example, consider the following function:

        void foo(int n)
        {
            const int max_m = 2048;
            const float c = 3;

            typedef array<float[3]> Vec;

            Vec v1[max_m], v2[max_m];

            for (int i=0; i<max_m; i++) {
                init(v1[i]);
                init(v2[i]);
            }

            for (int i=0; i<n; i++)
                for (int k=0; k<n; k++)
                    v1[k] = v2[k]+c*v2[k+1];

            std::cout << v1[0]; << std::endl;   // to prevent dead code elimination
        }

The lite::for_each() function is called by the assignment operator to evaluate the array expression and to store the result in v1[k]. The following is the assembly code generated for the the inner loop by MSVC++ 2009:

        00401ECB  cmp         edx,esi 
        00401ECD  jge         code_gen+1A4h (401F14h) 
        00401ECF  lea         eax,[edx+edx*2] 
        00401ED2  add         eax,eax 
        00401ED4  mov         ecx,esi 
        00401ED6  add         eax,eax 
        00401ED8  sub         ecx,edx 
        00401EDA  fld         dword ptr [esp+eax+14h] 
        00401EDE  add         eax,0Ch 
        00401EE1  sub         ecx,1 
        00401EE4  fmul        st,st(1) 
        00401EE6  fadd        dword ptr [esp+eax-4] 
        00401EEA  fstp        dword ptr [esp+eax+5FFCh] 
        00401EF1  fld         dword ptr [esp+eax+0Ch] 
        00401EF5  fmul        st,st(1) 
        00401EF7  fadd        dword ptr [esp+eax] 
        00401EFA  fstp        dword ptr [esp+eax+6000h] 
        00401F01  fld         dword ptr [esp+eax+10h] 
        00401F05  fmul        st,st(1) 
        00401F07  fadd        dword ptr [esp+eax+4] 
        00401F0B  fstp        dword ptr [esp+eax+6004h] 
        00401F12  jne         code_gen+16Ah (401EDAh)

As you can see, the whole expression v1[k] = v2[k]+c*v2[k+1] has been fully inlined and optimized. The above assembly code is almost identical to the assembly code generated for a hand written function that does not use the array library.

A few of the functions like the 2D and 3D version of the lite::det() and lite::inverse() are implemented explicitly to ensure optimal performance.

Todo:

performance and forward/reverse storage

performance notes, code samples, etc.

function objects?

Array Internals (Advanced) [lite array - A C++ Multidimensional Array Library]

Classes

Functions

Detailed Description

Array Internals (Advanced)
[lite array - A C++ Multidimensional Array Library]