]> oss.titaniummirror.com Git - msp430-gcc.git/blobdiff - libstdc++-v3/include/parallel/random_shuffle.h
Imported gcc-4.4.3
[msp430-gcc.git] / libstdc++-v3 / include / parallel / random_shuffle.h
diff --git a/libstdc++-v3/include/parallel/random_shuffle.h b/libstdc++-v3/include/parallel/random_shuffle.h
new file mode 100644 (file)
index 0000000..6e0ebef
--- /dev/null
@@ -0,0 +1,519 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 3, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// <http://www.gnu.org/licenses/>.
+
+/** @file parallel/random_shuffle.h
+ *  @brief Parallel implementation of std::random_shuffle().
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H
+#define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1
+
+#include <limits>
+#include <bits/stl_numeric.h>
+#include <parallel/parallel.h>
+#include <parallel/random_number.h>
+
+namespace __gnu_parallel
+{
+/** @brief Type to hold the index of a bin.
+  *
+  *  Since many variables of this type are allocated, it should be
+  *  chosen as small as possible.
+  */
+typedef unsigned short bin_index;
+
+/** @brief Data known to every thread participating in
+    __gnu_parallel::parallel_random_shuffle(). */
+template<typename RandomAccessIterator>
+  struct DRandomShufflingGlobalData
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    /** @brief Begin iterator of the source. */
+    RandomAccessIterator& source;
+
+    /** @brief Temporary arrays for each thread. */
+    value_type** temporaries;
+
+    /** @brief Two-dimensional array to hold the thread-bin distribution.
+     *
+     *  Dimensions (num_threads + 1) x (num_bins + 1). */
+    difference_type** dist;
+
+    /** @brief Start indexes of the threads' chunks. */
+    difference_type* starts;
+
+    /** @brief Number of the thread that will further process the
+       corresponding bin. */
+    thread_index_t* bin_proc;
+
+    /** @brief Number of bins to distribute to. */
+    int num_bins;
+
+    /** @brief Number of bits needed to address the bins. */
+    int num_bits;
+
+    /** @brief Constructor. */
+    DRandomShufflingGlobalData(RandomAccessIterator& _source)
+    : source(_source) { }
+  };
+
+/** @brief Local data for a thread participating in
+    __gnu_parallel::parallel_random_shuffle().
+  */
+template<typename RandomAccessIterator, typename RandomNumberGenerator>
+  struct DRSSorterPU
+  {
+    /** @brief Number of threads participating in total. */
+    int num_threads;
+
+    /** @brief Begin index for bins taken care of by this thread. */
+    bin_index bins_begin;
+
+    /** @brief End index for bins taken care of by this thread. */
+    bin_index bins_end;
+
+    /** @brief Random seed for this thread. */
+    uint32 seed;
+
+    /** @brief Pointer to global data. */
+    DRandomShufflingGlobalData<RandomAccessIterator>* sd;
+  };
+
+/** @brief Generate a random number in @c [0,2^logp).
+  *  @param logp Logarithm (basis 2) of the upper range bound.
+  *  @param rng Random number generator to use.
+  */
+template<typename RandomNumberGenerator>
+  inline int
+  random_number_pow2(int logp, RandomNumberGenerator& rng)
+  { return rng.genrand_bits(logp); }
+
+/** @brief Random shuffle code executed by each thread.
+  *  @param pus Array of thread-local data records. */
+template<typename RandomAccessIterator, typename RandomNumberGenerator>
+  void 
+  parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator,
+                                 RandomNumberGenerator>* pus)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    thread_index_t iam = omp_get_thread_num();
+    DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[iam];
+    DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd;
+
+    // Indexing: dist[bin][processor]
+    difference_type length = sd->starts[iam + 1] - sd->starts[iam];
+    bin_index* oracles = new bin_index[length];
+    difference_type* dist = new difference_type[sd->num_bins + 1];
+    bin_index* bin_proc = new bin_index[sd->num_bins];
+    value_type** temporaries = new value_type*[d->num_threads];
+
+    // Compute oracles and count appearances.
+    for (bin_index b = 0; b < sd->num_bins + 1; ++b)
+      dist[b] = 0;
+    int num_bits = sd->num_bits;
+
+    random_number rng(d->seed);
+
+    // First main loop.
+    for (difference_type i = 0; i < length; ++i)
+      {
+        bin_index oracle = random_number_pow2(num_bits, rng);
+        oracles[i] = oracle;
+
+        // To allow prefix (partial) sum.
+        ++(dist[oracle + 1]);
+      }
+
+    for (bin_index b = 0; b < sd->num_bins + 1; ++b)
+      sd->dist[b][iam + 1] = dist[b];
+
+#   pragma omp barrier
+
+#   pragma omp single
+    {
+      // Sum up bins, sd->dist[s + 1][d->num_threads] now contains the
+      // total number of items in bin s
+      for (bin_index s = 0; s < sd->num_bins; ++s)
+        __gnu_sequential::partial_sum(sd->dist[s + 1],
+                                      sd->dist[s + 1] + d->num_threads + 1,
+                                      sd->dist[s + 1]);
+    }
+
+#   pragma omp barrier
+
+    sequence_index_t offset = 0, global_offset = 0;
+    for (bin_index s = 0; s < d->bins_begin; ++s)
+      global_offset += sd->dist[s + 1][d->num_threads];
+
+#   pragma omp barrier
+
+    for (bin_index s = d->bins_begin; s < d->bins_end; ++s)
+      {
+       for (int t = 0; t < d->num_threads + 1; ++t)
+         sd->dist[s + 1][t] += offset;
+       offset = sd->dist[s + 1][d->num_threads];
+      }
+
+    sd->temporaries[iam] = static_cast<value_type*>(
+      ::operator new(sizeof(value_type) * offset));
+
+#   pragma omp barrier
+
+    // Draw local copies to avoid false sharing.
+    for (bin_index b = 0; b < sd->num_bins + 1; ++b)
+      dist[b] = sd->dist[b][iam];
+    for (bin_index b = 0; b < sd->num_bins; ++b)
+      bin_proc[b] = sd->bin_proc[b];
+    for (thread_index_t t = 0; t < d->num_threads; ++t)
+      temporaries[t] = sd->temporaries[t];
+
+    RandomAccessIterator source = sd->source;
+    difference_type start = sd->starts[iam];
+
+    // Distribute according to oracles, second main loop.
+    for (difference_type i = 0; i < length; ++i)
+      {
+        bin_index target_bin = oracles[i];
+        thread_index_t target_p = bin_proc[target_bin];
+
+        // Last column [d->num_threads] stays unchanged.
+        ::new(&(temporaries[target_p][dist[target_bin + 1]++]))
+           value_type(*(source + i + start));
+      }
+
+    delete[] oracles;
+    delete[] dist;
+    delete[] bin_proc;
+    delete[] temporaries;
+
+#   pragma omp barrier
+
+    // Shuffle bins internally.
+    for (bin_index b = d->bins_begin; b < d->bins_end; ++b)
+      {
+        value_type* begin =
+                    sd->temporaries[iam] +
+                    ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]),
+                  * end =
+                    sd->temporaries[iam] + sd->dist[b + 1][d->num_threads];
+        sequential_random_shuffle(begin, end, rng);
+        std::copy(begin, end, sd->source + global_offset +
+            ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]));
+      }
+
+    ::operator delete(sd->temporaries[iam]);
+  }
+
+/** @brief Round up to the next greater power of 2.
+  *  @param x Integer to round up */
+template<typename T>
+  T 
+  round_up_to_pow2(T x)
+  {
+    if (x <= 1)
+      return 1;
+    else
+      return (T)1 << (__log2(x - 1) + 1);
+  }
+
+/** @brief Main parallel random shuffle step.
+  *  @param begin Begin iterator of sequence.
+  *  @param end End iterator of sequence.
+  *  @param n Length of sequence.
+  *  @param num_threads Number of threads to use.
+  *  @param rng Random number generator to use.
+  */
+template<typename RandomAccessIterator, typename RandomNumberGenerator>
+  void
+  parallel_random_shuffle_drs(RandomAccessIterator begin,
+                             RandomAccessIterator end,
+                             typename std::iterator_traits
+                             <RandomAccessIterator>::difference_type n,
+                             thread_index_t num_threads,
+                             RandomNumberGenerator& rng)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    _GLIBCXX_CALL(n)
+
+    const _Settings& __s = _Settings::get();
+
+    if (num_threads > n)
+      num_threads = static_cast<thread_index_t>(n);
+
+    bin_index num_bins, num_bins_cache;
+
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
+    // Try the L1 cache first.
+
+    // Must fit into L1.
+    num_bins_cache = std::max<difference_type>(
+        1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
+    num_bins_cache = round_up_to_pow2(num_bins_cache);
+
+    // No more buckets than TLB entries, power of 2
+    // Power of 2 and at least one element per bin, at most the TLB size.
+    num_bins = std::min<difference_type>(n, num_bins_cache);
+
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
+    // 2 TLB entries needed per bin.
+    num_bins = std::min<difference_type>(__s.TLB_size / 2, num_bins);
+#endif
+    num_bins = round_up_to_pow2(num_bins);
+
+    if (num_bins < num_bins_cache)
+      {
+#endif
+        // Now try the L2 cache
+        // Must fit into L2
+        num_bins_cache = static_cast<bin_index>(std::max<difference_type>(
+            1, n / (__s.L2_cache_size / sizeof(value_type))));
+        num_bins_cache = round_up_to_pow2(num_bins_cache);
+
+        // No more buckets than TLB entries, power of 2.
+        num_bins = static_cast<bin_index>(
+            std::min(n, static_cast<difference_type>(num_bins_cache)));
+        // Power of 2 and at least one element per bin, at most the TLB size.
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
+        // 2 TLB entries needed per bin.
+        num_bins = std::min(
+            static_cast<difference_type>(__s.TLB_size / 2), num_bins);
+#endif
+          num_bins = round_up_to_pow2(num_bins);
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
+      }
+#endif
+
+    num_threads = std::min<bin_index>(num_threads, num_bins);
+
+    if (num_threads <= 1)
+      return sequential_random_shuffle(begin, end, rng);
+
+    DRandomShufflingGlobalData<RandomAccessIterator> sd(begin);
+    DRSSorterPU<RandomAccessIterator, random_number >* pus;
+    difference_type* starts;
+
+#   pragma omp parallel num_threads(num_threads)
+      {
+        thread_index_t num_threads = omp_get_num_threads();
+#       pragma omp single
+          {
+            pus = new DRSSorterPU<RandomAccessIterator, random_number>
+                [num_threads];
+
+            sd.temporaries = new value_type*[num_threads];
+            sd.dist = new difference_type*[num_bins + 1];
+            sd.bin_proc = new thread_index_t[num_bins];
+            for (bin_index b = 0; b < num_bins + 1; ++b)
+              sd.dist[b] = new difference_type[num_threads + 1];
+            for (bin_index b = 0; b < (num_bins + 1); ++b)
+              {
+                sd.dist[0][0] = 0;
+                sd.dist[b][0] = 0;
+              }
+            starts = sd.starts = new difference_type[num_threads + 1];
+            int bin_cursor = 0;
+            sd.num_bins = num_bins;
+            sd.num_bits = __log2(num_bins);
+
+            difference_type chunk_length = n / num_threads,
+                            split = n % num_threads, start = 0;
+            difference_type bin_chunk_length = num_bins / num_threads,
+                            bin_split = num_bins % num_threads;
+            for (thread_index_t i = 0; i < num_threads; ++i)
+              {
+                starts[i] = start;
+                start += (i < split) ? (chunk_length + 1) : chunk_length;
+                int j = pus[i].bins_begin = bin_cursor;
+
+                // Range of bins for this processor.
+                bin_cursor += (i < bin_split) ?
+                    (bin_chunk_length + 1) : bin_chunk_length;
+                pus[i].bins_end = bin_cursor;
+                for (; j < bin_cursor; ++j)
+                  sd.bin_proc[j] = i;
+                pus[i].num_threads = num_threads;
+                pus[i].seed = rng(std::numeric_limits<uint32>::max());
+                pus[i].sd = &sd;
+              }
+            starts[num_threads] = start;
+          } //single
+        // Now shuffle in parallel.
+        parallel_random_shuffle_drs_pu(pus);
+      }  // parallel
+
+    delete[] starts;
+    delete[] sd.bin_proc;
+    for (int s = 0; s < (num_bins + 1); ++s)
+      delete[] sd.dist[s];
+    delete[] sd.dist;
+    delete[] sd.temporaries;
+
+    delete[] pus;
+  }
+
+/** @brief Sequential cache-efficient random shuffle.
+ *  @param begin Begin iterator of sequence.
+ *  @param end End iterator of sequence.
+ *  @param rng Random number generator to use.
+ */
+template<typename RandomAccessIterator, typename RandomNumberGenerator>
+  void
+  sequential_random_shuffle(RandomAccessIterator begin, 
+                            RandomAccessIterator end,
+                            RandomNumberGenerator& rng)
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    difference_type n = end - begin;
+    const _Settings& __s = _Settings::get();
+
+    bin_index num_bins, num_bins_cache;
+
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
+    // Try the L1 cache first, must fit into L1.
+    num_bins_cache =
+        std::max<difference_type>
+            (1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
+    num_bins_cache = round_up_to_pow2(num_bins_cache);
+
+    // No more buckets than TLB entries, power of 2
+    // Power of 2 and at least one element per bin, at most the TLB size
+    num_bins = std::min(n, (difference_type)num_bins_cache);
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
+    // 2 TLB entries needed per bin
+    num_bins = std::min((difference_type)__s.TLB_size / 2, num_bins);
+#endif
+    num_bins = round_up_to_pow2(num_bins);
+
+    if (num_bins < num_bins_cache)
+      {
+#endif
+        // Now try the L2 cache, must fit into L2.
+        num_bins_cache =
+            static_cast<bin_index>(std::max<difference_type>(
+                1, n / (__s.L2_cache_size / sizeof(value_type))));
+        num_bins_cache = round_up_to_pow2(num_bins_cache);
+
+        // No more buckets than TLB entries, power of 2
+        // Power of 2 and at least one element per bin, at most the TLB size.
+        num_bins = static_cast<bin_index>
+            (std::min(n, static_cast<difference_type>(num_bins_cache)));
+
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
+        // 2 TLB entries needed per bin
+        num_bins =
+            std::min<difference_type>(__s.TLB_size / 2, num_bins);
+#endif
+        num_bins = round_up_to_pow2(num_bins);
+#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
+      }
+#endif
+
+    int num_bits = __log2(num_bins);
+
+    if (num_bins > 1)
+      {
+        value_type* target = static_cast<value_type*>(
+          ::operator new(sizeof(value_type) * n));
+        bin_index* oracles = new bin_index[n];
+        difference_type* dist0 = new difference_type[num_bins + 1],
+                       * dist1 = new difference_type[num_bins + 1];
+
+        for (int b = 0; b < num_bins + 1; ++b)
+          dist0[b] = 0;
+
+        random_number bitrng(rng(0xFFFFFFFF));
+
+        for (difference_type i = 0; i < n; ++i)
+          {
+            bin_index oracle = random_number_pow2(num_bits, bitrng);
+            oracles[i] = oracle;
+
+            // To allow prefix (partial) sum.
+            ++(dist0[oracle + 1]);
+          }
+
+        // Sum up bins.
+        __gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
+
+        for (int b = 0; b < num_bins + 1; ++b)
+          dist1[b] = dist0[b];
+
+        // Distribute according to oracles.
+        for (difference_type i = 0; i < n; ++i)
+          ::new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i));
+
+        for (int b = 0; b < num_bins; ++b)
+          {
+            sequential_random_shuffle(target + dist1[b],
+                                      target + dist1[b + 1],
+                                      rng);
+          }
+
+        // Copy elements back.
+        std::copy(target, target + n, begin);
+
+        delete[] dist0;
+        delete[] dist1;
+        delete[] oracles;
+        ::operator delete(target);
+      }
+    else
+      __gnu_sequential::random_shuffle(begin, end, rng);
+  }
+
+/** @brief Parallel random public call.
+ *  @param begin Begin iterator of sequence.
+ *  @param end End iterator of sequence.
+ *  @param rng Random number generator to use.
+ */
+template<typename RandomAccessIterator, typename RandomNumberGenerator>
+  inline void
+  parallel_random_shuffle(RandomAccessIterator begin,
+                          RandomAccessIterator end,
+                          RandomNumberGenerator rng = random_number())
+  {
+    typedef std::iterator_traits<RandomAccessIterator> traits_type;
+    typedef typename traits_type::difference_type difference_type;
+    difference_type n = end - begin;
+    parallel_random_shuffle_drs(begin, end, n, get_max_threads(), rng) ;
+  }
+
+}
+
+#endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */