]> oss.titaniummirror.com Git - msp430-gcc.git/blobdiff - libstdc++-v3/include/parallel/partial_sum.h
Imported gcc-4.4.3
[msp430-gcc.git] / libstdc++-v3 / include / parallel / partial_sum.h
diff --git a/libstdc++-v3/include/parallel/partial_sum.h b/libstdc++-v3/include/parallel/partial_sum.h
new file mode 100644 (file)
index 0000000..9263093
--- /dev/null
@@ -0,0 +1,220 @@
+// -*- C++ -*-
+
+// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 3, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// <http://www.gnu.org/licenses/>.
+
+/** @file parallel/partial_sum.h
+ *  @brief Parallel implementation of std::partial_sum(), i. e. prefix
+ *  sums.
+ *  This file is a GNU parallel extension to the Standard C++ Library.
+ */
+
+// Written by Johannes Singler.
+
+#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
+#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
+
+#include <omp.h>
+#include <new>
+#include <bits/stl_algobase.h>
+#include <parallel/parallel.h>
+#include <parallel/numericfwd.h>
+
+namespace __gnu_parallel
+{
+  // Problem: there is no 0-element given.
+
+/** @brief Base case prefix sum routine.
+  *  @param begin Begin iterator of input sequence.
+  *  @param end End iterator of input sequence.
+  *  @param result Begin iterator of output sequence.
+  *  @param bin_op Associative binary function.
+  *  @param value Start value. Must be passed since the neutral
+  *  element is unknown in general.
+  *  @return End iterator of output sequence. */
+template<typename InputIterator,
+        typename OutputIterator,
+        typename BinaryOperation>
+  OutputIterator
+  parallel_partial_sum_basecase(InputIterator begin, InputIterator end,
+                               OutputIterator result, BinaryOperation bin_op,
+                               typename std::iterator_traits
+                               <InputIterator>::value_type value)
+  {
+    if (begin == end)
+      return result;
+
+    while (begin != end)
+      {
+        value = bin_op(value, *begin);
+        *result = value;
+        ++result;
+        ++begin;
+      }
+    return result;
+  }
+
+/** @brief Parallel partial sum implementation, two-phase approach,
+    no recursion.
+    *  @param begin Begin iterator of input sequence.
+    *  @param end End iterator of input sequence.
+    *  @param result Begin iterator of output sequence.
+    *  @param bin_op Associative binary function.
+    *  @param n Length of sequence.
+    *  @param num_threads Number of threads to use.
+    *  @return End iterator of output sequence.
+    */
+template<typename InputIterator,
+        typename OutputIterator,
+        typename BinaryOperation>
+  OutputIterator
+  parallel_partial_sum_linear(InputIterator begin, InputIterator end,
+                             OutputIterator result, BinaryOperation bin_op,
+                             typename std::iterator_traits
+                             <InputIterator>::difference_type n)
+  {
+    typedef std::iterator_traits<InputIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    if (begin == end)
+      return result;
+
+    thread_index_t num_threads =
+        std::min<difference_type>(get_max_threads(), n - 1);
+
+    if (num_threads < 2)
+      {
+        *result = *begin;
+        return parallel_partial_sum_basecase(
+            begin + 1, end, result + 1, bin_op, *begin);
+      }
+
+    difference_type* borders;
+    value_type* sums;
+
+    const _Settings& __s = _Settings::get();
+
+#   pragma omp parallel num_threads(num_threads)
+      {
+#       pragma omp single
+          {
+            num_threads = omp_get_num_threads();
+
+            borders = new difference_type[num_threads + 2];
+
+            if (__s.partial_sum_dilation == 1.0f)
+              equally_split(n, num_threads + 1, borders);
+            else
+              {
+                difference_type chunk_length =
+                    ((double)n
+                    / ((double)num_threads + __s.partial_sum_dilation)),
+                 borderstart = n - num_threads * chunk_length;
+                borders[0] = 0;
+                for (int i = 1; i < (num_threads + 1); ++i)
+                  {
+                    borders[i] = borderstart;
+                    borderstart += chunk_length;
+                  }
+                borders[num_threads + 1] = n;
+              }
+
+            sums = static_cast<value_type*>(::operator new(sizeof(value_type)
+                                                          * num_threads));
+            OutputIterator target_end;
+          } //single
+
+        thread_index_t iam = omp_get_thread_num();
+        if (iam == 0)
+          {
+            *result = *begin;
+            parallel_partial_sum_basecase(begin + 1, begin + borders[1],
+                                         result + 1, bin_op, *begin);
+            ::new(&(sums[iam])) value_type(*(result + borders[1] - 1));
+          }
+        else
+          {
+            ::new(&(sums[iam]))
+             value_type(std::accumulate(begin + borders[iam] + 1,
+                                        begin + borders[iam + 1],
+                                        *(begin + borders[iam]),
+                                        bin_op,
+                                        __gnu_parallel::sequential_tag()));
+          }
+
+#       pragma omp barrier
+
+#       pragma omp single
+          parallel_partial_sum_basecase(
+              sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]);
+
+#       pragma omp barrier
+
+        // Still same team.
+        parallel_partial_sum_basecase(begin + borders[iam + 1],
+                                     begin + borders[iam + 2],
+                                     result + borders[iam + 1], bin_op,
+                                     sums[iam]);
+      } //parallel
+
+    ::operator delete(sums);
+    delete[] borders;
+
+    return result + n;
+  }
+
+/** @brief Parallel partial sum front-end.
+  *  @param begin Begin iterator of input sequence.
+  *  @param end End iterator of input sequence.
+  *  @param result Begin iterator of output sequence.
+  *  @param bin_op Associative binary function.
+  *  @return End iterator of output sequence. */
+template<typename InputIterator,
+        typename OutputIterator,
+        typename BinaryOperation>
+  OutputIterator
+  parallel_partial_sum(InputIterator begin, InputIterator end,
+                       OutputIterator result, BinaryOperation bin_op)
+  {
+    _GLIBCXX_CALL(begin - end)
+
+    typedef std::iterator_traits<InputIterator> traits_type;
+    typedef typename traits_type::value_type value_type;
+    typedef typename traits_type::difference_type difference_type;
+
+    difference_type n = end - begin;
+
+    switch (_Settings::get().partial_sum_algorithm)
+      {
+      case LINEAR:
+        // Need an initial offset.
+        return parallel_partial_sum_linear(begin, end, result, bin_op, n);
+      default:
+    // Partial_sum algorithm not implemented.
+        _GLIBCXX_PARALLEL_ASSERT(0);
+        return result + n;
+      }
+  }
+}
+
+#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */