--- /dev/null
+/* matrix22_mul.c.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define MUL(rp, ap, an, bp, bn) do { \
+ if (an >= bn) \
+ mpn_mul (rp, ap, an, bp, bn); \
+ else \
+ mpn_mul (rp, bp, bn, ap, an); \
+} while (0)
+
+/* Inputs are unsigned. */
+static int
+abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+ int c;
+ MPN_CMP (c, ap, bp, n);
+ if (c >= 0)
+ {
+ mpn_sub_n (rp, ap, bp, n);
+ return 0;
+ }
+ else
+ {
+ mpn_sub_n (rp, bp, ap, n);
+ return 1;
+ }
+}
+
+static int
+add_signed_n (mp_ptr rp,
+ mp_srcptr ap, int as, mp_srcptr bp, int bs, mp_size_t n)
+{
+ if (as != bs)
+ return as ^ abs_sub_n (rp, ap, bp, n);
+ else
+ {
+ ASSERT_NOCARRY (mpn_add_n (rp, ap, bp, n));
+ return as;
+ }
+}
+
+mp_size_t
+mpn_matrix22_mul_itch (mp_size_t rn, mp_size_t mn)
+{
+ if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
+ || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
+ return 3*rn + 2*mn;
+ else
+ return 4*(rn + mn) + 5;
+}
+
+/* Algorithm:
+
+ / s0 \ / 1 0 0 0 \ / r0 \
+ | s1 | | 0 1 0 0 | | r1 |
+ | s2 | | 0 0 1 1 | | r2 |
+ | s3 | = | -1 0 1 1 | \ r3 /
+ | s4 | | 1 0 -1 0 |
+ | s5 | | 1 1 -1 -1 |
+ \ s6 / \ 0 0 0 1 /
+
+ / t0 \ / 1 0 0 0 \ / m0 \
+ | t1 | | 0 0 1 0 | | m1 |
+ | t2 | | -1 1 0 0 | | m2 |
+ | t3 | = | 1 -1 0 1 | \ m3 /
+ | t4 | | 0 -1 0 1 |
+ | t5 | | 0 0 0 1 |
+ \ t6 / \ -1 1 1 -1 /
+
+ / r0 \ / 1 1 0 0 0 0 0 \ / s0 * t0 \
+ | r1 | = | 1 0 1 1 0 1 0 | | s1 * t1 |
+ | r2 | | 1 0 0 1 1 0 1 | | s2 * t2 |
+ \ r3 / \ 1 0 1 1 1 0 0 / | s3 * t3 |
+ | s4 * t4 |
+ | s5 * t5 |
+ \ s6 * t6 /
+*/
+
+/* Computes R = R * M. Elements are numbers R = (r0, r1; r2, r3).
+ *
+ * Resulting elements are of size up to rn + mn + 1.
+ *
+ * Temporary storage: 4 rn + 4 mn + 5. */
+void
+mpn_matrix22_mul_strassen (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
+ mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
+ mp_ptr tp)
+{
+ mp_ptr s2, s3, t2, t3, u0, u1;
+ int r2s, r3s, s3s, t2s, t3s, u0s, u1s;
+ s2 = tp; tp += rn;
+ s3 = tp; tp += rn + 1;
+ t2 = tp; tp += mn;
+ t3 = tp; tp += mn + 1;
+ u0 = tp; tp += rn + mn + 1;
+ u1 = tp; /* rn + mn + 2 */
+
+ MUL (u0, r0, rn, m0, mn); /* 0 */
+ MUL (u1, r1, rn, m2, mn); /* 1 */
+
+ MPN_COPY (s2, r3, rn);
+
+ r3[rn] = mpn_add_n (r3, r3, r2, rn);
+ r0[rn] = 0;
+ s3s = abs_sub_n (s3, r3, r0, rn + 1);
+ t2s = abs_sub_n (t2, m1, m0, mn);
+ if (t2s)
+ {
+ t3[mn] = mpn_add_n (t3, m3, t2, mn);
+ t3s = 0;
+ }
+ else
+ {
+ t3s = abs_sub_n (t3, m3, t2, mn);
+ t3[mn] = 0;
+ }
+
+ r2s = abs_sub_n (r2, r0, r2, rn);
+ r0[rn+mn] = mpn_add_n (r0, u0, u1, rn + mn);
+
+ MUL(u1, s3, rn+1, t3, mn+1); /* 3 */
+ u1s = s3s ^ t3s;
+ ASSERT (u1[rn+mn+1] == 0);
+ ASSERT (u1[rn+mn] < 4);
+
+ if (u1s)
+ {
+ u0[rn+mn] = 0;
+ u0s = abs_sub_n (u0, u0, u1, rn + mn + 1);
+ }
+ else
+ {
+ u0[rn+mn] = u1[rn+mn] + mpn_add_n (u0, u0, u1, rn + mn);
+ u0s = 0;
+ }
+ MUL(u1, r3, rn + 1, t2, mn); /* 2 */
+ u1s = t2s;
+ ASSERT (u1[rn+mn] < 2);
+
+ u1s = add_signed_n (u1, u0, u0s, u1, u1s, rn + mn + 1);
+
+ t2s = abs_sub_n (t2, m3, m1, mn);
+ if (s3s)
+ {
+ s3[rn] += mpn_add_n (s3, s3, r1, rn);
+ s3s = 0;
+ }
+ else if (s3[rn] > 0)
+ {
+ s3[rn] -= mpn_sub_n (s3, s3, r1, rn);
+ s3s = 1;
+ }
+ else
+ {
+ s3s = abs_sub_n (s3, r1, s3, rn);
+ }
+ MUL (r1, s3, rn+1, m3, mn); /* 5 */
+ ASSERT_NOCARRY(add_signed_n (r1, r1, s3s, u1, u1s, rn + mn + 1));
+ ASSERT (r1[rn + mn] < 2);
+
+ MUL (r3, r2, rn, t2, mn); /* 4 */
+ r3s = r2s ^ t2s;
+ r3[rn + mn] = 0;
+ u0s = add_signed_n (u0, u0, u0s, r3, r3s, rn + mn + 1);
+ ASSERT_NOCARRY (add_signed_n (r3, r3, r3s, u1, u1s, rn + mn + 1));
+ ASSERT (r3[rn + mn] < 2);
+
+ if (t3s)
+ {
+ t3[mn] += mpn_add_n (t3, m2, t3, mn);
+ t3s = 0;
+ }
+ else if (t3[mn] > 0)
+ {
+ t3[mn] -= mpn_sub_n (t3, t3, m2, mn);
+ t3s = 1;
+ }
+ else
+ {
+ t3s = abs_sub_n (t3, m2, t3, mn);
+ }
+ MUL (r2, s2, rn, t3, mn + 1); /* 6 */
+
+ ASSERT_NOCARRY (add_signed_n (r2, r2, t3s, u0, u0s, rn + mn + 1));
+ ASSERT (r2[rn + mn] < 2);
+}
+
+void
+mpn_matrix22_mul (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
+ mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
+ mp_ptr tp)
+{
+ if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
+ || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
+ {
+ mp_ptr p0, p1;
+ unsigned i;
+
+ /* Temporary storage: 3 rn + 2 mn */
+ p0 = tp + rn;
+ p1 = p0 + rn + mn;
+
+ for (i = 0; i < 2; i++)
+ {
+ MPN_COPY (tp, r0, rn);
+
+ if (rn >= mn)
+ {
+ mpn_mul (p0, r0, rn, m0, mn);
+ mpn_mul (p1, r1, rn, m3, mn);
+ mpn_mul (r0, r1, rn, m2, mn);
+ mpn_mul (r1, tp, rn, m1, mn);
+ }
+ else
+ {
+ mpn_mul (p0, m0, mn, r0, rn);
+ mpn_mul (p1, m3, mn, r1, rn);
+ mpn_mul (r0, m2, mn, r1, rn);
+ mpn_mul (r1, m1, mn, tp, rn);
+ }
+ r0[rn+mn] = mpn_add_n (r0, r0, p0, rn + mn);
+ r1[rn+mn] = mpn_add_n (r1, r1, p1, rn + mn);
+
+ r0 = r2; r1 = r3;
+ }
+ }
+ else
+ mpn_matrix22_mul_strassen (r0, r1, r2, r3, rn,
+ m0, m1, m2, m3, mn, tp);
+}