Pagina's

2010/10/17

Factorial a bit faster, floorLog2 of an integer, source

using System;
using System.Numerics;
using System.Diagnostics;
using System.Threading.Tasks;
using Xint = System.Numerics.BigInteger;

class Program
{
    private static Stopwatch sw = new Stopwatch();

    static void Main()
    {
        test_Fact();
        Console.ReadLine();
    }

    private static void test_Fact()
    {
        sw.Start();
        Xint F = Factorial(1000000);
        sw.Stop();
        Console.WriteLine(sw.ElapsedMilliseconds + " ms");
        int bitLength_F = bitLength(F);
        Console.WriteLine(bitLength_F);
        if (bitLength_F < 3000) Console.WriteLine(F);
    }

    private static Xint MTP(Xint U, Xint V)
    {
        int n = Xint.Max(U.Sign * U, V.Sign * V).ToByteArray().Length << 3;
        if (n > 40000) return TC2P(U, V, n);
        if (n > 10000) return TC4(U, V, n);
        if (n > 6000) return TC3(U, V, n);
        if (n > 3000) return TC2(U, V, n);
        return U * V;
    }

    private static Xint MTP(Xint U, Xint V, int n)
    {
        if (n > 10000) return TC4(U, V, n);
        if (n > 6000) return TC3(U, V, n);
        if (n > 3000) return TC2(U, V, n);
        return U * V;
    }

    private static Xint TC2P(Xint A, Xint B, int n)
    {
        n >>= 1;
        Xint Mask = (Xint.One << n) - 1;
        Xint[] U = new Xint[3];
        U[0] = A & Mask; U[2] = (A >>= n); U[1] = U[0] + U[2];
        Xint[] V = new Xint[3];
        V[0] = B & Mask; V[2] = (B >>= n); V[1] = V[0] + V[2];
        Xint[] P = new Xint[3];
        Parallel.For(0, 3, (int i) => P[i] = MTP(U[i], V[i], n));
        return ((P[2] << n) + P[1] - (P[0] + P[2]) << n) + P[0];
    }

    private static Xint TC4(Xint U3, Xint V3, int n)
    {
        n >>= 2;
        Xint Mask = (Xint.One << n) - 1;
        Xint U0 = U3 & Mask; U3 >>= n;
        Xint U1 = U3 & Mask; U3 >>= n;
        Xint U2 = U3 & Mask; U3 >>= n;
        Xint V0 = V3 & Mask; V3 >>= n;
        Xint V1 = V3 & Mask; V3 >>= n;
        Xint V2 = V3 & Mask; V3 >>= n;

        Xint W0 = MTP(U0, V0, n);                               //  0
        U0 += U2;
        U1 += U3;
        V0 += V2;
        V1 += V3;
        Xint P1 = MTP(U0 + U1, V0 + V1, n);                     //  1
        Xint P2 = MTP(U0 - U1, V0 - V1, n);                     // -1
        U0 += 3 * U2;
        U1 += 3 * U3;
        V0 += 3 * V2;
        V1 += 3 * V3;
        Xint P3 = MTP(U0 + (U1 << 1), V0 + (V1 << 1), n);       //  2
        Xint P4 = MTP(U0 - (U1 << 1), V0 - (V1 << 1), n);       // -2
        Xint P5 = MTP(U0 + 12 * U2 + ((U1 + 12 * U3) << 2),
                      V0 + 12 * V2 + ((V1 + 12 * V3) << 2), n); //  4
        Xint W6 = MTP(U3, V3, n);                               //  inf

        Xint W1 = P1 + P2;
        Xint W4 = ((((((P3 + P4) >> 1) - (W1 << 1)) / 3) + W0) >> 2) - 5 * W6;
        Xint W2 = (W1 >> 1) - (W6 + W4 + W0);
        P1 = P1 - P2;
        P4 = P4 - P3;
        Xint W5 = ((P1 >> 1) + (5 * P4 + P5 - W0 >> 4) - ((((W6 << 4) + W4) << 4) + W2)) / 45;
        W1 = ((P4 >> 2) + (P1 << 1)) / 3 + (W5 << 2);
        Xint W3 = (P1 >> 1) - (W1 + W5);
        return ((((((W6 << n) + W5 << n) + W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0;
    }

    private static Xint TC3(Xint U2, Xint V2, int n)
    {
        n = (int)((long)(n) * 0x55555556 >> 32);// n /= 3;
        Xint Mask = (Xint.One << n) - 1;
        Xint U0 = U2 & Mask; U2 >>= n;
        Xint U1 = U2 & Mask; U2 >>= n;
        Xint V0 = V2 & Mask; V2 >>= n;
        Xint V1 = V2 & Mask; V2 >>= n;
        Xint W0 = MTP(U0, V0, n);
        Xint W4 = MTP(U2, V2, n);
        Xint P3 = MTP((((U2 << 1) + U1) << 1) + U0, (((V2 << 1) + V1 << 1)) + V0, n);
        U2 += U0;
        V2 += V0;
        Xint P2 = MTP(U2 - U1, V2 - V1, n);
        Xint P1 = MTP(U2 + U1, V2 + V1, n);
        Xint W2 = (P1 + P2 >> 1) - (W0 + W4);
        Xint W3 = W0 - P1;
        W3 = ((W3 + P3 - P2 >> 1) + W3) / 3 - (W4 << 1);
        Xint W1 = P1 - (W4 + W3 + W2 + W0);
        return ((((W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0;
    }

    private static Xint TC2(Xint U1, Xint V1, int n)
    {
        n >>= 1;
        Xint Mask = (Xint.One << n) - 1;
        Xint U0 = U1 & Mask; U1 >>= n;
        Xint V0 = V1 & Mask; V1 >>= n;
        Xint P0 = MTP(U0, V0, n);
        Xint P2 = MTP(U1, V1, n);
        Xint P1 = MTP(U0 + U1, V0 + V1, n) - (P0 + P2);
        return ((P2 << n) + P1 << n) + P0;
    }

    private static long current_n;

    private static Xint Factorial(int n)
    {
        Xint P = 1;
        Xint R = 1;
        current_n = 1;
        int h = 0, shift = 0, high = 1, len = 0;
        int log2n = floorLog2(n);
        while (h != n)
        {
            shift += h;
            h = n >> log2n;
            log2n--;
            len = high;
            high = (h - 1) | 1;
            len = (high - len) >> 1;
            if (len > 0)
            {
                P = MTP(P, Product(len));
                R = MTP(R, P);
            }
        }
        return R << shift;
    }

    private static Xint Product(int n)
    {
        int m = n >> 1;
        if (m == 0) return (Xint)(current_n += 2);
        if (n == 2) return (Xint)(current_n += 2) * (current_n += 2);
        return MTP(Product(m), Product(n - m));
    }

    private static int floorLog2(int i)    //do you see the binary tree? 5 comparisions
    {
        return
        i < 1 << 15 ? i < 1 << 07 ? i < 1 << 03 ? i < 1 << 01 ? i < 1 << 00 ? -1 : 00 :
                                                                i < 1 << 02 ? 01 : 02 :
                                                  i < 1 << 05 ? i < 1 << 04 ? 03 : 04 :
                                                                i < 1 << 06 ? 05 : 06 :
                                    i < 1 << 11 ? i < 1 << 09 ? i < 1 << 08 ? 07 : 08 :
                                                                i < 1 << 10 ? 09 : 10 :
                                                  i < 1 << 13 ? i < 1 << 12 ? 11 : 12 :
                                                                i < 1 << 14 ? 13 : 14 :
                      i < 1 << 23 ? i < 1 << 19 ? i < 1 << 17 ? i < 1 << 16 ? 15 : 16 :
                                                                i < 1 << 18 ? 17 : 18 :
                                                  i < 1 << 21 ? i < 1 << 20 ? 19 : 20 :
                                                                i < 1 << 22 ? 21 : 22 :
                                    i < 1 << 27 ? i < 1 << 25 ? i < 1 << 24 ? 23 : 24 :
                                                                i < 1 << 26 ? 25 : 26 :
                                                  i < 1 << 29 ? i < 1 << 28 ? 27 : 28 :
                                                                i < 1 << 30 ? 29 : 30;
    }

    private static int seed;

    private static Xint RND(int n)
    {
        if (n < 2) return n;
        if (seed == int.MaxValue) seed = 0; else seed++;
        Random rand = new Random(seed);
        byte[] bytes = new byte[((n - 1) >> 3) + 2];
        rand.NextBytes(bytes);
        int i = bytes.Length - 1;
        bytes[i] = 0;
        n = (i << 3) - n;
        i--;
        bytes[i] >>= n;
        bytes[i] |= (byte)(128 >> n);
        return new Xint(bytes);
    }

    private static int bitLength(Xint U)
    {
        byte[] bytes = (U.Sign * U).ToByteArray();
        int i = bytes.Length - 1;
        return (i << 3) + bitLength(bytes[i]);
    }

    private static int bitLength(byte b)
    {
        return b < 8 ? b < 2 ? b < 1 ? 0 : 1 : b < 4 ? 2 : 3 : b < 32 ? b < 16 ? 4 : 5 : b < 64 ? 6 : 7;
    }
}

2010/10/10

Factorial, Toom Cook 4, Parallel Programming, Threads

Threading should be quite easy with C#, it is. An intro:
Parallel Programming in .NET Framework 4: Getting Started

My system has four cores, so I used a parallel version of
Karatsuba, Toom Cook 2, "TC2P", to start up three threads.
The factorial code is from Luschny. Result: 1000000! in 15 seconds

Factorial, Toom Cook 4, three threads, source

using System;
using System.Numerics;
using System.Diagnostics;
using System.Threading.Tasks;
using Xint = System.Numerics.BigInteger;

class Program
{
    private static Stopwatch sw = new Stopwatch();

    static void Main()
    {
        test_Fact();
        Console.ReadLine();
    }

    private static void test_Fact()
    {
        sw.Start();
        Xint F = Factorial(400000);
        sw.Stop();
        Console.WriteLine(sw.ElapsedMilliseconds + " ms");
        Console.WriteLine(bitLength(F));
    }

    private static bool b = true;

    private static Xint MTP(Xint U, Xint V)
    {
        int n = Xint.Max(U.Sign * U, V.Sign * V).ToByteArray().Length << 3;
        if (b && (n > 40000)) return TC2P(U, V, n);
        if (n > 10000) return TC4(U, V, n);
        if (n > 6000) return TC3(U, V, n);
        if (n > 3000) return TC2(U, V, n);
        return U * V;
    }

    private static Xint TC2P(Xint A, Xint B, int n)
    {
        n >>= 1;
        Xint Mask = (Xint.One << n) - 1;
        Xint[] U = new Xint[3];
        U[0] = A & Mask; U[2] = (A >>= n); U[1] = U[0] + U[2];
        Xint[] V = new Xint[3];
        V[0] = B & Mask; V[2] = (B >>= n); V[1] = V[0] + V[2];
        Xint[] P = new Xint[3];
        b = false;
        Parallel.For(0, 3, (int i) => P[i] = MTP(U[i], V[i]));
        b = true;
        return ((P[2] << n) + P[1] - (P[0] + P[2]) << n) + P[0];
    }

    private static Xint TC4(Xint U3, Xint V3, int n)
    {
        n >>= 2;
        Xint Mask = (Xint.One << n) - 1;
        Xint U0 = U3 & Mask; U3 >>= n;
        Xint U1 = U3 & Mask; U3 >>= n;
        Xint U2 = U3 & Mask; U3 >>= n;
        Xint V0 = V3 & Mask; V3 >>= n;
        Xint V1 = V3 & Mask; V3 >>= n;
        Xint V2 = V3 & Mask; V3 >>= n;

        Xint W0 = MTP(U0, V0);                               //  0
        U0 += U2;
        U1 += U3;
        V0 += V2;
        V1 += V3;
        Xint P1 = MTP(U0 + U1, V0 + V1);                     //  1
        Xint P2 = MTP(U0 - U1, V0 - V1);                     // -1
        U0 += 3 * U2;
        U1 += 3 * U3;
        V0 += 3 * V2;
        V1 += 3 * V3;
        Xint P3 = MTP(U0 + (U1 << 1), V0 + (V1 << 1));       //  2
        Xint P4 = MTP(U0 - (U1 << 1), V0 - (V1 << 1));       // -2
        Xint P5 = MTP(U0 + 12 * U2 + ((U1 + 12 * U3) << 2),
                      V0 + 12 * V2 + ((V1 + 12 * V3) << 2)); //  4
        Xint W6 = MTP(U3, V3);                               //  inf

        Xint W1 = P1 + P2;
        Xint W4 = ((((((P3 + P4) >> 1) - (W1 << 1)) / 3) + W0) >> 2) - 5 * W6;
        Xint W2 = (W1 >> 1) - (W6 + W4 + W0);
        P1 = P1 - P2;
        P4 = P4 - P3;
        Xint W5 = ((P1 >> 1) + (5 * P4 + P5 - W0 >> 4) - ((((W6 << 4) + W4) << 4) + W2)) / 45;
        W1 = ((P4 >> 2) + (P1 << 1)) / 3 + (W5 << 2);
        Xint W3 = (P1 >> 1) - (W1 + W5);
        return ((((((W6 << n) + W5 << n) + W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0;
    }

    private static Xint TC3(Xint U2, Xint V2, int n)
    {
        n = (int)((long)(n) * 0x55555556 >> 32);// n /= 3;
        Xint Mask = (Xint.One << n) - 1;
        Xint U0 = U2 & Mask; U2 >>= n;
        Xint U1 = U2 & Mask; U2 >>= n;
        Xint V0 = V2 & Mask; V2 >>= n;
        Xint V1 = V2 & Mask; V2 >>= n;
        Xint W0 = MTP(U0, V0);
        Xint W4 = MTP(U2, V2);
        Xint P3 = MTP((((U2 << 1) + U1) << 1) + U0, (((V2 << 1) + V1 << 1)) + V0);
        U2 += U0;
        V2 += V0;
        Xint P2 = MTP(U2 - U1, V2 - V1);
        Xint P1 = MTP(U2 + U1, V2 + V1);
        Xint W2 = (P1 + P2 >> 1) - (W0 + W4);
        Xint W3 = W0 - P1;
        W3 = ((W3 + P3 - P2 >> 1) + W3) / 3 - (W4 << 1);
        Xint W1 = P1 - (W4 + W3 + W2 + W0);
        return ((((W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0;
    }

    private static Xint TC2(Xint U1, Xint V1, int n)
    {
        n >>= 1;
        Xint Mask = (Xint.One << n) - 1;
        Xint U0 = U1 & Mask; U1 >>= n;
        Xint V0 = V1 & Mask; V1 >>= n;
        Xint P0 = MTP(U0, V0);
        Xint P2 = MTP(U1, V1);
        Xint P1 = MTP(U0 + U1, V0 + V1) - (P0 + P2);
        return ((P2 << n) + P1 << n) + P0;
    }

    private static long current_n;

    private static Xint Factorial(int n)
    {
        Xint P = 1;
        Xint R = 1;
        current_n = 1;
        int h = 0, shift = 0, high = 1, len = 0;
        int log2n = floorLog2(n);
        while (h != n)
        {
            shift += h;
            h = n >> log2n;
            log2n--;
            len = high;
            high = (h - 1) | 1;
            len = (high - len) >> 1;
            if (len > 0)
            {
                P = MTP(P, Product(len));
                R = MTP(R, P);
            }
        }
        return R << shift;
    }

    private static Xint Product(int n)
    {
        int m = n >> 1;
        if (m == 0) return (Xint)(current_n += 2);
        if (n == 2) return (Xint)((current_n += 2) * (current_n += 2));
        return MTP(Product(m), Product(n - m));
    }

    private static int floorLog2(int n)
    {
        int i = 0;
        while (n > 1)
        {
            n >>= 1;
            i++;
        }
        return i;
    }

    private static int seed;

    private static Xint RND(int n)
    {
        if (n == 0) return 0;
        if (seed == int.MaxValue) seed = 0; else seed++;
        Random rand = new Random(seed);
        byte[] bytes = new byte[((n - 1) >> 3) + 2];
        rand.NextBytes(bytes);
        bytes[bytes.Length - 1] = 0;
        n = ((bytes.Length - 1) << 3) - n;
        bytes[bytes.Length - 2] >>= n;
        bytes[bytes.Length - 2] |= (byte)(128 >> n);
        return new Xint(bytes);
    }

    private static int bitLength(Xint U)
    {
        byte[] bytes = (U.Sign * U).ToByteArray();
        int i = bytes.Length - 1;
        return (i << 3) + bitLength(bytes[i]);
    }

    private static int bitLength(byte b)
    {
        return b < 8 ? b < 2 ? b < 1 ? 0 : 1 : b < 4 ? 2 : 3 : b < 32 ? b < 16 ? 4 : 5 : b < 64 ? 6 : 7;
    }
}