Pagina's

2013/08/23

Small Cube Roots ( < 2^64 )


/*
The time to take a cube root from a 64 bits ulong was: ~330 ns, new time: < 75 ns
The time to take a cube root from a 32 bits  uint was:  ~25 ns, new time: < 22 ns (mean)
* UPDATE (2013-09-22) new time for 64 bits: < 70 ns, casting float to int is faster than 
                                                     casting float to uint.

The 64 bits version: 
 
    www.hackersdelight.org/hdcodetxt/acbrt.c.txt
 
    Why? 0x2a5137a0

A union data type is used, not available in C#, first work around: "BitConverter", at the
bottom: "cro64old(x)". It takes ~110 ns, "BitConverter" takes ~10 ns, it's used four times,
so 40 ns are gone. A better solution:
 
    Unions (or an equivalent) in C# - Sairama's Tip of the Day

A 63 bits version might be slightly faster, no overflow checks, less correction loops?
 
 
The 32 bits version:
 
A while loop is replaced by a "do while", leading (triple) zero bits are counted to avoid
useless iterations, all (4294967295) roots in 92,7 s, mean time 21,6 ns.
** UPDATE (2013-09-26) new time for 32 bits: < 15 ns, see: Fast Cube Root 
 
  
    |-------------||--------------------------||-------------------------------------------| 
    |  cro12(x)   ||       cr032(x)           ||               cro64(random x)             |
    |------|------||------------|-------------||------|----------------------|-------------|
    |    x |  ns  ||          x |     ns   ** || bits |                    x |     ns   *  |  
    |------|------||------------|------|------||------|----------------------|------|------|  
    |    0 |  3,3 ||          0 |  5,8 |  2,4 ||   64 | 13397031575470575331 | 74,7 | 69,8 |  
    |    1 |  3,7 ||          1 |  6,5 |  2,3 ||   63 |  6512002244030281881 | 70,4 | 66,8 |  
    |    3 |  5,2 ||          3 |  6,5 |  2,3 ||   62 |  4508122487018397160 | 73,6 | 68,5 |  
    |    7 |  5,3 ||          7 |  6,5 |  2,3 ||   61 |  1458795770683441400 | 74,0 | 66,8 |  
    |   15 |  6,3 ||         15 |  7,7 |  3,6 ||   60 |   994257496981222664 | 74,4 | 66,8 |  
    |   31 |  7,3 ||         31 |  7,5 |  3,0 ||   59 |   470915095588726850 | 74,0 | 66,8 |  
    |   63 |  7,3 ||         63 |  7,5 |  3,0 ||   58 |   201745106158251712 | 74,0 | 66,8 |  
    |   64 |  7,3 ||         64 |  9,4 |  4,8 ||   57 |    78804375734046440 | 74,0 | 66,8 |  
    |   65 |  8,3 ||         65 |  9,4 |  4,6 ||   56 |    57442872892622787 | 74,0 | 66,8 |  
    |  100 |  8,3 ||        100 |  9,4 |  4,6 ||   55 |    19109171792964730 | 74,0 | 66,8 |  
    |  127 |  9,3 ||        127 |  9,4 |  4,6 ||   54 |     9099755032911121 | 74,0 | 66,8 |  
    |  128 |  9,3 ||        128 |  9,4 |  4,6 ||   53 |     8170442697918990 | 74,0 | 66,8 |  
    |  255 | 10,3 ||        255 |  9,4 |  5,0 ||   52 |     2913033827815339 | 74,0 | 66,8 |  
    |  256 | 10,3 ||        256 |  9,4 |  5,0 ||   51 |     1395743698554671 | 74,0 | 66,8 |  
    |  511 | 11,3 ||        511 |  9,6 |  5,0 ||   50 |      929340477894354 | 74,0 | 66,8 |  
    | 1023 | 14,3 ||       1023 | 10,7 |  5,0 ||   49 |      351768287627885 | 74,1 | 66,8 |  
    | 2047 | 16,3 ||       2047 | 10,5 |  5,7 ||   48 |      165110734465934 | 70,9 | 66,5 |  
    | 4095 | 23,3 ||       4095 | 11,5 |  5,3 ||   47 |      116364019662847 | 73,6 | 66,8 |  
    |      |      ||      32768 | 13,7 |  6,4 ||   46 |       45894645838798 | 70,9 | 66,5 |  
    |      |      ||    1048576 | 15,7 |  8,7 ||   45 |       25652614759428 | 70,9 | 65,5 |  
    |      |      ||  268435456 | 19,7 | 11,0 ||   44 |       12123210057895 | 71,7 | 65,5 |  
    |      |      ||  536870912 | 20,1 | 12,0 ||   43 |        5902555891371 | 71,7 | 65,5 |  
    |      |      || 1073741824 | 25,8 | 11,3 ||   42 |        3490450430912 | 71,7 | 65,5 |  
    |      |      || 1073741824 | 25,8 | 11,3 ||   41 |        1149630654634 | 71,7 | 65,5 |  
    |      |      || 1073774592 | 25,8 | 11,3 ||   40 |         902265632303 | 71,7 | 65,5 |  
    |      |      || 2147483648 | 21,1 | 12,3 ||   39 |         343393482192 | 71,7 | 65,5 |  
    |      |      || 4294967295 | 21,8 | 12,3 ||   38 |         198281228228 | 71,7 | 65,5 |  
    |      |      ||            |      |      ||   37 |          93252157892 | 71,7 | 65,5 |  
    |      |      ||            |      |      ||   36 |          41910484903 | 71,7 | 65,5 |
    |      |      ||            |      |      ||   35 |          23282355985 | 71,8 | 65,5 |
    |      |      ||            |      |      ||   34 |           8677323163 | 71,7 | 65,5 |
    |      |      ||            |      |      ||   33 |           6714386675 | 71,7 | 65,5 |
    |------|------||------------|------|------||------|----------------------|------|------|
 
*/
using System;
using System.Diagnostics;
using System.Runtime.InteropServices;

[StructLayout(LayoutKind.Explicit)]

public struct fu_32   // float <==> uint
{
    [FieldOffset(0)]
    public float f;
    [FieldOffset(0)]
    public uint u;
}
class cro_64_32
{
    private static uint cro64(ulong x)
    {
        if (x >= 18446724184312856125) return 2642245;
        float fx = (float)x;
        fu_32 fu32 = new fu_32();
        fu32.f = fx;
        uint uy = fu32.u / 4;
        uy += uy / 4;
        uy += uy / 16;
        uy += uy / 256;
        uy += 0x2a5137a0;
        fu32.u = uy;
        float fy = fu32.f;
        fy = 0.33333333f * (fx / (fy * fy) + 2.0f * fy);

        // uint y1 = (uint)
        //    (0.33333333f * (fx / (fy * fy) + 2.0f * fy));

        int y0 = (int)                                      // 2013-09-22
            (0.33333333f * (fx / (fy * fy) + 2.0f * fy));   // 5 ns 
        uint y1 = (uint)y0;                                 // faster

        ulong y2, y3;
        if (y1 >= 2642245)
        {
            y1 = 2642245;
            y2 = 6981458640025;
            y3 = 18446724184312856125;
        }
        else
        {
            y2 = (ulong)y1 * y1;
            y3 = y2 * y1;
        }
        if (y3 > x)
        {
            y1 -= 1;
            y2 -= 2 * y1 + 1;
            y3 -= 3 * y2 + 3 * y1 + 1;
            while (y3 > x)
            {
                y1 -= 1;
                y2 -= 2 * y1 + 1;
                y3 -= 3 * y2 + 3 * y1 + 1;
            }
            return y1;
        }
        do
        {
            y3 += 3 * y2 + 3 * y1 + 1;
            y2 += 2 * y1 + 1;
            y1 += 1;
        }
        while (y3 <= x);
        return y1 - 1;
    }

    private static uint cro32(uint x)
    {
        uint y = 0, z = 0, b = 0;
        int s = x < 1u << 24 ? x < 1u << 12 ? x < 1u << 06 ? x < 1u << 03 ? 00 : 03 :
                                                             x < 1u << 09 ? 06 : 09 :
                                              x < 1u << 18 ? x < 1u << 15 ? 12 : 15 :
                                                             x < 1u << 21 ? 18 : 21 :
                               x >= 1u << 30 ? 30 : x < 1u << 27 ? 24 : 27;
        do
        {
            y *= 2;
            z *= 4;
            b = 3 * y + 3 * z + 1 << s;
            if (x >= b)
            {
                x -= b;
                z += 2 * y + 1;
                y += 1;
            }
            s -= 3;
        }
        while (s >= 0);
        return y;
    }

    private static uint cro12(uint x)
    {
        uint y = 0, a = 0, b = 1, c = 0;
        while (a < x)
        {
            y++;
            b += c;
            a += b;
            c += 6;
        }
        if (a != x) y--;
        return y;
    }

    private static Stopwatch sw = new Stopwatch();
    static void Main()
    {
        cro64(long.MaxValue);
        check_perfect_cubes64();
        check_perfect_cubes32();
        time_cro64();
        time_cro32();
        time_cro12();
        cro32_all();              // ~90 s, sorry, no cro64_all() ;) it would take ~44 millennia.  
        Console.ReadLine();
    }
    private static void check_perfect_cubes64()
    {
        ulong n = 0, a = 0, b = 1, c = 6;
        while (n <= 2642245)
        {
            if (n != cro64(a)) Console.WriteLine("WRONG");
            a += b; b += c; c += 6; n++;
        }
        Console.WriteLine("CHECKED64");
    }
    private static void check_perfect_cubes32()
    {
        uint n = 0, a = 0, b = 1, c = 6;
        while (n <= 1625)
        {
            if (n != cro32(a)) Console.WriteLine("WRONG");
            a += b; b += c; c += 6; n++;
        }
        Console.WriteLine("CHECKED32");
    }

    private static Random random = new Random(0);       // Random(0)/Random(), same/random sequence 
    private static uint rnd32()                         // random uint
    {
        return (uint)(random.Next(1 << 16)) << 16 |
               (uint)(random.Next(1 << 16));
    }
    private static uint rnd32(uint n)                   // random uint of n bits
    {
        if (n < 2) return n;
        if (n > 32) n = 32;
        return (((uint)(random.Next(1 << 16)) << 16 |
                 (uint)(random.Next(1 << 16)))
                >> 32 - (int)n) | (1u << (int)n - 1);
    }
    private static ulong rnd64(uint n)                 // random ulong of n bits
    {
        if (n < 2) return n;
        if (n > 64) n = 64;
        ulong u = rnd32();
        u = u << 32 | rnd32();
        u >>= 64 - (int)n;
        u |= 1uL << (int)n - 1;
        return u;
    }

    private static void time_cro64()
    {
        int i; uint j; ulong x; double t;
        Console.WriteLine("|------|----------------------|------|");
        Console.WriteLine("| bits |                    x |  ns  |");
        Console.WriteLine("|------|----------------------|------|");
        Console.WriteLine("|             PRESS A KEY            |");
        Console.ReadLine();
        for (j = 64; j > 32; j -= 1)
        {
            x = rnd64(j);
            cro64(x);
            sw.Restart();
            for (i = 0; i < 10000000; i++) cro64(x);
            sw.Stop();
            t = sw.ElapsedMilliseconds;
            sw.Restart();
            for (i = 0; i < 10000000; i++) ; // nada
            sw.Stop();
            t -= sw.ElapsedMilliseconds;
            Console.WriteLine("|   {0,2} | {1,20} | {2:00.0} |", j, x, t / 10);
        }
        Console.WriteLine("|------|----------------------|------|");
        Console.WriteLine();
    }
    private static void time_cro32()
    {
        int i, j; uint x; double t;
        uint[] a = { 0, 1, 3, 7, 15, 31, 63, 64, 65,
                     100, 127, 128, 255, 256, 511, 1023, 2047, 4095,
                     1u << 15, 1u << 20, 1u << 28 ,1u << 29 , 1070599167,
                     1u << 30,1u << 30,(1u << 30) + (1u << 15), 1u << 31, ~0u };
        Console.WriteLine("|------------|------|");
        Console.WriteLine("|          x |  ns  |");
        Console.WriteLine("|------------|------|");
        for (j = 0; j < a.Length; j++)
        {
            x = a[j];
            cro32(x);
            sw.Restart();
            for (i = 0; i < 10000000; i++) cro32(x);
            sw.Stop();
            t = sw.ElapsedMilliseconds;
            sw.Restart();
            for (i = 0; i < 10000000; i++) ; // nada
            sw.Stop();
            t -= sw.ElapsedMilliseconds;
            Console.WriteLine("| {0,10} | {1:00.0} |", x, t / 10);
        }
        Console.WriteLine("|------------|------|");
        Console.WriteLine();
    }
    private static void time_cro12()
    {
        int i, j; uint x; double t;
        uint[] a = { 0, 1, 3, 7, 15, 31, 63, 64, 65,
                     100, 127, 128, 255, 256, 511, 1023, 2047, 4095 };
        Console.WriteLine("|------------|------|");
        Console.WriteLine("|          x |  ns  |");
        Console.WriteLine("|------------|------|");
        for (j = 0; j < a.Length; j++)
        {
            x = a[j];
            cro12(x);
            sw.Restart();
            for (i = 0; i < 10000000; i++) cro12(x);
            sw.Stop();
            t = sw.ElapsedMilliseconds;
            sw.Restart();
            for (i = 0; i < 10000000; i++) ; // nada
            sw.Stop();
            t -= sw.ElapsedMilliseconds;
            Console.WriteLine("| {0,10} | {1:00.0} |", x, t / 10);
        }
        Console.WriteLine("|------------|------|");
        Console.WriteLine();
    }
    private static void cro32_all()
    {
        uint x; double t;
        cro32(~0u);
        sw.Restart();
        for (x = 0; x < ~0u; x++) cro32(x);
        sw.Stop();
        t = sw.ElapsedMilliseconds;
        sw.Restart();
        for (x = 0; x < ~0u; x++) ; // nada
        sw.Stop();
        t -= sw.ElapsedMilliseconds;
        Console.Write(x + " roots in " + t + " ms, ");
        Console.WriteLine("mean time {0:.00} ns",
                          (double)(t) * 1000000 / x);
        // 4294967295 roots in 92732 ms, mean time 21,59 ns
    }
    private static uint cro64old(ulong x)
    {
        if (x >= 18446724184312856125) return 2642245;                       // avoid overflow
        float fx = (float)x;           //------------------------------------------------------\
        uint uy = BitConverter.ToUInt32(BitConverter.GetBytes(fx), 0) / 4;   // ~20 ns          \
        uy += uy / 4;                                                        //                  \
        uy += uy / 16;                                                       //                   \
        uy += uy / 256;                                                      //                    \
        uy += 0x2a5137a0;                                                    //                     \
        float fy = BitConverter.ToSingle(BitConverter.GetBytes(uy), 0);      // ~20 ns              32 bits
        fy = 0.33333333f * (fx / (fy * fy) + 2.0f * fy);                     //                     /
        uint y1 = (uint)(0.33333333f * (fx / (fy * fy) + 2.0f * fy));        //                    /
        ulong y2, y3;                                                        //                   /
        if (y1 >= 2642245)                                                   // avoid overflow   /
        {                                                                    //                 /
            y1 = 2642245;              //------------------------------------------------------/                                                  
            y2 = 6981458640025;
            y3 = 18446724184312856125;
        }
        else
        {
            y2 = (ulong)y1 * y1;
            y3 = y2 * y1;
        }
        if (y3 > x)
        {
            y1 -= 1;
            y2 -= 2 * y1 + 1;
            y3 -= 3 * y2 + 3 * y1 + 1;
            while (y3 > x)
            {
                y1 -= 1;
                y2 -= 2 * y1 + 1;
                y3 -= 3 * y2 + 3 * y1 + 1;
            }
            return y1;
        }
        do
        {
            y3 += 3 * y2 + 3 * y1 + 1;
            y2 += 2 * y1 + 1;
            y1 += 1;
        }
        while (y3 <= x);
        return y1 - 1;
    }
}

2013/08/14

Stirling numbers of the second kind

/*

   k                                                                                  1       1      1    1   1 1
 n 0 1     2       3        4         5         6         7         8        9        0       1      2    3   4 5
 0 1 0     0       0        0         0         0         0         0        0        0       0      0    0   0 0
 1 0 1     0       0        0         0         0         0         0        0        0       0      0    0   0 0
 2 0 1     1       0        0         0         0         0         0        0        0       0      0    0   0 0
 3 0 1     3       1        0         0         0         0         0        0        0       0      0    0   0 0
 4 0 1     7       6        1         0         0         0         0        0        0       0      0    0   0 0
 5 0 1    15      25       10         1         0         0         0        0        0       0      0    0   0 0
 6 0 1    31      90       65        15         1         0         0        0        0       0      0    0   0 0
 7 0 1    63     301      350       140        21         1         0        0        0       0      0    0   0 0
 8 0 1   127     966     1701      1050       266        28         1        0        0       0      0    0   0 0
 9 0 1   255    3025     7770      6951      2646       462        36        1        0       0      0    0   0 0
10 0 1   511    9330    34105     42525     22827      5880       750       45        1       0      0    0   0 0
11 0 1  1023   28501   145750    246730    179487     63987     11880     1155       55       1      0    0   0 0
12 0 1  2047   86526   611501   1379400   1323652    627396    159027    22275     1705      66      1    0   0 0
13 0 1  4095  261625  2532530   7508501   9321312   5715424   1899612   359502    39325    2431     78    1   0 0
14 0 1  8191  788970 10391745  40075035  63436373  49329280  20912320  5135130   752752   66066   3367   91   1 0
15 0 1 16383 2375101 42355950 210766920 420693273 408741333 216627840 67128490 12662650 1479478 106470 4550 105 1
 
                              coincidence?
Two formula's:
                       k
    S2(n,k) = 1/k! * SIGMA[(-1)^(k-j) * C(k,j) * j^n ]
                      j=0
  
    S2(n,k) = k * S2(n-1,k) + S2(n-1,k-1) 
  
    Initial value's: S2(j,j) = 1 , S2(j,0) = 0 , S2(0,j) = 0
 
The first formula: * powers, * / factorials, + - (large) numbers .
The second one leads to a recursive solution. It's going top-down,
a larger number depends on two smaller ones. Bottom-up seems to be
an easier, faster, iterative way.
  
Top-down:
 
    S2(5,3) = 3 * S2(4,3) + S2(4,2)
                  S2(4,3) = 3 * S2(3,3) + S2(3,2)
                            S2(4,2) = 2 * S2(3,2) + S2(2,2)
                                          S2(3,2) = 2 * S2(2,2) + S2(2 ...
                                                            ...complicated...
Bottom-up:
 
     Rearrange table, remove empty (zero) items in colums:
      _             _            _             _
     | 1             |          | 1   1   1   1 |
     | 0   1         |  =====>  | 0   1   3   6 |
     | 0   1   1     |          |_0   1   7  25_|
     | .   1   3   1 |          
     | .   .   7   6 |
     |_.   .   .  25_|
     
       1   1   1 + 2 * 0 = 1   1 + 3 * 0 =  1
       0   1   1 + 2 * 1 = 3   3 + 3 * 1 =  6
       0   1   1 + 2 * 3 = 7   7 + 3 * 6 = 25 = S2(5,3)  ...less complicated...


          |---------------------------------------------------------------|
          |                             S2(n,k)                           |
          |------|------|-----------------------------------------|-------|
          |    n |    k |     Time in ms (Athlon X4, XP, 2GB)     |  bits |
          |------|------|---------|---------|----------|----------|-------|
          |  100 |   10 |    0,25 |         |          |          |   311 |
          |  100 |   25 |         |    0,61 |          |          |   381 |
          |  100 |   50 |         |         |     0,81 |          |   338 |
          |  100 |   89 |         |         |          |     0,27 |   108 |
          |      |      |         |         |          |          |       |
          |  200 |   20 |    1,5  |         |          |          |   804 |
          |  200 |   43 |         |    3,1  |          |          |   910 |
          |  200 |   86 |         |         |     4,4  |          |   838 |
          |  200 |  172 |         |         |          |     1,6  |   295 |
          |      |      |         |         |          |          |       |
          |  400 |   40 |    9,4  |         |          |          |  1970 |
          |  400 |   77 |         |   18    |          |          |  2131 |
          |  400 |  154 |         |         |    27    |          |  1986 |
          |  400 |  308 |         |         |          |    14    |   982 |
          |      |      |         |         |          |          |       |
          |  800 |   80 |   75    |         |          |          |  4663 |
          |  800 |  138 |         |  124    |          |          |  4900 |
          |  800 |  276 |         |         |   183    |          |  4617 |
          |  800 |  552 |         |         |          |   119    |  2744 |
          |      |      |         |         |          |          |       |
          | 1600 |  160 |  672    |         |          |          | 10770 |
          | 1600 |  250 |         | 1010    |          |          | 11109 |
          | 1600 |  500 |         |         |  1620    |          | 10546 |
          | 1600 | 1000 |         |         |          |  1120    |  6974 |
          |      |      |         |         |          |          |       |
          | 3200 |  320 | 7230    |         |          |          | 24424 |
          | 3200 |  456 |         | 9930    |          |          | 24889 |
          | 3200 |  912 |         |         | 15300    |          | 23765 |
          | 3200 | 1824 |         |         |          | 11900    | 16881 |
          |------|------|---------|---------|----------|----------|-------|
   
*/

using System;
using System.Diagnostics;
using Xint = System.Numerics.BigInteger;

class Stirling_2nd_kind
{
    private static Xint S2(uint n, uint k)
    {
        if (k >= n) return k == n ? 1 : 0;
        if (k <= 2) return k == 0 ? 0 : k == 1 ?
            1 : (Xint.One << (int)(n - 1)) - 1;
        if (k + 1 == n) return (Xint)(n) * k / 2;

        n -= k - 1;
        uint i = 0, j = 3;
        Xint[] A = new Xint[n];
        for (; i < n; i++)
            A[i] = 1;
        Xint[] B = new Xint[n];
        for (i = 0; i < n; i++) 
            B[i] = (Xint.One << (int)(i + 1)) - 1;

        do
        {
            for (i = 1; i < n; i++)
                A[i] = A[i - 1] * j + B[i];
            j++;

            if (j > k) return A[n - 1];

            for (i = 1; i < n; i++)
                B[i] = B[i - 1] * j + A[i];
            j++;
        }
        while (j <= k);
        return B[n - 1];
    }

    // to do: - use uints/ulongs: - for smaller value's / 
    //                            - as long as possible 
    //        - return row (array)
    //        - return previous/next row from row
    //        - k close to n , go from right to left in table ?
    //        - same/similar trick for: - other sequences ?
    //                                  - sequences using S2(n,k) ?

    private static Stopwatch sw = new Stopwatch();
    static void Main()
    {
        Console.WriteLine("S2(50,17) = " + S2(50, 17));
        Console.WriteLine("     bits : " + bL(S2(50, 17)));
        sw.Restart();
        for (int i = 0; i < 1000; i++) S2(50, 17);
        sw.Stop();
        Console.WriteLine("       ms : 0," + sw.ElapsedMilliseconds);
        Console.ReadLine();
        // S2(50,17) = 37645241791600906804871080818625037726247519045
        //      bits : 155
        //        ms : 0,134
    }

    private static int bL(Xint U)
    {
        byte[] bytes = (U.Sign * U).ToByteArray();
        int i = bytes.Length - 1;
        return i * 8 | bitLengthMostSignificantByte(bytes[i]);
    }
    private static int bitLengthMostSignificantByte(byte b)
    {
        return b < 08 ? b < 02 ? b < 01 ? 0 : 1 :
                                 b < 04 ? 2 : 3 :
                        b < 32 ? b < 16 ? 4 : 5 :
                                 b < 64 ? 6 : 7;
    }
}