C# 奇怪的模数速度行为

标签 c#

为什么第一段代码比第二段慢很多?

public long Gen(int mod)
{
    long a, b = 0x7fffffffffffffff - mod;

    do
    {
        a = (long)(Gen() >> 1);
    } while (a > b);

    return a % mod;
}

public long Gen(int mod)
{
    long a, b = 0x7fffffffffffffff - mod;

    do
    {
        a = (long)(Gen() >> 1);
    } while (a > b);

    return a % 12345;
}

gen 函数是一个 64 位无符号 PRNG(见下文)。

问题是第一段代码慢得多,使用变量计算模数基本上增加了计算随机数所需时间的 3 倍!更神秘的是,当你去掉循环,并使用变量计算模数时,速度与第二段代码相似。

这里发生了一些奇怪的事情,因为你不能告诉我使用变量的模数比这样慢几倍:

public ulong Gen()
{
    counter = (counter + 1) & 3;

    if (counter == 0)
    {
        state[0]++;

        ulong x0 = state[0];
        ulong x1 = state[1];
        ulong x2 = state[2];
        ulong x3 = state[3];

        for (int i = 0; i < 2; i++)
        {
            x0 += x1; x1 ^= ((x0 << 32) | (x0 >> (64 - 32)));
            x1 += x0; x0 ^= ((x1 << 32) | (x1 >> (64 - 32)));
            x2 += x3; x3 ^= ((x2 << 32) | (x2 >> (64 - 32)));
            x3 += x2; x2 ^= ((x3 << 32) | (x3 >> (64 - 32)));

            x0 += x2; x2 ^= ((x0 << 27) | (x0 >> (64 - 27)));
            x2 += x0; x0 ^= ((x2 << 27) | (x2 >> (64 - 27)));
            x1 += x3; x3 ^= ((x1 << 27) | (x1 >> (64 - 27)));
            x3 += x1; x1 ^= ((x3 << 27) | (x3 >> (64 - 27)));

            x0 += x3; x3 ^= ((x0 << 11) | (x0 >> (64 - 11)));
            x3 += x0; x0 ^= ((x3 << 11) | (x3 >> (64 - 11)));
            x1 += x2; x2 ^= ((x1 << 11) | (x1 >> (64 - 11)));
            x2 += x1; x1 ^= ((x2 << 11) | (x2 >> (64 - 11)));
        }

        block[0] = x0;
        block[1] = x1;
        block[2] = x2;
        block[3] = x3;
    }

    return block[counter];
}

要求的最小可复制版本:

using System;
using System.Diagnostics;

class Program
{
    static void Main(string[] args)
    {
        Stopwatch sw = new Stopwatch();
        Arx rng = new Arx();
        long a = 0;

        // constant = fast

        sw.Start();
        for (int i = 0; i < 10000000; i++)
        {
            a += rng.GenConstant(123);
        }
        sw.Stop();
        Console.WriteLine(sw.ElapsedMilliseconds);
        Console.WriteLine("{0:x16}", a);
        sw.Reset();

        // no loop = fast

        sw.Start();
        for (int i = 0; i < 10000000; i++)
        {
            a += rng.GenNoLoop(123);
        }
        sw.Stop();
        Console.WriteLine(sw.ElapsedMilliseconds);
        Console.WriteLine("{0:x16}", a);
        sw.Reset();

        // modulus variable = slow

        sw.Start();
        for (int i = 0; i < 10000000; i++)
        {
            a += rng.GenVariable(123);
        }
        sw.Stop();
        Console.WriteLine(sw.ElapsedMilliseconds);
        Console.WriteLine("{0:x16}", a);
        sw.Reset();
    }
}

class Arx
{
    static public ulong[] state = new ulong[4];
    static public ulong[] outBlock = new ulong[4];

    static int counter = -1;

    public Arx(ulong seed = 0)
    {
        if (seed == 0)
            state[1] = (ulong)DateTime.UtcNow.Ticks;

        else
            state[1] = seed;
    }

    public ulong Gen()
    {
        counter = (counter + 1) & 3;

        if (counter == 0)
        {
            state[0]++;

            ulong x0 = state[0];
            ulong x1 = state[1];
            ulong x2 = state[2];
            ulong x3 = state[3];

            for (int i = 0; i < 2; i++)
            {
                x0 += x1; x1 ^= ((x0 << 32) | (x0 >> (64 - 32)));
                x1 += x0; x0 ^= ((x1 << 32) | (x1 >> (64 - 32)));
                x2 += x3; x3 ^= ((x2 << 32) | (x2 >> (64 - 32)));
                x3 += x2; x2 ^= ((x3 << 32) | (x3 >> (64 - 32)));

                x0 += x2; x2 ^= ((x0 << 27) | (x0 >> (64 - 27)));
                x2 += x0; x0 ^= ((x2 << 27) | (x2 >> (64 - 27)));
                x1 += x3; x3 ^= ((x1 << 27) | (x1 >> (64 - 27)));
                x3 += x1; x1 ^= ((x3 << 27) | (x3 >> (64 - 27)));

                x0 += x3; x3 ^= ((x0 << 11) | (x0 >> (64 - 11)));
                x3 += x0; x0 ^= ((x3 << 11) | (x3 >> (64 - 11)));
                x1 += x2; x2 ^= ((x1 << 11) | (x1 >> (64 - 11)));
                x2 += x1; x1 ^= ((x2 << 11) | (x2 >> (64 - 11)));
            }

            outBlock[0] = x0;
            outBlock[1] = x1;
            outBlock[2] = x2;
            outBlock[3] = x3;
        }

        return outBlock[counter];
    }

    public long GenConstant(int mod)
    {
        long a, b = 0x7fffffffffffffff - mod;

        do
        {
            a = (long)(Gen() >> 1);
        } while (a > b);

        return a % 12345;
    }

    public long GenVariable(int mod)
    {
        long a, b = 0x7fffffffffffffff - mod;

        do
        {
            a = (long)(Gen() >> 1);
        } while (a > b);

        return a % mod;
    }

    public long GenNoLoop(int mod)
    {
        long a = (long)(Gen() >> 1);

        return a % mod;
    }
}

最佳答案

这是一个优化器问题。

首先毫无疑问,使用变量比使用常量慢因为加载变量需要更多时间

但是当你删除循环部分时,方法变得简单并且优化器使它们内联。请注意,当方法内联时,rng.GenNoLoop(123) 中的表达式 a % mod 可以被识别为常量。所以它们现在是相同的。

要恢复未优化的状态,您需要将一个真实变量传递给GenNoLoop

static int mod = 123;

static void Main(string[] args)
{
    rng.GenNoLoop(mod);
}

另一种选择是强制方法不内联

[MethodImpl(MethodImplOptions.NoInlining)]
public long GenNoLoop(int mod)

关于C# 奇怪的模数速度行为,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/56711861/

相关文章:

c# - 从 C# 使用托管 C++ dll

c# - 在 C# 中传递引用类型

c# - MissingFieldException 与 CodeContracts

c# - 如何使用 C# 将 html 文本转换为 utf-8

c# - 如何通过 Web 请求在 C# 中编辑请求主机 header ?

c# - 如何在谷歌标记 map 上使用 C# .net MVC 框架显示 Azure Cosmos DB 数据?

c# - 更改端点地址后如何修复 'Unrecognized Message Version'

c# - 如何使用 Azure 的弹性数据库设置 Multi-Tenancy ?

c# - ASP.NET MVC 单元测试自定义 AuthorizeAttribute

c# - Task.WhenAll 没有按预期抛出异常