c# - 比较两个不同长度的整数数组时提高性能

标签 c# performance algorithm

我有一个瓶颈(或者至少是我认为我可以做得更好的领域)这个比较器,它基本上是一个序数字符串比较器,但对整数(ushort,虽然我认为这不重要)数组有效。

数组可以有不同的长度,但长度只有在元素 [0..n] 匹配时才有意义,其中 n 是最短数组的长度。在这种情况下,较长的数组被认为“更大”。

1 2 3 < 1 2 4

1 2 3 5 < 1 2 4

1 2 5 3 > 1 2 4

1 2 3 4 > 1 2 3

    public int Compare(ushort[] x, ushort[] y)
    {
        int pos = 0;
        int len = Math.Min(x.Length, y.Length);
        while (pos < len && x[pos] == y[pos])
            pos++;

        return pos < len ?
            x[pos].CompareTo(y[pos]) :
            x.Length.CompareTo(y.Length);

    }

关于如何优化它有什么想法吗?

更新

回应一位评论者关于我在这里实际做的事情:我意识到很久以前我问过一个与此相关的问题,它准确地显示了我在上下文中所做的事情。唯一的主要区别是我现在使用 ushort 数组而不是字符串作为键,因为它更紧凑。

使用整个路径作为键让我可以使用部分键从排序集中获取 View ,从而为子集查询提供高性能。我试图在构建索引时提高性能。 Data structure for indexed searches of subsets

顺便说一下,到目前为止,我对这里的回答印象深刻,多年来我就 SO 提出了很多问题,但这无疑是我见过的最周到、最有趣的答案集。我还不确定我的具体问题的正确答案是什么(即 - 短数组的数百万次比较),但他们中的每一个都教会了我一些我不知道的东西。

最佳答案

这是我想出的,我使用您的代码和一些并行代码的组合测试了大约 1600 万 (2^24)。

public int CompareParallel(ushort[]x, ushort[] y, int len, int segLen)
{
    int compareArrLen = ( len / segLen ) + 1;
    int [ ] compareArr = new int [ compareArrLen ];
    Parallel.For ( 0 , compareArrLen , 
                   new Action<int , ParallelLoopState> ( ( i , state ) =>
    {
        if ( state.LowestBreakIteration.HasValue 
                 && state.LowestBreakIteration.Value < i )
            return;
        int segEnd = ( i + 1 ) * segLen;
        int k = len < segEnd ? len : segEnd;
        for ( int j = i * segLen ; j < k ; j++ )
            if ( x [ j ] != y [ j ] )
            {
                compareArr [ i ] = ( x [ j ].CompareTo ( y [ j ] ) );
                state.Break ( );
                return;
            }
    } ) );
    int r = compareArrLen - 1;
    while ( r >= 0 )
    {
        if ( compareArr [ r ] != 0 )
            return compareArr [ r ];
        r--;
    }
    return x.Length.CompareTo ( y.Length );
}

public int CompareSequential ( ushort [ ] x , ushort [ ] y, int len )
{
    int pos = 0;
    while ( pos < len && x [ pos ] == y [ pos ] )
        pos++;

    return pos < len ?
        x [ pos ].CompareTo ( y [ pos ] ) :
        x.Length.CompareTo ( y.Length );

}

public int Compare( ushort [ ] x , ushort [ ] y ) 
{
    //determined through testing to be the best on my machine
    const int cutOff = 4096;
    int len = x.Length < y.Length ? x.Length : y.Length;
    //check if len is above a specific threshold 
    //and if first and a number in the middle are equal
    //chose equal because we know that there is a chance that more
    //then 50% of the list is equal, which would make the overhead
    //worth the effort
    if ( len > cutOff && x [ len - 1 ] == y [ len - 1 ] 
           && x [ len/2 ] == y [ len/2 ] )
    {
        //segment length was determined to be best through testing
        //at around 8% of the size of the array seemed to have the
        //on my machine
        return CompareParallel ( x , y , len , (len / 100)*8 );
    }
    return CompareSequential ( x , y, len );
}

这是我写的测试:

class Program
{
    [Flags]
    private enum InfoLevel:byte
    {
        Detail=0x01, Summary=0x02
    }

    private static InfoLevel logLevel = InfoLevel.Summary;

    private static void LogDetail ( string content ) 
    {
        LogInfo ( InfoLevel.Detail,content );
    }

    private static void LogSummary ( string content ) 
    {
        LogInfo ( InfoLevel.Summary , content );
    }

    private static void LogInfo ( InfoLevel level , string content ) 
    {
        if ( ( level & logLevel ) == level )
            Console.WriteLine ( content );
    }

    private static void LogInfo ( InfoLevel level , string format, 
                                  params object[] arg )
    {
        if ( ( level & logLevel ) == level )
            Console.WriteLine ( format:format, arg:arg  );
    }

    private static void LogDetail ( string format , params object [ ] arg )
    {
        LogInfo ( InfoLevel.Detail , format, arg );
    }

    private static void LogSummary ( string format , params object [ ] arg )
    {
        LogInfo ( InfoLevel.Summary , format, arg );
    }

    const string _randTestResultHeader = "\r\nRandom Array Content\r\n";
    const string _equalArrayResultHeader = "Only Length Different\r\n\r\n";
    const string _summaryTestResultsHeader = 
                                "Size\t\tOrig Elps\tPara Elps\tComp Elps\r\n";
    const string _summaryBodyContent = 
                         "{0}\t\t{1:0.0000}\t\t{2:0.0000}\t\t{3:0.00000}\r\n";

    static void Main ( string [ ] args )
    {
        Console.SetOut(new StreamWriter(File.Create("out.txt")));

        int segLen = 0;
        int segPercent = 7;
        Console.WriteLine ( "Algorithm Test, Time results in milliseconds" );
        for ( ; segPercent < 13; segPercent ++ )
        {
            Console.WriteLine ( 
                      "Test Run with parallel Dynamic segment size at {0}%"
                       +" of Array Size (Comp always at 8%)\r\n" , segPercent);

            StringBuilder _aggrRandResults = new StringBuilder ( );
            StringBuilder _aggrEqualResults = new StringBuilder ( );

            _aggrRandResults.Append ( _randTestResultHeader );
            _aggrEqualResults.Append ( _equalArrayResultHeader );

            _aggrEqualResults.Append ( _summaryTestResultsHeader );
            _aggrRandResults.Append ( _summaryTestResultsHeader );


            for ( int i = 10 ; i < 25 ; i++ )
            {
                int baseLen = ( int ) Math.Pow ( 2 , i );
                segLen = ( baseLen / 100 ) * segPercent;

                var testName = "Equal Length ";
                var equalTestAverage = RandomRunTest ( testName , baseLen , 
                                                       baseLen, segLen );
                testName = "Left Side Larger";
                var lslargerTestAverage=RandomRunTest(testName,baseLen+10, 
                                                      baseLen, segLen );
                testName = "Right Side Larger";
                var rslargerTestAverage = RandomRunTest ( testName , baseLen ,
                                                        baseLen + 10, segLen );

                double [ ] completelyRandomTestAvg = new double [ 3 ];
                for ( int l = 0 ; l < completelyRandomTestAvg.Length ; l++ )
                    completelyRandomTestAvg [ l ] = ( equalTestAverage [ l ] +
                                                 lslargerTestAverage [ l ] +
                                              rslargerTestAverage [ l ] ) / 3;

                LogDetail ( "\r\nRandom Test Results:" );
                LogDetail ("Original Composite Test Average: {0}" ,
                           completelyRandomTestAvg [ 0 ] );
                LogDetail ( "Parallel Composite Test Average: {0}" ,
                            completelyRandomTestAvg [ 1 ]  );

                _aggrRandResults.AppendFormat ( _summaryBodyContent , 
                    baseLen , 
                    completelyRandomTestAvg [ 0 ] , 
                    completelyRandomTestAvg [ 1 ] , 
                    completelyRandomTestAvg [ 2 ]);

                testName = "Equal Len And Values";
                var equalEqualTest = EqualTill ( testName , baseLen , 
                                                 baseLen, segLen );

                testName = "LHS Larger";
                var equalLHSLargerTest = EqualTill ( testName , baseLen + 10 , 
                                                     baseLen, segLen );

                testName = "RHS Larger";
                var equalRHSLargerTest = EqualTill ( testName , baseLen , 
                                                     baseLen + 10, segLen );

                double [ ] mostlyEqualTestAvg = new double [ 3 ];
                for ( int l = 0 ; l < mostlyEqualTestAvg.Length ; l++ )
                    mostlyEqualTestAvg [ l ] = ( ( equalEqualTest [ l ] +
                                            equalLHSLargerTest [ l ] +
                                            equalRHSLargerTest [ l ] ) / 3 );

                LogDetail( "\r\nLength Different Test Results" );
                LogDetail( "Original Composite Test Average: {0}" , 
                           mostlyEqualTestAvg [ 0 ] );
                LogDetail( "Parallel Composite Test Average: {0}" , 
                            mostlyEqualTestAvg [ 1 ] );

                _aggrEqualResults.AppendFormat ( _summaryBodyContent , 
                                                 baseLen , 
                                                 mostlyEqualTestAvg [ 0 ] , 
                                                 mostlyEqualTestAvg [ 1 ] ,
                                                 mostlyEqualTestAvg [ 2 ]);
            }

            LogSummary ( _aggrRandResults.ToString() + "\r\n");
            LogSummary ( _aggrEqualResults.ToString()+ "\r\n");

        }
        Console.Out.Flush ( );
    }


    private const string _testBody = 
                  "\r\n\tOriginal:: Result:{0}, Elapsed:{1}"
                 +"\r\n\tParallel:: Result:{2}, Elapsed:{3}"
                 +"\r\n\tComposite:: Result:{4}, Elapsed:{5}";
    private const string _testHeader = 
                  "\r\nTesting {0}, Array Lengths: {1}, {2}";
    public static double[] RandomRunTest(string testName, int shortArr1Len, 
                                         int shortArr2Len, int parallelSegLen)
    {

        var shortArr1 = new ushort [ shortArr1Len ];
        var shortArr2 = new ushort [ shortArr2Len ];
        double [ ] avgTimes = new double [ 3 ];

        LogDetail ( _testHeader , testName , shortArr1Len , shortArr2Len ) ;
        for ( int i = 0 ; i < 10 ; i++ )
        {
            int arrlen1 = shortArr1.Length , arrlen2 = shortArr2.Length;

            double[] currResults = new double [ 3 ];

            FillCompareArray ( shortArr1 , shortArr1.Length );
            FillCompareArray ( shortArr2 , shortArr2.Length );

            var sw = new Stopwatch ( );

            //Force Garbage Collection 
            //to avoid having it effect 
            //the test results this way 
            //test 2 may have to garbage 
            //collect due to running second
            GC.Collect ( );
            sw.Start ( );
            int origResult = Compare ( shortArr1 , shortArr2 );
            sw.Stop ( );
            currResults[0] = sw.Elapsed.TotalMilliseconds;
            sw.Reset ( );

            GC.Collect ( );
            sw.Start ( );
            int parallelResult = CompareParallelOnly ( shortArr1 , shortArr2, 
                                                       parallelSegLen );
            sw.Stop ( );
            currResults [ 1 ] = sw.Elapsed.TotalMilliseconds;
            sw.Reset ( );

            GC.Collect ( );
            sw.Start ( );
            int compositeResults = CompareComposite ( shortArr1 , shortArr2 );
            sw.Stop ( );                
            currResults [ 2 ] = sw.Elapsed.TotalMilliseconds;

            LogDetail ( _testBody, origResult , currResults[0] , 
                        parallelResult , currResults[1], 
                        compositeResults, currResults[2]);

            for ( int l = 0 ; l < currResults.Length ; l++ )
                avgTimes [ l ] = ( ( avgTimes[l]*i)+currResults[l]) 
                                    / ( i + 1 );
        }
        LogDetail ( "\r\nAverage Run Time Original: {0}" , avgTimes[0]);
        LogDetail ( "Average Run Time Parallel: {0}" , avgTimes[1]);
        LogDetail ( "Average Run Time Composite: {0}" , avgTimes [ 2 ] );

        return avgTimes;
    }

    public static double [ ] EqualTill ( string testName, int shortArr1Len , 
                                       int shortArr2Len, int parallelSegLen)
    {

        const string _testHeader = 
               "\r\nTesting When Array Difference is "
               +"Only Length({0}), Array Lengths: {1}, {2}";

        int baseLen = shortArr1Len > shortArr2Len 
                          ? shortArr2Len : shortArr1Len;

        var shortArr1 = new ushort [ shortArr1Len ];
        var shortArr2 = new ushort [ shortArr2Len ];
        double [ ] avgTimes = new double [ 3 ];

        LogDetail( _testHeader , testName , shortArr1Len , shortArr2Len );
        for ( int i = 0 ; i < 10 ; i++ )
        {

            FillCompareArray ( shortArr1 , shortArr1Len);
            Array.Copy ( shortArr1 , shortArr2, baseLen );
            double [ ] currElapsedTime = new double [ 3 ];
            var sw = new Stopwatch ( );
            //See previous explaination 
            GC.Collect ( );
            sw.Start ( );
            int origResult = Compare ( shortArr1 , shortArr2 );
            sw.Stop ( );
            currElapsedTime[0] = sw.Elapsed.TotalMilliseconds;
            sw.Reset ( );

            GC.Collect ( );
            sw.Start ( );
            int parallelResult = CompareParallelOnly ( shortArr1, shortArr2, 
                                     parallelSegLen );
            sw.Stop ( );
            currElapsedTime[1] = sw.Elapsed.TotalMilliseconds;
            sw.Reset ( );

            GC.Collect ( );
            sw.Start ( );
            var compositeResult = CompareComposite ( shortArr1 , shortArr2 );
            sw.Stop ( );
            currElapsedTime [ 2 ] = sw.Elapsed.TotalMilliseconds;

            LogDetail ( _testBody , origResult , currElapsedTime[0] , 
                parallelResult , currElapsedTime[1], 
                compositeResult,currElapsedTime[2]);

            for ( int l = 0 ; l < currElapsedTime.Length ; l++ )
                avgTimes [ l ] = ( ( avgTimes [ l ] * i ) 
                                   + currElapsedTime[l])/(i + 1);
        }
        LogDetail ( "\r\nAverage Run Time Original: {0}" , avgTimes [ 0 ] );
        LogDetail ( "Average Run Time Parallel: {0}" , avgTimes [ 1 ] );
        LogDetail ( "Average Run Time Composite: {0}" , avgTimes [ 2 ] );
        return avgTimes;
    }


    static Random rand = new Random ( );
    public static void FillCompareArray ( ushort[] compareArray, int length ) 
    {
        var retVals = new byte[length];
        ( rand ).NextBytes ( retVals );
        Array.Copy ( retVals , compareArray , length);
    }

    public static int CompareParallelOnly ( ushort [ ] x , ushort[] y, 
                                            int segLen ) 
    {
       int len = x.Length<y.Length ? x.Length:y.Length;
       int compareArrLen = (len/segLen)+1;
       int[] compareArr = new int [ compareArrLen ];
       Parallel.For ( 0 , compareArrLen , 
           new Action<int , ParallelLoopState> ( ( i , state ) =>
       {
           if ( state.LowestBreakIteration.HasValue 
                    && state.LowestBreakIteration.Value < i )
               return;
           int segEnd = ( i + 1 ) * segLen;
           int k = len<segEnd?len:segEnd;

           for ( int j = i * segLen ; j < k ; j++ )
               if ( x [ j ] != y [ j ] )
               {
                   compareArr [ i ] = ( x [ j ].CompareTo ( y [ j ] ) );
                   state.Break ( );
                   return;
               }
       } ) );
       int r=compareArrLen-1;
       while ( r >= 0 ) 
       {
           if ( compareArr [ r ] != 0 )
               return compareArr [ r ];
           r--;
       }
       return x.Length.CompareTo ( y.Length );
    }

    public static int Compare ( ushort [ ] x , ushort [ ] y )
    {
        int pos = 0;
        int len = Math.Min ( x.Length , y.Length );
        while ( pos < len && x [ pos ] == y [ pos ] )
            pos++;

        return pos < len ?
            x [ pos ].CompareTo ( y [ pos ] ) :
            x.Length.CompareTo ( y.Length );

    }

    public static int CompareParallel ( ushort[] x, ushort[] y, int len, 
                                        int segLen )
    {
        int compareArrLen = ( len / segLen ) + 1;
        int [ ] compareArr = new int [ compareArrLen ];
        Parallel.For ( 0 , compareArrLen , 
            new Action<int , ParallelLoopState> ( ( i , state ) =>
        {
            if ( state.LowestBreakIteration.HasValue 
                 && state.LowestBreakIteration.Value < i )
                return;
            int segEnd = ( i + 1 ) * segLen;
            int k = len < segEnd ? len : segEnd;
            for ( int j = i * segLen ; j < k ; j++ )
                if ( x [ j ] != y [ j ] )
                {
                    compareArr [ i ] = ( x [ j ].CompareTo ( y [ j ] ) );
                    state.Break ( );
                    return;
                }
        } ) );
        int r = compareArrLen - 1;
        while ( r >= 0 )
        {
            if ( compareArr [ r ] != 0 )
                return compareArr [ r ];
            r--;
        }
        return x.Length.CompareTo ( y.Length );
    }

    public static int CompareSequential(ushort [ ] x , ushort [ ] y, int len)
    {
        int pos = 0;
        while ( pos < len && x [ pos ] == y [ pos ] )
            pos++;

        return pos < len ?
            x [ pos ].CompareTo ( y [ pos ] ) :
            x.Length.CompareTo ( y.Length );

    }

    public static int CompareComposite ( ushort [ ] x , ushort [ ] y ) 
    {
        const int cutOff = 4096;
        int len = x.Length < y.Length ? x.Length : y.Length;

        if ( len > cutOff && x [ len - 1 ] == y [ len - 1 ]
                 && x [ len/2 ] == y [ len/2 ] )
            return CompareParallel ( x , y , len , (len / 100)*8 );

        return CompareSequential ( x , y, len );
    }
}

注意:

确保您使用优化的代码进行构建,当我不包括这一步时,结果非常不同,它使并行代码看起来比实际的改进要大得多。

我得到的结果是,对于非常长的相同数字集,执行时间减少了大约 33%。它仍然随着输入的增加而线性增长,但速度较慢。对于小数据集(在我的机器上小于 4092),它的启动速度也较慢,但通常花费的时间足够小(在我的机器上为 .001 毫秒),如果你确实得到了,那么使用它是值得的一个几乎相等的大数组。

关于c# - 比较两个不同长度的整数数组时提高性能,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/15117859/

相关文章:

c# - COM异常 : 80040154

c# - 如何在 C#/Visual Studio 中调用包含在文件夹中的类?

c# - 获取可用(语言)resx 文件的列表

c++ - 在 jpeg/bmp/pdf 图像中搜索直线、圆和文本

algorithm - 处理请求百分比的在线算法

c# - 使用 'using' 语句时是否需要关闭连接

php - 缓慢的 vagrant box,如何改进?

iphone - 编写 "Brushes"优质绘图应用程序,需要书籍和资源推荐

java - CollectionUtils.isNotEmpty 和 In Place 检查之间的性能差异

algorithm - 如何正确绘制函数?