performance - 蛮力旅行商: Why is Haskell so much slower than C?

我最初写了一个功能性的蛮力搜索(城市的 ADT 表示，城市的元组作为距离数组的索引，用 Data.List.permutations 懒惰地产生排列并用严格的左折叠来消耗它们)，但这非常慢。我通过以更命令的方式连续重写它并使用 Heap 的算法进行排列，设法将它加速了 3 倍，但是直接将结果转换为(写得不好的)C 会快 10 倍!到底是怎么回事？

{-# LANGUAGE TupleSections, BangPatterns, ForeignFunctionInterface #-}

import Data.Array.Unboxed
import Data.Array.Base (unsafeAt)
import qualified Data.Vector.Unboxed.Mutable as M
import qualified Data.Vector.Unboxed as V
import Data.Word (Word)
import Data.IORef
import GHC.Arr (Ix(..)) 
import Control.Arrow
import Control.Monad

import Data.Array.Storable
import Foreign.Ptr (Ptr)
import Foreign.C (CInt)

import Criterion.Main

distances :: UArray Int Word
distances = listArray (0, 255) [0,629,1023,1470,1276,915,894,1199,760,795,676,903,1394,350,647,922,629,
                                0,556,1312,674,1097,317,946,784,504,1228,276,1254,878,712,1236,1023,556,
                                0,861,408,977,280,480,1340,288,1424,533,822,1346,1267,1198,1470,1312,861,
                                0,1196,751,1125,382,2052,809,1476,1381,78,1817,1957,980,1276,674,408,1196,
                                0,1384,383,837,1370,676,1777,469,1173,1551,1327,1600,915,1097,977,751,1384,
                                0,1103,722,1641,719,730,1303,676,1223,1530,249,894,317,280,1125,383,1103,
                                0,743,1083,392,1409,256,1079,1178,1019,1293,1199,946,480,382,837,722,743,
                                0,1708,454,1366,999,343,1549,1618,972,760,784,1340,2052,1370,1641,1083,1708,
                                0,1253,1392,901,1986,640,113,1679,795,504,288,809,676,719,392,454,1253,
                                0,1138,623,749,1136,1164,925,676,1228,1424,1476,1777,730,1409,1366,1392,1138,
                                0,1502,1399,786,1283,551,903,276,533,1381,469,1303,256,999,901,623,1502,
                                0,1335,1127,857,1469,1394,1254,822,78,1173,676,1079,343,1986,749,1399,1335,
                                0,1741,1889,908,350,878,1346,1817,1551,1223,1178,1549,640,1136,786,1127,1741,
                                0,543,1182,647,712,1267,1957,1327,1530,1019,1618,113,1164,1283,857,1889,543,
                                0,1566,922,1236,1198,980,1600,249,1293,972,1679,925,551,1469,908,1182,1566,0]

tsp_mut :: [Int] -> IO ([Int], Word) 
tsp_mut [] = error "empty list"
tsp_mut cs = do
    route <- V.thaw . V.fromList $ cs
    let  l = length cs -1 
         dis a b= unsafeAt distances $ 16*a + b
         f (prev, !acc) c = (c, acc + dis prev c)  
         len = V.unsafeFreeze route >>=  return . snd . (\v -> V.foldl' f (V.unsafeLast v, 0) v) 
    ref <- newIORef . (cs,) =<< len
    let permut 0 = do
                !l <- len 
                (_, ol) <- readIORef ref
                when (l < ol) (writeIORef ref . (,l) . V.toList =<< V.freeze route)
        permut n = let op = if odd n then const 0 else id
                    in forM_ [0..n] (\ i -> permut (n-1) >> M.unsafeSwap route (op i) n)             
    permut l >> readIORef ref 


foreign import ccall unsafe "tsp_c"
        c_tsp_c :: Int -> Ptr CInt ->  IO Word
tsp_c :: [Int] -> IO ([Int], Word)
tsp_c cs = do
        let l=length cs
        marr <- newListArray (0, l-1) $ map fromIntegral cs
        r <- withStorableArray marr $ c_tsp_c l 
        list <-getElems marr
        return (map fromIntegral list, fromIntegral r)

main = defaultMain [ pertestIO "tsp_mut" tsp_mut,
                      pertestIO "tsp_c" tsp_c ]              
        where 
           pertestIO str f = bgroup str $ map (uncurry bench . (show &&&  nfIO . (f . (`take` [0..15])))) [6..11]

这里的C代码:

#include <stdlib.h> 
#include <string.h>

typedef unsigned int word;

word distances[] = { 0,629,1023,1470,1276,915,894,1199,760,795,676,903,1394,350,647,922,629,
                     0,556,1312,674,1097,317,946,784,504,1228,276,1254,878,712,1236,1023,556,
                     0,861,408,977,280,480,1340,288,1424,533,822,1346,1267,1198,1470,1312,861,
                     0,1196,751,1125,382,2052,809,1476,1381,78,1817,1957,980,1276,674,408,1196,
                     0,1384,383,837,1370,676,1777,469,1173,1551,1327,1600,915,1097,977,751,1384,
                     0,1103,722,1641,719,730,1303,676,1223,1530,249,894,317,280,1125,383,1103,
                     0,743,1083,392,1409,256,1079,1178,1019,1293,1199,946,480,382,837,722,743,
                     0,1708,454,1366,999,343,1549,1618,972,760,784,1340,2052,1370,1641,1083,1708,
                     0,1253,1392,901,1986,640,113,1679,795,504,288,809,676,719,392,454,1253,
                     0,1138,623,749,1136,1164,925,676,1228,1424,1476,1777,730,1409,1366,1392,1138,
                     0,1502,1399,786,1283,551,903,276,533,1381,469,1303,256,999,901,623,1502,
                     0,1335,1127,857,1469,1394,1254,822,78,1173,676,1079,343,1986,749,1399,1335,
                     0,1741,1889,908,350,878,1346,1817,1551,1223,1178,1549,640,1136,786,1127,1741,
                     0,543,1182,647,712,1267,1957,1327,1530,1019,1618,113,1164,1283,857,1889,543,
                     0,1566,922,1236,1198,980,1600,249,1293,972,1679,925,551,1469,908,1182,1566,0}; 

word dist (int a, int b) {
      return distances[16*a+b];
}

int len (int cities[], int length) {
   int l = dist(cities[length-1], cities[0]);
   int i;
   for (i=0; i < length-1; i++ ) {
        l +=dist(cities[i], cities[i+1]);
   }
   return l;
}

int *route, *bestRoute; 
word minL; 
int sz, size, cntr;

void permut (int n){
    if (n==0) { 
        cntr++;
        int l=len(route, sz);
        if (l<minL) {
            memcpy(bestRoute, route,size);
            minL=l;
            }
        return;    
        }
    int i, swap, temp; 
    for (i=0; i<=n; i++) {
        permut(n-1);
        swap=n% 2 ? i : 0;
        temp=route[swap];
        route[swap]=route[n];
        route[n]=temp; 
    }         
}

word tsp_c (int length, int cities[]) {
    size= length * sizeof(int);
    cntr=0;
    sz=length;
    route = malloc(size); 
    bestRoute = malloc(size); 
    memcpy( route, cities, size); 
    memcpy( bestRoute, cities, size); 
    minL = len (cities, length);
    permut(length-1); 
    memcpy(cities, bestRoute, length * sizeof(int)); 
    free(route);
    free(bestRoute); 
    /* printf("permutations: %d", cntr); */
    return minL;
}

我在 Windows 上使用了 64 位版本的 GHC 7.8.3 和 -O2

最佳答案

引用 Haskell 网站:

In applications where performance is required at any cost, or when the goal is detailed tuning of a low-level algorithm, an imperative language like C would probably be a better choice than Haskell, exactly because it provides more intimate control over the exact way in which the computation is carried out ...

另一种看待它的方式:C'指令'更接近硬件，因此它固有地具有优势。当您从“高级”语言直接翻译成 C 时，这一点尤其明显。

另一方面，高级语言的优势在于您专注于更大的图景，您可以(通常)更快地完成更多优化。

关于performance - 蛮力旅行商: Why is Haskell so much slower than C?，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/27121135/

performance - 蛮力旅行商: Why is Haskell so much slower than C?

上一篇：f# - 可能的 F# 交互式 PInvoke 错误

下一篇：javascript - 刷新图像在 mozilla 中不起作用