我有以下 IR:
; ModuleID = 'vec.ir'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-darwin15.3.0"
define void @patch(i64) {
entry:
%1 = load float, float* inttoptr (i64 4388240000 to float*)
%2 = load float, float* inttoptr (i64 4387644544 to float*)
%3 = fadd float %1, %2
%4 = load float, float* inttoptr (i64 4387729024 to float*)
%5 = fadd float %1, %4
%6 = load float, float* inttoptr (i64 4387730560 to float*)
%7 = fadd float %1, %6
%8 = load float, float* inttoptr (i64 4387513984 to float*)
%9 = fadd float %1, %8
store float %3, float* inttoptr (i64 4371309760 to float*)
call void @__tickValue(i64 105553117467608, i64 %0)
store float %5, float* inttoptr (i64 4371851456 to float*)
call void @__tickValue(i64 105553117465688, i64 %0)
store float %7, float* inttoptr (i64 4371574976 to float*)
call void @__tickValue(i64 105553117465528, i64 %0)
store float %9, float* inttoptr (i64 4371576512 to float*)
call void @__tickValue(i64 105553117466648, i64 %0)
ret void
}
declare void @__tickValue(i64, i64)
当我运行 /usr/local/opt/llvm/bin/opt -S -O3 vec.ir > vec-opt.ir
时,我得到:
; ModuleID = 'vec.ir'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-darwin15.3.0"
define void @patch(i64) {
entry:
%1 = load float, float* inttoptr (i64 4388240000 to float*), align 128
%2 = load float, float* inttoptr (i64 4387644544 to float*), align 128
%3 = fadd float %1, %2
%4 = load float, float* inttoptr (i64 4387729024 to float*), align 128
%5 = fadd float %1, %4
%6 = load float, float* inttoptr (i64 4387730560 to float*), align 128
%7 = fadd float %1, %6
%8 = load float, float* inttoptr (i64 4387513984 to float*), align 128
%9 = fadd float %1, %8
store float %3, float* inttoptr (i64 4371309760 to float*), align 64
tail call void @__tickValue(i64 105553117467608, i64 %0)
store float %5, float* inttoptr (i64 4371851456 to float*), align 64
tail call void @__tickValue(i64 105553117465688, i64 %0)
store float %7, float* inttoptr (i64 4371574976 to float*), align 64
tail call void @__tickValue(i64 105553117465528, i64 %0)
store float %9, float* inttoptr (i64 4371576512 to float*), align 64
tail call void @__tickValue(i64 105553117466648, i64 %0)
ret void
}
declare void @__tickValue(i64, i64)
根据 http://llvm.org/docs/Vectorizers.html#the-slp-vectorizer ,我希望 fadd
指令可以合并。
我如何才能确定有关优化器未进行矢量化的原因的更多信息?
最佳答案
LLVM 具有内部成本模型,可以自动检测 SIMD 向量化是否有益。您可以通过 adding some flags to your build line 获得诊断信息
您还可以尝试通过添加 some directives 来“强制”矢量化在您的代码中。
如果你从 llvm IR 而不是源代码开始,你仍然有命令 line switches for opt
关于llvm - 为什么 LLVM SIMD 不向量化此代码?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/36253728/