rust - 为什么 rustc 编译这个 250 行的程序需要超过一分钟的时间,而 clang 需要不到一秒钟的时间?

标签 rust compiler-optimization

背景:我为各种编程语言编写了一个愚蠢的代码生成程序,目的是比较各种编译器编译由简单函数组成的荒谬长的 100K+ LOC 程序需要多长时间。但是在编译时,当优化标志被赋予 rustc 时,程序的 Rust 版本从未完成编译。 .
我发现制作一个非常小的 Rust 程序(下面的示例)很容易,它需要(根据我的感受)太长而无法使用 -C opt-level=2 进行编译。或 -C opt-level=3 rustc 的标志.我在 Linux 上尝试了 1.16 stable、1.30 stable、1.32.0-nightly 和 macOS 和 Windows 上的 1.30 stable - 所有这些都让我觉得编译的时间太长了。我这样说是因为相比之下,大致等效的 C++ 用 clang++ -O3 编译需要不到一秒钟的时间。 .这让我想到几个问题:

  • 有没有优化rustc正在(缓慢地)处理这个 clang 的代码不是吗?
  • 如果我想自己研究一下,是否有很好的资源可供分析 rustc ?看看 opt-level=2 中是否有特定的优化会很酷是罪魁祸首。

  • rustc guide告诉如何打开调试日志。使用它得到:
    INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::rt::lang_start::<()>)
    INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::rt::lang_start::{{closure}}::<(), i8, extern "rust-call" fn(()) -> i32, fn()>)
    INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::sys::unix::process::process_common::ExitCode::as_i32)
    INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(<[closure@DefId(1/1:1916 ~ std[424f]::rt[0]::lang_start[0]::{{closure}}[0]) 0:fn()] as std::ops::FnOnce<()>>::call_once - shim)
    INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(<[closure@DefId(1/1:1916 ~ std[424f]::rt[0]::lang_start[0]::{{closure}}[0]) 0:fn()] as std::ops::FnOnce<()>>::call_once - shim(vtable))
    INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::ptr::real_drop_in_place::<[closure@DefId(1/1:1916 ~ std[424f]::rt[0]::lang_start[0]::{{closure}}[0]) 0:fn()]> - shim(None))
    INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(<() as std::process::Termination>::report)
    INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(<std::process::ExitCode as std::process::Termination>::report)
    INFO 2018-12-09T19:37:54Z: rustc_codegen_ssa::base: codegen_instance(std::fmt::ArgumentV1::new::<i32>)
    INFO 2018-12-09T19:39:12Z: rustc_codegen_llvm::back::lto: 5 symbols to preserve in this crate
    INFO 2018-12-09T19:39:12Z: rustc_codegen_llvm::back::lto: going for that thin, thin LTO
    
    注意最后一个 rustc_codegen_ssa::base 之间的微小差距和 rustc_codegen_llvm::back::lto - 我应该如何解释这个?
    Rust gistC++ gist
    //$ rustc -C opt-level=2 test_20.rs
    //  takes over a minute to compile, rustc 1.32.0-nightly
    //  see https://gist.github.com/ajdust/5e92cab52ffab5ea2a52edbd47aa348a
    #![allow(unused_parens)]
    
    fn f0(p: i32) -> i32 {
        let x1: i32 = (p - ((((21 | 1) | p) ^ 84) & ((48 ^ (52 | (p & (2 ^ 61)))) - 67)));
        let x2: i32 = x1;
        let mut x3: i32 = 54;
        let x4: i32 = 75;
        let x5: i32 = (77 & 39);
        let x6: i32 = (x2 * x5);
        let x7: i32 = (88 * (8 + x1));
        x3 = (x3 + 60);
        ((((((((32 * p) & x1) ^ x2) - x3) ^ x4) & x5) | x6) | x7)
    }
    
    fn f1(p: i32) -> i32 {
        let mut x1: i32 = f0(78);
        x1 = (x1 ^ p);
        let mut x2: i32 = f0(x1);
        x2 = (x2 * 3);
        let x3: i32 = f0(x1);
        let x4: i32 = ((21 & (x3 - ((93 * (x3 - (f0(x3) - (x2 - (f0(x1) | 43))))) | (f0(p) - f0(x1))))) * 41);
        ((((((f0(p) | x2) ^ p) & x1) ^ x2) | x3) - x4)
    }
    
    fn f2(p: i32) -> i32 {
        let mut x1: i32 = f1(50);
        x1 = (x1 * p);
        x1 = (x1 | f0(p));
        let mut x2: i32 = f1(x1);
        x2 = (x2 | f1(x2));
        let mut x3: i32 = (24 * f0(x1));
        x2 = (x2 & f0(p));
        x3 = (x3 ^ x1);
        let x4: i32 = x1;
        (((((x4 ^ p) | x1) * x2) + x3) | x4)
    }
    
    fn f3(p: i32) -> i32 {
        let mut x1: i32 = f2(75);
        let x2: i32 = x1;
        x1 = (x1 & x2);
        let x3: i32 = f0(p);
        let x4: i32 = ((f1(x3) ^ f1(x2)) + 92);
        x1 = (x1 | (x2 ^ 94));
        x1 = (x1 * x2);
        let x5: i32 = (f0(x1) & (3 ^ (f0(x1) * f2(x4))));
        x1 = (x1 + x2);
        (((((((x1 * x5) * p) - x1) | x2) * x3) - x4) - x5)
    }
    
    fn f4(p: i32) -> i32 {
        let mut x1: i32 = f3(14);
        x1 = (x1 + f0(p));
        let mut x2: i32 = f1(x1);
        x1 = (x1 - 41);
        x2 = (x2 ^ 61);
        let x3: i32 = f2(p);
        x2 = (x2 ^ p);
        let x4: i32 = x2;
        x1 = (x1 - p);
        x1 = (x1 * x4);
        ((((((88 & 11) & p) - x1) * x2) ^ x3) | x4)
    }
    
    fn f5(p: i32) -> i32 {
        let mut x1: i32 = f4(50);
        x1 = (x1 ^ 13);
        ((35 + p) | x1)
    }
    
    fn f6(p: i32) -> i32 {
        let mut x1: i32 = f5(51);
        x1 = (x1 + 27);
        let x2: i32 = (p + (p | f1(x1)));
        x1 = (x1 + f0(x2));
        let x3: i32 = f0(x1);
        let mut x4: i32 = 48;
        x1 = (x1 ^ f0(p));
        x1 = (x1 & 26);
        x4 = (x4 * f1(x4));
        (((((99 - p) * x1) ^ x2) & x3) + x4)
    }
    
    fn f7(p: i32) -> i32 {
        let mut x1: i32 = f6(71);
        x1 = (x1 & 66);
        x1 = (x1 & p);
        let x2: i32 = 57;
        x1 = (x1 * 26);
        let x3: i32 = (21 & p);
        let x4: i32 = (f0(x1) & (f3(p) * f2(p)));
        let x5: i32 = f6(x3);
        ((((((x5 + p) | x1) + x2) - x3) & x4) * x5)
    }
    
    fn f8(p: i32) -> i32 {
        let mut x1: i32 = f7(57);
        x1 = (x1 & f5(p));
        x1 = (x1 ^ (x1 & f1(p)));
        let x2: i32 = 25;
        let x3: i32 = f5(x1);
        ((((x1 - p) * x1) & x2) ^ x3)
    }
    
    fn f9(p: i32) -> i32 {
        let mut x1: i32 = f8(23);
        x1 = (x1 | (((26 | f4(x1)) - f0(p)) | f8(p)));
        let x2: i32 = x1;
        let mut x3: i32 = 58;
        x3 = (x3 - p);
        let x4: i32 = f7(x1);
        let x5: i32 = f7(x2);
        let x6: i32 = (f7(x1) & 79);
        (((((((33 | p) - x1) + x2) + x3) * x4) ^ x5) + x6)
    }
    
    fn f10(p: i32) -> i32 {
        let mut x1: i32 = f9(75);
        x1 = (x1 | 37);
        (((f8(x1) + f3(x1)) | p) * x1)
    }
    
    fn f11(p: i32) -> i32 {
        let mut x1: i32 = f10(8);
        x1 = (x1 ^ f6(x1));
        let mut x2: i32 = p;
        x2 = (x2 ^ 84);
        let x3: i32 = (f5(p) ^ f5(p));
        x1 = (x1 * f5(p));
        x1 = (x1 | f1(x2));
        x1 = (x1 * f8(p));
        ((((((f0(x3) | f9(p)) - f4(x1)) + p) & x1) & x2) - x3)
    }
    
    fn f12(p: i32) -> i32 {
        let mut x1: i32 = f11(33);
        x1 = (x1 * 84);
        let mut x2: i32 = (67 - f0(p));
        x2 = (x2 | x1);
        x1 = (x1 - 67);
        x2 = (x2 - f6(p));
        (((p - p) * x1) | x2)
    }
    
    fn f13(p: i32) -> i32 {
        let mut x1: i32 = f12(90);
        x1 = (x1 + (f6(x1) - f4(p)));
        x1 = (x1 - 19);
        let x2: i32 = 92;
        let mut x3: i32 = f9(x1);
        let mut x4: i32 = x3;
        x4 = (x4 - (87 | f5(x3)));
        x3 = (x3 | 49);
        let x5: i32 = 25;
        let x6: i32 = x3;
        (((((((2 & p) - x1) ^ x2) ^ x3) ^ x4) | x5) | x6)
    }
    
    fn f14(p: i32) -> i32 {
        let mut x1: i32 = f13(66);
        let x2: i32 = f2(p);
        x1 = (x1 - 11);
        let mut x3: i32 = 69;
        x3 = (x3 * x2);
        let x4: i32 = 91;
        (((((19 * p) + x1) | x2) ^ x3) & x4)
    }
    
    fn f15(p: i32) -> i32 {
        let mut x1: i32 = f14(79);
        x1 = (x1 + (f8(p) & p));
        let x2: i32 = p;
        x1 = (x1 | ((f5(p) & x2) ^ x2));
        let mut x3: i32 = x1;
        x1 = (x1 - p);
        x3 = (x3 * p);
        ((((40 * p) ^ x1) + x2) + x3)
    }
    
    fn f16(p: i32) -> i32 {
        let x1: i32 = f15(77);
        let mut x2: i32 = 5;
        let mut x3: i32 = x1;
        let x4: i32 = p;
        x2 = (x2 + p);
        let x5: i32 = x4;
        x3 = (x3 | f9(x4));
        let x6: i32 = (68 ^ (61 ^ (24 * f14(x4))));
        (((((((88 + p) - x1) & x2) | x3) & x4) ^ x5) | x6)
    }
    
    fn f17(p: i32) -> i32 {
        let mut x1: i32 = f16(41);
        x1 = (x1 | 4);
        let mut x2: i32 = x1;
        x1 = (x1 | 52);
        x1 = (x1 & 49);
        x2 = (x2 & (f8(x2) ^ p));
        let mut x3: i32 = x2;
        x3 = (x3 ^ ((x1 ^ x2) + f15(x2)));
        let mut x4: i32 = (f13(x2) ^ 73);
        x4 = (x4 - f12(x1));
        (((((x3 - p) + x1) ^ x2) + x3) | x4)
    }
    
    fn f18(p: i32) -> i32 {
        let mut x1: i32 = f17(3);
        x1 = (x1 & (p - ((33 * (95 | 87)) | (9 - f1(x1)))));
        x1 = (x1 & (80 - f16(x1)));
        x1 = (x1 & p);
        x1 = (x1 + p);
        x1 = (x1 | (82 - ((81 ^ p) - 97)));
        ((20 - p) * x1)
    }
    
    fn f19(p: i32) -> i32 {
        let x1: i32 = f18(24);
        let x2: i32 = (p & p);
        let mut x3: i32 = 82;
        let x4: i32 = (4 + x1);
        x3 = (x3 | ((f10(p) + (f16(x3) - 34)) - f10(x1)));
        let x5: i32 = (x4 | (x1 * (((f16(x1) + f4(x4)) - 43) & f7(x3))));
        (((((((f14(x3) | f9(x5)) - p) & x1) * x2) & x3) * x4) + x5)
    }
    
    fn f20(p: i32) -> i32 {
        let x1: i32 = f19(78);
        let x2: i32 = 81;
        let x3: i32 = (x2 + (59 & x1));
        (((((f9(x3) ^ f11(x3)) * p) * x1) - x2) ^ x3)
    }
    
    fn main() {
        let mut x0: i32 = f20(65);
        x0 = (x0 * (53 + 37));
        let mut x1: i32 = (x0 - ((41 | ((f20(x0) * f9(x0)) + ((((f20(x0) + (77 + (f14(x0) ^ 60))) * 27) & 62) + x0))) & f20(x0)));
        let x2: i32 = f15(x1);
        x1 = (x1 | (x0 * (4 ^ 37)));
        let m: i32 = (((x2 | x0) | x1) | x2);
        println!("{}", m);
    
    }
    

    最佳答案

    这在评论中得到了有效回答 - 谢谢@bluss。路过-Cinline-threshold=1000rustc将编译速度降低到不到一秒左右的预期速度。
    由于这仍然可以在 2.5 年后复制,因此我最终遵循了在 rust-lang 存储库中创建错误报告问题的建议,以查看是否有可以改进 Rust 的方法。您可以在此处查看错误报告问题:https://github.com/rust-lang/rust/issues/86870
    感谢所有评论的人。

    关于rust - 为什么 rustc 编译这个 250 行的程序需要超过一分钟的时间,而 clang 需要不到一秒钟的时间?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/53696057/

    相关文章:

    c - 是 GCC 的选项 -O2 破坏了这个小程序还是我有未定义的行为

    c++ - 使用优化级别编译 C++ 项目

    rust - 如何实例化近协议(protocol)向量?

    c++ - 现代编译器优化如何将递归转换为返回常量?

    c++ - 当我使用直接初始化与 std::initializer_list 时的不同指令

    对元组向量进行排序需要第二个值的引用?

    haskell - 如何在 GHCI 中加载优化代码?

    rust - 交换两个本地引用会导致生命周期错误

    reference - 为什么我可以返回对局部文字的引用而不是变量?

    Rust(新手): Read and write (mutable) access to the same underlying array from multiple threads for in-memory database?