swapAt()
比 (nums[i], nums[j]) = (nums[j], nums[i])
操作快,我想知道为什么?
手动交换解决方案需要 44 毫秒,但 swapAt
解决方案只需要 40 毫秒。我想知道有什么区别?
手动交换解决方案(44ms)
func moveZeroes(_ nums: inout [Int]) {
var j = 0;
for i in 0..<nums.count {
if (nums[i] != 0) {
(nums[i], nums[j]) = (nums[j], nums[i])
j += 1
}
}
}
swapAt 解决方案(40 毫秒)
func moveZeroes(_ nums: inout [Int]) {
var j = 0;
for i in 0..<nums.count {
if (nums[i] != 0) {
nums.swapAt(i, j)
j += 1
}
}
}
最佳答案
首先,关于基准测试的一些一般观察:
- 使用优化的构建(我知道你正在这样做,但我只是为了完整起见才提到)
- 多次重复测试
- 随机安排您执行基准测试的顺序并比较几次运行。 (我个人使用开启了“随机化执行顺序”功能的单元测试。)
其次,我应该说元组和 swapAt
演绎版之间的区别非常小。我循环了十亿次(包含 1000 个项目的数组,重复了一百万次),差异仍然只有几分之一秒。
第三,我发现在处理 10% 的数据点为零的大型随机数据集时,使用元组进行交换比使用 swapAt
稍微快一些,但在非常低的情况下会慢一点真正需要的交换很少。
后一点是有道理的,因为 swapAt
will skip the swap operation if it’s not necessary :“使用与 i
和 j
相同的索引调用 swapAt(_:_:)
无效。” We can see swapAt
提前退出 i
和 j
是一样的:
@inlinable
public mutating func swapAt(_ i: Index, _ j: Index) {
guard i != j else { return }
let tmp = self[i]
self[i] = self[j]
self[j] = tmp
}
如果您修改元组例程以执行相同的检查,则差异会减小:
func moveZeroesTuple(_ nums: inout [Int]) {
var j = 0
for i in 0..<nums.count where nums[i] != 0 {
if i != j {
(nums[i], nums[j]) = (nums[j], nums[i])
}
j += 1
}
}
坦率地说,我对基于元组的方法如此之快感到有点惊讶,但看看程序集,优化器做得很好,将其提炼为相当流线型的东西:
0x100b5fb70 <+192>: movq 0x20(%rbx,%rdx,8), %rsi ; retrieve nums[i]
0x100b5fb75 <+197>: testq %rsi, %rsi ; is it zero?
0x100b5fb78 <+200>: je 0x100b5fb98 ; <+232> [inlined] protocol witness for Swift.Strideable.advanced(by: A.Stride) -> A in conformance Swift.Int : Swift.Strideable in Swift
0x100b5fb7a <+202>: cmpq %rcx, %rdx ; is i = j?
0x100b5fb7d <+205>: je 0x100b5fb93 ; <+227> [inlined] MyAppTests.MyAppTests.moveZeroesTuple(inout Swift.Array<Swift.Int>) -> () + 66 at MyAppTests.swift:120
0x100b5fb7f <+207>: cmpq (%rax), %rcx ; are we done?
0x100b5fb82 <+210>: jae 0x100b5fbc7 ; <+279> [inlined] generic specialization <Swift.Int> of Swift.Array._getElement(_: Swift.Int, wasNativeTypeChecked: Swift.Bool, matchingSubscriptCheck: Swift._DependenceToken) -> A
0x100b5fb84 <+212>: movq 0x20(%rbx,%rcx,8), %rdi ; retrieve nums[j]
0x100b5fb89 <+217>: movq %rdi, 0x20(%rbx,%rdx,8) ; save it in nums[i]
0x100b5fb8e <+222>: movq %rsi, 0x20(%rbx,%rcx,8) ; save previously retrieved nums[i] in nums[j]
0x100b5fb93 <+227>: incq %rcx ; j += 1
0x100b5fb96 <+230>: jo 0x100b5fbc5 ; <+277> [inlined] MyAppTests.MyAppTests.moveZeroesTuple(inout Swift.Array<Swift.Int>) -> () at MyAppTests.swift:120
0x100b5fb98 <+232>: incq %rdx ; i += 1
0x100b5fb9b <+235>: cmpq %rdx, %r12 ; repeat for loop
0x100b5fb9e <+238>: jne 0x100b5fb70 ; <+192> [inlined] generic specialization <Swift.Int> of Swift.Array._getElement(_: Swift.Int, wasNativeTypeChecked: Swift.Bool, matchingSubscriptCheck: Swift._DependenceToken) -> A + 15
优化后的 swapAt
程序集几乎相同(多亏了内联),但其中只多了一些指令:
0x100b5d920 <+192>: movq 0x20(%rbx,%rdx,8), %rsi
0x100b5d925 <+197>: testq %rsi, %rsi
0x100b5d928 <+200>: je 0x100b5d955 ; <+245> [inlined] protocol witness for Swift.Strideable.advanced(by: A.Stride) -> A in conformance Swift.Int : Swift.Strideable in Swift
0x100b5d92a <+202>: cmpq %rcx, %rdx
0x100b5d92d <+205>: je 0x100b5d950 ; <+240> [inlined] MyAppTests.MyAppTests.moveZeroesSwapAt(inout Swift.Array<Swift.Int>) -> () + 79 at MyAppTests.swift:128
0x100b5d92f <+207>: movq (%rax), %rdi
0x100b5d932 <+210>: cmpq %rdi, %rdx
0x100b5d935 <+213>: jae 0x100b5d984 ; <+292> [inlined] generic specialization <Swift.Int> of Swift.Array._getElement(_: Swift.Int, wasNativeTypeChecked: Swift.Bool, matchingSubscriptCheck: Swift._DependenceToken) -> A
0x100b5d937 <+215>: cmpq %rdi, %rcx
0x100b5d93a <+218>: jae 0x100b5d986 ; <+294> [inlined] generic specialization <Swift.Int> of Swift.Array._getElement(_: Swift.Int, wasNativeTypeChecked: Swift.Bool, matchingSubscriptCheck: Swift._DependenceToken) -> A + 2
0x100b5d93c <+220>: movq 0x20(%rbx,%rcx,8), %rdi
0x100b5d941 <+225>: movq %rdi, 0x20(%rbx,%rdx,8)
0x100b5d946 <+230>: cmpq (%rax), %rcx
0x100b5d949 <+233>: jge 0x100b5d988 ; <+296> [inlined] generic specialization <Swift.Int> of Swift._ArrayBuffer._checkInoutAndNativeTypeCheckedBounds(_: Swift.Int, wasNativeTypeChecked: Swift.Bool) -> ()
0x100b5d94b <+235>: movq %rsi, 0x20(%rbx,%rcx,8)
0x100b5d950 <+240>: incq %rcx
0x100b5d953 <+243>: jo 0x100b5d982 ; <+290> [inlined] MyAppTests.MyAppTests.moveZeroesSwapAt(inout Swift.Array<Swift.Int>) -> () at MyAppTests.swift:128
0x100b5d955 <+245>: incq %rdx
0x100b5d958 <+248>: cmpq %rdx, %r12
0x100b5d95b <+251>: jne 0x100b5d920 ; <+192> [inlined] generic specialization <Swift.Int> of Swift.Array._getElement(_: Swift.Int, wasNativeTypeChecked: Swift.Bool, matchingSubscriptCheck: Swift._DependenceToken) -> A + 15
尽管如此,这几条额外的指令似乎使元组方法在“多次交换”场景中略有优势。
最重要的是,在我的基准测试中,除了最超大的数据集之外,它们之间的差异可以忽略不计,否则不值得担心。性能特征取决于数据集的性质,在一般情况下没有明显的赢家。而且,坦率地说,如果您的数据集实际上大到足以保证从另一个中挑选一个,那么无论如何您真的应该考虑完全不同的算法。
如果您有兴趣,这是我的单元测试,使用“发布”构建进行测试,并打开“随机执行顺序”:
import XCTest
class MyAppTests: XCTestCase {
static var originalArray: [Int]!
let iterations = 1_000_000
// fyi, use static to make sure all tests use the same original array
// eliminating any discrepancies between different tests, within the same
// test run, having different data and therefore different number of swaps.
override static func setUp() {
print("building array")
// scenario 1: no swapping needed
//
// Array with 1000 integers, 10% which are zeros and the rest are non-zero
originalArray = (0..<900).map { _ in Int.random(in: 1..<10) } + [Int](repeating: 0, count: 100)
// scenario 2: some swapping needed
//
// if you don't want them randomized, comment the following line
originalArray.shuffle()
// scenario 3: since all zeros are at the start, the maximum amount of swapping
//
// if you want zeros at start, uncomment the following line
//
// originalArray.sort()
}
func moveZeroesTuple(_ nums: inout [Int]) {
var j = 0
for i in 0..<nums.count where nums[i] != 0 {
if i != j {
(nums[i], nums[j]) = (nums[j], nums[i])
}
j += 1
}
}
func moveZeroesShiftManual(_ nums: inout [Int]) {
var i = 0
let count = nums.count
while i < count, nums[i] != 0 {
i += 1
}
var j = i
while i < count {
if nums[i] != 0 {
nums[j] = nums[i]
j += 1
}
i += 1
}
while j < count {
nums[j] = 0
j += 1
}
}
func moveZeroesSwapManual(_ nums: inout [Int]) {
var j = 0
for i in 0..<nums.count where nums[i] != 0 {
if i != j {
let temp = nums[i]
nums[i] = nums[j]
nums[j] = temp
}
j += 1
}
}
func moveZeroesSwapAt(_ nums: inout [Int]) {
var j = 0
for i in 0 ..< nums.count where nums[i] != 0 {
nums.swapAt(i, j)
j += 1
}
}
// a quick test to make sure all solutions generate the same result
func testLogic() {
var arrayTuple = MyAppTests.originalArray!
moveZeroesTuple(&arrayTuple)
var arraySwapAt = MyAppTests.originalArray!
moveZeroesSwapAt(&arraySwapAt)
var arrayShiftManual = MyAppTests.originalArray!
moveZeroesShiftManual(&arrayShiftManual)
var arraySwapManual = MyAppTests.originalArray!
moveZeroesSwapManual(&arraySwapManual)
XCTAssertEqual(arrayTuple, arrayShiftManual)
XCTAssertEqual(arrayTuple, arraySwapManual)
XCTAssertEqual(arrayTuple, arraySwapAt)
}
// now the performance tests
func testTuple() {
measure {
for _ in 0 ..< iterations {
var array = MyAppTests.originalArray!
moveZeroesTuple(&array)
}
}
}
func testSwapAt() {
measure {
for _ in 0 ..< iterations {
var array = MyAppTests.originalArray!
moveZeroesSwapAt(&array)
}
}
}
func testShiftManual() {
measure {
for _ in 0 ..< iterations {
var array = MyAppTests.originalArray!
moveZeroesShiftManual(&array)
}
}
}
func testSwapManual() {
measure {
for _ in 0 ..< iterations {
var array = MyAppTests.originalArray!
moveZeroesSwapManual(&array)
}
}
}
}
通常,在对发布版本进行测试时,我会对结果做一些处理,以确保代码的某些关键部分没有被编译器完全优化掉。我在这里不需要这样做,只是因为我仔细检查了各种方法的程序集,并且我知道所有内容都已保留。
关于swift - swapAt 比手动交换操作更快,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55939947/