sorting - 打开文件，删除零，排序 - NASM

我目前正在解决一些问题，这就是我遇到的问题。澄清一下，我是初学者，因此非常欢迎任何帮助。

问题:

Sort the content of a binary file in descending order. The name of the file is passed as a command line argument. File content is interpreted as four-byte positive integers, where value 0, when found, is not written into the file. The result must be written in the same file that has been read.

我的理解是我必须有一个二进制文件。打开它。获取其内容。查找所有字符，同时记住这些字符是正的四字节整数，查找零，去掉零，对其余数字进行排序。

我们可以使用 glibc，所以这是我的尝试:

section .data
    warning db 'File does not exist!', 10, 0
    argument db 'Enter your argument.', 10, 0

    mode dd 'r+'
    opened db 'File is open. Time to read.', 10, 0


section .bss
    content resd 10
    counter resb 1

section .text

    extern printf, fopen, fgets, fputc

global main
main:
    push rbp
    mov rbp, rsp
    push rsi
    push rdi
    push rbx

    ;location of argument's address
    push rsi 

    cmp rdi, 2
    je .openfile
    mov rdi, argument
    mov rax, 0
    call printf
    jmp .end

.openfile:
    pop rbx
    ;First real argument of command line
    mov rdi, [rbx + 8]
    mov rsi, mode
    mov rax, 0
    call fopen
    cmp al, 0
    je .end

    push rax

    mov rdi, opened
    mov rax, 0
    call printf

.readfromfile:
    mov rdi, content
    mov rsi, 12 ;I wrote 10 numbers in my file
    pop rdx
    mov rax, 0
    call fgets
    cmp al, 0
    je .end

    push rax


    mov rsi, tekst
    pop rdi
.loop:
    lodsd
    inc byte[counter]
    cmp eax, '0'
    jne .loop

    ;this is the part where I am not sure what to do. 
    ;I am trying to delete the zero with backspace, then use space and 
    ;backspace again - I saw it here somewhere as a solution
    mov esi, 0x08
    call fputc  
    mov esi, 0x20
    call fputc
    mov esi, 0x08
    call fputc
    cmp eax, 0
    je .end
    jmp .loop

.end:
    pop rdi
    pop rsi
    pop rbx
    mov rsp, rbp
    pop rbp
    ret

所以，我的想法是打开文件，找到零，使用退格键和空格键将其删除，然后再次退格键；继续，直到到达文件末尾，然后对其进行排序。可以看出，我没有尝试对内容进行排序，因为我无法让程序为我完成第一部分。我已经尝试了几天了，一切都变得模糊起来。

如果有人能帮助我，我将不胜感激。如果有类似这个问题的，欢迎联系我。任何有帮助的事情，我都准备好阅读和学习。

我也不确定我必须提供多少信息。如果有不清楚的地方请指出。

谢谢

最佳答案

为了我自己的自私乐趣，当检测到双字零值时内存区域被“折叠”的示例:

使用 NASM 在 Linux 中构建目标 ELF64 可执行文件:

nasm -f elf64 so_64b_collapseZeroDword.asm -l so_64b_collapseZeroDword.lst -w+all
ld -b elf64-x86-64 -o so_64b_collapseZeroDword so_64b_collapseZeroDword.o

对于调试器，我使用 edb (从源代码构建)(可执行文件不会执行用户可观察到的任何操作，当它正常工作时，它应该在调试器单步运行中运行指令并通过内存查看 .data 段来查看值如何在内存中移动)。

源文件so_64b_collapseZeroDword.asm

    segment .text

collapseZeroDwords:
; input (custom calling convention, suitable only for calls from assembly):
;   rsi - address of first element
;   rdx - address beyond last element ("vector::end()" pointer)
; return: rdi - new "beyond last element" address
; modifies: rax, rsi, rdi
; the memory after new end() is not cleared (the zeroes are just thrown away)!

; search for first zero (up till that point the memory content will remain same)
    cmp     rsi, rdx
    jae     .noZeroFound    ; if the (rsi >= end()), no zero was in the memory
    lodsd                   ; eax = [rsi], rsi += 4
    test    eax, eax        ; check for zero
    jne     collapseZeroDwords
; first zero found, from here on, the non-zero values will be copied to earlier area
    lea     rdi, [rsi-4]    ; address where the non-zero values should be written
.moveNonZeroValues:
    cmp     rsi, rdx
    jae     .wholeArrayCollapsed    ; if (rsi >= end()), whole array is collapsed
    lodsd                   ; eax = [rsi], rsi += 4
    test    eax, eax        ; check for zero
    jz      .moveNonZeroValues      ; zero detected, skip the "store" value part
    stosd                   ; [rdi] = eax, rdi += 4 (pointing beyond last element)
    jmp     .moveNonZeroValues

.noZeroFound:
    mov     rdi, rdx        ; just return the original "end()" pointer
.wholeArrayCollapsed:       ; or just return when rdi is already set as new end()
    ret

global _start
_start:     ; run some hardcoded simple tests, verify in debugger
    lea     rsi, [test1]
    lea     rdx, [test1+4*4]
    call    collapseZeroDwords
    cmp     rdi, test1+4*4      ; no zero collapsed

    lea     rsi, [test2]
    lea     rdx, [test2+4*4]
    call    collapseZeroDwords
    cmp     rdi, test2+3*4      ; one zero

    lea     rsi, [test3]
    lea     rdx, [test3+4*4]
    call    collapseZeroDwords
    cmp     rdi, test3+3*4      ; one zero

    lea     rsi, [test4]
    lea     rdx, [test4+4*4]
    call    collapseZeroDwords
    cmp     rdi, test4+2*4      ; two zeros

    lea     rsi, [test5]
    lea     rdx, [test5+4*4]
    call    collapseZeroDwords
    cmp     rdi, test5+2*4      ; two zeros

    lea     rsi, [test6]
    lea     rdx, [test6+4*4]
    call    collapseZeroDwords
    cmp     rdi, test6+0*4      ; four zeros

    ; exit back to linux
    mov     eax, 60
    xor     edi, edi
    syscall

    segment .data
    ; all test arrays are 4 elements long for simplicity
        dd 0xCCCCCCCC       ; debug canary value to detect any over-read or over-write
test1   dd 71, 72, 73, 74, 0xCCCCCCCC
test2   dd 71, 72, 73,  0, 0xCCCCCCCC
test3   dd  0, 71, 72, 73, 0xCCCCCCCC
test4   dd  0, 71,  0, 72, 0xCCCCCCCC
test5   dd 71,  0, 72,  0, 0xCCCCCCCC
test6   dd  0,  0,  0,  0, 0xCCCCCCCC

我试图对其进行广泛的评论，以展示它的作用/原因/方式，但请随意询问任何特定部分。该代码在编写时考虑到了简单性，因此它没有使用任何激进的性能优化(例如对第一个零值的矢量化搜索等)。

关于sorting - 打开文件，删除零，排序 - NASM，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/52444642/

sorting - 打开文件，删除零，排序 - NASM

上一篇：Bash:按分隔符分割但保留分隔符

下一篇：spring-boot - WebSocket握手时出错: Incorrect 'Sec-WebSocket-Accept' header value