assembly - APIC多核启动协议(protocol)和ICR启动地址

标签 assembly x86 multiprocessing cpu apic

我正在编写引导加载并尝试测试处理器间中断。我遇到了以下两个问题:

1、哪里可以找到启动AP的流程;

2、发出IPI时,应该加载内存地址到哪里,告诉目标处理器从哪个内存地址开始。

感谢您的回答,请附上组装示例。

最佳答案

我从现已解散的 Stackoverflow 文档项目中提取了此内容。这最初是由玛格丽特·布鲁姆(Margaret Bloom)编写的,我已经清理了她的代码。由于这不是我自己的,我已将其标记为社区维基。可能有一些您可能会觉得有用的信息。

<小时/>

此示例将唤醒每个应用程序处理器 (AP),并使它们与引导处理器 (BSP) 一起显示其 LAPIC ID。

; Assemble boot sector and insert it into a 1.44MiB floppy image
;
; nasm -f bin boot.asm -o boot.bin
; dd if=/dev/zero of=disk.img bs=512 count=2880
; dd if=boot.bin of=disk.img bs=512 conv=notrunc

BITS 16
; Bootloader starts at segment:offset 07c0h:0000h
section bootloader, vstart=0000h
jmp 7c0h:__START__

__START__:
 mov ax, cs
 mov ds, ax
 mov es, ax
 mov ss, ax
 xor sp, sp
 cld

 ;Clear screen
 mov ax, 03h
 int 10h

 ;Set limit of 4GiB and base 0 for FS and GS
 call 7c0h:unrealmode

 ;Enable the APIC
 call enable_lapic

 ;Move the payload to the expected address
 mov si, payload_start_abs
 mov cx, payload_end-payload + 1
 mov di, 400h                 ;7c0h:400h = 8000h
 rep movsb


 ;Wakeup the other APs

 ;INIT
 call lapic_send_init
 mov cx, WAIT_10_ms
 call us_wait

 ;SIPI
 call lapic_send_sipi
 mov cx, WAIT_200_us
 call us_wait

 ;SIPI
 call lapic_send_sipi

 ;Jump to the payload
 jmp 0000h:8000h

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

 ;CX = Wait (in ms) Max 65536 us (=0 on input)
 us_wait:
  mov dx, 80h               ;POST Diagnose port, 1us per IO
  xor si, si
  rep outsb

  ret

  WAIT_10_ms     EQU 10000
  WAIT_200_us    EQU 200

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll


 enable_lapic:

  ;Enable the APIC globally
  ;On P6 CPU once this flag is set to 0, it cannot be set back to 16
  ;Without an HARD RESET
  mov ecx, IA32_APIC_BASE_MSR
  rdmsr
  or ah, 08h        ;bit11: APIC GLOBAL Enable/Disable
  wrmsr

  ;Mask off lower 12 bits to get the APIC base address
  and ah, 0f0h
  mov DWORD [APIC_BASE], eax

  ;Newer processors enables the APIC through the Spurious Interrupt Vector register
  mov ecx, DWORD [fs: eax + APIC_REG_SIV]
  or ch, 01h                                ;bit8: APIC SOFTWARE enable/disable
  mov DWORD [fs: eax+APIC_REG_SIV], ecx

  ret

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

 lapic_send_sipi:
  mov eax, DWORD [APIC_BASE]

  ;Destination field is set to 0 has we will use a shorthand
  xor ebx, ebx
  mov DWORD [fs: eax+APIC_REG_ICR_HIGH], ebx

  ;Vector: 08h (Will make the CPU execute instruction ad address 08000h)
  ;Delivery mode: Startup
  ;Destination mode: ignored (0)
  ;Level: ignored (1)
  ;Trigger mode: ignored (0)
  ;Shorthand: All excluding self (3)
  mov ebx, 0c4608h
  mov DWORD [fs: eax+APIC_REG_ICR_LOW], ebx  ;Writing the low DWORD sent the IPI

  ret

  ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

 lapic_send_init:
  mov eax, DWORD [APIC_BASE]

  ;Destination field is set to 0 has we will use a shorthand
  xor ebx, ebx
  mov DWORD [fs: eax+APIC_REG_ICR_HIGH], ebx

  ;Vector: 00h
  ;Delivery mode: Startup
  ;Destination mode: ignored (0)
  ;Level: ignored (1)
  ;Trigger mode: ignored (0)
  ;Shorthand: All excluding self (3)
  mov ebx, 0c4500h
  mov DWORD [fs: eax+APIC_REG_ICR_LOW], ebx  ;Writing the low DWORD sent the IPI

  ret

 IA32_APIC_BASE_MSR    EQU    1bh

 APIC_REG_SIV        EQU    0f0h

 APIC_REG_ICR_LOW    EQU 300h
 APIC_REG_ICR_HIGH    EQU 310h

 APIC_REG_ID        EQU 20h

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

 APIC_BASE            dd     00h

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

unrealmode:
 lgdt [cs:GDT]

 cli

 mov eax, cr0
 or ax, 01h
 mov cr0, eax

 mov bx, 08h
 mov fs, bx
 mov gs, bx

 and ax, 0fffeh
 mov cr0, eax

 sti

 ;IMPORTAT: This call is FAR!
 ;So it can be called from everywhere
 retf

 GDT:
    dw 0fh
    dd GDT + 7c00h
    dw 00h

    dd 0000ffffh
    dd 00cf9200h

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

payload_start_abs:
; payload starts at segment:offset 0800h:0000h
section payload, vstart=0000h, align=1
 payload:

  ;IMPORTANT NOTE: Here we are in a \"new\" CPU every state we set before is no
  ;more present here (except for the BSP, but we handler every processor with
  ;the same code).
 jmp 800h: __RESTART__

 __RESTART__:
  mov ax, cs
  mov ds, ax
  xor sp, sp
  cld

  ;IMPORTANT: We can't use the stack yet. Every CPU is pointing to the same stack!

  ;Get an unique id
  mov ax, WORD [counter]
  .try:
    mov bx, ax
    inc bx
    lock cmpxchg WORD [counter], bx
   jnz .try

  mov cx, ax            ;Save this unique id

  ;Stack segment = CS + unique id * 1000
  shl ax, 12
  mov bx, cs
  add ax, bx
  mov ss, ax

  ;Text buffer
  push 0b800h
  pop es

  ;Set unreal mode again
  call 7c0h:unrealmode

  ;Use GS for old variables
  mov ax, 7c0h
  mov gs, ax

  ;Calculate text row
  mov ax, cx
  mov bx, 160d           ;80 * 2
  mul bx
  mov di, ax

  ;Get LAPIC id
  mov ebx, DWORD [gs:APIC_BASE]
  mov edx, DWORD [fs:ebx + APIC_REG_ID]
  shr edx, 24d
  call itoa8

  cli
  hlt

  ;DL = Number
  ;DI = ptr to text buffer
  itoa8:
    mov bx, dx
    shr bx, 0fh
    mov al, BYTE [bx +  digits]
    mov ah, 09h
    stosw

    mov bx, dx
    and bx, 0fh
    mov al, BYTE [bx +  digits]
    mov ah, 09h
    stosw

    ret

  digits db \"0123456789abcdef\"
  counter dw 0

 payload_end:



; Boot signature is at physical offset 01feh of
; the boot sector
section bootsig, start=01feh
 dw 0aa55h
<小时/>

需要执行两个主要步骤:

<强>1。唤醒 AP
这是通过向所有 AP 发出 INIT-SIPI-SIPI (ISS) 序列来实现的。

BSP 将使用速记 All except self 作为目的地来发送 ISS 序列,从而针对所有 AP。

SIPI(启动处理器间中断)会被所有在收到它时唤醒的 CPU 忽略,因此如果第一个 SIPI 足以唤醒目标处理器,则第二个 SIPI 将被忽略。出于兼容性原因,英特尔建议这样做。

SIPI 包含一个向量,这与中断向量(又名中断号)的含义相似,但在实践中完全不同
该向量是一个 8 位数字,值为 V(以 16 进制表示为 vv),它使 CPU 开始在物理处执行指令。 > 地址0vv000h
我们将0vv000h称为唤醒地址(WA)。
WA 强制位于 4KiB(或页面)边界。

我们将使用 08h 作为V,那么 WA 就是 08000h,即引导加载程序之后的 400h 字节。

这将控制权交给了 AP。

<强>2。初始化并区分AP
WA 必须有可执行代码。引导加载程序位于7c00h,因此我们需要在页边界重新定位一些代码。

编写有效负载时要记住的第一件事是对共享资源的任何访问都必须受到保护或区分。
常见的共享资源是堆栈,如果我们天真地初始化堆栈,那么每个AP最终都会使用相同的堆栈!

第一步是使用不同的堆栈地址,从而区分堆栈。
我们通过为每个 CPU 分配一个从零开始的唯一编号来实现这一点。这个数字,我们称之为索引,用于区分堆栈和CPU将写入其APIC ID的行。

每个 CPU 的堆栈地址为 800h:(索引 * 1000h),为每个 AP 提供 64KiB 的堆栈。
每个 CPU 的行号是 index,因此指向文本缓冲区的指针是 80 * 2 * index

为了生成索引,使用锁 cmpxchg 以原子方式递增并返回一个 WORD。

最终说明
* 对端口 80h 的写入用于生成 1 µs 的延迟。
* unrealmode 是一个远例程,因此也可以在唤醒后调用。
* BSP 也跳转到 WA。

屏幕截图

来自具有 8 个处理器的 Bochs

Screenshot with eight processors

关于assembly - APIC多核启动协议(protocol)和ICR启动地址,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/48763883/

相关文章:

c - 如何查看为 C 函数生成的汇编代码?

linux - 使用指针在x86 Assembly中返回结果

assembly - C 编译器输出的此代码中的 MOVZX、CDQE 指令的含义/用途是什么?

javascript - 如何在 Node.js 中组织多进程应用程序?

python - 使用 python 和多处理压缩输出

python - 如何在 python GUI TKinter 中通过相同图像标签的另一个进程调用替换正在进行的图像捕获过程

assembly - 汇编语言中Rd、Rn、Rm、Ra的完整形式是什么?

c - 一种破解 C 函数调用的快速方法

performance - 为什么jnz不算循环?

multithreading - `xchg` 是否包含 `mfence` 假设没有非时间指令?