xv6Trampoline - ccc-sp/riscv2os GitHub Wiki

xv6: 彈跳床如何切換分頁表?

xv6 是 64 位元的作業系統,分頁表採用 SV39 格式 (而非 32 位元的 SV32 格式),如圖 3.2 所示

其對應的資料結構宣告如下:

kernel/riscv.h


typedef uint64 pte_t;
typedef uint64 *pagetable_t; // 512 PTEs

分頁表原本放在記憶體中,但可以透過寫入 SATP 載入將其載入到 TLB (Translation Lookaside Buffer) 中:

// use riscv's sv39 page table scheme.
#define SATP_SV39 (8L << 60)

#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12))

// supervisor address translation and protection;
// holds the address of the page table.
static inline void 
w_satp(uint64 x)
{
  asm volatile("csrw satp, %0" : : "r" (x));
}

static inline uint64
r_satp()
{
  uint64 x;
  asm volatile("csrr %0, satp" : "=r" (x) );
  return x;
}

然後透過 sfence.vma zero, zero 指令清除分頁表。


// flush the TLB.
static inline void
sfence_vma()
{
  // the zero, zero means flush all TLB entries.
  asm volatile("sfence.vma zero, zero");
}

在 kernel 剛啟動時,還無法使用分頁表,所以在 start.c 裏會用 w_satp(0) 先抑制分頁機制。

等到 kernel 載入後,會呼叫 vm.c 裏的 kvminithart() 函數,啟用核心分頁表:

// Switch h/w page table register to the kernel's page table,
// and enable paging.
void
kvminithart() // 啟動分頁機制
{
  w_satp(MAKE_SATP(kernel_pagetable));
  sfence_vma();
}

當 user process (使用者行程) 被中斷或進行系統呼叫時,會跳到 trampoline.S (彈跳床) 中的 uservec 程式,其中的 csrw satp, t1 指令會切換回核心分頁表,然後透過最後的 jr t0 指令跳到 usertrap 去執行,要從核心返回使用者行程時會執行 userret 程式 (然後會返回原本行程的停止點 usertrap() 最後一行的 usertrapret() 中) ,於是再度用 csrw satp, a1 切回使用者行程的分頁表。

	# 彈跳床 -- 在 user/kernel 間切換的程式 (進入 user/ 從 user 返回)
        # code to switch between user and kernel space.
        #
        # this code is mapped at the same virtual address
        # (TRAMPOLINE) in user and kernel space so that
        # it continues to work when it switches page tables.
	#
	# kernel.ld causes this to be aligned
        # to a page boundary.
        #
	.section trampsec
.globl trampoline
trampoline:
.align 4
.globl uservec
uservec:# 進入使用者中斷    
	#
        # trap.c sets stvec to point here, so
        # traps from user space start here,
        # in supervisor mode, but with a
        # user page table.
        #
        # sscratch points to where the process's p->trapframe is
        # mapped into user space, at TRAPFRAME.
        #
        # ------------ 以下是儲存 user process 暫存器的程式段落 ------------------
	# swap a0 and sscratch    # 原本 sscratch = p->trapframe, 現在兩者交換 swap(sscratch, a0)
        # so that a0 is TRAPFRAME # 所以現在 a0 = p->trapframe
        csrrw a0, sscratch, a0 

        # save the user registers in TRAPFRAME # 儲存所有 user process 的暫存器到 p->trapframe
        sd ra, 40(a0)
        sd sp, 48(a0)
        sd gp, 56(a0)
        sd tp, 64(a0)
        sd t0, 72(a0)
        sd t1, 80(a0)
        sd t2, 88(a0)
        sd s0, 96(a0)
        sd s1, 104(a0)
        sd a1, 120(a0)
        sd a2, 128(a0)
        sd a3, 136(a0)
        sd a4, 144(a0)
        sd a5, 152(a0)
        sd a6, 160(a0)
        sd a7, 168(a0)
        sd s2, 176(a0)
        sd s3, 184(a0)
        sd s4, 192(a0)
        sd s5, 200(a0)
        sd s6, 208(a0)
        sd s7, 216(a0)
        sd s8, 224(a0)
        sd s9, 232(a0)
        sd s10, 240(a0)
        sd s11, 248(a0)
        sd t3, 256(a0)
        sd t4, 264(a0)
        sd t5, 272(a0)
        sd t6, 280(a0)

	# save the user a0 in p->trapframe->a0
        csrr t0, sscratch # sscratch 就是剛剛和 a0 交換的,所以就是 p->trapframe
        sd t0, 112(a0)    # 將 p->trapframe 存入 112(p->trapframe)
        # ------- 以下是恢復核心暫存器的處理程式 --------------------
        # restore kernel stack pointer from p->trapframe->kernel_sp
        ld sp, 8(a0)      # 恢復堆疊

        # make tp hold the current hartid, from p->trapframe->kernel_hartid
        ld tp, 32(a0)     # tp = kernel.hartid

        # load the address of usertrap(), p->trapframe->kernel_trap
        ld t0, 16(a0)     # t0 = usertrap

        # restore kernel page table from p->trapframe->kernel_satp
        ld t1, 0(a0)
        csrw satp, t1     # 恢復核心的分頁表
        sfence.vma zero, zero # 清除 TLB 讓緩存失效

        # a0 is no longer valid, since the kernel page
        # table does not specially map p->tf.

        # jump to usertrap(), which does not return
        jr t0             # 跳到 usertrap()

.globl userret
userret:# 從使用者中斷返回
        # userret(TRAPFRAME, pagetable)
        # switch from kernel to user.
        # usertrapret() calls here.
        # a0: TRAPFRAME, in user page table.
        # a1: user page table, for satp.

        # switch to the user page table.
        csrw satp, a1              # 切回使用者分頁表
        sfence.vma zero, zero
        # ------- 以下是恢復使用者暫存器的處理程式 --------------------
        # put the saved user a0 in sscratch, so we
        # can swap it with our a0 (TRAPFRAME) in the last step.
        ld t0, 112(a0)
        csrw sscratch, t0

        # restore all but a0 from TRAPFRAME
        ld ra, 40(a0)
        ld sp, 48(a0)
        ld gp, 56(a0)
        ld tp, 64(a0)
        ld t0, 72(a0)
        ld t1, 80(a0)
        ld t2, 88(a0)
        ld s0, 96(a0)
        ld s1, 104(a0)
        ld a1, 120(a0)
        ld a2, 128(a0)
        ld a3, 136(a0)
        ld a4, 144(a0)
        ld a5, 152(a0)
        ld a6, 160(a0)
        ld a7, 168(a0)
        ld s2, 176(a0)
        ld s3, 184(a0)
        ld s4, 192(a0)
        ld s5, 200(a0)
        ld s6, 208(a0)
        ld s7, 216(a0)
        ld s8, 224(a0)
        ld s9, 232(a0)
        ld s10, 240(a0)
        ld s11, 248(a0)
        ld t3, 256(a0)
        ld t4, 264(a0)
        ld t5, 272(a0)
        ld t6, 280(a0)

	# restore user a0, and save TRAPFRAME in sscratch
        csrrw a0, sscratch, a0
        
        # return to user mode and user pc. // sret 會取回 user pc 然後繼續執行該使用者行程
        # usertrapret() set up sstatus and sepc.
        sret

必須注意的是, trampoline 在 kernel 和 user space 都被映射到同一個虛擬位址,所以切換時不用修改暫存器,這樣才能達到《無縫接軌》的效果。

而那些需要儲存與恢復的資料,則是放在 trapframe 當中,每個行程的 struct proc 結構當中,都會有一份 trapframe:

// per-process data for the trap handling code in trampoline.S.
// sits in a page by itself just under the trampoline page in the
// user page table. not specially mapped in the kernel page table.
// the sscratch register points here.
// uservec in trampoline.S saves user registers in the trapframe,
// then initializes registers from the trapframe's
// kernel_sp, kernel_hartid, kernel_satp, and jumps to kernel_trap.
// usertrapret() and userret in trampoline.S set up
// the trapframe's kernel_*, restore user registers from the
// trapframe, switch to the user page table, and enter user space.
// the trapframe includes callee-saved user registers like s0-s11 because the
// return-to-user path via usertrapret() doesn't return through
// the entire kernel call stack.
struct trapframe { // 彈跳框 -- 讓核心與使用者行程間能順利切換
  /*   0 */ uint64 kernel_satp;   // kernel page table
  /*   8 */ uint64 kernel_sp;     // top of process's kernel stack
  /*  16 */ uint64 kernel_trap;   // usertrap()
  /*  24 */ uint64 epc;           // saved user program counter
  /*  32 */ uint64 kernel_hartid; // saved kernel tp
  /*  40 */ uint64 ra;
  /*  48 */ uint64 sp;
  /*  56 */ uint64 gp;
  /*  64 */ uint64 tp;
  /*  72 */ uint64 t0;
  /*  80 */ uint64 t1;
  /*  88 */ uint64 t2;
  /*  96 */ uint64 s0;
  /* 104 */ uint64 s1;
  /* 112 */ uint64 a0;
  /* 120 */ uint64 a1;
  /* 128 */ uint64 a2;
  /* 136 */ uint64 a3;
  /* 144 */ uint64 a4;
  /* 152 */ uint64 a5;
  /* 160 */ uint64 a6;
  /* 168 */ uint64 a7;
  /* 176 */ uint64 s2;
  /* 184 */ uint64 s3;
  /* 192 */ uint64 s4;
  /* 200 */ uint64 s5;
  /* 208 */ uint64 s6;
  /* 216 */ uint64 s7;
  /* 224 */ uint64 s8;
  /* 232 */ uint64 s9;
  /* 240 */ uint64 s10;
  /* 248 */ uint64 s11;
  /* 256 */ uint64 t3;
  /* 264 */ uint64 t4;
  /* 272 */ uint64 t5;
  /* 280 */ uint64 t6;
};

結論

在每次中斷從 user process 切回 kernel 時,會切換分頁表。

從 kernel 返回 user process 時又會再切換一次分頁表。

這樣的切換代價,應該比 thread 的 Context-Switch 代價大得多了!