15. System Calls API Setup - josehu07/hux-kernel GitHub Wiki

With the user mode execution mechanism we enabled in the last chapter, it is time to implement a framework for the system calls API. Our design is to put user programs in a separate folder user/ apart from kernel src/ and build user programs into independent ELF binaries. The kernel defines a set of syscalls, each syscall having a unique syscall number and expected arguments. The user side implements a set of user library functions that wrap over these syscalls. User programs link with the user library to run on Hux.

I will use a hello() syscall as an example throughout this chapter.

Main References of This Chapter

Scan through them before going forth:

Kernel Side Support

Syscall Arguments Parsing

We pass arguments on the user stack. It is worth making some argument parsing helper functions @ src/interrupt/syscall.c :

/** Helpers for getting something from user memory address. */
bool
sysarg_addr_int(uint32_t addr, int32_t *ret)
{
    process_t *proc = running_proc();

    if (addr < proc->stack_low || addr + 4 > USER_MAX) {
        warn("sysarg_addr_int: invalid arg addr %p for %s", addr, proc->name);
        return false;
    }

    *ret = *((int32_t *) addr);
    return true;
}

bool
sysarg_addr_uint(uint32_t addr, uint32_t *ret)
{
    process_t *proc = running_proc();

    if (addr < proc->stack_low || addr + 4 > USER_MAX) {
        warn("sysarg_addr_uint: invalid arg addr %p for %s", addr, proc->name);
        return false;
    }

    *ret = *((uint32_t *) addr);
    return true;
}

bool
sysarg_addr_mem(uint32_t addr, char **mem, size_t len)
{
    process_t *proc = running_proc();

    if (addr >= USER_MAX || addr + len > USER_MAX || addr < USER_BASE
        || (addr >= proc->heap_high && addr < proc->stack_low)
        || (addr + len > proc->heap_high && addr + len <= proc->stack_low)
        || (addr < proc->heap_high && addr + len > proc->heap_high)) {
        warn("sysarg_addr_mem: invalid mem %p w/ len %d for %s",
             addr, len, proc->name);
        return false;
    }

    *mem = (char *) addr;
    return true;
}

int32_t
sysarg_addr_str(uint32_t addr, char **str)
{
    process_t *proc = running_proc();

    if (addr >= USER_MAX || addr < USER_BASE
        || (addr >= proc->heap_high && addr < proc->stack_low)) {
        warn("sysarg_get_str: invalid str %p for %s",
             addr, proc->name);
        return -1;
    }

    char *bound = addr < proc->heap_high ? (char *) proc->heap_high
                                         : (char *) USER_MAX;
    for (char *c = (char *) addr; c < bound; ++c) {
        if (*c == '\0') {
            *str = (char *) addr;
            return c - (char *) addr;
        }
    }
    return -1;
}


/**
 * Get syscall arguments on the user stack.
 *   - state->esp is the current user ESP;
 *   - 0(state->esp) is the return address, so skip;
 *   - starting from 4(state->esp) are the arguments, from stack
 *     bottom -> top are user lib arguments from left -> right.
 */

/**
 * Fetch the n-th (starting from 0-th) 32bit integer. Returns true on
 * success and false if address not in user stack.
 */
bool
sysarg_get_int(int8_t n, int32_t *ret)
{
    process_t *proc = running_proc();
    uint32_t addr = (proc->trap_state->esp) + 4 + (4 * n);
    return sysarg_addr_int(addr, ret);
}

/** Same but for uint32_t. */
bool
sysarg_get_uint(int8_t n, uint32_t *ret)
{
    process_t *proc = running_proc();
    uint32_t addr = (proc->trap_state->esp) + 4 + (4 * n);
    return sysarg_addr_uint(addr, ret);
}

/**
 * Fetch the n-th (starting from 0-th) 32-bit argument and interpret as
 * a pointer to a bytes array of length `len`. Returns true on success
 * and false if address invalid.
 */
bool
sysarg_get_mem(int8_t n, char **mem, size_t len)
{
    uint32_t ptr;
    if (!sysarg_get_int(n, (int32_t *) &ptr)) {
        warn("sysarg_get_mem: inner sysarg_get_int failed");
        return false;
    }
    return sysarg_addr_mem(ptr, mem, len);
}

/**
 * Fetch the n-th (starting from 0-th) 32-bit argument and interpret as
 * a pointer to a string. Returns the length of string actually parsed
 * on success, and -1 if address invalid or the string is not correctly
 * null-terminated.
 */
int32_t
sysarg_get_str(int8_t n, char **str)
{
    uint32_t ptr;
    if (!sysarg_get_int(n, (int32_t *) &ptr)) {
        warn("sysarg_get_str: inner sysarg_get_int failed");
        return -1;
    }
    return sysarg_addr_str(ptr, str);
}


// src/interrupt/syscall.h

bool sysarg_addr_int(uint32_t addr, int32_t *ret);
bool sysarg_addr_uint(uint32_t addr, uint32_t *ret);
bool sysarg_addr_mem(uint32_t addr, char **mem, size_t len);
int32_t sysarg_addr_str(uint32_t addr, char **str);

bool sysarg_get_int(int8_t n, int32_t *ret);
bool sysarg_get_uint(int8_t n, uint32_t *ret);
bool sysarg_get_mem(int8_t n, char **mem, size_t len);
int32_t sysarg_get_str(int8_t n, char **str);

Completing the Syscall Handler

Our central ISR handler stub is not treating syscall traps correctly yet. We will let it invoke a syscall() wrapper function which checks EAX for the syscall number and dispatches the request to the corresponding end-point handler.

Add these definitions @ src/interrupt/syscall.h:

/** Individual syscall handler type: void -> int32_t. */
typedef int32_t (*syscall_t)(void);

/** Syscall unsuccessful return code. */
#define SYS_FAIL_RC (-1)

void syscall(interrupt_state_t *state);


/** List of known syscall numbers. */
#define SYSCALL_HELLO 1

Write a central handler entry wrapper @ src/interrupt/syscall.c:

/**
 * Centralized syscall handler entry.
 *   - The trap state holds the syscall number in register EAX
 *     and the arguments on user stack;
 *   - Returns a return code back to register EAX.
 *
 * User syscall library should do syscalls following this rule.
 */
void
syscall(interrupt_state_t *state)
{
    int32_t syscall_no = state->eax;

    if (syscall_no <= 0 || syscall_no >= NUM_SYSCALLS) {
        warn("syscall: unknwon syscall number %d", syscall_no);
        state->eax = SYS_FAIL_RC;
    } else if (syscall_handlers[syscall_no] == NULL) {
        warn("syscall: null handler for syscall # %d", syscall_no);
        state->eax = SYS_FAIL_RC;
    } else {
        syscall_t handler = syscall_handlers[syscall_no];
        state->eax = handler();
    }
}

We will implement the syscalls in separate folders, for example, process operations go in src/process/sysproc.c, user heap memory operations go in src/memory/sysmem.c, etc. A simple hello syscall would look like this @ src/process/sysproc.c:

/** int32_t hello(int32_t num, char *mem, int32_t len, char *str); */
int32_t
syscall_hello(void)
{
    int32_t num, len;
    char *mem, *str;

    if (!sysarg_get_int(0, &num))
        return SYS_FAIL_RC;
    if (!sysarg_get_int(2, &len))
        return SYS_FAIL_RC;
    if (len <= 0)
        return SYS_FAIL_RC;
    if (!sysarg_get_mem(1, &mem, len))
        return SYS_FAIL_RC;
    if (sysarg_get_str(3, &str) < 0)
        return SYS_FAIL_RC;

    process_t *proc = running_proc();
    printf("From sysall_hello handler: Hello, %s!\n", proc->name);
    printf("  num: %d, mem[0]: %c, str: %s\n", num, mem[0], str);

    return 0;
}


// src/process/sysproc.h
int32_t syscall_hello(void);

Maintain an array of function pointers of type syscall_t @ src/interrupt/syscall.c:

/** Array of individual handlers: void -> int32_t functions. */
static syscall_t syscall_handlers[] = {
    [SYSCALL_HELLO]     syscall_hello
};

#define NUM_SYSCALLS ((int32_t) (sizeof(syscall_handlers) / sizeof(syscall_t)))

The interrupt handler stub needs to recognize the syscall trap number and invoke syscall(). Modifications to the handler stub @ src/interrupt/isr.c:

/** Print interrupt state information. */
static void
_print_interrupt_state(interrupt_state_t *state)
{
    info("interrupt state:");
    process_t *proc = running_proc();
    printf("  Current process: %d - %s\n", proc->pid, proc->name);
    printf("  INT#: %d  ERR_CODE: %#010X\n",
           state->int_no, state->err_code);
    printf("  EAX: %#010X  EIP: %#010X  ESP: %#010X\n",
           state->eax, state->eip, state->esp);
    printf("   DS: %#010X   CS: %#010X   SS: %#010X\n",
           state->ds, state->cs, state->ss);
    printf("  EFLAGS: %#010X\n", state->eflags);
}

/**
 * ISR handler written in C.
 *
 * Receives a pointer to a structure of interrupt state. Handles the
 * interrupt and simply returns. Can modify interrupt state through
 * this pointer if necesary.
 */
void
isr_handler(interrupt_state_t *state)
{
    uint8_t int_no = state->int_no;

    /** An exception interrupt. */
    if (int_no <= 31) {

        /** Panic if no actual ISR is registered. */
        if (isr_table[int_no] == NULL) {
            _print_interrupt_state(state);
            error("isr: missing handler for exception (fault) # %#x", int_no);
        } else
            isr_table[int_no](state);

    /** An IRQ-translated interrupt from external device. */
    } else if (int_no <= 47) {
        uint8_t irq_no = state->int_no - 32;

        /** Call actual ISR if registered. */
        if (isr_table[int_no] == NULL) {
            _print_interrupt_state(state);
            error("isr: missing handler for device interrupt # %#x", int_no);
        } else
            isr_table[int_no](state);

        _pic_send_eoi(irq_no);      /** Send back EOI signal to PIC. */

    /** Syscall trap. */
    } else if (int_no == INT_NO_SYSCALL) {

        /** Point proc->trap_state to this trap. */
        running_proc()->trap_state = state;

        /**
         * Call the syscall handler in `syscall.c`.
         *
         * Interrupt state contains the syscall number in EAX and the
         * arguments on the user stack. Returns an integer return code
         * back to EAX.
         */
        syscall(state);

    /** Unknown interrupt number. */
    } else {
        _print_interrupt_state(state);
        error("isr: caught unknown interrupt # %#x", int_no);
    }
}

User Side Library

Better Folder Structure

It is better to have all user-side code in a separate user/ folder than mixing them with the kernel src/. In the last few chapters, the init process I used as a demonstration is put under src/process/, which does not make too much sense. Reorganize the folder structure into:

src/
  - ...
  - kernel.c
user/
  - lib/
      - syscall.s
      - syscall.h
      - syslist.s
  - init.c (using C code now)
  - ... (other user programs)

This way, the user side is both logically and physically separated from the kernel side in our source tree ✭. Many other toy OS projects tend to mix everything together and have a flat folder structure, which I think is not a good practice.

User Syscall Library

The user syscall library is simply a set of wrappers over all the available syscalls that the system provides.

First, make a list that duplicates the syscall number definitions (instead of directly including kernel headers, again for better user/kernel separation). The list is written in GAS syntax constants @ user/lib/syslist.s:

/** Syscall trap gate number. */
INT_NO_SYSCALL = 64


/** List of known syscall. */
SYSCALL_HELLO = 1

The wrapper implementations will be exactly the same for each syscall (except the syscall number and function name), so we use an assembly macro to do that for us. Code @ user/lib/syscall.s:

.include "syslist.s"


/**
 * Using an auto-generation macro, since every syscall expect the same
 * thing of putting arguments on stack, setting EAX to the number, etc.
 */
.macro SYSCALL_LIBGEN name, no
    .global \name
    .type \name, @function
    \name:
        movl $\no, %eax
        int $INT_NO_SYSCALL
        ret
.endm


SYSCALL_LIBGEN hello, SYSCALL_HELLO

And the user library header is simply externing all the function declarations:

// user/lib/syscall.h

#ifndef SYSCALL_H
#define SYSCALL_H


#include <stdint.h>


/**
 * Externed from ASM `syscall.s`.
 *
 * Be sure that all arguments & returns values are 32-bit values, since
 * Hux parses syscall arguments by simply getting 32-bit values on stack.
 */
extern int32_t hello(int32_t num, char *mem, int32_t len, char *str);


#endif

Makefile Tweaks

Our Makefile also needs to be changed to build the user programs correctly. There are several things we need to take care of:

  • It is expected that each user program xxx.c under user/ is an independent program and should be compiled & linked into an independent ELF binary xxx.bin (with init.c being the only exception).
  • For a user program, the linker should expect an entry symbol at main (the C main function).
  • For a user program, the linker must be set to relocate the text section to USER_BASE (with -Ttext 0x20000000) - that's where the text sections gets loaded in our process virtual address space.

Modifications to the Makefile:

C_SOURCES=$(shell find ./src/ -name "*.c")
C_OBJECTS=$(patsubst %.c, %.o, $(C_SOURCES))

S_SOURCES=$(shell find ./src/ -name "*.s")
S_OBJECTS=$(patsubst %.s, %.o, $(S_SOURCES))

INIT_SOURCE=./user/init.c
INIT_OBJECT=./user/init.c.o
INIT_LINKED=./user/init.bin
INIT_BINARY=./user/init

ULIB_C_SOURCES=$(shell find ./user/lib/ -name "*.c")
ULIB_C_OBJECTS=$(patsubst %.c, %.o, $(ULIB_C_SOURCES))

ULIB_S_SOURCES=$(shell find ./user/lib/ -name "*.s")
ULIB_S_OBJECTS=$(patsubst %.s, %.o, $(ULIB_S_SOURCES))

USER_SOURCES_ALL=$(shell find ./user/ -name "*.c" ! -path "./user/lib/*")
USER_SOURCES=$(filter-out $(INIT_SOURCE), $(USER_SOURCES_ALL))
USER_OBJECTS=$(patsubst %.c, %.c.o, $(USER_SOURCES))
USER_LINKEDS=$(patsubst %.c, %.bin, $(USER_SOURCES))


ADDRSPACE_USER_BASE=0x20000000


ASM=i686-elf-as
ASM_FLAGS=

CC=i686-elf-gcc
C_FLAGS_USER=-c -Wall -Wextra -ffreestanding -O2 -std=gnu99 -Wno-tautological-compare \
             -g -fno-omit-frame-pointer
C_FLAGS=$(C_FLAGS_USER) -fstack-protector

LD=i686-elf-gcc
LD_FLAGS=-ffreestanding -O2 -nostdlib

OBJCOPY=i686-elf-objcopy
OBJDUMP=i686-elf-objdump


HUX_MSG="[--Hux->]"


#
# Targets for building.
#
ALL_DEPS := $(S_OBJECTS) $(C_OBJECTS)
ALL_DEPS += $(ULIB_S_OBJECTS) $(ULIB_C_OBJECTS) $(USER_LINKEDS) initproc
ALL_DEPS += kernel verify update
all: $(ALL_DEPS)

$(S_OBJECTS): %.o: %.s
    @echo
    @echo $(HUX_MSG) "Compiling kernel assembly '$<'..."
    $(ASM) $(ASM_FLAGS) -o $@ $<

$(C_OBJECTS): %.o: %.c
    @echo
    @echo $(HUX_MSG) "Compiling kernel C code '$<'..."
    $(CC) $(C_FLAGS) -o $@ $<

# User programs use more specific rules to build into independent binary.
$(ULIB_S_OBJECTS): %.o: %.s
    @echo
    @echo $(HUX_MSG) "Compiling user lib assembly '$<'..."
    $(ASM) $(ASM_FLAGS) -I ./user/lib/ -o $@ $<

$(ULIB_C_OBJECTS): %.o: %.c
    @echo
    @echo $(HUX_MSG) "Compiling user lib C code '$<'..."
    $(CC) $(C_FLAGS_USER) -o $@ $<

$(USER_LINKEDS): %.bin: %.c $(ULIB_S_OBJECTS) $(ULIB_C_OBJECTS)
    @echo
    @echo $(HUX_MSG) "Compiling & linking user program '$<'..."
    $(CC) $(C_FLAGS_USER) -o $<.o $<
    $(LD) $(LD_FLAGS) -e main -Ttext $(ADDRSPACE_USER_BASE) -o $@ \
        $<.o $(ULIB_S_OBJECTS) $(ULIB_C_OBJECTS)
    $(OBJCOPY) --strip-debug $@

# Init process goes separately, to allow later embedding into kernel image.
initproc: $(INIT_SOURCE) $(ULIB_S_OBJECTS) $(ULIB_C_OBJECTS)
    @echo
    @echo $(HUX_MSG) "Compiling & linking user 'init' program..."
    $(CC) $(C_FLAGS_USER) -o $(INIT_OBJECT) $(INIT_SOURCE)
    $(LD) $(LD_FLAGS) -e main -Ttext $(ADDRSPACE_USER_BASE) -o $(INIT_LINKED) \
        $(INIT_OBJECT) $(ULIB_S_OBJECTS) $(ULIB_C_OBJECTS)
    $(OBJCOPY) --strip-debug $(INIT_LINKED)
    $(OBJCOPY) --strip-all -O binary $(INIT_LINKED) $(INIT_BINARY)

# Remember to link 'libgcc'. Embeds the init process binary.
kernel: $(S_OBJECTS) $(C_OBJECTS) initproc
    @echo
    @echo $(HUX_MSG) "Linking kernel image..."
    $(LD) $(LD_FLAGS) -T scripts/kernel.ld -lgcc -o $(TARGET_BIN) \
        -Wl,--oformat,elf32-i386 $(S_OBJECTS) $(C_OBJECTS)        \
        -Wl,-b,binary,$(INIT_BINARY)
    $(OBJCOPY) --only-keep-debug $(TARGET_BIN) $(TARGET_SYM)
    $(OBJCOPY) --strip-debug $(TARGET_BIN)


.PHONY: clean
clean:
    @echo
    @echo $(HUX_MSG) "Cleaning the build..."
    rm -f $(S_OBJECTS) $(C_OBJECTS) $(ULIB_S_OBJECTS) $(ULIB_C_OBJECTS) \
        $(INIT_OBJECT) $(INIT_LINKED) $(INIT_BINARY)                    \
        $(USER_OBJECTS) $(USER_LINKEDS)                                 \
        $(TARGET_BIN) $(TARGET_ISO) $(TARGET_SYM) 

Progress So Far

Let's make a syscall from user land into the kernel! If we write an init.c that invokes the hello() syscall @ user/init.c:

void
main(void)
{
    int32_t num = 7913;

    char mem[8] = "ABCDEFG";
    int32_t len = 7;

    char *str = "This is init!";

    hello(num, mem, len, str);

    asm volatile ( "hlt" );
}

This should produce a terminal window as the following after booting up:

(Notice that after the syscall, executing hlt in user mode causes a general protection fault 0xd. In the next chapter, we will be implementing useful syscalls which include exit(), and by then all user programs must end with an exit() call.)

Current repo structure:

hux-kernel
├── Makefile
├── scripts
│   ├── gdb_init
│   ├── grub.cfg
│   └── kernel.ld
├── src
│   ├── boot
│   │   ├── boot.s
│   │   ├── elf.h
│   │   └── multiboot.h
│   ├── common
│   │   ├── debug.c
│   │   ├── debug.h
│   │   ├── port.c
│   │   ├── port.h
│   │   ├── printf.c
│   │   ├── printf.h
│   │   ├── string.c
│   │   ├── string.h
│   │   ├── types.c
│   │   └── types.h
│   ├── device
│   │   ├── keyboard.c
│   │   ├── keyboard.h
│   │   ├── timer.c
│   │   └── timer.h
│   ├── display
│   │   ├── terminal.c
│   │   ├── terminal.h
│   │   └── vga.h
│   ├── interrupt
│   │   ├── idt-load.s
│   │   ├── idt.c
│   │   ├── idt.h
│   │   ├── isr-stub.s
│   │   ├── isr.c
│   │   ├── isr.h
│   │   ├── syscall.c
│   │   └── syscall.h
│   ├── memory
│   │   ├── gdt-load.s
│   │   ├── gdt.c
│   │   ├── gdt.h
│   │   ├── kheap.c
│   │   ├── kheap.h
│   │   ├── paging.c
│   │   ├── paging.h
│   │   ├── slabs.c
│   │   └── slabs.h
│   ├── process
│   │   ├── layout.h
│   │   ├── process.c
│   │   ├── process.h
│   │   ├── scheduler.c
│   │   ├── scheduler.h
│   │   ├── switch.s
│   │   ├── sysproc.c
│   │   └── sysproc.h
│   └── kernel.c
├── user
│   ├── lib
│   │   ├── syscall.h
│   │   ├── syscall.s
│   │   └── syslist.s
│   └── init.c
⚠️ **GitHub.com Fallback** ⚠️