04. Formatted Printing - josehu07/hux-kernel GitHub Wiki

You might have noticed how tedious it could be to print an integer onto the terminal window with our terminal_write function. A formatted printing interface just like printf() is what we need. Once this is done, we will definitely invoke it hundreds or thousands of times in later development and debugging.

Main References of This Chapter

Scan through them before going forth:

Adding More Helper Libraries

First of all, let us complete our string library (we only have strlen() by now). See "Null-terminated byte strings" page for a collection of string utilities in C standard library. You can pick frequently used ones and duplicate their implementation from glibc source code glibc/string/<funcname>.c.

Code @ src/common/string.h:

/** These four MUST be present, as required by GCC. */
void *memset(void *dst, unsigned char c, size_t count);
void *memcpy(void *dst, const void *src, size_t count);
void *memmove(void *dst, const void *src, size_t count);
int memcmp(const void *ptr1, const void *ptr2, size_t count);

size_t strlen(const char *str);

size_t strnlen(const char *str, size_t count);
int strncmp(const char *str1, const char *str2, size_t count);
char *strncpy(char *dst, const char *src, size_t count);
char *strncat(char *dst, const char *src, size_t count);

It is always recommended to use length-specified versions of string utilities, thus I'm only implementing these.

Code @ src/common/string.c:

/**
 * Copies the byte C into the first COUNT bytes pointed to by DST.
 * Returns a copy of the pointer DST.
 */
void *
memset(void *dst, unsigned char c, size_t count)
{
    unsigned char *dst_copy = (unsigned char *) dst;
    while (count-- > 0)
        *dst_copy++ = c;
    return dst;
}

/**
 * Copies COUNT bytes from where SRC points to into where DST points to.
 * Assumes no overlapping between these two regions.
 * Returns a copy of the pointer DST.
 */
void *
memcpy(void *dst, const void *src, size_t count)
{
    unsigned char *dst_copy = (unsigned char *) dst;
    unsigned char *src_copy = (unsigned char *) src;
    while (count-- > 0)
        *dst_copy++ = *src_copy++;
    return dst;
}

/**
 * Copies COUNT bytes from where SRC points to into where DST points to.
 * The copy is like relayed by an internal buffer, so it is OK if these
 * two memory regions overlap.
 * Returns a copy of the pointer DST.
 */
void *
memmove(void *dst, const void *src, size_t count)
{
    unsigned long int dstp = (long int) dst;
    unsigned long int srcp = (long int) src;
    if (dstp - srcp >= count)   /** Unsigned compare. */
        dst = memcpy(dst, src, count);
    else {  /** SRC region overlaps start of DST, do reversed order. */
        unsigned char *dst_copy = ((unsigned char *) dst) + count;
        unsigned char *src_copy = ((unsigned char *) src) + count;
        while (count-- > 0)
            *dst_copy-- = *src_copy--;
    }
    return dst;
}

/**
 * Compare two memory regions byte-wise. Returns zero if they are equal.
 * Returns <0 if the first unequal byte has a lower unsigned value in
 * PTR1, and >0 if higher.
 */
int
memcmp(const void *ptr1, const void *ptr2, size_t count)
{
    const char *ptr1_cast = (const char *) ptr1;
    const char *ptr2_cast = (const char *) ptr2;
    char b1 = 0, b2 = 0;
    while (count-- > 0) {
        b1 = *ptr1_cast++;
        b2 = *ptr2_cast++;
        if (b1 != b2)
            return ((int) b1) - ((int) b2);
    }
    return ((int) b1) - ((int) b2);
}


/** Length of the string (excluding the terminating '\0'). */
size_t
strlen(const char *str)
{
    size_t len = 0;
    while (str[len])
        len++;
    return len;
}


/**
 * Length of the string (excluding the terminating '\0').
 * If string STR does not terminate before reaching COUNT chars, returns
 * COUNT.
 */
size_t
strnlen(const char *str, size_t count)
{
    size_t len = 0;
    while (str[len] && count > 0) {
        len++;
        count--;
    }
    return len;
}

/**
 * Compare two strings, returning less than, equal to or greater than zero
 * if STR1 is lexicographically less than, equal to or greater than S2.
 * Limited to upto COUNT chars.
 */
int
strncmp(const char *str1, const char *str2, size_t count)
{
    char c1 = '\0', c2 = '\0';
    while (count-- > 0) {
        c1 = *str1++;
        c2 = *str2++;
        if (c1 == '\0' || c1 != c2)
            return ((int) c1) - ((int) c2);
    }
    return ((int) c1) - ((int) c2);
}

/**
 * Copy string SRC to DST. Assume DST is large enough.
 * Limited to upto COUNT chars. Adds implicit null terminator even if
 * COUNT is smaller than actual length of SRC.
 */
char *
strncpy(char *dst, const char *src, size_t count)
{
    size_t size = strnlen(src, count);
    if (size != count)
        memset(dst + size, '\0', count - size);
    dst[size] = '\0';
    return memcpy(dst, src, size);
}

/**
 * Concatenate string DST with SRC. Assume DST is large enough.
 * Returns a copy of the pointer DST.
 * Limited to upto COUNT chars.
 */
char *
strncat(char *dst, const char *src, size_t count)
{
    char *s = dst;
    dst += strlen(dst);
    size_t size = strnlen(src, count);
    dst[size] = '\0';
    memcpy(dst, src, size);
    return s;
}

Another common task is to identify what type a character is. We put them in a separate library @ src/common/types.h:

bool isdigit(char c);
bool isxdigit(char c);

bool isalpha(char c);
bool islower(char c);
bool isupper(char c);

bool isspace(char c);

Code @ src/common/types.c:

inline bool
isdigit(char c)
{
    return ('0' <= c) && (c <= '9');
}

inline bool
isxdigit(char c)
{
    return (('0' <= c) && (c <= '9')) ||
           (('A' <= c) && (c <= 'F')) ||
           (('a' <= c) && (c <= 'f'));
}

inline bool
isupper(char c)
{
    return ('A' <= c) && (c <= 'Z');
}

inline bool
islower(char c)
{
    return ('a' <= c) && (c <= 'z');
}

inline bool
isalpha(char c)
{
    return islower(c) || isupper(c);
}

/**
 * If C is a whitespace:
 *   - space (0x20)
 *   - form feed (0x0c)
 *   - line feed (0x0a, \n)
 *   - carriage return (0x0d, \r)
 *   - horizontal tab (0x09, \t)
 *   - vertical tab (0x0b)
 */
inline bool
isspace(char c)
{
    return (c == 0x20) || (c == 0x0c) || (c == 0x0a) || (c == 0x0d) ||
           (c == 0x09) || (c == 0x0b);
}

Variadic Functions in C

Functions that accept a variable number of arguments are called variadic functions. Standard printf() & scanf() are typical examples of variadic functions. A variadic function is declared with ... as the last argument, and arguments are processed with a va_list struct and va_[start|end|arg] macros imported from <stdarg.h> (this is one of those libraries available in freestanding C coding).

Systems V ABI-i386 Calling Convention

To understand how variable-length argument list is processed, we should first look at Systems V ABI-i386 calling convention. The following code:

/** Credit to https://www.tenouk.com/Bufferoverflowc/Bufferoverflow2a.html. */

int
myfunc(int p1, char p2)
{
    int local1 = 9;
    char local2 = 'Z';
    return 0;
}

int
main(int argc, char *argv[])
{
    myfunc(7, '8');
    return 0;
}

would result in the following memory snapshot right before myfunc() returns (suppose no compiler optimizations such as using spare registers to hold the arguments):

Notice that arguments are pushed onto the stack from right to left (the stack grows downwards ↓).

Using Variable-Length Argument Lists

Variable-length argument list logically works as follows:

  1. va_list va declares a pointer
  2. va_start(va, x) initializes the pointer as right above x in stack memory, telling that "all arguments above x are variadic arguments"
  3. each va_arg(va, type) fetches sizeof(type) bytes off the pointer as a variable of type type, and updates the pointer
  4. va_end(va) destroys the pointer

Notice that variadic functions always take in at least one definite (non-variadic) argument as well. We need definite arguments to guide the variadic function "how many arguments should I process and what is the next one's type to fetch". Think of the formatting string in printf().

Special attention should be paid to which type should we specify in va_arg(va, type): please read "Special Issues with Varargs Functions" ✭. In short, to fetch

  • Unsigned long: va_arg(va, unsigned long)
  • Signed long: va_arg(va, long)
  • Unsigned int/short/char: va_arg(va, unsigned int)
  • Signed int/short/char: va_arg(va, int)
  • Double/float: va_arg(va, double)

Implementing printf()

We will mainly follow Marco's implementation (thank you, Marco!) with only limited features supported. By throwing away some strange and unpractical specifications in C standard library, we are able to write way clearer code. Note that the following implementation will not behave in the same way as original C standard library. Compatibility is not a major concern of Hux ✭.

A format specifier follows: %[special][width][.precision][length]<type>. Some format specifiers are not supported (e.g., e & g) and some are handled differently from C standard library (e.g., x, o, & b). Do not use my implementation for compatibility-required code!

String - Number Conversion

The next step is to enable string -> unsigned integer, integer -> string, float -> string conversion. We will implement these internally (only exposing to printf.c), because they have some weird interfaces that are only useful for formatted printing.

Code @ src/common/printf.c:

/**
 * This implementation is too long & tedious to fit in the wiki body. Check
 * out the code file @ `src/common/printf.c`.
 */

Note that we use val != val for testing NaN float values. This is definitely correct but might trigger a tautological-compare warning when compiling. Add a -Wno-tautological-compare flag in the Makefile.

Main Formatted Printing Logic

Actual formatted printing logic is in _vprintf & _vsnprintf @ src/common/printf.c:

/**
 * This implementation is too long & tedious to fit in the wiki body. Check
 * the code file @ `src/common/printf.c` directly.
 */

At the very bottom, add the interface we want to expose to callers:

/** Formatted printing to terminal window. */
void
printf(const char *fmt, ...)
{
    va_list va;
    va_start(va, fmt);
    _vprintf(TERMINAL_DEFAULT_COLOR_FG, fmt, va);
    va_end(va);
}

/** Formatted printing to terminal window with specified color FG. */
void
cprintf(vga_color_t fg, const char *fmt, ...)
{
    va_list va;
    va_start(va, fmt);
    _vprintf(fg, fmt, va);
    va_end(va);
}

/**
 * Formatted printing to a string buffer BUF.
 * Limited to COUNT chars. Will implicitly add a trailing null byte
 * even when COUNT chars limit has been reached. (So be sure that
 * BUF is at least COUNT + 1 in length.)
 */
void
snprintf(char *buf, size_t count, const char *fmt, ...)
{
    va_list va;
    va_start(va, fmt);
    _vsnprintf(buf, count, fmt, va);
    va_end(va);
}

Also, make the corresponding header @ src/common/printf.h:

void printf(const char *fmt, ...);
void cprintf(vga_color_t fg, const char *fmt, ...);

void snprintf(char *buf, size_t count, const char *fmt, ...);

Progress So Far

Let's test our formatted printing implementation! Write a test suite @ src/kernel.c:

static int static_var;


/** The main function that `boot.s` jumps to. */
void
kernel_main(void)
{
    terminal_init();

    int stack_var;

    printf("[%+#010x], [%X], [%-+ #zu], [%0 5li], [%-7d], [%#ho], [%#b]\n",
           &static_var, &stack_var, sizeof(void *), (long) 791, -238, (short) 11, 13);

    cprintf(VGA_COLOR_GREEN, "[%0+10.4lf], [%.3f], [%-10lf], [% 8F], [%#F]\n",
            37.9, -29086.008446435, 0.27121759, -3.14159, 2.0000718);

    printf("[%3c], [%-5c], [%c] | ", 'H', 'u', 'X');
    cprintf(VGA_COLOR_CYAN, "[%3s], [%-7s], [%s]\n", "hux-kernel", "Hux", "Kernel");

    printf("[%p], [%p], [%%]\n", &static_var, &stack_var);

    printf("%-#0t, %123-d, %m ... - These are invalid!\n");

    char buf[100];

    snprintf(buf, 99, "Stack pointer: %p\n", &stack_var);
    cprintf(VGA_COLOR_BLUE, "Buf contains: %s", buf);
}

This should produce a terminal window as the following after booting up:

Current repo structure:

hux-kernel
├── Makefile  
├── scripts
│   ├── grub.cfg
│   └── kernel.ld
├── src
│   ├── boot
│   │   └── boot.s
│   ├── common
│   │   ├── port.c
│   │   ├── port.h
│   │   ├── printf.c
│   │   ├── printf.h
│   │   ├── string.c
│   │   ├── string.h
│   │   ├── types.c
│   │   └── types.h
│   ├── display
│   │   ├── terminal.c
│   │   ├── terminal.h
│   │   └── vga.h
│   └── kernel.c
⚠️ **GitHub.com Fallback** ⚠️