21. FS Internal Operations - josehu07/hux-kernel GitHub Wiki

With the data structures defined, the next step is to implement the algorithms over those data structures and would help expose a system call API to user programs. This is by far the most coding-heavy chapter. Hux borrows quite a lot of FS code from xv6 - they share many common operations on inodes and open files, even though the overall layout and interfaces are different.

Main References of This Chapter

Scan through them before going forth:

Block-Level Operations

Let's begin by writing some wrappers over IDE disk requests.

Block-Level Reads/Writes

These functions translate a contiguous block-level I/O into proper IDE disk requests.

Code @ src/filesys/block.c:

/**
 * Helper function for reading blocks of data from disk into memory.
 * Uses an internal request buffer, so not zero-copy I/O. DST is the
 * destination buffer, and DISK_ADDR and LEN are both in bytes.
 */
static bool
_block_read(char *dst, uint32_t disk_addr, uint32_t len, bool boot)
{
    block_request_t req;

    uint32_t bytes_read = 0;
    while (len > bytes_read) {
        uint32_t bytes_left = len - bytes_read;

        uint32_t start_addr = disk_addr + bytes_read;
        uint32_t block_no = ADDR_BLOCK_NUMBER(start_addr);
        uint32_t req_offset = ADDR_BLOCK_OFFSET(start_addr);

        uint32_t next_addr = ADDR_BLOCK_ROUND_DN(start_addr) + BLOCK_SIZE;
        uint32_t effective = next_addr - start_addr;
        if (bytes_left < effective)
            effective = bytes_left;

        req.valid = false;
        req.dirty = false;
        req.block_no = block_no;
        bool success = boot ? idedisk_do_req_at_boot(&req)
                            : idedisk_do_req(&req);
        if (!success) {
            warn("block_read: reading IDE disk block %u failed", block_no);
            return false;
        }
        memcpy(dst + bytes_read, req.data + req_offset, effective);

        bytes_read += effective;
    }

    return true;
}

bool
block_read(char *dst, uint32_t disk_addr, uint32_t len)
{
    return _block_read(dst, disk_addr, len, false);
}

bool
block_read_at_boot(char *dst, uint32_t disk_addr, uint32_t len)
{
    return _block_read(dst, disk_addr, len, true);
}

/**
 * Helper function for writing blocks of data from memory into disk.
 * Uses an internal request buffer, so not zero-copy I/O. SRC is the
 * source buffer, and DISK_ADDR and LEN are both in bytes.
 */
bool
block_write(char *src, uint32_t disk_addr, uint32_t len)
{
    block_request_t req;

    uint32_t bytes_written = 0;
    while (len > bytes_written) {
        uint32_t bytes_left = len - bytes_written;

        uint32_t start_addr = disk_addr + bytes_written;
        uint32_t block_no = ADDR_BLOCK_NUMBER(start_addr);
        uint32_t req_offset = ADDR_BLOCK_OFFSET(start_addr);

        uint32_t next_addr = ADDR_BLOCK_ROUND_DN(start_addr) + BLOCK_SIZE;
        uint32_t effective = next_addr - start_addr;
        if (bytes_left < effective)
            effective = bytes_left;

        /**
         * If writing less than a block, first read out the block to
         * avoid corrupting what's already on disk.
         */
        if (effective < BLOCK_SIZE) {
            if (!block_read((char *) req.data, ADDR_BLOCK_ROUND_DN(start_addr),
                            BLOCK_SIZE)) {
                warn("block_write: failed to read out old block %u", block_no);
                return false;
            }
        }

        memcpy(req.data + req_offset, src + bytes_written, effective);
        req.valid = true;
        req.dirty = true;
        req.block_no = block_no;
        if (!idedisk_do_req(&req)) {
            warn("block_write: writing IDE disk block %u failed", block_no);
            return false;
        }

        bytes_written += effective;
    }

    return true;
}


// src/filesys/block.h

bool block_read(char *dst, uint32_t disk_addr, uint32_t len);
bool block_read_at_boot(char *dst, uint32_t disk_addr, uint32_t len);
bool block_write(char *src, uint32_t disk_addr, uint32_t len);

Allocating/Freeing Data Blocks

We will frequently need to find empty data blocks or to free out data blocks.

Code @ src/filesys/block.c:

/**
 * Allocate a free data block and mark it in use. Returns the block
 * disk address allocated, or 0 (which is invalid for a data block)
 * on failures.
 */
uint32_t
block_alloc(void)
{
    /** Get a free block from data bitmap. */
    uint32_t slot = bitmap_alloc(&data_bitmap);
    if (slot == data_bitmap.slots) {
        warn("block_alloc: no free data block left");
        return 0;
    }

    if (!data_bitmap_update(slot)) {
        warn("block_alloc: failed to persist data bitmap");
        return 0;
    }

    uint32_t disk_addr = DISK_ADDR_DATA_BLOCK(slot);

    /** Zero the block out for safety. */
    char buf[BLOCK_SIZE];
    memset(buf, 0, BLOCK_SIZE);
    if (!block_write(buf, disk_addr, BLOCK_SIZE)) {
        warn("block_alloc: failed to zero out block %p", disk_addr);
        bitmap_clear(&data_bitmap, slot);
        data_bitmap_update(slot);   /** Ignores error. */
        return 0;
    }

    return disk_addr;
}

/** Free a disk data block. */
void
block_free(uint32_t disk_addr)
{
    assert(disk_addr >= DISK_ADDR_DATA_BLOCK(0));
    uint32_t slot = (disk_addr / BLOCK_SIZE) - superblock.data_start;

    bitmap_clear(&data_bitmap, slot);
    data_bitmap_update(slot);   /** Ignores error. */

    /** Zero the block out for safety. */
    char buf[BLOCK_SIZE];
    memset(buf, 0, BLOCK_SIZE);
    if (!block_write(buf, ADDR_BLOCK_ROUND_DN(disk_addr), BLOCK_SIZE))
        warn("block_free: failed to zero out block %p", disk_addr);
}


// src/filesys/block.h

uint32_t block_alloc();
void block_free(uint32_t disk_addr);

Inode & Open File Operations

It would be a good idea to implement some common operations on inodes and open files.

Getting/Putting Inodes

We define the action of fetching an inode structure with given inumber (and bringing it from disk into icache if necessary) as get, and the action of dropping a reference to an inode as put ✭.

Code @ src/filesys/file.c:

/** Lock/Unlock a mem_ionde's private fields. */
void
inode_lock(mem_inode_t *m_inode)
{
    parklock_acquire(&(m_inode->lock));
}

void
inode_unlock(mem_inode_t *m_inode)
{
    parklock_release(&(m_inode->lock));
}


/**
 * Get inode for given inode number. If that inode has been in memory,
 * increment its ref count and return. Otherwise, read from the disk into
 * an empty inode cache slot.
 */
static mem_inode_t *
_inode_get(uint32_t inumber, bool boot)
{
    assert(inumber < (superblock.inode_blocks * (BLOCK_SIZE / INODE_SIZE)));

    mem_inode_t *m_inode = NULL;

    spinlock_acquire(&icache_lock);

    /** Search icache to see if it has been in memory. */
    mem_inode_t *empty_slot = NULL;
    for (m_inode = icache; m_inode < &icache[MAX_MEM_INODES]; ++m_inode) {
        if (m_inode->ref_cnt > 0 && m_inode->inumber == inumber) {
            m_inode->ref_cnt++;
            spinlock_release(&icache_lock);
            return m_inode;
        }
        if (empty_slot == NULL && m_inode->ref_cnt == 0)
            empty_slot = m_inode;   /** Remember empty slot seen. */
    }

    if (empty_slot == NULL) {
        warn("inode_get: no empty mem_inode slot");
        spinlock_release(&icache_lock);
        return NULL;
    }

    m_inode = empty_slot;
    m_inode->inumber = inumber;
    m_inode->ref_cnt = 1;
    spinlock_release(&icache_lock);

    /** Lock the inode and read from disk. */
    inode_lock(m_inode);
    bool success = boot ? block_read_at_boot((char *) &(m_inode->d_inode),
                                             DISK_ADDR_INODE(inumber),
                                             sizeof(inode_t))
                        : block_read((char *) &(m_inode->d_inode),
                                     DISK_ADDR_INODE(inumber),
                                     sizeof(inode_t));
    if (!success) {
        warn("inode_get: failed to read inode %u from disk", inumber);
        inode_unlock(m_inode);
        return NULL;
    }
    inode_unlock(m_inode);

    // _print_icache_state();
    return m_inode;
}

mem_inode_t *
inode_get(uint32_t inumber)
{
    return _inode_get(inumber, false);
}

mem_inode_t *
inode_get_at_boot(uint32_t inumber)
{
    return _inode_get(inumber, true);
}

/** Increment reference to an already-got inode. */
void
inode_ref(mem_inode_t *m_inode)
{
    spinlock_acquire(&icache_lock);
    assert(m_inode->ref_cnt > 0);
    m_inode->ref_cnt++;
    spinlock_release(&icache_lock);
}

/**
 * Put down a reference to an inode. If the reference count goes to
 * zero, this icache slot becomes empty.
 */
void
inode_put(mem_inode_t *m_inode)
{
    spinlock_acquire(&icache_lock);
    assert(!parklock_holding(&(m_inode->lock)));
    assert(m_inode->ref_cnt > 0);
    m_inode->ref_cnt--;
    spinlock_release(&icache_lock);
}


// src/filesys/file.h

void inode_lock(mem_inode_t *m_inode);
void inode_unlock(mem_inode_t *m_inode);

mem_inode_t *inode_get(uint32_t inumber);
mem_inode_t *inode_get_at_boot(uint32_t inumber);
void inode_ref(mem_inode_t *m_inode);
void inode_put(mem_inode_t *m_inode);

Allocating/Freeing Inodes

When creating a new file, a new inode needs to be allocated on disk. When removing a file, its inode needs to be erased from disk.

Code @ src/filesys/file.c:

/** Flush an in-memory modified inode to disk. */
static bool
_flush_inode(mem_inode_t *m_inode)
{
    return block_write((char *) &(m_inode->d_inode),
                       DISK_ADDR_INODE(m_inode->inumber),
                       sizeof(inode_t));
}

/** Allocate an inode structure on disk (and gets into memory). */
mem_inode_t *
inode_alloc(uint32_t type)
{
    /** Get a free slot according to bitmap. */
    uint32_t inumber = bitmap_alloc(&inode_bitmap);
    if (inumber == inode_bitmap.slots) {
        warn("inode_alloc: no free inode slot left");
        return NULL;
    }

    inode_t d_inode;
    memset(&d_inode, 0, sizeof(inode_t));
    d_inode.type = type;
    
    /** Persist to disk: bitmap first, then the inode. */
    if (!inode_bitmap_update(inumber)) {
        warn("inode_alloc: failed to persist inode bitmap");
        bitmap_clear(&inode_bitmap, inumber);
        return NULL;
    }

    if (!block_write((char *) &d_inode,
                     DISK_ADDR_INODE(inumber),
                     sizeof(inode_t))) {
        warn("inode_alloc: failed to persist inode %u", inumber);
        bitmap_clear(&inode_bitmap, inumber);
        inode_bitmap_update(inumber);   /** Ignores error. */
        return NULL;
    }

    return inode_get(inumber);
}

/**
 * Free an on-disk inode structure (removing a file). Avoids calling
 * `_walk_inode_index()` repeatedly.
 * Must be called with lock on M_INODE held.
 */
void
inode_free(mem_inode_t *m_inode)
{
    m_inode->d_inode.size = 0;
    m_inode->d_inode.type = 0;

    /** Direct. */
    for (size_t idx0 = 0; idx0 < NUM_DIRECT; ++idx0) {
        if (m_inode->d_inode.data0[idx0] != 0) {
            block_free(m_inode->d_inode.data0[idx0]);
            m_inode->d_inode.data0[idx0] = 0;
        }
    }
    
    /** Singly-indirect. */
    for (size_t idx0 = 0; idx0 < NUM_INDIRECT1; ++idx0) {
        uint32_t ib1_addr = m_inode->d_inode.data1[idx0];
        if (ib1_addr != 0) {
            uint32_t ib1[UINT32_PB];
            if (block_read((char *) ib1, ib1_addr, BLOCK_SIZE)) {
                for (size_t idx1 = 0; idx1 < UINT32_PB; ++idx1) {
                    if (ib1[idx1] != 0)
                        block_free(ib1[idx1]);
                }
            }
            block_free(ib1_addr);
            m_inode->d_inode.data1[idx0] = 0;
        }
    }

    /** Doubly-indirect. */
    for (size_t idx0 = 0; idx0 < NUM_INDIRECT2; ++idx0) {
        uint32_t ib1_addr = m_inode->d_inode.data2[idx0];
        if (ib1_addr != 0) {
            uint32_t ib1[UINT32_PB];
            if (block_read((char *) ib1, ib1_addr, BLOCK_SIZE)) {
                for (size_t idx1 = 0; idx1 < UINT32_PB; ++idx1) {
                    uint32_t ib2_addr = ib1[idx1];
                    if (ib2_addr != 0) {
                        uint32_t ib2[UINT32_PB];
                        if (block_read((char *) ib2, ib2_addr, BLOCK_SIZE)) {
                            for (size_t idx2 = 0; idx2 < UINT32_PB; ++idx2) {
                                if (ib2[idx2] != 0)
                                    block_free(ib2[idx2]);
                            }
                        }
                        block_free(ib1[idx1]);
                    }
                }
            }
            block_free(ib1_addr);
            m_inode->d_inode.data2[idx0] = 0;
        }
    }

    _flush_inode(m_inode);

    bitmap_clear(&inode_bitmap, m_inode->inumber);
    inode_bitmap_update(m_inode->inumber);      /** Ignores error. */
}


// src/filesys/file.h

mem_inode_t *inode_alloc(uint32_t type);
void inode_free(mem_inode_t *m_inode);

Reading/Writing at Offset

Reading or writing an on-disk file at a given offset for a given length is an operation required by almost any file-related syscall, not only read() and write(), because creating a file or removing a file also modifies the data content of its parent directory ✭.

Code @ src/filesys/file.c:

/**
 * Walk the indexing array to get block number for the n-th block.
 * Allocates the block if was not allocated. Returns address 0
 * (which is invalid for a data block) on failures.
 */
static uint32_t
_walk_inode_index(mem_inode_t *m_inode, uint32_t idx)
{
    /** Direct. */
    if (idx < NUM_DIRECT) {
        if (m_inode->d_inode.data0[idx] == 0)
            m_inode->d_inode.data0[idx] = block_alloc();
        return m_inode->d_inode.data0[idx];
    }
    
    /** Singly-indirect. */
    idx -= NUM_DIRECT;
    if (idx < NUM_INDIRECT1 * UINT32_PB) {
        size_t idx0 = idx / UINT32_PB;
        size_t idx1 = idx % UINT32_PB;

        /** Load indirect1 block. */
        uint32_t ib1_addr = m_inode->d_inode.data1[idx0];
        if (ib1_addr == 0) {
            ib1_addr = block_alloc();
            if (ib1_addr == 0)
                return 0;
            m_inode->d_inode.data1[idx0] = ib1_addr;
        }
        uint32_t ib1[UINT32_PB];
        if (!block_read((char *) ib1, ib1_addr, BLOCK_SIZE))
            return 0;

        /** Index in the indirect1 block. */
        if (ib1[idx1] == 0) {
            ib1[idx1] = block_alloc();
            if (ib1[idx1] == 0)
                return 0;
            if (!block_write((char *) ib1, ib1_addr, BLOCK_SIZE))
                return 0;
        }

        return ib1[idx1];
    }

    /** Doubly indirect. */
    idx -= NUM_INDIRECT1 * UINT32_PB;
    if (idx < NUM_INDIRECT2 * UINT32_PB*UINT32_PB) {
        size_t idx0 = idx / (UINT32_PB*UINT32_PB);
        size_t idx1 = (idx % (UINT32_PB*UINT32_PB)) / UINT32_PB;
        size_t idx2 = idx % UINT32_PB;

        /** Load indirect1 block. */
        uint32_t ib1_addr = m_inode->d_inode.data2[idx0];
        if (ib1_addr == 0) {
            ib1_addr = block_alloc();
            if (ib1_addr == 0)
                return 0;
            m_inode->d_inode.data2[idx0] = ib1_addr;
        }
        uint32_t ib1[UINT32_PB];
        if (!block_read((char *) ib1, ib1_addr, BLOCK_SIZE))
            return 0;

        /** Load indirect2 block. */
        uint32_t ib2_addr = ib1[idx1];
        if (ib2_addr == 0) {
            ib2_addr = block_alloc();
            if (ib2_addr == 0)
                return 0;
            ib1[idx1] = ib2_addr;
            if (!block_write((char *) ib1, ib1_addr, BLOCK_SIZE))
                return 0;
        }
        uint32_t ib2[UINT32_PB];
        if (!block_read((char *) ib2, ib2_addr, BLOCK_SIZE))
            return 0;

        /** Index in the indirect2 block. */
        if (ib2[idx2] == 0) {
            ib2[idx2] = block_alloc();
            if (ib2[idx2] == 0)
                return 0;
            if (!block_write((char *) ib2, ib2_addr, BLOCK_SIZE))
                return 0;
        }

        return ib2[idx2];
    }

    warn("walk_inode_index: index %u is out of range", idx);
    return 0;
}

/**
 * Read data at logical offset from inode. Returns the number of bytes
 * actually read.
 * Must with lock on M_INODE held.
 */
size_t
inode_read(mem_inode_t *m_inode, char *dst, uint32_t offset, size_t len)
{
    if (offset > m_inode->d_inode.size)
        return 0;
    if (offset + len > m_inode->d_inode.size)
        len = m_inode->d_inode.size - offset;

    uint32_t bytes_read = 0;
    while (len > bytes_read) {
        uint32_t bytes_left = len - bytes_read;

        uint32_t start_offset = offset + bytes_read;
        uint32_t req_offset = ADDR_BLOCK_OFFSET(start_offset);

        uint32_t next_offset = ADDR_BLOCK_ROUND_DN(start_offset) + BLOCK_SIZE;
        uint32_t effective = next_offset - start_offset;
        if (bytes_left < effective)
            effective = bytes_left;

        uint32_t block_addr = _walk_inode_index(m_inode, start_offset / BLOCK_SIZE);
        if (block_addr == 0) {
            warn("inode_read: failed to walk inode index on offset %u", start_offset);
            return bytes_read;
        }

        if (!block_read(dst + bytes_read, block_addr + req_offset, effective)) {
            warn("inode_read: failed to read disk address %p", block_addr);
            return bytes_read;
        }

        bytes_read += effective;
    }

    return bytes_read;
}

/**
 * Write data at logical offset of inode. Returns the number of bytes
 * actually written. Will extend the inode if the write exceeds current
 * file size.
 * Must with lock on M_INODE held.
 */
size_t
inode_write(mem_inode_t *m_inode, char *src, uint32_t offset, size_t len)
{
    if (offset > m_inode->d_inode.size)
        return 0;

    uint32_t bytes_written = 0;
    while (len > bytes_written) {
        uint32_t bytes_left = len - bytes_written;

        uint32_t start_offset = offset + bytes_written;
        uint32_t req_offset = ADDR_BLOCK_OFFSET(start_offset);

        uint32_t next_offset = ADDR_BLOCK_ROUND_DN(start_offset) + BLOCK_SIZE;
        uint32_t effective = next_offset - start_offset;
        if (bytes_left < effective)
            effective = bytes_left;

        uint32_t block_addr = _walk_inode_index(m_inode, start_offset / BLOCK_SIZE);
        if (block_addr == 0) {
            warn("inode_write: failed to walk inode index on offset %u", start_offset);
            return bytes_written;
        }

        if (!block_write(src + bytes_written, block_addr + req_offset, effective)) {
            warn("inode_write: failed to write block address %p", block_addr);
            return bytes_written;
        }

        bytes_written += effective;
    }

    /** Update inode size if extended. */
    if (offset + len > m_inode->d_inode.size) {
        m_inode->d_inode.size = offset + len;
        _flush_inode(m_inode);
    }

    return bytes_written;
}


// src/filesys/file.h

size_t inode_read(mem_inode_t *m_inode, char *dst, uint32_t offset, size_t len);
size_t inode_write(mem_inode_t *m_inode, char *src, uint32_t offset, size_t len);

Getting/Putting Open Files

For open file structures in the ftable, there will be a similar get/put pair like inodes.

Code @ src/filesys/file.c:

/** Allocate a slot in the opne file table. Returns NULL on failure. */
file_t *
file_get(void)
{
    spinlock_acquire(&ftable_lock);

    for (file_t *file = ftable; file < &ftable[MAX_OPEN_FILES]; ++file) {
        if (file->ref_cnt == 0) {
            file->ref_cnt = 1;

            spinlock_release(&ftable_lock);
            return file;
        }
    }

    spinlock_release(&ftable_lock);
    return NULL;
}

/** Increment reference to an already-got file. */
void
file_ref(file_t *file)
{
    spinlock_acquire(&ftable_lock);
    assert(file->ref_cnt > 0);
    file->ref_cnt++;
    spinlock_release(&ftable_lock);
}

/**
 * Put down a reference to an open file. If the reference count goes to
 * zero, actually closes this file and this ftable slot becomes empty.
 */
void
file_put(file_t *file)
{
    mem_inode_t *inode;

    /** Decrement reference count. */
    spinlock_acquire(&ftable_lock);
    assert(file->ref_cnt > 0);
    
    file->ref_cnt--;
    if (file->ref_cnt > 0) {    /** Do nothing if still referenced. */
        spinlock_release(&ftable_lock);
        return;
    }

    inode = file->inode;        /** Remember inode for putting. */
    spinlock_release(&ftable_lock);

    /** Actually closing, put inode. */
    inode_put(inode);
}


// src/filesys/file.h

file_t *file_get();
void file_ref(file_t *file);
void file_put(file_t *file);

Directory Operations

Many file-related syscalls play with directories. It would also be nice to have some helper functions on directory operations.

Adding/Finding Entries

On a given directory, we should be able to search for an entry with given filename or to add a new entry.

Code @ src/filesys/vsfs.c:

/**
 * Look for a filename in a directory. Returns a got inode on success, or
 * NULL if not found. If found, sets *ENTRY_OFFSET to byte offset of the
 * entry.
 * Must be called with lock on DIR_INODE held.
 */
static mem_inode_t *
_dir_find(mem_inode_t *dir_inode, char *filename, uint32_t *entry_offset)
{
    assert(dir_inode->d_inode.type == INODE_TYPE_DIR);

    /** Search for the filename in this directory. */
    dentry_t dentry;
    for (size_t offset = 0;
         offset < dir_inode->d_inode.size;
         offset += sizeof(dentry_t)) {
        if (inode_read(dir_inode, (char *) &dentry, offset,
                       sizeof(dentry_t)) != sizeof(dentry_t)) {
            warn("dir_find: failed to read at offset %u", offset);
            return NULL;
        }
        if (dentry.valid == 0)
            continue;

        /** If matches, get the inode. */
        if (strncmp(dentry.filename, filename, MAX_FILENAME) == 0) {
            if (entry_offset != NULL)
                *entry_offset = offset;
            return inode_get(dentry.inumber);
        }
    }

    return NULL;
}

/** 
 * Add a new directory entry.
 * Must be called with lock on DIR_INODE held.
 */
static bool
_dir_add(mem_inode_t *dir_inode, char *filename, uint32_t inumber)
{
    /** The name must not be present. */
    mem_inode_t *file_inode = _dir_find(dir_inode, filename, NULL);
    if (file_inode != NULL) {
        warn("dir_add: file '%s' already exists", filename);
        inode_put(file_inode);
        return false;
    }

    /** Look for an emtpy directory entry. */
    dentry_t dentry;
    uint32_t offset;
    for (offset = 0;
         offset < dir_inode->d_inode.size;
         offset += sizeof(dentry_t)) {
        if (inode_read(dir_inode, (char *) &dentry, offset,
                       sizeof(dentry_t)) != sizeof(dentry_t)) {
            warn("dir_add: failed to read at offset %u", offset);
            return false;
        }
        if (dentry.valid == 0)
            break;
    }

    /** Add into this empty slot. */
    memset(&dentry, 0, sizeof(dentry_t));
    strncpy(dentry.filename, filename, MAX_FILENAME);
    dentry.inumber = inumber;
    dentry.valid = 1;
    if (inode_write(dir_inode, (char *) &dentry, offset,
                    sizeof(dentry_t)) != sizeof(dentry_t)) {
        warn("dir_add: failed to write at offset %u", offset);
        return false;
    }

    return true;
}

/**
 * Returns true if the directory is empty. Since directory size grows
 * in Hux and never gets recycled until removed, need to loop over all
 * allocated dentry slots to check if all are now unused.
 * Must be called with lock on DIR_INODE held.
 */
static bool
_dir_empty(mem_inode_t *dir_inode)
{
    dentry_t dentry;
    for (size_t offset = 2 * sizeof(dentry_t);      /** Skip '.' and '..' */
         offset < dir_inode->d_inode.size;
         offset += sizeof(dentry_t)) {
        if (inode_read(dir_inode, (char *) &dentry, offset,
                       sizeof(dentry_t)) != sizeof(dentry_t)) {
            warn("dir_empty: failed to read at offset %u", offset);
            return false;
        }
        if (dentry.valid != 0)
            return false;
    }

    return true;
}

Parsing Path Strings

We will also need to be able to parse a path string into a stream of directory inodes ✭.

Code @ src/filesys/vsfs.c:

/**
 * Copy the next path element into ELEM, and returns the rest path with
 * leading slashes removed.
 * E.g., _parse_elem("aaa///bb/c") sets ELEM to "aaa" and returns a pointer
 * to "bb/c". If there are no elements, return NULL.
 */
static char *
_parse_elem(char *path, char *elem)
{
    while (*path == '/')
        path++;
    if (*path == '\0')
        return NULL;

    char *elem_start = path;
    while (*path != '/' && *path != '\0')
        path++;
    size_t elem_len = path - elem_start;
    if (elem_len > MAX_FILENAME - 1)
        elem_len = MAX_FILENAME - 1;
    memcpy(elem, elem_start, elem_len);
    elem[elem_len] = '\0';

    while (*path == '/')
        path++;
    return path;
}

/**
 * Search the directory tree and get the inode for a path name.
 * Returns NULL if not found.
 *
 * If STOP_AT_PARENT is set, returns the inode for the parent
 * directory and writes the final path element into FILENAME,
 * which must be at least MAX_FILENAME in size.
 */
static mem_inode_t *
_path_to_inode(char *path, bool stop_at_parent, char *filename)
{
    mem_inode_t *inode;

    /** Starting point. */
    if (*path == '/')
        inode = inode_get(ROOT_INUMBER);
    else {
        inode = running_proc()->cwd;
        inode_ref(inode);
    }
    if (inode == NULL) {
        warn("path_lookup: failed to get starting point of %s", path);
        return NULL;
    }

    /** For each path element, go into that directory. */
    do {
        path = _parse_elem(path, filename);
        if (path == NULL)
            break;

        inode_lock(inode);

        if (inode->d_inode.type != INODE_TYPE_DIR) {
            inode_unlock(inode);
            inode_put(inode);
            return NULL;
        }

        if (stop_at_parent && *path == '\0') {
            inode_unlock(inode);
            return inode;     /** Stopping one-level early. */
        }

        mem_inode_t *next = _dir_find(inode, filename, NULL);
        if (next == NULL) {
            inode_unlock(inode);
            inode_put(inode);
            return NULL;
        }

        inode_unlock(inode);
        inode_put(inode);
        inode = next;
    } while (path != NULL);

    if (stop_at_parent) {
        inode_put(inode);   /** Path does not contain parent. */
        return NULL;
    }

    return inode; 
}

/** Wrappers for path lookup. */
static mem_inode_t *
_path_lookup(char *path)
{
    char filename[MAX_FILENAME];
    return _path_to_inode(path, false, filename);
}

static mem_inode_t *
_path_lookup_parent(char *path, char *filename)
{
    return _path_to_inode(path, true, filename);
}

Progress So Far

Since these internal operations on FS data structures are already a lot of code, I will move the file-related system calls implementation to the next chapter.

Current repo structure:

hux-kernel
├── Makefile
├── scripts
│   ├── gdb_init
│   ├── grub.cfg
│   ├── kernel.ld
│   └── mkfs.py
├── src
│   ├── boot
│   │   ├── boot.s
│   │   ├── elf.h
│   │   └── multiboot.h
│   ├── common
│   │   ├── bitmap.c
│   │   ├── bitmap.h
│   │   ├── debug.c
│   │   ├── debug.h
│   │   ├── intstate.c
│   │   ├── intstate.h
│   │   ├── parklock.c
│   │   ├── parklock.h
│   │   ├── port.c
│   │   ├── port.h
│   │   ├── printf.c
│   │   ├── printf.h
│   │   ├── spinlock.c
│   │   ├── spinlock.h
│   │   ├── string.c
│   │   ├── string.h
│   │   ├── types.c
│   │   └── types.h
│   ├── device
│   │   ├── idedisk.c
│   │   ├── idedisk.h
│   │   ├── keyboard.c
│   │   ├── keyboard.h
│   │   ├── sysdev.c
│   │   ├── sysdev.h
│   │   ├── timer.c
│   │   └── timer.h
│   ├── display
│   │   ├── sysdisp.c
│   │   ├── sysdisp.h
│   │   ├── terminal.c
│   │   ├── terminal.h
│   │   └── vga.h
│   ├── filesys
│   │   ├── block.c
│   │   ├── block.h
│   │   ├── file.c
│   │   ├── file.h
│   │   ├── vsfs.c
│   │   └── vsfs.h
│   ├── interrupt
│   │   ├── idt-load.s
│   │   ├── idt.c
│   │   ├── idt.h
│   │   ├── isr-stub.s
│   │   ├── isr.c
│   │   ├── isr.h
│   │   ├── syscall.c
│   │   └── syscall.h
│   ├── memory
│   │   ├── gdt-load.s
│   │   ├── gdt.c
│   │   ├── gdt.h
│   │   ├── kheap.c
│   │   ├── kheap.h
│   │   ├── paging.c
│   │   ├── paging.h
│   │   ├── slabs.c
│   │   ├── slabs.h
│   │   ├── sysmem.c
│   │   └── sysmem.h
│   ├── process
│   │   ├── layout.h
│   │   ├── process.c
│   │   ├── process.h
│   │   ├── scheduler.c
│   │   ├── scheduler.h
│   │   ├── switch.s
│   │   ├── sysproc.c
│   │   └── sysproc.h
│   └── kernel.c
├── user
│   ├── lib
│   │   ├── debug.h
│   │   ├── malloc.c
│   │   ├── malloc.h
│   │   ├── printf.c
│   │   ├── printf.h
│   │   ├── string.c
│   │   ├── string.h
│   │   ├── syscall.h
│   │   ├── syscall.s
│   │   ├── syslist.s
│   │   ├── types.c
│   │   └── types.h
│   └── init.c