Patching WebAssembly thread implementation - chung-leong/zigar GitHub Wiki

As of Zig 0.14.0, thread support for WebAssembly is still a work in progress. When multithreaded is set to true, the Zig compiler will fail with a message informing you that multithread support for WasmAllocator is still a TODO item.

In order to use multithreading in your app, you will have to hot-patch the Zig standard library.

First, you need to find the standard library. It's stored in the lib sub-directory alongside the zig executable. If you've installed the compiler using zvm, you'll find it in ~/.zvm/0.14.0/lib.

Open lib/std/heap/WasmAllocator.zig and locate the following lines:

    if (!builtin.single_threaded) {
        @compileError("TODO implement support for multi-threaded wasm");
    }

Delete them. Scroll down to the part where variables are declared and add the following:

var mutex: switch (builtin.single_threaded) {
    false => std.Thread.Mutex,
    true => struct {
        inline fn lock(_: *@This()) void {}
        inline fn unlock(_: *@This()) void {}
    },
} = .{};

This mutex will protect the allocator's state variables from concurrent changes by different threads.

Scroll down to alloc() and ensures that it locks the mutex at the start and releases it before exit:

fn alloc(ctx: *anyopaque, len: usize, alignment: mem.Alignment, return_address: usize) ?[*]u8 {
    _ = ctx;
    _ = return_address;
    mutex.lock();
    defer mutex.unlock();

Then do the same to free():

fn free(
    ctx: *anyopaque,
    buf: []u8,
    alignment: mem.Alignment,
    return_address: usize,
) void {
    _ = ctx;
    _ = return_address;
    mutex.lock();
    defer mutex.unlock();

Now, because the web browser's main thread does not allow synchronous wait, we cannot use a regular mutex. We need a special mutex that would spin in the main thread instead of sleeping when another thread has locked it.

Open lib/std/Thread/Mutex.zig and look for the definition of ReleaseImpl. Add a new if clause for WasiImpl:

else if (builtin.os.tag.isDarwin())
    DarwinImpl
else if (builtin.os.tag == .wasi)
    WasiImpl
else
    FutexImpl;

Scroll down pass the other implementations and add WasiImpl:

const WasiImpl = struct {
    status: std.atomic.Value(u32) = .{ .raw = free },
    wait_count: std.atomic.Value(u32) = .{ .raw = 0 },

    const free: u32 = 0; // no one owns the lock
    const owned: u32 = 1; // a worker thread has the lock
    const seized: u32 = 2; // the main thread either has the lock already or is about to get it
    const forfeited: u32 = 3; // the main thread has received the lock from the previous owner

    pub fn lock(self: *@This()) void {
        if (inMainThread()) {
            // announce that the lock will be taken by the main thread
            switch (self.status.swap(seized, .acquire)) {
                // seizing a free lock
                free => {},
                // keep spinning until the current owner surrenders it
                owned => while (self.status.load(.monotonic) != forfeited) {},
                else => unreachable,
            }
        } else {
            while (true) {
                // try to get the lock
                if (self.status.cmpxchgWeak(free, owned, .acquire, .monotonic)) |status| {
                    // pause the worker when the lock is not free
                    if (status != free) {
                        _ = self.wait_count.fetchAdd(1, .monotonic);
                        Thread.Futex.wait(&self.status, status);
                        _ = self.wait_count.fetchSub(1, .monotonic);
                    }
                } else break;
            }
        }
    }

    pub fn unlock(self: *@This()) void {
        if (inMainThread()) {
            // just release the lock
            self.status.store(free, .release);
        } else {
            // release the lock if the worker thread still owns it
            if (self.status.cmpxchgStrong(owned, free, .release, .monotonic)) |status| {
                switch (status) {
                    seized => {
                        // let the spinning main thread take the lock
                        self.status.store(forfeited, .release);
                        return;
                    },
                    else => unreachable,
                }
            }
        }
        if (self.wait_count.load(.monotonic) > 0) {
            // awaken a waiting worker thread
            Thread.Futex.wake(&self.status, 1);
        }
    }

    pub fn tryLock(self: *@This()) bool {
        const new_status: u32 = if (inMainThread()) seized else owned;
        return self.status.cmpxchgStrong(free, new_status, .acquire, .monotonic) == null;
    }

    fn inMainThread() bool {
        const root = @import("root");
        if (@hasDecl(root, "std_options") and root.std_options.wasi_main_thread_wait) {
            return false;
        }
        return Thread.getCurrentId() == 0;
    }
};

As Node.js permits synchronous wait in the main thread, WasiImpl provides an option for disabling the use of a spinlock. You need to add this option to std_options.

Open lib/std/std.zig and look for the definition of Options. Add the following field:

    /// Availability of synchronous wait in the main thread
    wasi_main_thread_wait: bool = false,

The changes above remove the main impediment to multithreading in WebAssembly. Now let us fix one final bug that causes stack corruption when debugging is enabled.

Open lib/std/Thread.zig and find the comptime block that exports wasi_thread_start:

    comptime {
        if (!builtin.single_threaded) {
            @export(&wasi_thread_start, .{ .name = "wasi_thread_start" });
        }
    }

And replace it with the following:

    comptime {
        if (!builtin.single_threaded) {
            switch (builtin.mode) {
                .Debug => {
                    @export(&wasi_thread_start_debug, .{ .name = "wasi_thread_start" });
                    @export(&wasi_thread_start, .{ .name = "wasi_thread_start_cont", .visibility = .hidden });
                },
                else => @export(&wasi_thread_start, .{ .name = "wasi_thread_start" }),
            }
        }
    }

    /// Set the stack pointer then call wasi_thread_start
    fn wasi_thread_start_debug(_: i32, arg: *Instance) callconv(.naked) void {
        __set_stack_pointer(arg.thread.memory.ptr + arg.stack_offset);
        asm volatile (
            \\ local.get 0
            \\ local.get 1
            \\ call wasi_thread_start_cont
            \\ return
        );
    }

The "naked" function sets the thread's stack pointer before calling wasi_thread_start() (exported as wasi_thread_start_cont), ensuring that the stack frame will be created in the right place.

Since the stack pointer would be set already, we need to make sure it doesn't get set again in wasi_thread_start():

    fn wasi_thread_start(tid: i32, arg: *Instance) callconv(.c) void {
        comptime assert(!builtin.single_threaded);
        if (builtin.mode != .Debug) {
            __set_stack_pointer(arg.thread.memory.ptr + arg.stack_offset);
        }

Scroll down further and replace the code in the .detached case with the following:

                // use free in the vtable so the stack doesn't get set to undefined when optimize = Debug
                const free = arg.thread.allocator.vtable.free;
                const ptr = arg.thread.allocator.ptr;
                free(ptr, arg.thread.memory, std.mem.Alignment.@"1", 0);

That's it! Hopefully by Zig 0.15.0, we will have all these teething issues fully ironed out.