Cross langauge memory access - chung-leong/zigar GitHub Wiki

In a hybrid JavaScript-Zig application, each language has its own pool of memory. One is structured to meet the needs of a dynamic language; the other meanwhile employs a scheme common to low-level, programming languages:

JavaScript vs Zig memory

At times one language will need to access data stored in the memory of the other language. For instance, when a Zig function is called from JavaScript, it has to read data from variables passed as arguments. While Zigar tries to make interoperation seamless, there're potential pitfalls. Understanding what happens under the hood can help you diagnose problems when things don't work as you'd expect.

Passing arguments to Zig from JavaScript

Zig code is incapable of interacting with basic JavaScript types like number, string, and object. It can't read them, it can't modify them. These are too high-level. Zig works with lower-level data structures. Zig data objects are just bytes in continous memory. In JavaScript they're backed by instances of ArrayBuffer.

Regular JavaScript types are automatically converted to Zig data objects. Consider the following example:

const std = @import("std");

pub const Actor = struct {
    name: []u8,
    age: u32,
};

pub fn print(actor: Actor) void {
    std.debug.print("{s} is {d} years' old\n", .{ actor.name, actor.age });
}

pub fn deage(actor: *Actor, years: u32) void {
    actor.age -= years;
}
import { print, deage } from './memory-example-1.zig';

const actor = {
    name: 'Arnold Schwarzenegger',
    age: 77,
};

print(actor);
deage(actor, 40);
print(actor);
Arnold Schwarzenegger is 77 years' old
Arnold Schwarzenegger is 77 years' old

Here, actor is a regular JavaScript object. When we call print(), a new Zig data object of the type Actor gets created using the content from the JavaScript object. string becomes []u8 and number becomes u32. print() had no problem receiving a copy of actor because that's what the function is expecting. deage() on the other hand did not work as expected because it's supposed to modify the incoming object. It only modified the temporary copy, leaving the original object unchanged.

The above example would work properly if we'd started out with a Zig data object:

import { Actor, print, deage } from './memory-example-1.zig';

const actor = new Actor({
    name: 'Arnold Schwarzenegger',
    age: 77,
});

print(actor);
deage(actor, 40);
print(actor);
Arnold Schwarzenegger is 77 years' old
Arnold Schwarzenegger is 37 years' old

Native code is capable of directly modifying the bytes in the ArrayBuffer. On the other hand WebAssembly code has no access to JavaScript memory. When a function call occurs, arguments are copied into WebAssembly memory and copied back afterward (when modifications are possible).

Copying can also occur during native code execution when a misaligned pointer is encountered. The example above still works when the object’s position in memory is off by one byte thanks to this correction mechanism:

import { __zigar, Actor, print, deage } from './memory-example-1.zig';

console.log(`alignment = ${__zigar.alignOf(Actor)}`);
const size = __zigar.sizeOf(Actor);
const buffer = new ArrayBuffer(size + 1);
const dv = new DataView(buffer, 1);
const actor = Actor(dv);
actor.$ = {
    name: 'Arnold Schwarzenegger',
    age: 77,
};

print(actor);
deage(actor, 40);
print(actor);
alignment = 8
Arnold Schwarzenegger is 77 years' old
Arnold Schwarzenegger is 37 years' old

Pointers passed to a function can be assumed to be valid only during the call. Afterward, the memory they referred to could be freed by JavaScript’s garbage collector. If the function accepts a Promise or Generator as an argument, then the lifetime of pointers is extended until the promise is resolved or the generator is stopped. Consider the following:

const std = @import("std");
const zigar = @import("zigar");

pub fn print(text: []const u8, promise: zigar.function.Promise(void)) !void {
    try zigar.thread.use();
    const thread = try std.Thread.spawn(.{}, output, .{ text, promise });
    thread.detach();
    // text will survive beyound this point
}

fn output(text: []const u8, promise: zigar.function.Promise(void)) void {
    std.debug.print("{s}\n", .{text});
    promise.resolve({});
    // text can now be gc'ed
    zigar.thread.end();
}
import { print } from './memory-example-2.zig';

await print('Hello world');
Hello world

If promise were not present, there would be a non-zero chance that text could be garbage-collected before the new thread get around to it.

Returning values to JavaScript from Zig

When a Zig function returns a primitive like bool or i32, it becomes the corresponding primitive on the JavaScript side:

pub fn returnBool() bool {
    return true;
}

pub fn returnInt() i32 {
    return 123;
}
import { returnBool, returnInt } from './memory-example-3.zig';

console.log(typeof returnBool());
console.log(typeof returnInt());
boolean
number

When the function returns a struct, it’s a Zig data object:

pub fn returnStruct() struct {
    int: i32,
    float: f64,
} {
    return .{ .int = 123, .float = 3.14 };
}
import { returnStruct } from './memory-example-4.zig';

console.log(returnStruct());
console.log(returnStruct().valueOf());
S0 {
  [Symbol(memory)]: DataView {
    byteLength: 16,
    byteOffset: 0,
    buffer: ArrayBuffer {
      [Uint8Contents]: <1f 85 eb 51 b8 1e 09 40 7b 00 00 00 00 00 00 00>,
      byteLength: 16
    }
  }
}
{ int: 123, float: 3.14 }

This object sits in JavaScript memory.

What happens when a function returns a pointer?

pub fn returnString() []const u8 {
    return "Hello world";
}
import { returnString } from './memory-example-5.zig';

console.log(returnString().string);
console.log(returnString().dataView);
Hello world
DataView {
  byteLength: 11,
  byteOffset: 0,
  buffer: ArrayBuffer {
    [Uint8Contents]: <48 65 6c 6c 6f 20 77 6f 72 6c 64>,
    byteLength: 11,
    [Symbol(zig)]: { address: 140352294253853n, len: 11 }
  },
  [Symbol(zig)]: { address: 140352294253853n, len: 11 }
}

The slice pointer itself is in JavaScript memory. But what memory is it pointing to? Where is "Hello world!" stored? The answer is the .rodata section of our shared library, a memory region carved out by the compiler to store constants. We can confirm this by attempting to modify this memory:

import { returnString } from './memory-example-5.zig';

const { typedArray } = returnString();
typedArray[0] = 123;
Segmentation fault (core dumped)

Yup, it’s read-only.

A pointer to a var would point to memory in the .data section instead:

const std = @import("std");

var string = [_]u8{ 'H', 'e', 'l', 'l', 'o' };

pub fn returnString() []const u8 {
    return &string;
}

pub fn print() void {
    std.debug.print("{s}\n", .{&string});
}
import { print, returnString } from './memory-example-6.zig';

print();
const { typedArray } = returnString();
typedArray[0] = 123;
print();
Hello
{ello

Heap memory can be allocated from an Allocator and returned to the JavaScript side:

const std = @import("std");

var gpa = std.heap.DebugAllocator(.{}).init;
const allocator = gpa.allocator();

pub fn returnString() ![]const u8 {
    return allocator.dupe(u8, "Hello world");
}

pub fn freeString(str: []const u8) void {
    allocator.free(str);
}
import { returnString, freeString } from './memory-example-7.zig';

const s = returnString();
console.log(s.string);
console.log(s.dataView);
freeString(s);
Hello world
DataView {
  byteLength: 11,
  byteOffset: 0,
  buffer: ArrayBuffer {
    [Uint8Contents]: <48 65 6c 6c 6f 20 77 6f 72 6c 64>,
    byteLength: 11,
    [Symbol(zig)]: { address: 140390405111808n, len: 11 }
  },
  [Symbol(zig)]: { address: 140390405111808n, len: 11 }
}

The Symbol(zig) field attached to both the DataView and ArrayBuffer object tells you that they're referencing Zig memory. This memory needs to be manually freed. Afterward, an attempt to use pointer again would lead to a crash:

import { returnString, freeString } from './memory-example-7.zig';

const s = returnString();
freeString(s);
console.log(s.string);
Segmentation fault (core dumped)

An Allocator recevied from JavaScript allocates JavaScript memory:

const std = @import("std");

pub fn returnString(allocator: std.mem.Allocator) ![]const u8 {
    return allocator.dupe(u8, "Hello world!");
}
import { returnString } from './memory-example-8.zig';

const s = returnString();
console.log(s.string);
console.log(s.dataView);
Hello world!
DataView {
  byteLength: 12,
  byteOffset: 0,
  buffer: ArrayBuffer {
    [Uint8Contents]: <48 65 6c 6c 6f 20 77 6f 72 6c 64 21>,
    byteLength: 12
  }
}

Note the absence of a Symbol(zig) field here. This is JavaScript memory and it'll automatically get garbage collected.

Generally, you should return JavaScript memory to JavaScript when it's meant to be consumed there. Only return pointers to Zig memory when they're expected to be used later by Zig code.

Passing arguments to JavaScript from Zig

When Zig passes primitives like bool or i32 to a JavaScript function, they become the corresponding primitives on the JavaScript side:

pub fn call(cb: *const fn (bool, i32) void) void {
    cb(false, 1234);
}
import { call } from './memory-example-9.zig';

call((arg1, arg2) => {
    console.log(typeof arg1);
    console.log(typeof arg2);
});
boolean
number

When an argument is a struct, it becomes a Zig data object in JavaScript memory:

const Point = struct {
    x: f32,
    y: f32,
};

pub fn call(cb: *const fn (Point) void) void {
    cb(.{ .x = 1.5, .y = 2.5 });
}
import { call } from './memory-example-10.zig';

call((point) => {
  console.log(point.valueOf());
  console.log(point.dataView);
});
{ x: 1.5, y: 2.5 }
DataView {
  byteLength: 8,
  byteOffset: 0,
  buffer: ArrayBuffer {
    [Uint8Contents]: <00 00 c0 3f 00 00 20 40>,
    byteLength: 8
  }
}

Pointer arguments can potentially point to anything. Special care should be taken with pointers that reference stack variables, as in the following example:

const Point = struct {
    x: f32,
    y: f32,
};

pub fn call(cb: *const fn (*Point) void) void {
    var point: Point = .{ .x = 1.5, .y = 2.5 };
    cb(&point);
}
import { call } from './memory-example-11.zig';

call((ptr) => {
    const point = ptr['*'];
    console.log(point.valueOf());
    console.log(point.dataView);
});
{ x: 1.5, y: 2.5 }
DataView {
  byteLength: 8,
  byteOffset: 0,
  buffer: ArrayBuffer {
    [Uint8Contents]: <00 00 c0 3f 00 00 20 40>,
    byteLength: 8,
    [Symbol(zig)]: { address: 140736111398768n, len: 8 }
  },
  [Symbol(zig)]: { address: 140736111398768n, len: 8 }
}

Stack variables are only valid within the function that contains them. This is not a problem in the above example, as the JavaScript function runs within the scope of the Zig function. It becomes a problem when a Promise is used:

const zigar = @import("zigar");

const Point = struct {
    x: f32,
    y: f32,
};
const Promise = zigar.function.Promise(void);

pub fn call(cb: *const fn (*Point, Promise) void) void {
    var point: Point = .{ .x = 1.5, .y = 2.5 };
    cb(&point, Promise.init(null, callback));
}

pub fn callback(_: *anyopaque, _: void) void {}
import { call } from './memory-example-12.zig';

call(async (ptr) => {
    const point = ptr['*'];
    console.log(point.valueOf());
    await Promise.resolve();
    console.log(point.valueOf());
});
{ x: 1.5, y: 2.5 }
{ x: -3.067365169704512e+29, y: 4.591354418360263e-41 }

The first await statement is where the synchronous portion of the JavaScript ends. That's when the caller receive a Promise object. On the Zig side, call() will exit at this point, causing its point variable to be overwritten. When the JavaScript function looks at that memory again, it'll find garbage data.

The same problem affects Generator as well.

Returning values to Zig from JavaScript

As usual, primitives like bool and i32 behave as you'd expect:

const std = @import("std");

pub fn getBool(cb: *const fn () bool) void {
    std.debug.print("bool = {}\n", .{cb()});
}

pub fn getInt(cb: *const fn () i32) void {
    std.debug.print("int = {}\n", .{cb()});
}
import { getBool, getInt } from './memory-example-13.zig';

getBool(_ => false);
getInt(_ => 1234);
bool = false
int = 1234

Simple structs also do not require special consideration:

const std = @import("std");

const Point = struct {
    x: f32,
    y: f32,
};

pub fn getPoint(cb: *const fn () Point) void {
    std.debug.print("point = {}\n", .{cb()});
}
import { getPoint } from './memory-example-14.zig';

getPoint(_ => ({ x: 1.5, y: 2.0 }));
point = memory-example-14.Point{ .x = 1.5e0, .y = 2e0 }

When you return variable-length values like strings that require new memory, that's when you need to think about where that memory is coming from. Per Zig's convention, new memory needed by a function comes from a caller-supplied allocator:

const std = @import("std");

var gpa = std.heap.DebugAllocator(.{}).init;
const allocator = gpa.allocator();

const Player = struct {
    first_name: []const u8,
    last_name: []const u8,
};

pub fn getPlayer(cb: *const fn (std.mem.Allocator) Player) void {
    const player = cb(allocator);
    defer allocator.free(player.first_name);
    defer allocator.free(player.last_name);
    std.debug.print("first_name = {s}\n", .{player.first_name});
    std.debug.print("last_name = {s}\n", .{player.last_name});
}
import { getPlayer } from './memory-example-15.zig';

getPlayer(_ => ({ 
    first_name: 'José',
    last_name: 'Canseco'
}));
first_name = José
last_name = Canseco

Here, heap memory is returned. With the help of a StackFallbackAllocator you can make JavaScript write into the stack instead:

const std = @import("std");

var gpa = std.heap.DebugAllocator(.{}).init;

const Player = struct {
    first_name: []const u8,
    last_name: []const u8,
};

pub fn getPlayer(cb: *const fn (std.mem.Allocator) Player) void {
    var sfb = std.heap.stackFallback(1024, gpa.allocator());
    const allocator = sfb.get();
    const player = cb(allocator);
    defer allocator.free(player.first_name);
    defer allocator.free(player.last_name);
    std.debug.print("first_name = {s}\n", .{player.first_name});
    std.debug.print("last_name = {s}\n", .{player.last_name});
    std.debug.print("buffer = {s}\n", .{sfb.buffer[0..12]});
}
import { getPlayer } from './memory-example-16.zig';

getPlayer(_ => ({ 
    first_name: 'Rickey',
    last_name: 'Henderson'
}));
first_name = Rickey
last_name = Henderson
buffer = RickeyHender