Cross langauge memory access - chung-leong/zigar GitHub Wiki
In a hybrid JavaScript-Zig application, each language has its own pool of memory. One is structured to meet the needs of a dynamic language; the other meanwhile employs a scheme common to low-level, programming languages:
At times one language will need to access data stored in the memory of the other language. For instance, when a Zig function is called from JavaScript, it has to read data from variables passed as arguments. While Zigar tries to make interoperation seamless, there're potential pitfalls. Understanding what happens under the hood can help you diagnose problems when things don't work as you'd expect.
Passing arguments to Zig from JavaScript
Zig code is incapable of interacting with basic JavaScript types like number
, string
, and
object
. It can't read them, it can't modify them. These are too high-level. Zig works with
lower-level data structures. Zig data objects are just bytes
in continous memory. In JavaScript they're backed by instances of
ArrayBuffer
.
Regular JavaScript types are automatically converted to Zig data objects. Consider the following example:
const std = @import("std");
pub const Actor = struct {
name: []u8,
age: u32,
};
pub fn print(actor: Actor) void {
std.debug.print("{s} is {d} years' old\n", .{ actor.name, actor.age });
}
pub fn deage(actor: *Actor, years: u32) void {
actor.age -= years;
}
import { print, deage } from './memory-example-1.zig';
const actor = {
name: 'Arnold Schwarzenegger',
age: 77,
};
print(actor);
deage(actor, 40);
print(actor);
Arnold Schwarzenegger is 77 years' old
Arnold Schwarzenegger is 77 years' old
Here, actor
is a regular JavaScript object. When we call print()
, a new Zig data object of the
type Actor
gets created using the content from the JavaScript object. string
becomes []u8
and
number
becomes u32
. print()
had no problem receiving a copy of actor
because that's what
the function is expecting. deage()
on the other hand did not work as expected because it's
supposed to modify the incoming object. It only modified the temporary copy, leaving the original
object unchanged.
The above example would work properly if we'd started out with a Zig data object:
import { Actor, print, deage } from './memory-example-1.zig';
const actor = new Actor({
name: 'Arnold Schwarzenegger',
age: 77,
});
print(actor);
deage(actor, 40);
print(actor);
Arnold Schwarzenegger is 77 years' old
Arnold Schwarzenegger is 37 years' old
Native code is capable of directly modifying the bytes in the ArrayBuffer
. On the other hand
WebAssembly code has no access to JavaScript memory. When a function call occurs, arguments are
copied into WebAssembly memory and copied back afterward (when modifications are possible).
Copying can also occur during native code execution when a misaligned pointer is encountered. The example above still works when the object’s position in memory is off by one byte thanks to this correction mechanism:
import { __zigar, Actor, print, deage } from './memory-example-1.zig';
console.log(`alignment = ${__zigar.alignOf(Actor)}`);
const size = __zigar.sizeOf(Actor);
const buffer = new ArrayBuffer(size + 1);
const dv = new DataView(buffer, 1);
const actor = Actor(dv);
actor.$ = {
name: 'Arnold Schwarzenegger',
age: 77,
};
print(actor);
deage(actor, 40);
print(actor);
alignment = 8
Arnold Schwarzenegger is 77 years' old
Arnold Schwarzenegger is 37 years' old
Pointers passed to a function can be assumed to be valid only during the call. Afterward, the
memory they referred to could be freed by JavaScript’s garbage collector. If the function
accepts a Promise
or Generator
as an argument, then the lifetime of
pointers is extended until the promise is resolved or the generator is stopped. Consider the
following:
const std = @import("std");
const zigar = @import("zigar");
pub fn print(text: []const u8, promise: zigar.function.Promise(void)) !void {
try zigar.thread.use();
const thread = try std.Thread.spawn(.{}, output, .{ text, promise });
thread.detach();
// text will survive beyound this point
}
fn output(text: []const u8, promise: zigar.function.Promise(void)) void {
std.debug.print("{s}\n", .{text});
promise.resolve({});
// text can now be gc'ed
zigar.thread.end();
}
import { print } from './memory-example-2.zig';
await print('Hello world');
Hello world
If promise
were not present, there would be a non-zero chance that text
could be
garbage-collected before the new thread get around to it.
Returning values to JavaScript from Zig
When a Zig function returns a primitive like bool
or i32
, it becomes the corresponding
primitive on the JavaScript side:
pub fn returnBool() bool {
return true;
}
pub fn returnInt() i32 {
return 123;
}
import { returnBool, returnInt } from './memory-example-3.zig';
console.log(typeof returnBool());
console.log(typeof returnInt());
boolean
number
When the function returns a struct
, it’s a Zig data object:
pub fn returnStruct() struct {
int: i32,
float: f64,
} {
return .{ .int = 123, .float = 3.14 };
}
import { returnStruct } from './memory-example-4.zig';
console.log(returnStruct());
console.log(returnStruct().valueOf());
S0 {
[Symbol(memory)]: DataView {
byteLength: 16,
byteOffset: 0,
buffer: ArrayBuffer {
[Uint8Contents]: <1f 85 eb 51 b8 1e 09 40 7b 00 00 00 00 00 00 00>,
byteLength: 16
}
}
}
{ int: 123, float: 3.14 }
This object sits in JavaScript memory.
What happens when a function returns a pointer?
pub fn returnString() []const u8 {
return "Hello world";
}
import { returnString } from './memory-example-5.zig';
console.log(returnString().string);
console.log(returnString().dataView);
Hello world
DataView {
byteLength: 11,
byteOffset: 0,
buffer: ArrayBuffer {
[Uint8Contents]: <48 65 6c 6c 6f 20 77 6f 72 6c 64>,
byteLength: 11,
[Symbol(zig)]: { address: 140352294253853n, len: 11 }
},
[Symbol(zig)]: { address: 140352294253853n, len: 11 }
}
The slice pointer itself is in JavaScript memory. But what memory is it pointing to? Where is
"Hello world!" stored? The answer is the .rodata
section of our shared library, a memory region
carved out by the compiler to store constants. We can confirm this by attempting to modify this
memory:
import { returnString } from './memory-example-5.zig';
const { typedArray } = returnString();
typedArray[0] = 123;
Segmentation fault (core dumped)
Yup, it’s read-only.
A pointer to a var
would point to memory in the .data
section instead:
const std = @import("std");
var string = [_]u8{ 'H', 'e', 'l', 'l', 'o' };
pub fn returnString() []const u8 {
return &string;
}
pub fn print() void {
std.debug.print("{s}\n", .{&string});
}
import { print, returnString } from './memory-example-6.zig';
print();
const { typedArray } = returnString();
typedArray[0] = 123;
print();
Hello
{ello
Heap memory can be allocated from an Allocator
and returned to the
JavaScript side:
const std = @import("std");
var gpa = std.heap.DebugAllocator(.{}).init;
const allocator = gpa.allocator();
pub fn returnString() ![]const u8 {
return allocator.dupe(u8, "Hello world");
}
pub fn freeString(str: []const u8) void {
allocator.free(str);
}
import { returnString, freeString } from './memory-example-7.zig';
const s = returnString();
console.log(s.string);
console.log(s.dataView);
freeString(s);
Hello world
DataView {
byteLength: 11,
byteOffset: 0,
buffer: ArrayBuffer {
[Uint8Contents]: <48 65 6c 6c 6f 20 77 6f 72 6c 64>,
byteLength: 11,
[Symbol(zig)]: { address: 140390405111808n, len: 11 }
},
[Symbol(zig)]: { address: 140390405111808n, len: 11 }
}
The Symbol(zig)
field attached to both the DataView
and ArrayBuffer
object tells you that
they're referencing Zig memory. This memory needs to be manually freed. Afterward, an attempt to
use pointer again would lead to a crash:
import { returnString, freeString } from './memory-example-7.zig';
const s = returnString();
freeString(s);
console.log(s.string);
Segmentation fault (core dumped)
An Allocator
recevied from JavaScript allocates JavaScript memory:
const std = @import("std");
pub fn returnString(allocator: std.mem.Allocator) ![]const u8 {
return allocator.dupe(u8, "Hello world!");
}
import { returnString } from './memory-example-8.zig';
const s = returnString();
console.log(s.string);
console.log(s.dataView);
Hello world!
DataView {
byteLength: 12,
byteOffset: 0,
buffer: ArrayBuffer {
[Uint8Contents]: <48 65 6c 6c 6f 20 77 6f 72 6c 64 21>,
byteLength: 12
}
}
Note the absence of a Symbol(zig)
field here. This is JavaScript memory and it'll automatically
get garbage collected.
Generally, you should return JavaScript memory to JavaScript when it's meant to be consumed there. Only return pointers to Zig memory when they're expected to be used later by Zig code.
Passing arguments to JavaScript from Zig
When Zig passes primitives like bool
or i32
to a JavaScript function, they become the
corresponding primitives on the JavaScript side:
pub fn call(cb: *const fn (bool, i32) void) void {
cb(false, 1234);
}
import { call } from './memory-example-9.zig';
call((arg1, arg2) => {
console.log(typeof arg1);
console.log(typeof arg2);
});
boolean
number
When an argument is a struct
, it becomes a Zig data object in JavaScript memory:
const Point = struct {
x: f32,
y: f32,
};
pub fn call(cb: *const fn (Point) void) void {
cb(.{ .x = 1.5, .y = 2.5 });
}
import { call } from './memory-example-10.zig';
call((point) => {
console.log(point.valueOf());
console.log(point.dataView);
});
{ x: 1.5, y: 2.5 }
DataView {
byteLength: 8,
byteOffset: 0,
buffer: ArrayBuffer {
[Uint8Contents]: <00 00 c0 3f 00 00 20 40>,
byteLength: 8
}
}
Pointer arguments can potentially point to anything. Special care should be taken with pointers that reference stack variables, as in the following example:
const Point = struct {
x: f32,
y: f32,
};
pub fn call(cb: *const fn (*Point) void) void {
var point: Point = .{ .x = 1.5, .y = 2.5 };
cb(&point);
}
import { call } from './memory-example-11.zig';
call((ptr) => {
const point = ptr['*'];
console.log(point.valueOf());
console.log(point.dataView);
});
{ x: 1.5, y: 2.5 }
DataView {
byteLength: 8,
byteOffset: 0,
buffer: ArrayBuffer {
[Uint8Contents]: <00 00 c0 3f 00 00 20 40>,
byteLength: 8,
[Symbol(zig)]: { address: 140736111398768n, len: 8 }
},
[Symbol(zig)]: { address: 140736111398768n, len: 8 }
}
Stack variables are only valid within the function that contains them. This is not a problem in
the above example, as the JavaScript function runs within the scope of the Zig function. It becomes
a problem when a Promise
is used:
const zigar = @import("zigar");
const Point = struct {
x: f32,
y: f32,
};
const Promise = zigar.function.Promise(void);
pub fn call(cb: *const fn (*Point, Promise) void) void {
var point: Point = .{ .x = 1.5, .y = 2.5 };
cb(&point, Promise.init(null, callback));
}
pub fn callback(_: *anyopaque, _: void) void {}
import { call } from './memory-example-12.zig';
call(async (ptr) => {
const point = ptr['*'];
console.log(point.valueOf());
await Promise.resolve();
console.log(point.valueOf());
});
{ x: 1.5, y: 2.5 }
{ x: -3.067365169704512e+29, y: 4.591354418360263e-41 }
The first await
statement is where the synchronous portion of the JavaScript ends. That's
when the caller receive a Promise
object. On the Zig side, call()
will exit at this point,
causing its point
variable to be overwritten. When the JavaScript function looks at that
memory again, it'll find garbage data.
The same problem affects Generator
as well.
Returning values to Zig from JavaScript
As usual, primitives like bool
and i32
behave as you'd expect:
const std = @import("std");
pub fn getBool(cb: *const fn () bool) void {
std.debug.print("bool = {}\n", .{cb()});
}
pub fn getInt(cb: *const fn () i32) void {
std.debug.print("int = {}\n", .{cb()});
}
import { getBool, getInt } from './memory-example-13.zig';
getBool(_ => false);
getInt(_ => 1234);
bool = false
int = 1234
Simple struct
s also do not require special consideration:
const std = @import("std");
const Point = struct {
x: f32,
y: f32,
};
pub fn getPoint(cb: *const fn () Point) void {
std.debug.print("point = {}\n", .{cb()});
}
import { getPoint } from './memory-example-14.zig';
getPoint(_ => ({ x: 1.5, y: 2.0 }));
point = memory-example-14.Point{ .x = 1.5e0, .y = 2e0 }
When you return variable-length values like strings that require new memory, that's when you need to think about where that memory is coming from. Per Zig's convention, new memory needed by a function comes from a caller-supplied allocator:
const std = @import("std");
var gpa = std.heap.DebugAllocator(.{}).init;
const allocator = gpa.allocator();
const Player = struct {
first_name: []const u8,
last_name: []const u8,
};
pub fn getPlayer(cb: *const fn (std.mem.Allocator) Player) void {
const player = cb(allocator);
defer allocator.free(player.first_name);
defer allocator.free(player.last_name);
std.debug.print("first_name = {s}\n", .{player.first_name});
std.debug.print("last_name = {s}\n", .{player.last_name});
}
import { getPlayer } from './memory-example-15.zig';
getPlayer(_ => ({
first_name: 'José',
last_name: 'Canseco'
}));
first_name = José
last_name = Canseco
Here, heap memory is returned. With the help of a
StackFallbackAllocator
you can make JavaScript write into the stack instead:
const std = @import("std");
var gpa = std.heap.DebugAllocator(.{}).init;
const Player = struct {
first_name: []const u8,
last_name: []const u8,
};
pub fn getPlayer(cb: *const fn (std.mem.Allocator) Player) void {
var sfb = std.heap.stackFallback(1024, gpa.allocator());
const allocator = sfb.get();
const player = cb(allocator);
defer allocator.free(player.first_name);
defer allocator.free(player.last_name);
std.debug.print("first_name = {s}\n", .{player.first_name});
std.debug.print("last_name = {s}\n", .{player.last_name});
std.debug.print("buffer = {s}\n", .{sfb.buffer[0..12]});
}
import { getPlayer } from './memory-example-16.zig';
getPlayer(_ => ({
first_name: 'Rickey',
last_name: 'Henderson'
}));
first_name = Rickey
last_name = Henderson
buffer = RickeyHender