Union - chung-leong/zigar GitHub Wiki

A union in the Zig language is a data type that can hold one of multiple variables, typically of different types. In JavaScript, it behaves like an object whose properties are all inactive, save for one:

const std = @import("std");

pub const Number = union(enum) {
    integer: i32,
    big_integer: i64,
    decimal: f64,
    complex: std.math.complex.Complex(f64),
};

pub const a: Number = .{ .integer = 123 };
pub const b: Number = .{ .big_integer = 1234567890 };
pub const c: Number = .{ .decimal = 0.12345 };
pub const d: Number = .{ .complex = .{ .re = 1, .im = 2 } };
pub var v: Number = .{ .big_integer = 8888 };
import { a, b, c, d } from './tagged-union-example-1.zig';

console.log(a.integer);
console.log(b.big_integer);
console.log(c.decimal);
console.log(d.complex.valueOf());
123
1234567890n
0.12345
{ re: 1, im: 2 }

In the example above, the only active property for a is integer. For b, it's big_integer. What happens when you access an inactive property depends on the kind of union involved. There are three kinds: tagged union, bare union, and extern union.

Tagged union

A tagged unions uses an enum to keep track of which of its field is active. You can obtain the current tag by casting the union to its tag type:

import { a, Number } from './tagged-union-example-1.zig';

console.log(Number.tag(a) === Number.tag.integer);
console.log(`${Number.tag(a)}`);
true
integer

When you read an inactive field/property of a tagged union, you get null:

import { b } from './tagged-union-example-1.zig';

console.log(b.big_integer);
console.log(b.integer);
1234567890n
null

This behave is different from that in Zig itself, where such access would trigger a panic when runtime safety is active. This deviation was decided upon because it allows the use of JavaScript's nullish coalescing operator:

import { c } from './tagged-union-example-1.zig';

console.log(c.integer ?? c.big_integer ?? c.decimal);
0.12345

Assignment to an active field would trigger an error:

import module from './tagged-union-example-1.zig';

console.log(module.v.big_integer);
console.log(module.v.integer);
try {
    module.v.integer = 1234n;
} catch (err) {
    console.log(err.message);
}
8888n
null
Accessing property integer when big_integer is active

In order to switch to a different field, you need to assign to the union itself:

import module from './tagged-union-example-1.zig';

console.log(module.v.integer);
module.v = { integer: 1234 };
console.log(module.v.integer);
null
1234

Like a struct, a union provides an iterator for its properties. This iterator will always yield a single entry:

import { c } from './tagged-union-example-1.zig';

for (const [ tag, value ] of c) {
    console.log(`${tag} => ${value}`);
}
decimal => 0.12345

You can also obtain the name of the active field and the value through a double destructuring assignment:

import { c } from './tagged-union-example-1.zig';

const [ tag, value ](/chung-leong/zigar/wiki/-tag,-value-) = c;
console.log(`${tag} => ${value}`);
decimal => 0.12345

When using a switch statement to determine the course of action, remember that the tag you get from the iterator is a string and not an enum object:

import { a, b, c, d, Number } from './tagged-union-example-1.zig';

for (const number of [a, b, c, d ]) {
    for (const [ tag, value ] of number) {        
        switch (tag) {
            case Number.tag.integer:
                console.log('This is not reachable');
                break;
            case 'integer':
                console.log('Do something with integer');
                break
            case 'big_integer':
                console.log('Do something with big integer');
                break;
            case 'decimal':
                console.log('Do something with decimal number');
                break;
            case 'complex': 
                console.log('Do something with complex number');
                break;
        }
    }   
}
Do something with integer
Do something with big integer
Do something with decimal number
Do something with complex number

Another way of obtaining the active tag is by casting the union into a string:

import { a, b, c, d } from './tagged-union-example-1.zig';

for (const number of [a, b, c, d ]) {
    switch (String(number)) {
        case 'integer':
            console.log('Do something with integer');
            break
        case 'big_integer':
            console.log('Do something with big integer');
            break;
        case 'decimal':
            console.log('Do something with decimal number');
            break;
        case 'complex': 
            console.log('Do something with complex number');
            break;
    }
}
Do something with integer
Do something with big integer
Do something with decimal number
Do something with complex number

You can also use the == operator to implicitly cast a tagged union into string:

import { a, b, c, d } from './tagged-union-example-1.zig';

for (const number of [a, b, c, d ]) {
    if (number == 'integer') {
        console.log(number.integer);
    } else if (number == 'big_integer') {
        console.log(number.big_integer);
    } else if (number == 'decimal') {
        console.log(number.decimal);
    } else if (number == 'complex') {
        console.log(number.complex.valueOf());
    }
}
123
1234567890n
0.12345
{ re: 1, im: 2 }

Bare union

A bare union does not have a tag indicating which of its fields is active. You must rely on some external mean.

const std = @import("std");

pub const Number = union {
    integer: i32,
    big_integer: i64,
    decimal: f64,
    complex: std.math.complex.Complex(f64),
};

pub const a: Number = .{ .integer = 123 };
pub const b: Number = .{ .big_integer = 1234567890 };
pub const c: Number = .{ .decimal = 0.12345 };
pub const d: Number = .{ .complex = .{ .re = 1, .im = 2 } };
import { a, b, c, d } from './tagged-union-example-2.zig';

console.log(a.integer);
console.log(b.big_integer);
console.log(c.decimal);
console.log(d.complex.valueOf());

In the example above, we know which field is active for each union only by looking at the source code. In an actual program, this information would need be stored in some variables, somewhere.

When optimization level is Debug or ReleaseSafe, the Zig compiler would add a hidden tag to a bare union to enable runtime safety checks. Zigar also uses this information to warn you when your code accesses invalid data:

import { b } from './tagged-union-example-2.zig';

try {
    console.log(b.big_integer);
    console.log(b.integer);   
} catch (err) {
    console.log(err.message);
}
1234567890n
Accessing property integer when big_integer is active

This check is turned off when optimize is set to ReleaseFast or ReleaseSmall:

// using a query variable to set the optimization level ---v
import { b } from './tagged-union-example-2.zig?optimize=ReleaseSmall';

try {
    console.log(b.big_integer);
    console.log(b.integer);
    console.log(b.decimal);
} catch (err) {
    console.log(err.message);
}
1234567890n
1234567890
6.09957582e-315

The absence of a tag makes the iterator of a bare union practically useless, since it always returns entries of all fields. Calls to a bare union's valueOf method and JSON.stringify, will always yield invalid data:

import { b } from './tagged-union-example-2.zig';

for (const [ tag, value ] of b) {
    console.log(`${tag} => ${value}`);
}
console.log(b.valueOf());
console.log(JSON.stringify(b, undefined, 4));

Another major shortcoming of bare unions is that pointers within them are not accessible:

const std = @import("std");

const IntegerOrTextT = union(enum) {
    number: i32,
    text: []const u8,
};

const IntegerOrTextB = union {
    number: i32,
    text: []const u8,
};

pub fn getT(allocator: std.mem.Allocator, text: bool) !IntegerOrTextT {
    return if (text)
        .{ .text = try allocator.dupe(u8, "Hello") }
    else
        .{ .number = 1234 };
}

pub fn getB(allocator: std.mem.Allocator, text: bool) !IntegerOrTextB {
    return if (text)
        .{ .text = try allocator.dupe(u8, "Hello") }
    else
        .{ .number = 1234 };
}
import { getB, getT } from './bare-union-example-2.zig';

try {
    console.log(getT(false).number);
    console.log(getT(true).text.string);
    console.log(getB(false).number);
    console.log(getB(true).text.string);
} catch (err) {
    console.log(err.message);
}
1234
Hello
1234
Pointers within an untagged union are not accessible

In the example above, Zigar is able to tell whether IntegerOrTextT.text is a valid pointer thanks to the presence of a tag. The status of IntegerOrTextB.text is unknown, on the other hand. It might be a valid address--or it might be 1234. This uncertainty means that throwing an error is the only reasonable action to take.

Extern union

An extern union is like a bare union, except there is no check even when optimize is Debug:

const std = @import("std");

pub const Number = extern union {
    integer: i32,
    big_integer: i64,
    decimal: f64,
};

pub const a: Number = .{ .integer = 123 };
pub const b: Number = .{ .big_integer = 1234567890 };
pub const c: Number = .{ .decimal = 0.12345 };
import { b } from './extern-union-example-1.zig';

try {
    console.log(b.big_integer);
    console.log(b.integer);   
} catch (err) {
    console.log(err.message);
}
1234567890n
1234567890

Note the absence of the complex field in the example above. We're forced to remove it because an extern union cannot contain a non-extern struct.