Tarball server - chung-leong/zigar GitHub Wiki

In this exercise we're going to create a multithreaded server-side app that creates tarballs on the fly. It demonstrates how Zig code can write to a JavaScript stream.

Creating the app

First, we initialize the project and add in the necessary modules:

mkdir filter
cd filter
npm init -y
npm install fastify 
npm install --save-dev node-zigar
mkdir src zig

We're going to deploy our app in such a way that it's not dependent on node-zigar. That's why we're adding the module as a dev-dependent.

In the zig sub-directory, create tar.zig:

const std = @import("std");

var gpa: std.heap.DebugAllocator(.{}) = .init;
const allocator = gpa.allocator();

pub fn tar(
    writer: std.io.AnyWriter,
    root_path: []const u8,
    src_paths: []const []const u8,
) !void {
    // buffer output to destination stream
    var buffer = std.io.bufferedWriter(writer);
    // create gzip compressor
    var compressor = try std.compress.gzip.compressor(buffer.writer(), .{ .level = .best });
    // create tar writer
    var tar_writer = std.tar.writer(compressor.writer().any());
    try tar_writer.setRoot(root_path);
    for (src_paths) |src_path| {
        const sub_path = std.fs.path.basename(src_path);
        if (std.fs.openDirAbsolute(src_path, .{ .iterate = true })) |dir| {
            // add the directory
            try tar_writer.writeDir(sub_path, .{});
            // then its content
            var iter = try dir.walk(allocator);
            defer iter.deinit();
            while (try iter.next()) |entry| {
                try tar_writer.writeEntry(entry);
            }
        } else |dir_err| {
            if (dir_err != error.NotDir) return dir_err;
            // if not a directory, then it's a file
            if (std.fs.openFileAbsolute(src_path, .{})) |file| {
                try tar_writer.writeFile(sub_path, file);
            } else |file_err| return file_err;
        }
    }
    try tar_writer.finish();
    try compressor.finish();
    try compressor.flush();
    try buffer.flush();
}

pub fn main() !void {
    defer _ = gpa.detectLeaks();
    var file = try std.fs.createFileAbsolute("/home/cleong/Desktop/test.tar.gz", .{});
    defer file.close();
    const writer = file.writer().any();
    try tar(writer, "node_modules", &.{
        "/home/cleong/zigar/node-zigar/demos/node/tarball-server/node_modules",
    });
}

The tar() function first chains together three writers: one that performs buffering, one that compresses data, and one that constructs the archive. Then it add the specified files or directories to the archive.

We use main() to verify that our function is working properly. Before running it, be sure to modified the hardcoded paths. As test data we'll use the contents of our project's node_modules directory.

Once we have a working function, we'll modify our code so that we can use tar() in JavaScript:

const std = @import("std");
const allocator = std.heap.c_allocator;

const zigar = @import("zigar");

var work_queue: zigar.thread.WorkQueue(thread_ns) = .{};

pub fn startup(thread_count: usize) !void {
    try work_queue.init(.{
        .allocator = allocator,
        .n_jobs = thread_count,
    });
}

pub fn shutdown(promise: zigar.function.Promise(void)) void {
    work_queue.deinitAsync(promise);
}

pub fn tar(
    writer: std.io.AnyWriter,
    root_path: []const u8,
    src_paths: []const []const u8,
    promise: zigar.function.PromiseOf(thread_ns.tar),
) !void {
    try work_queue.push(thread_ns.tar, .{ writer, root_path, src_paths }, promise);
}

const thread_ns = struct {
    pub fn tar(
        writer: std.io.AnyWriter,
        root_path: []const u8,
        src_paths: []const []const u8,
    ) !void {
        // buffer output to destination stream
        var buffer = std.io.bufferedWriter(writer);
        // create gzip compressor
        var compressor = try std.compress.gzip.compressor(buffer.writer(), .{ .level = .best });
        // create tar writer
        var tar_writer = std.tar.writer(compressor.writer().any());
        try tar_writer.setRoot(root_path);
        for (src_paths) |src_path| {
            const sub_path = std.fs.path.basename(src_path);
            if (std.fs.openDirAbsolute(src_path, .{ .iterate = true })) |dir| {
                // add the directory
                try tar_writer.writeDir(sub_path, .{});
                // then its content
                var iter = try dir.walk(allocator);
                defer iter.deinit();
                while (try iter.next()) |entry| {
                    try tar_writer.writeEntry(entry);
                }
            } else |dir_err| {
                if (dir_err != error.NotDir) return dir_err;
                // if not a directory, then it's a file
                if (std.fs.openFileAbsolute(src_path, .{})) |file| {
                    try tar_writer.writeFile(sub_path, file);
                } else |file_err| return file_err;
            }
        }
        try tar_writer.finish();
        try compressor.finish();
        try compressor.flush();
        try buffer.flush();
    }
};

At this point the Zig part is done. Time to move on to the JavaScript side. In the src directory, create index.js:

import Fastify from 'fastify';
import { dirname, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import { shutdown, startup, tar } from '../zig/tar.zig';

const srcDir = dirname(fileURLToPath(import.meta.url));
const fastify = Fastify({});
fastify.get('/', async (request, reply) => {
  reply.type('html');
  return `<a href="download">Download</a>`;
});
fastify.get('/download', async (request, reply) => {
  const tarballSrcDir = resolve(srcDir, '../node_modules');
  const transform = new TransformStream(undefined, { highWaterMark: 1024 * 16 });
  const writer = transform.writable.getWriter();
  tar(writer, 'modules', [ tarballSrcDir ]).then(() => writer.close());
  reply.header('Content-Disposition', 'attachment; filename=modules.tar.gz');
  return transform.readable;
});
startup(50);
fastify.addHook('onClose', async () => await shutdown());
fastify.listen({ port: 8080 });

The download handler creates an identity TransformStream, which functions basically as a passthru stream. What goes into its writable side and comes out its readable side. We obtain a writer for transform.writable and pass it to our Zig function. transform.readable, meanwhile, is returned to Fastify as the HTTP response body. And that's it. That's the plumbing we need.

A call to startup() creates 50 threads, the number of simultaneous downloads our server can handle.

Before we can start the app, we need to open package.json and set the module type and add a launch command:

  "main": "index.js",
  "type": "module",
  "scripts": {
    "start": "node --loader=node-zigar --no-warnings src/index.js",
  },

Start up the app. Open http://localhost:8080 in your browser and click the download link. Transfer should complete after a few seconds.

Preparing for deployment

So far, we have used node-zigar in a development configuration where we import from the Zig source file in JavaScript. In a production environment, this does not make sense since neither the .zig file or the Zig compiler will be present. In previous examples, we changed the imports to so they reference the .zigar module directory. Here we're use an alternate strategy.

First create node-zigar.config.json by running the following command:

npx node-zigar init

Open the file and add the module as well as additional build targets:

{
  "optimize": "ReleaseSmall",
  "modules": {
    "lib/tar.zigar": {
      "source": "zig/tar.zig",
      "loader": "src/tar.js"
    }
  },
  "targets": [
    {
      "platform": "linux",
      "arch": "x64"
    },
    {
      "platform": "linux",
      "arch": "arm64"
    }
  ]
}

Presence of the loader setting instructs node-zigar to create a standalone module loader. We choose to keep it in the src sub-directory.

Open package.json again and add a build command:

  "scripts": {
    "start": "node src/index.js",
    "build": "node-zigar build"
  },

We can also remove the --loader=node-zigar --no-warnings flags at this point from the start command, as they're no longer needed.

Run the build command to create the shared libraries:

npm run build
✓ Built module "tar" (linux/arm64)
✓ Built module "tar" (darwin/arm64)
✓ Built module "tar" (win32/x64)
✓ Built module "tar" (linux/x64)
✓ Built Node.js addon (linux/arm64)
✓ Built Node.js addon (darwin/arm64)
✓ Built Node.js addon (win32/x64)
✓ Built Node.js addon (linux/x64)

Afterward, the directory tree should look like this:

📁 lib
  📁 node-zigar-addon
    📑 darwin.arm64.node
    📑 linux.x64.node
    📑 linux.arm64.node
    📑 win32.x64.node
  📁 tar.zigar
    📑 darwin.arm64.dylib
    📑 linux.x64.so
    📑 linux.arm64.so
    📑 win32.x64.dll
📁 src
  📑 index.js
  📑 tar.js

If you open tar.js, you can see the code that selects the right shared library for the current platform and CPU archecture:

const platform = getPlatform();
const arch = getArch();
const ext = getLibraryExt(platform);
const moduleName = `${platform}.${arch}.${ext}`;
const addonName = `${platform}.${arch}.node`;
const { createEnvironment } = require(resolve(__dirname, "../lib/node-zigar-addon", addonName));

Further down is the method call that loads the module:

env.loadModule(resolve(__dirname, "../lib/tar.zigar", moduleName));

Open src/index.js and change the import statement to use the standalone loader:

import { shutdown, startup, tar } from './tar.js';

Then verify that the server still works. The speed of data transfer must be much faster now since the module is optimized for release.

The main advantage of using a standalone loader is that it removes the dependency on node-zigar, allowing your app to run in JavaScript environments other than Node.js. If you have Deno installed on your computer, you can start the app using the following command:

deno --allow-read --allow-ffi --allow-net src/index.js

Our app works in Bun as well:

bun src/index.js

The downside is that the module isn't recompiled automatically when you change the source code.