Tarball viewer - chung-leong/zigar GitHub Wiki

In this exercise we're going to create a web app that allows the user to view contents stored in a GitHub tarball. It demonstrates how Zig code can read a web stream obtained through the Fetch API.

Starting out

First, we'll create a boilerplate Vite app. In a terminal window, run the following command:

npm create vite@latest

Enter code-viewer as name, then select React and JavaScript + SWC:

│
◇  Project name:
│  code-viewer
│
◇  Select a framework:
│  React
│
◇  Select a variant:
│  JavaScript + SWC
│
◇  Use rolldown-vite (Experimental)?:
│  No
│
◇  Install with npm and start now?
│  No
│
◇  Scaffolding project in /home/rwiggum/code-viewer...
│
└  Done. Now run:

  cd code-viewer
  npm install
  npm run dev

Once the project is created, go into its directory and install the necessary files:

cd code-viewer
npm install

Next, install the Zigar plugin:

npm install --save-dev rollup-plugin-zigar

Create a sub-directory for Zig code:

mkdir zig

For demo purpose we're going to use the Ziglang repo, going with the very first release, to keep the file size small. Download https://github.com/ziglang/zig/archive/refs/tags/0.1.1.tar.gz and note the file path.

To aget feel for the standard library functions involed, we're going to first write a simple Zig program that list the contents of this tarball. Create decompress.zig in the zig sub-directory and insert the following code:

const std = @import("std");

pub fn main() !void {
    const path = "/home/cleong/Downloads/zig-0.1.1.tar.gz";
    var file = try std.fs.openFileAbsolute(path, .{});
    defer file.close();
    var read_buffer: [4096]u8 = undefined;
    var reader = file.reader(&read_buffer);
    var decompress_buffer: [std.compress.flate.max_window_len]u8 = undefined;
    var decompressor: std.compress.flate.Decompress = .init(&reader.interface, .gzip, &decompress_buffer);
    var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
    var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
    var iter: std.tar.Iterator = .init(&decompressor.reader, .{
        .file_name_buffer = &file_name_buffer,
        .link_name_buffer = &link_name_buffer,
    });
    while (try iter.next()) |f| {
        std.debug.print("{s}\n", .{f.name});
    }
}

Be sure to update the hardcoded file path with your own. Then run the program using the following command (or by clicking the "run" link in VS Code):

zig run decompress.zig

You should see a long list of files:

zig-0.1.1/
zig-0.1.1/.gitignore
zig-0.1.1/.travis.yml
zig-0.1.1/CMakeLists.txt
zig-0.1.1/LICENSE
zig-0.1.1/README.md
zig-0.1.1/build.zig
zig-0.1.1/c_headers/
zig-0.1.1/c_headers/__clang_cuda_builtin_vars.h
zig-0.1.1/c_headers/__clang_cuda_cmath.h
zig-0.1.1/c_headers/__clang_cuda_complex_builtins.h
zig-0.1.1/c_headers/__clang_cuda_intrinsics.h
zig-0.1.1/c_headers/__clang_cuda_math_forward_declares.h
zig-0.1.1/c_headers/__clang_cuda_runtime_wrapper.h
...

As you can see, extracting files from a tarball is fairly straightforward in Zig. We create a reader for the file and pass that to the flate decompressor. We then give the decompressor's reader to std.tar.Iterator. The iterator then gives us each file.

Next, we're going to rework the code in preparation for using it in JavaScript. We want a function that accepts a std.fs.File as an argument and returns an iterator of structs containing both file info and content:

const std = @import("std");

fn extract(file: std.fs.File) !Iterator {
    return .{ .file = file };
}

const Iterator = struct {
    file: std.fs.File,
    started: bool = false,
    reader: std.fs.File.Reader = undefined,
    read_buffer: [4096]u8 = undefined,
    decompressor: std.compress.flate.Decompress = undefined,
    decompress_buffer: [std.compress.flate.max_window_len]u8 = undefined,
    file_name_buffer: [std.fs.max_path_bytes]u8 = undefined,
    link_name_buffer: [std.fs.max_path_bytes]u8 = undefined,
    tar_iter: std.tar.Iterator = undefined,

    pub fn next(self: *@This(), allocator: std.mem.Allocator) !?File {
        if (!self.started) {
            // create file reader
            self.reader = self.file.reader(&self.read_buffer);
            // create decompressor
            self.decompressor = .init(&self.reader.interface, .gzip, &self.decompress_buffer);
            // obtain the tar iterator
            self.tar_iter = .init(&self.decompressor.reader, .{
                .file_name_buffer = &self.file_name_buffer,
                .link_name_buffer = &self.link_name_buffer,
            });
            self.started = true;
        }
        // get next item
        const f = try self.tar_iter.next() orelse return null;
        const name = try allocator.dupe(u8, f.name);
        errdefer allocator.free(name);
        const link_name = try allocator.dupe(u8, f.link_name);
        errdefer allocator.free(link_name);
        // read file content
        const data = try allocator.alloc(u8, f.size);
        errdefer allocator.free(data);
        var data_writer: std.Io.Writer = .fixed(data);
        try self.tar_iter.streamRemaining(f, &data_writer);
        return .{
            .name = name,
            .link_name = link_name,
            .size = f.size,
            .mode = f.mode,
            .kind = f.kind,
            .data = data,
        };
    }
};

const File = struct {
    name: []const u8,
    link_name: []const u8,
    size: u64,
    mode: u32,
    kind: std.tar.FileKind,
    data: []const u8,
};

pub fn main() !void {
    var gpa: std.heap.DebugAllocator(.{}) = .init;
    defer _ = gpa.detectLeaks();
    const allocator = gpa.allocator();
    const path = "/home/cleong/Downloads/zig-0.1.1.tar.gz";
    var file = try std.fs.openFileAbsolute(path, .{});
    defer file.close();
    var iter = try extract(file);
    while (try iter.next(allocator)) |f| {
        defer {
            allocator.free(f.name);
            allocator.free(f.link_name);
            allocator.free(f.data);
        }
        std.debug.print("{s}\n", .{f.name});
    }
}

Most of the work is done in the iterator's next function. The iterator will get hooked up to an async generator on the JavaScript side. The allocator that next() receives will allocate JavaScript memory.

Enabling use in JavaScript

Now we'll modify the code so it works in Node.js. First, move extract() along with the structs into the namespace worker:

const worker = struct {
    pub fn extract(reader: std.io.AnyReader) !Iterator {
        // ...
    }

    const Iterator = struct {
        // ...
    };

    const File = struct {
        // ...
    };
};

Be sure extract() is public. Then add code that sets up a work queue:

const std = @import("std");
const zigar = @import("zigar");

var work_queue: zigar.thread.WorkQueue(thread_ns) = .{};

pub const shutdown = work_queue.promisify(.shutdown);
pub const extract = work_queue.asyncify(worker.extract);

In the src sub-directory, create test.js:

import { open } from 'fs/promises';
import { extract, shutdown } from '../zig/decompress.zig';

const file = await open('/home/cleong/Downloads/zig-0.1.1.tar.gz');
const stream = file.readableWebStream();
try {
  for await (const file of extract(stream)) {
    console.log(file.name.string);
  }
} finally {
  stream.close();
  await shutdown();
}

Install node-zigar before running the script:

npm install --save-dev node-zigar

Then run it:

node --loader=node-zigar --no-warnings src/test.js

Now we can test whether it works correctly with a stream from the Fetch API:

import { extract, shutdown } from '../zig/decompress.zig';

const { body: stream } = await fetch('https://github.com/ziglang/zig/archive/refs/tags/0.1.1.tar.gz');
try {
  for await (const file of extract(stream)) {
    console.log(file.name.string);
  }
} finally {
  stream.close();
  await shutdown();
}

And it should. The file paths should fly by nearly as quickly as before if you have a decent Internet connection.

Before continuing, let me briefly explain what the Zig code actually does. This line here:

pub const shutdown = work_queue.promisify(.shutdown);

Is a shortcut that defines a function for shutting down the work queue. It's the equivalent of the following:

pub fn shutdown(promise: Promise(void)) void {
    return work_queue.deinitAsync(promise);
}

You can also define a shart-up function in the same (using .startup). We're omitting that since the work queue will automatically initializes itself with a single thread when extract() gets called. That's defined by the following:

pub const extract = work_queue.asyncify(worker.extract);

It creates a function that pushes a call request into the work queue. It's equivalent to doing this:

pub fn extract(
    reader: std.io.AnyReader,
    generator: zigar.function.GeneratorOf(worker.extract),
) !void {
    try work_queue.push(worker.extract, .{reader}, generator);
}

We use asyncify() here, because extract() returns an iterator. It becomes an async generator on the JavaScript side. Other value types become promises, so we would use promisify() instead.

Running in web browser

After all that prelim, we've reached the moment of truth. Will our code work in the browser?

We need to first set up rollup-plugin-zigar. Open vite.config.js and add the plugin:

import react from '@vitejs/plugin-react-swc';
import zigar from 'rollup-plugin-zigar';
import { defineConfig } from 'vite';

// https://vitejs.dev/config/
export default defineConfig({
  plugins: [react(), zigar({ multithreaded: true, optimize: 'ReleaseSmall' })],
  server: {
    host: true,
    headers: {
      'Cross-Origin-Opener-Policy': 'same-origin',
      'Cross-Origin-Embedder-Policy': 'require-corp',
    }
  },
})

We need to configure Vite's dev server to add two HTTP headers to responses to make our app cross origin isolated. This is a prerequisite of using WebAssembly threads.

After changing the configuration, open src/App.jsx and add test.js as an import:

import './App.css';
import './test.js';

We can then start Vite in dev mode:

npm run dev

We'll run straight into a Zig compilation error:

/home/cleong/zigar/rollup-plugin-zigar/demos/vite/code-viewer/zig/decompress.zig:59:51: 
error: expected type 'usize', found 'u64'

We had neglected to cast u64 to usize. On a 64-bit OS usize is 64-bit so there was no problem. WebAssembly is 32-bit, so the compiler balked. We can fix this easily enough:

            // read file content
            const len: usize = @intCast(f.size);
            const data = try allocator.alloc(u8, len);
            errdefer allocator.free(data);

After restarting Vite, our code will compile correctly, but immediately we encounter a more serious issue:

Dev console

GitHub's servers don't allow cross-origin access. Our web app simply cannot retrieve the tarball. Not directly. CORS proxy services like https://corsproxy.io/ do exist luckily. As soon as we insert the proxy's URL into our URL:

const { body: stream } = await fetch('https://corsproxy.io/?url=https://github.com/ziglang/zig/archive/refs/tags/0.1.1.tar.gz');

Bingo! We see file paths racing by in the dev console:

Dev console

Now it's just a matter of creating a user interface. First open src/main.jsx and remove the <StrictMode> tags:

import { createRoot } from 'react-dom/client'
import App from './App.jsx'
import './index.css'

createRoot(document.getElementById('root')).render(
  <App />
)

This stops React from initializing the app twice in dev mode. After that, install a file tree component and another for syntax highlighting:

npm install @sinm/react-file-tree react-syntax-highlighter

Since this isn't a React tutorial, I'm not going to get in details of how the code works. I'll simply provide the updated files, starting with App.jsx:

import { FileTree, utils } from '@sinm/react-file-tree';
import '@sinm/react-file-tree/icons.css';
import FileItemWithFileIcon from '@sinm/react-file-tree/lib/FileItemWithFileIcon';
import '@sinm/react-file-tree/styles.css';
import { startTransition, useCallback, useEffect, useState } from 'react';
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
import { dark } from 'react-syntax-highlighter/dist/esm/styles/prism';
import { extract, shutdown } from '../zig/decompress.zig';
import './App.css';

const decoder = new TextDecoder;
const ext2lang = {
  h: 'c',
  c: 'c',
  hpp: 'cpp',
  cpp: 'cpp',
  js: 'javascript',
};

function App() {
  const [ tree, setTree ] = useState(null);
  const [ codeString, setCodeString ] = useState('');
  const [ language, setLanguage ] = useState('');
  const treeProps = {
    tree,
    itemRenderer: (treeNode) => <FileItemWithFileIcon treeNode={treeNode} />,
    onItemClick: useCallback(({ uri, type, data, expanded }) => {
      startTransition(() => {
        if (type === 'directory') {
          setTree(tree => utils.assignTreeNode(tree, uri, { expanded: !expanded }));
        } else {
          const code = decoder.decode(data);
          const slashIndex = uri.lastIndexOf('/');
          const dotIndex = uri.lastIndexOf('.');
          const ext = (dotIndex > slashIndex) ? uri.slice(dotIndex + 1) : '';
          setCodeString(code);
          setLanguage(ext2lang[ext] ?? ext);
        }
      });
    }, []),
  };
  const highlightProps = {
    language,
    showLineNumbers: true,
    style: dark,
    customStyle: {
      backgroundColor: null,
      border: null,
      boxShadow: null,
      padding: null,
      margin: 0,
    },
  };
  useEffect(() => {
    let unmounted = false;    
    async function load() {
      const { body: stream } = await fetch('https://corsproxy.io/?url=https://github.com/ziglang/zig/archive/refs/tags/0.1.1.tar.gz');
      try {
        for await (const file of extract(stream)) {
          if (unmounted) break;
          startTransition(() => {
            setTree((tree) => {
              const uri = file.name.string.replace(/\/$/, '');
              const slashIndex = uri.lastIndexOf('/');
              if (slashIndex === -1) {
                // root
                return { uri, expanded: true };
              } else {
                const parentUri = uri.slice(0, slashIndex);
                const node = (file.kind == 'directory') 
                ? { uri, type: 'directory', expanded: false }
                : { uri, type: 'file', data: file.data.typedArray };
                return utils.appendTreeNode(tree, parentUri, node);
              }
            });
          });
        }
      } finally {
        stream.close();
        await shutdown();
      }
    }
    load();
    return () => unmounted = true;
  }, []);
  return (
    <>
      <div id="left-pane">
        <FileTree {...treeProps} />
      </div>
      <div id="right-pane">
        <SyntaxHighlighter {...highlightProps}>
          {codeString}
        </SyntaxHighlighter>
      </div>
    </>
  )
}

export default App

Then App.css:

#root {
  margin: 0 auto;
  display: flex;
  flex-direction: row;
  height: 100vh;
}

#left-pane {
  flex: 1 0 16em;
  display: flex;
  border-right: 1px solid #333333;
  padding-left: 0.5em;
}

#left-pane span {
  cursor: pointer;
}

#right-pane {
  flex: 10 1 auto;
  display: flex;
  overflow: auto;
}

#right-pane pre {
  flex: 1 1 100%;
}

And finally index.css:

:root {
  font-family: system-ui, Avenir, Helvetica, Arial, sans-serif;
  line-height: 1.5;
  font-weight: 400;

  color-scheme: light dark;
  color: rgba(255, 255, 255, 0.87);
  background-color: #242424;

  font-synthesis: none;
  text-rendering: optimizeLegibility;
  -webkit-font-smoothing: antialiased;
  -moz-osx-font-smoothing: grayscale;
}

body {
  margin: 0;
  min-width: 320px;
  min-height: 100vh;
}

Here's how our app looks with the UI in place:

User interface

Source code

You can find the complete source code for this example here.

You can see the code in action here. As corsproxy.io's free tier only works at localhost, this live demo does not actually download the tarball from GitHub, relying instead on a copy hosted at Cloudflare.

⚠️ **GitHub.com Fallback** ⚠️