Tarball viewer - chung-leong/zigar GitHub Wiki
In this exercise we're going to create a web app that allows the user to view contents stored in a GitHub tarball. It demonstrates how Zig code can read a web stream obtained through the Fetch API.
First, we'll create a boilerplate Vite app. In a terminal window, run the following command:
npm create vite@latestEnter code-viewer as name, then select React and JavaScript + SWC:
│
◇ Project name:
│ code-viewer
│
◇ Select a framework:
│ React
│
◇ Select a variant:
│ JavaScript + SWC
│
◇ Use rolldown-vite (Experimental)?:
│ No
│
◇ Install with npm and start now?
│ No
│
◇ Scaffolding project in /home/rwiggum/code-viewer...
│
└ Done. Now run:
cd code-viewer
npm install
npm run dev
Once the project is created, go into its directory and install the necessary files:
cd code-viewer
npm installNext, install the Zigar plugin:
npm install --save-dev rollup-plugin-zigarCreate a sub-directory for Zig code:
mkdir zigFor demo purpose we're going to use the Ziglang repo, going with the very first release, to keep the file size small. Download https://github.com/ziglang/zig/archive/refs/tags/0.1.1.tar.gz and note the file path.
To aget feel for the standard library functions involed, we're going to first write a simple
Zig program that list the contents of this tarball. Create decompress.zig in the zig
sub-directory and insert the following code:
const std = @import("std");
pub fn main() !void {
const path = "/home/cleong/Downloads/zig-0.1.1.tar.gz";
var file = try std.fs.openFileAbsolute(path, .{});
defer file.close();
var read_buffer: [4096]u8 = undefined;
var reader = file.reader(&read_buffer);
var decompress_buffer: [std.compress.flate.max_window_len]u8 = undefined;
var decompressor: std.compress.flate.Decompress = .init(&reader.interface, .gzip, &decompress_buffer);
var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
var iter: std.tar.Iterator = .init(&decompressor.reader, .{
.file_name_buffer = &file_name_buffer,
.link_name_buffer = &link_name_buffer,
});
while (try iter.next()) |f| {
std.debug.print("{s}\n", .{f.name});
}
}Be sure to update the hardcoded file path with your own. Then run the program using the following command (or by clicking the "run" link in VS Code):
zig run decompress.zigYou should see a long list of files:
zig-0.1.1/
zig-0.1.1/.gitignore
zig-0.1.1/.travis.yml
zig-0.1.1/CMakeLists.txt
zig-0.1.1/LICENSE
zig-0.1.1/README.md
zig-0.1.1/build.zig
zig-0.1.1/c_headers/
zig-0.1.1/c_headers/__clang_cuda_builtin_vars.h
zig-0.1.1/c_headers/__clang_cuda_cmath.h
zig-0.1.1/c_headers/__clang_cuda_complex_builtins.h
zig-0.1.1/c_headers/__clang_cuda_intrinsics.h
zig-0.1.1/c_headers/__clang_cuda_math_forward_declares.h
zig-0.1.1/c_headers/__clang_cuda_runtime_wrapper.h
...
As you can see, extracting files from a tarball is fairly straightforward in Zig. We create a reader
for the file and pass that to the flate decompressor. We then give the decompressor's reader to
std.tar.Iterator. The iterator then gives us each file.
Next, we're going to rework the code in preparation for using it in JavaScript. We want a function
that accepts a std.fs.File as an argument and returns an iterator of structs containing
both file info and content:
const std = @import("std");
fn extract(file: std.fs.File) !Iterator {
return .{ .file = file };
}
const Iterator = struct {
file: std.fs.File,
started: bool = false,
reader: std.fs.File.Reader = undefined,
read_buffer: [4096]u8 = undefined,
decompressor: std.compress.flate.Decompress = undefined,
decompress_buffer: [std.compress.flate.max_window_len]u8 = undefined,
file_name_buffer: [std.fs.max_path_bytes]u8 = undefined,
link_name_buffer: [std.fs.max_path_bytes]u8 = undefined,
tar_iter: std.tar.Iterator = undefined,
pub fn next(self: *@This(), allocator: std.mem.Allocator) !?File {
if (!self.started) {
// create file reader
self.reader = self.file.reader(&self.read_buffer);
// create decompressor
self.decompressor = .init(&self.reader.interface, .gzip, &self.decompress_buffer);
// obtain the tar iterator
self.tar_iter = .init(&self.decompressor.reader, .{
.file_name_buffer = &self.file_name_buffer,
.link_name_buffer = &self.link_name_buffer,
});
self.started = true;
}
// get next item
const f = try self.tar_iter.next() orelse return null;
const name = try allocator.dupe(u8, f.name);
errdefer allocator.free(name);
const link_name = try allocator.dupe(u8, f.link_name);
errdefer allocator.free(link_name);
// read file content
const data = try allocator.alloc(u8, f.size);
errdefer allocator.free(data);
var data_writer: std.Io.Writer = .fixed(data);
try self.tar_iter.streamRemaining(f, &data_writer);
return .{
.name = name,
.link_name = link_name,
.size = f.size,
.mode = f.mode,
.kind = f.kind,
.data = data,
};
}
};
const File = struct {
name: []const u8,
link_name: []const u8,
size: u64,
mode: u32,
kind: std.tar.FileKind,
data: []const u8,
};
pub fn main() !void {
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.detectLeaks();
const allocator = gpa.allocator();
const path = "/home/cleong/Downloads/zig-0.1.1.tar.gz";
var file = try std.fs.openFileAbsolute(path, .{});
defer file.close();
var iter = try extract(file);
while (try iter.next(allocator)) |f| {
defer {
allocator.free(f.name);
allocator.free(f.link_name);
allocator.free(f.data);
}
std.debug.print("{s}\n", .{f.name});
}
}Most of the work is done in the iterator's next function. The iterator will get hooked up to an
async generator
on the JavaScript side. The allocator that next() receives will allocate JavaScript memory.
Now we'll modify the code so it works in Node.js. First, move extract() along with the structs
into the namespace worker:
const worker = struct {
pub fn extract(reader: std.io.AnyReader) !Iterator {
// ...
}
const Iterator = struct {
// ...
};
const File = struct {
// ...
};
};Be sure extract() is public. Then add code that sets up a
work queue:
const std = @import("std");
const zigar = @import("zigar");
var work_queue: zigar.thread.WorkQueue(thread_ns) = .{};
pub const shutdown = work_queue.promisify(.shutdown);
pub const extract = work_queue.asyncify(worker.extract);In the src sub-directory, create test.js:
import { open } from 'fs/promises';
import { extract, shutdown } from '../zig/decompress.zig';
const file = await open('/home/cleong/Downloads/zig-0.1.1.tar.gz');
const stream = file.readableWebStream();
try {
for await (const file of extract(stream)) {
console.log(file.name.string);
}
} finally {
stream.close();
await shutdown();
}Install node-zigar before running the script:
npm install --save-dev node-zigarThen run it:
node --loader=node-zigar --no-warnings src/test.jsNow we can test whether it works correctly with a stream from the Fetch API:
import { extract, shutdown } from '../zig/decompress.zig';
const { body: stream } = await fetch('https://github.com/ziglang/zig/archive/refs/tags/0.1.1.tar.gz');
try {
for await (const file of extract(stream)) {
console.log(file.name.string);
}
} finally {
stream.close();
await shutdown();
}And it should. The file paths should fly by nearly as quickly as before if you have a decent Internet connection.
Before continuing, let me briefly explain what the Zig code actually does. This line here:
pub const shutdown = work_queue.promisify(.shutdown);Is a shortcut that defines a function for shutting down the work queue. It's the equivalent of the following:
pub fn shutdown(promise: Promise(void)) void {
return work_queue.deinitAsync(promise);
}You can also define a shart-up function in the same (using .startup). We're omitting that since
the work queue will automatically initializes itself with a single thread when extract() gets
called. That's defined by the following:
pub const extract = work_queue.asyncify(worker.extract);It creates a function that pushes a call request into the work queue. It's equivalent to doing this:
pub fn extract(
reader: std.io.AnyReader,
generator: zigar.function.GeneratorOf(worker.extract),
) !void {
try work_queue.push(worker.extract, .{reader}, generator);
}We use asyncify() here, because extract() returns an iterator. It becomes an async generator on
the JavaScript side. Other value types become promises, so we would use promisify() instead.
After all that prelim, we've reached the moment of truth. Will our code work in the browser?
We need to first set up rollup-plugin-zigar. Open vite.config.js and add the plugin:
import react from '@vitejs/plugin-react-swc';
import zigar from 'rollup-plugin-zigar';
import { defineConfig } from 'vite';
// https://vitejs.dev/config/
export default defineConfig({
plugins: [react(), zigar({ multithreaded: true, optimize: 'ReleaseSmall' })],
server: {
host: true,
headers: {
'Cross-Origin-Opener-Policy': 'same-origin',
'Cross-Origin-Embedder-Policy': 'require-corp',
}
},
})We need to configure Vite's dev server to add two HTTP headers to responses to make our app cross origin isolated. This is a prerequisite of using WebAssembly threads.
After changing the configuration, open src/App.jsx and add test.js as an import:
import './App.css';
import './test.js';
We can then start Vite in dev mode:
npm run devWe'll run straight into a Zig compilation error:
/home/cleong/zigar/rollup-plugin-zigar/demos/vite/code-viewer/zig/decompress.zig:59:51:
error: expected type 'usize', found 'u64'
We had neglected to cast u64 to usize. On a 64-bit OS usize is 64-bit so there was no
problem. WebAssembly is 32-bit, so the compiler balked. We can fix this easily enough:
// read file content
const len: usize = @intCast(f.size);
const data = try allocator.alloc(u8, len);
errdefer allocator.free(data);After restarting Vite, our code will compile correctly, but immediately we encounter a more serious issue:

GitHub's servers don't allow cross-origin access. Our web app simply cannot retrieve the tarball. Not directly. CORS proxy services like https://corsproxy.io/ do exist luckily. As soon as we insert the proxy's URL into our URL:
const { body: stream } = await fetch('https://corsproxy.io/?url=https://github.com/ziglang/zig/archive/refs/tags/0.1.1.tar.gz');Bingo! We see file paths racing by in the dev console:

Now it's just a matter of creating a user interface. First open src/main.jsx and remove the
<StrictMode> tags:
import { createRoot } from 'react-dom/client'
import App from './App.jsx'
import './index.css'
createRoot(document.getElementById('root')).render(
<App />
)This stops React from initializing the app twice in dev mode. After that, install a file tree component and another for syntax highlighting:
npm install @sinm/react-file-tree react-syntax-highlighter
Since this isn't a React tutorial, I'm not going to get in details of how the code works. I'll
simply provide the updated files, starting with App.jsx:
import { FileTree, utils } from '@sinm/react-file-tree';
import '@sinm/react-file-tree/icons.css';
import FileItemWithFileIcon from '@sinm/react-file-tree/lib/FileItemWithFileIcon';
import '@sinm/react-file-tree/styles.css';
import { startTransition, useCallback, useEffect, useState } from 'react';
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
import { dark } from 'react-syntax-highlighter/dist/esm/styles/prism';
import { extract, shutdown } from '../zig/decompress.zig';
import './App.css';
const decoder = new TextDecoder;
const ext2lang = {
h: 'c',
c: 'c',
hpp: 'cpp',
cpp: 'cpp',
js: 'javascript',
};
function App() {
const [ tree, setTree ] = useState(null);
const [ codeString, setCodeString ] = useState('');
const [ language, setLanguage ] = useState('');
const treeProps = {
tree,
itemRenderer: (treeNode) => <FileItemWithFileIcon treeNode={treeNode} />,
onItemClick: useCallback(({ uri, type, data, expanded }) => {
startTransition(() => {
if (type === 'directory') {
setTree(tree => utils.assignTreeNode(tree, uri, { expanded: !expanded }));
} else {
const code = decoder.decode(data);
const slashIndex = uri.lastIndexOf('/');
const dotIndex = uri.lastIndexOf('.');
const ext = (dotIndex > slashIndex) ? uri.slice(dotIndex + 1) : '';
setCodeString(code);
setLanguage(ext2lang[ext] ?? ext);
}
});
}, []),
};
const highlightProps = {
language,
showLineNumbers: true,
style: dark,
customStyle: {
backgroundColor: null,
border: null,
boxShadow: null,
padding: null,
margin: 0,
},
};
useEffect(() => {
let unmounted = false;
async function load() {
const { body: stream } = await fetch('https://corsproxy.io/?url=https://github.com/ziglang/zig/archive/refs/tags/0.1.1.tar.gz');
try {
for await (const file of extract(stream)) {
if (unmounted) break;
startTransition(() => {
setTree((tree) => {
const uri = file.name.string.replace(/\/$/, '');
const slashIndex = uri.lastIndexOf('/');
if (slashIndex === -1) {
// root
return { uri, expanded: true };
} else {
const parentUri = uri.slice(0, slashIndex);
const node = (file.kind == 'directory')
? { uri, type: 'directory', expanded: false }
: { uri, type: 'file', data: file.data.typedArray };
return utils.appendTreeNode(tree, parentUri, node);
}
});
});
}
} finally {
stream.close();
await shutdown();
}
}
load();
return () => unmounted = true;
}, []);
return (
<>
<div id="left-pane">
<FileTree {...treeProps} />
</div>
<div id="right-pane">
<SyntaxHighlighter {...highlightProps}>
{codeString}
</SyntaxHighlighter>
</div>
</>
)
}
export default AppThen App.css:
#root {
margin: 0 auto;
display: flex;
flex-direction: row;
height: 100vh;
}
#left-pane {
flex: 1 0 16em;
display: flex;
border-right: 1px solid #333333;
padding-left: 0.5em;
}
#left-pane span {
cursor: pointer;
}
#right-pane {
flex: 10 1 auto;
display: flex;
overflow: auto;
}
#right-pane pre {
flex: 1 1 100%;
}And finally index.css:
:root {
font-family: system-ui, Avenir, Helvetica, Arial, sans-serif;
line-height: 1.5;
font-weight: 400;
color-scheme: light dark;
color: rgba(255, 255, 255, 0.87);
background-color: #242424;
font-synthesis: none;
text-rendering: optimizeLegibility;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}
body {
margin: 0;
min-width: 320px;
min-height: 100vh;
}Here's how our app looks with the UI in place:

You can find the complete source code for this example here.
You can see the code in action here. As corsproxy.io's free tier only works at localhost, this live demo does not actually download the tarball from GitHub, relying instead on a copy hosted at Cloudflare.