Tarball viewer - chung-leong/zigar GitHub Wiki
In this exercise we're going to create a web app that allows the user to view contents stored in a GitHub tarball. It demonstrates how Zig code can read a web stream obtained through the Fetch API.
First, we'll create a boilerplate Vite app. In a terminal window, run the following command:
npm create vite@latest
Enter code-viewer
as name, then select React
and JavaScript + SWC
:
Need to install the following packages:
[email protected]
Ok to proceed? (y) y
✔ Project name: … code-viewer
✔ Select a framework: › React
✔ Select a variant: › JavaScript + SWC
Once the project is created, go into its directory and install the necessary files:
cd code-viewer
npm install
Next, install the Zigar plugin:
npm install --save-dev rollup-plugin-zigar
Create a sub-directory for Zig code:
mkdir zig
For demo purpose we're going to use the Ziglang repo, going with the very first release, to keep the file size small. Download https://github.com/ziglang/zig/archive/refs/tags/0.1.1.tar.gz and note the file path.
To aget feel for the standard library functions involed, we're going to first write a simple
Zig program that list the contents of this tarball. Create decompress.zig
in the zig
sub-directory and insert the following code:
const std = @import("std");
pub fn main() !void {
const path = "/home/cleong/Downloads/zig-0.1.1.tar.gz";
var file = try std.fs.openFileAbsolute(path, .{});
defer file.close();
var decompressor = std.compress.gzip.decompressor(file.reader());
var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
var iter = std.tar.iterator(decompressor.reader(), .{
.file_name_buffer = &file_name_buffer,
.link_name_buffer = &link_name_buffer,
});
while (try iter.next()) |f| {
std.debug.print("{s}\n", .{f.name});
}
}
Be sure to update the hardcoded file path with your own. Then run the program using the following command (or by clicking the "run" link in VS Code):
zig run decompress.zig
You should see a long list of files:
zig-0.1.1/
zig-0.1.1/.gitignore
zig-0.1.1/.travis.yml
zig-0.1.1/CMakeLists.txt
zig-0.1.1/LICENSE
zig-0.1.1/README.md
zig-0.1.1/build.zig
zig-0.1.1/c_headers/
zig-0.1.1/c_headers/__clang_cuda_builtin_vars.h
zig-0.1.1/c_headers/__clang_cuda_cmath.h
zig-0.1.1/c_headers/__clang_cuda_complex_builtins.h
zig-0.1.1/c_headers/__clang_cuda_intrinsics.h
zig-0.1.1/c_headers/__clang_cuda_math_forward_declares.h
zig-0.1.1/c_headers/__clang_cuda_runtime_wrapper.h
...
As you can see, extracting files from a tarball is fairly straightforward in Zig. We pass the
file's reader to the the gzip decompressor, then pass the decompressor's reader to
std.tar.iterator()
. The iterator then gives us each file.
Next, we're going to rework the code in preparation for using it in JavaScript. We want a function
that accepts a std.io.AnyReader
as an argument and returns an iterator of structs containing
both file info and content:
const std = @import("std");
fn extract(reader: std.io.AnyReader) !Iterator {
return .{
.reader = reader,
};
}
const Iterator = struct {
const Decompressor = std.compress.gzip.Decompressor(std.io.AnyReader);
const TarIterator = std.tar.Iterator(Decompressor.Reader);
reader: std.io.AnyReader,
started: bool = false,
decompressor: Decompressor = undefined,
file_name_buffer: [std.fs.max_path_bytes]u8 = undefined,
link_name_buffer: [std.fs.max_path_bytes]u8 = undefined,
tar_iter: TarIterator = undefined,
pub fn next(self: *@This(), allocator: std.mem.Allocator) !?File {
if (!self.started) {
// create decompressor
self.decompressor = std.compress.gzip.decompressor(self.reader);
// obtain the tar iterator
self.tar_iter = std.tar.iterator(self.decompressor.reader(), .{
.file_name_buffer = &self.file_name_buffer,
.link_name_buffer = &self.link_name_buffer,
});
self.started = true;
}
// get next item
const f = try self.tar_iter.next() orelse return null;
const reader = f.reader();
// read data
const data = try allocator.alloc(u8, f.size);
errdefer allocator.free(data);
const len = try reader.readAll(data);
if (len != f.size) return error.SizeMismatch;
const name = try allocator.dupe(u8, f.name);
errdefer allocator.free(name);
const link_name = try allocator.dupe(u8, f.link_name);
errdefer allocator.free(link_name);
return .{
.name = name,
.link_name = link_name,
.size = f.size,
.mode = f.mode,
.kind = f.kind,
.data = data,
};
}
};
const File = struct {
name: []const u8,
link_name: []const u8,
size: u64,
mode: u32,
kind: std.tar.FileKind,
data: []const u8,
};
pub fn main() !void {
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.detectLeaks();
const allocator = gpa.allocator();
const path = "/home/cleong/Downloads/zig-0.1.1.tar.gz";
var file = try std.fs.openFileAbsolute(path, .{});
defer file.close();
var iter = try extract(file.reader().any());
while (try iter.next(allocator)) |f| {
defer {
allocator.free(f.name);
allocator.free(f.link_name);
allocator.free(f.data);
}
std.debug.print("{s}\n", .{f.name});
}
}
Most of the work is done in the iterator's next
function. The iterator will get hooked up to an
async generator
on the JavaScript side. The allocator that next()
receives will allocate JavaScript memory.
Now we'll modify the code so it works in Node.js. First, move extract()
along with the structs
into the namespace thread_ns
:
const thread_ns = struct {
pub fn extract(reader: std.io.AnyReader) !Iterator {
// ...
}
const Iterator = struct {
// ...
};
const File = struct {
// ...
};
};
Be sure extract()
is public. Then add code that sets up a
work queue:
const std = @import("std");
const zigar = @import("zigar");
var gpa: std.heap.DebugAllocator(.{}) = .init;
var work_queue: zigar.thread.WorkQueue(thread_ns) = .{};
pub fn startup() !void {
try work_queue.init(.{ .allocator = gpa.allocator() });
}
pub fn shutdown(promise: zigar.function.Promise(void)) void {
work_queue.deinitAsync(promise);
}
pub fn extract(
reader: std.io.AnyReader,
generator: zigar.function.GeneratorOf(thread_ns.extract),
) !void {
try work_queue.push(thread_ns.extract, .{reader}, generator);
}
In the src
sub-directory, create test.js
:
import { open } from 'fs/promises';
import { extract, startup, shutdown } from '../zig/decompress.zig';
startup();
try {
const file = await open('/home/cleong/Downloads/zig-0.1.1.tar.gz');
const stream = file.readableWebStream();
const reader = stream.getReader()
for await (const file of extract(reader)) {
console.log(file.name.string);
}
} finally {
shutdown();
}
Install node-zigar before running the script:
npm install --save-dev node-zigar
Then run it:
node --loader=node-zigar --no-warnings src/test.js
After compilation completes, the following error will pop up immediately:
Error: Inefficient reader access. Each call is only reading 5.91 bytes. Please use std.io.BufferedReader.
As the message indicates, std.compress.gzip.Decompressor
only reads a handful of bytes at a time
from the given reader. This results in agonizingly low throughput, since the JavaScript event loop
introduces substantial latency. The iterator needs to use std.io.BufferedReader
to perform read
operations in larger chunks:
const Decompressor = std.compress.gzip.Decompressor(Buffer.Reader); // <- changed
const TarIterator = std.tar.Iterator(Decompressor.Reader);
const Buffer = std.io.BufferedReader(1024 * 16, std.io.AnyReader); // <- added
reader: std.io.AnyReader,
started: bool = false,
decompressor: Decompressor = undefined,
buffer: Buffer = undefined, // <- added
file_name_buffer: [std.fs.max_path_bytes]u8 = undefined,
link_name_buffer: [std.fs.max_path_bytes]u8 = undefined,
tar_iter: TarIterator = undefined,
pub fn next(self: *@This(), allocator: std.mem.Allocator) !?File {
if (!self.started) {
// create buffered reader
self.buffer = .{ .unbuffered_reader = self.reader }; // <- added
// create decompressor
self.decompressor = std.compress.gzip.decompressor(self.buffer.reader()); // <- changed
After that the script will run with no issue. Now we can test whether it works correctly with a stream from the Fetch API:
import { extract, startup, shutdown } from '../zig/decompress.zig';
startup();
try {
const response = await fetch('https://github.com/ziglang/zig/archive/refs/tags/0.1.1.tar.gz');
const reader = response.body.getReader()
for await (const file of extract(reader)) {
console.log(file.name.string);
}
} finally {
shutdown();
}
And it should. The file paths should fly by nearly as quickly as before if you have a decent Internet connection.
After all that prelim, we've reached the moment of truth. Will our code work in the browser?
We need to first set up rollup-plugin-zigar. Open vite.config.js
and add the plugin:
import react from '@vitejs/plugin-react-swc';
import zigar from 'rollup-plugin-zigar';
import { defineConfig } from 'vite';
// https://vitejs.dev/config/
export default defineConfig({
plugins: [react(), zigar({ topLevelAwait: false, multithreaded: true, optimize: 'ReleaseSmall' })],
server: {
host: true,
headers: {
'Cross-Origin-Opener-Policy': 'same-origin',
'Cross-Origin-Embedder-Policy': 'require-corp',
}
},
})
We need to configure Vite's dev server to add two HTTP headers to responses to make our app cross origin isolated. This is a prerequisite of using WebAssembly threads.
After changing the configuration, open src/App.jsx
and add test.js
as an import:
import './App.css';
import './test.js';
We can then start Vite in dev mode:
npm run dev
We'll run straight into a Zig compilation error:
/home/cleong/zigar/rollup-plugin-zigar/demos/vite/code-viewer/zig/decompress.zig:59:51:
error: expected type 'usize', found 'u64'
We had neglected to cast u64
to usize
. On a 64-bit OS usize
is 64-bit so there was no
problem. WebAssembly is 32-bit, so the compiler balked. We can fix this easily enough:
// read data
const correct_len: usize = @intCast(f.size);
const data = try allocator.alloc(u8, correct_len);
errdefer allocator.free(data);
const len = try reader.readAll(data);
if (len != correct_len) return error.SizeMismatch;
After restarting Vite, our code will compile correctly, but immediately we encounter a more serious issue:
GitHub's servers don't allow cross-origin access. Our web app simply cannot retrieve the tarball. Not directly. CORS proxy services like as https://corsproxy.io/ do exist luckily. As soon as we insert the proxy's URL into our URL:
const response = await fetch('https://corsproxy.io/?url=https://github.com/ziglang/zig/archive/refs/tags/0.1.1.tar.gz');
Bingo! We have file paths racing by in the dev console:
Now it's just a matter of creating a user interface. First open src/main.jsx
and remove the
<StrictMode>
tags:
import { createRoot } from 'react-dom/client'
import App from './App.jsx'
import './index.css'
createRoot(document.getElementById('root')).render(
<App />
)
This stops React from initializing the app twice in dev mode. After that, install a file tree component and another for syntax highlighting:
npm install @sinm/react-file-tree react-syntax-highlighter
Since this isn't a React tutorial, I'm not going to get in details of how the code works. I'll
simply provide the updated files, starting with App.jsx
:
import { FileTree, utils } from '@sinm/react-file-tree';
import '@sinm/react-file-tree/icons.css';
import FileItemWithFileIcon from '@sinm/react-file-tree/lib/FileItemWithFileIcon';
import '@sinm/react-file-tree/styles.css';
import { startTransition, useCallback, useEffect, useState } from 'react';
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
import { dark } from 'react-syntax-highlighter/dist/esm/styles/prism';
import { extract, shutdown, startup } from '../zig/decompress.zig';
import './App.css';
const decoder = new TextDecoder;
const ext2lang = {
h: 'c',
c: 'c',
hpp: 'cpp',
cpp: 'cpp',
js: 'javascript',
};
function App() {
const [ tree, setTree ] = useState(null);
const [ codeString, setCodeString ] = useState('');
const [ language, setLanguage ] = useState('');
const treeProps = {
tree,
itemRenderer: (treeNode) => <FileItemWithFileIcon treeNode={treeNode} />,
onItemClick: useCallback(({ uri, type, data, expanded }) => {
startTransition(() => {
if (type === 'directory') {
setTree(tree => utils.assignTreeNode(tree, uri, { expanded: !expanded }));
} else {
const code = decoder.decode(data);
const slashIndex = uri.lastIndexOf('/');
const dotIndex = uri.lastIndexOf('.');
const ext = (dotIndex > slashIndex) ? uri.slice(dotIndex + 1) : '';
setCodeString(code);
setLanguage(ext2lang[ext] ?? ext);
}
});
}, []),
};
const highlightProps = {
language,
showLineNumbers: true,
style: dark,
customStyle: {
backgroundColor: null,
border: null,
boxShadow: null,
padding: null,
margin: 0,
},
};
useEffect(() => {
let unmounted = false;
async function load() {
startup();
const response = await fetch('https://corsproxy.io/?url=https://github.com/ziglang/zig/archive/refs/tags/0.1.1.tar.gz');
const reader = response.body.getReader()
try {
for await (const file of await extract(reader)) {
if (unmounted) break;
startTransition(() => {
setTree((tree) => {
const uri = file.name.string.replace(/\/$/, '');
const slashIndex = uri.lastIndexOf('/');
if (slashIndex === -1) {
return { uri, expanded: true };
} else {
const parentUri = uri.slice(0, slashIndex);
const node = (file.kind == 'directory')
? { uri, type: 'directory', expanded: false }
: { uri, type: 'file', data: file.data.typedArray };
return utils.appendTreeNode(tree, parentUri, node);
}
});
});
}
} finally {
await shutdown();
}
}
load();
return () => unmounted = true;
}, []);
return (
<>
<div id="left-pane">
<FileTree {...treeProps} />
</div>
<div id="right-pane">
<SyntaxHighlighter {...highlightProps}>
{codeString}
</SyntaxHighlighter>
</div>
</>
)
}
export default App
Then App.css
:
#root {
margin: 0 auto;
display: flex;
flex-direction: row;
height: 100vh;
}
#left-pane {
flex: 1 0 16em;
display: flex;
border-right: 1px solid #333333;
padding-left: 0.5em;
}
#left-pane span {
cursor: pointer;
}
#right-pane {
flex: 10 1 auto;
display: flex;
overflow: auto;
}
#right-pane pre {
flex: 1 1 100%;
}
And finally index.css
:
:root {
font-family: system-ui, Avenir, Helvetica, Arial, sans-serif;
line-height: 1.5;
font-weight: 400;
color-scheme: light dark;
color: rgba(255, 255, 255, 0.87);
background-color: #242424;
font-synthesis: none;
text-rendering: optimizeLegibility;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}
body {
margin: 0;
min-width: 320px;
min-height: 100vh;
}
One thing worth mentioning is why an await
has been added in front of the call to extract()
.
This is necessary because we've disabled the use of top-level await for the sake of older browsers.
As a result, it's possible for WebAssembly compilation to be unfinished by the time of the call.
In that case extract()
would return a promise (instead of an async generator).
Here's how our app looks with the UI in place:
You can find the complete source code for this example here.
You can see the code in action here. As corsproxy.io's free tier only works at localhost, this live demo does not actually download the tarball from GitHub, relying instead on a copy hosted at Cloudflare.