Notes on finding a set of files in a classpath to traverse a directory - krickert/search-api GitHub Wiki
Below is a simplified solution where the resource finder returns a collection of lightweight resource references instead of raw Paths. Each reference stores the resource’s URI (which is valid even if the underlying FileSystem is closed) plus some metadata (like filename and extension) and provides a helper method to open a fresh InputStream on demand. This lets you process each resource (for example, converting it into a Protocol Buffers ByteString) one at a time.
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
public class ResourceReference {
private final URI resourceUri;
private final String filename;
private final String extension;
public ResourceReference(URI resourceUri) {
this.resourceUri = resourceUri;
String path = resourceUri.getPath();
if (path == null || path.isEmpty()) {
// Fallback: use the entire URI string if no path is available.
this.filename = resourceUri.toString();
} else {
int lastSlash = path.lastIndexOf('/');
if (lastSlash == -1 || lastSlash == path.length() - 1) {
// No slash found or the slash is the last character: use the entire path.
this.filename = path;
} else {
// Extract the filename after the last slash.
this.filename = path.substring(lastSlash + 1);
}
}
int dotIndex = this.filename.lastIndexOf('.');
if (dotIndex > 0 && dotIndex < this.filename.length() - 1) {
this.extension = this.filename.substring(dotIndex + 1);
} else {
this.extension = "";
}
}
public URI getResourceUri() {
return resourceUri;
}
public String getFilename() {
return filename;
}
public String getExtension() {
return extension;
}
/**
* Opens a fresh InputStream for this resource.
* This method leverages the helper in ResourceUtil so that if the resource
* is inside a jar (zip) file, the FileSystem is re‑opened.
*
* @return an InputStream to the resource.
* @throws IOException if an I/O error occurs.
*/
public InputStream openInputStream() throws IOException {
return ResourceUtil.openFreshInputStream(resourceUri);
}
/**
* Convenience helper that converts the resource into a Protocol Buffers ByteString.
* This method reads the entire content of the resource into memory.
*
* @return a ByteString representing the resource's content.
* @throws IOException if an I/O error occurs.
*/
public ByteString asByteStream() throws IOException {
try (InputStream is = openInputStream()) {
return ByteString.readFrom(is);
}
}
}
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.nio.file.*;
import java.util.Collections;
import java.util.Map;
public class ResourceUtil {
/**
* Opens a fresh InputStream for the given resource URI.
* Supports both "file:" and "jar:" URIs.
*
* In the jar case, if the FileSystem is already open, it will be shared
* and not closed when the stream is closed, to avoid interfering with other users.
*
* @param uri the resource URI (e.g., "file:/path/to/file" or "jar:file:/path/to/jar.jar!/internal/path")
* @return an InputStream for the resource.
* @throws IOException if an I/O error occurs.
*/
public static InputStream openFreshInputStream(URI uri) throws IOException {
if ("jar".equals(uri.getScheme())) {
// Example jar URI: jar:file:/path/to/jar.jar!/internal/path
String uriStr = uri.toString();
int separatorIndex = uriStr.indexOf("!/");
if (separatorIndex == -1) {
throw new IOException("Invalid jar URI: " + uriStr);
}
// Extract the jar file system URI (ending with "!/")
String jarUriStr = uriStr.substring(0, separatorIndex + 2);
URI jarUri = URI.create(jarUriStr);
// The internal entry path is after "!/"
String entryPath = uriStr.substring(separatorIndex + 2);
Map<String, String> env = Collections.singletonMap("create", "false");
FileSystem fs;
boolean fsOpened; // true if we created a new FS, false if it already existed.
try {
fs = FileSystems.newFileSystem(jarUri, env);
fsOpened = true;
} catch (FileSystemAlreadyExistsException e) {
fs = FileSystems.getFileSystem(jarUri);
fsOpened = false;
}
Path freshPath = fs.getPath(entryPath);
InputStream is = Files.newInputStream(freshPath);
// Wrap the InputStream so that if we opened a new FS, closing the stream closes it.
return new FilterInputStream(is) {
@Override
public void close() throws IOException {
super.close();
if (fsOpened) {
fs.close();
}
}
};
} else if ("file".equals(uri.getScheme())) {
return Files.newInputStream(Paths.get(uri));
} else {
throw new UnsupportedOperationException("Unsupported URI scheme: " + uri.getScheme());
}
}
}
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.*;
import java.util.*;
import java.util.stream.Stream;
public class ResourceFinder {
/**
* Finds all resources under the given directory prefix whose file names
* end with one of the given extensions.
*
* @param directoryPrefix the resource directory to search (e.g., "testdata")
* @param extensions a list of file extensions (e.g., ".txt", ".json")
* @return a collection of ResourceReference objects for matching resources
* @throws IOException
* @throws URISyntaxException
*/
public static Collection<ResourceReference> findResources(String directoryPrefix, List<String> extensions)
throws IOException, URISyntaxException {
List<ResourceReference> foundRefs = new ArrayList<>();
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
URL url = classLoader.getResource(directoryPrefix);
if (url == null) {
return foundRefs; // nothing found, return empty list
}
URI uri = url.toURI();
String protocol = uri.getScheme();
if ("file".equals(protocol)) {
// Resource is on the file system.
Path path = Paths.get(uri);
try (Stream<Path> stream = Files.walk(path)) {
stream.filter(Files::isRegularFile)
.filter(p -> hasSupportedExtension(p, extensions))
.forEach(p -> foundRefs.add(new ResourceReference(p.toUri())));
}
} else if ("jar".equals(protocol)) {
/*
* The URI for a jar resource looks like:
* jar:file:/path/to/jarfile.jar!/directoryPrefix
* We need to extract the jar portion (up to the "!/") to open the FileSystem.
*/
String uriStr = uri.toString();
int separatorIndex = uriStr.indexOf("!/");
if (separatorIndex == -1) {
throw new IllegalArgumentException("Invalid jar URL: " + uriStr);
}
// The jar file system URI (ending with "!/")
String jarUriStr = uriStr.substring(0, separatorIndex + 2);
URI jarUri = URI.create(jarUriStr);
// Open the jar file system.
try (FileSystem fs = FileSystems.newFileSystem(jarUri, Collections.emptyMap())) {
// The path inside the jar is the part after "!/".
String entryPath = uriStr.substring(separatorIndex + 2);
Path jarDir = fs.getPath(entryPath);
try (Stream<Path> stream = Files.walk(jarDir)) {
stream.filter(Files::isRegularFile)
.filter(p -> hasSupportedExtension(p, extensions))
.forEach(p -> foundRefs.add(new ResourceReference(p.toUri())));
}
}
} else {
throw new UnsupportedOperationException("Protocol not supported: " + protocol);
}
return foundRefs;
}
// Helper method to check if the file has one of the supported extensions.
private static boolean hasSupportedExtension(Path p, List<String> extensions) {
String fileName = p.getFileName().toString().toLowerCase();
return extensions.stream().anyMatch(ext -> fileName.endsWith(ext.toLowerCase()));
}
}
Now you can obtain a collection of ResourceReference
objects from your resource finder and process them one at a time (for example, converting them into Protocol Buffer ByteString objects when needed):
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Collection;
public class ResourceProcessor {
public static void main(String[] args) throws IOException, URISyntaxException {
// Find resources in the "testdata" directory with .txt or .json extensions.
Collection<ResourceReference> resources =
ResourceFinder.findResources("testdata", List.of(".txt", ".json"));
// Process each resource one at a time.
for (ResourceReference ref : resources) {
// This opens a fresh InputStream for the resource (even if it’s inside a jar)
try (var is = ref.openInputStream()) {
ByteString bs = ByteString.readFrom(is);
System.out.println("Read " + bs.size() + " bytes from " + ref.getFilename());
// Process the ByteString with your protocol buffers logic...
}
}
}
}
-
ResourceReference Class:
Encapsulates the resource’s URI along with simple metadata (filename, extension) and provides a method to open a fresh InputStream. -
ResourceUtil:
Provides a helper method to open a fresh InputStream from a given URI (handling both file and jar cases). -
ResourceFinder:
Searches for matching resources under a given directory (whether in a file system or inside a jar) and returns a collection ofResourceReference
objects instead of raw Paths.
This design keeps your in‑memory data lightweight (just references and metadata) while letting you open and process the full contents one resource at a time when needed for Protocol Buffers.