Troubleshooting - ruvnet/ruv-FANN GitHub Wiki
This comprehensive troubleshooting guide covers common issues encountered when working with ruv-FANN and its ecosystem components.
# Quick health check
npx ruv-swarm@latest test --self-check --verbose
# Reset everything
rm -rf ~/.claude-flow/
rm -rf .swarm/
cargo clean && cargo build
# Nuclear option - full reset
npx ruv-swarm@latest reset --purge-all --confirm
cargo clean
rustup update
cargo build --release
Problem: cargo
command not found
# Solution: Install or update Rust
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
source ~/.cargo/env
# Verify installation
cargo --version
rustc --version
Problem: Compilation fails with "linker not found"
# Linux (Ubuntu/Debian)
sudo apt update
sudo apt install build-essential pkg-config libssl-dev
# macOS
xcode-select --install
# Windows
# Install Visual Studio Build Tools
# https://visualstudio.microsoft.com/visual-cpp-build-tools/
Problem: SIMD compilation errors
# Check CPU capabilities
lscpu | grep -i simd
cat /proc/cpuinfo | grep flags
# Compile with native CPU features
RUSTFLAGS="-C target-cpu=native" cargo build --release --features simd
# Fallback without SIMD
cargo build --release --no-default-features --features std
Problem: wasm-pack
not found
# Install wasm-pack
curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
# Or via cargo
cargo install wasm-pack
Problem: WASM build fails with memory errors
# Increase WASM memory limit
export WASM_BUILD_MEMORY=4096
# Use optimized build
wasm-pack build --release --target web --features wasm,simd
wasm-opt -Oz -o pkg/ruv_fann_bg.wasm pkg/ruv_fann_bg.wasm
Problem: WASM SIMD not working in browser
// Check browser SIMD support
if (typeof WebAssembly.SIMD === 'undefined') {
console.warn('WebAssembly SIMD not supported, falling back to scalar operations');
// Load non-SIMD version
} else {
console.log('WebAssembly SIMD supported');
}
Problem: npx ruv-swarm
fails with module not found
# Clear npm cache
npm cache clean --force
# Update Node.js (requires Node 18+)
nvm install 20
nvm use 20
# Install globally
npm install -g ruv-swarm@latest
Problem: Permission errors on Linux/macOS
# Fix npm permissions
mkdir ~/.npm-global
npm config set prefix '~/.npm-global'
echo 'export PATH=~/.npm-global/bin:$PATH' >> ~/.bashrc
source ~/.bashrc
# Alternative: use npm with --user flag
npm install --user -g ruv-swarm
Problem: Network not converging
// Solution: Check your learning parameters
let mut trainer = BackpropTrainer::new()
.learning_rate(0.01) // Try lower values: 0.001, 0.0001
.momentum(0.9) // Add momentum
.max_epochs(10000) // Increase training time
.desired_error(0.001); // Relax error tolerance
// Add regularization
trainer.set_l2_regularization(0.0001);
// Use adaptive learning rate
trainer.set_adaptive_learning_rate(true);
Problem: Training is too slow
// Solution: Enable optimizations
let trainer = BackpropTrainer::new()
.learning_rate(0.1) // Higher learning rate
.batch_size(32) // Use batch training
.parallel(true) // Enable parallel processing
.simd(true); // Enable SIMD acceleration
// Use GPU acceleration if available
#[cfg(feature = "gpu")]
let trainer = trainer.gpu_accelerated(true);
Problem: Memory issues during training
// Solution: Reduce memory usage
let trainer = BackpropTrainer::new()
.batch_size(16) // Smaller batches
.memory_pool_size(1024) // Limit memory pool
.gradient_clipping(1.0); // Prevent gradient explosion
// Use streaming data loading
let data_loader = StreamingDataLoader::new("training_data.fann")
.buffer_size(1000)
.prefetch(2);
Problem: Network accuracy is poor
// Solution: Optimize network architecture
let mut net = NeuralNetwork::builder()
.input_size(input_dim)
.hidden_layers(&[64, 32, 16]) // Try different architectures
.output_size(output_dim)
.activation_function(ActivationFunction::ReLU) // Try different activations
.dropout(0.2) // Add regularization
.batch_normalization(true) // Normalize layers
.build()?;
// Add skip connections for deep networks
net.add_skip_connections(&[(0, 2), (1, 3)])?;
Problem: Overfitting issues
// Solution: Add regularization techniques
let trainer = BackpropTrainer::new()
.l2_regularization(0.001) // Weight decay
.dropout_rate(0.3) // Dropout
.early_stopping_patience(10) // Stop early
.validation_split(0.2); // Use validation data
// Implement cross-validation
let cv_results = cross_validate(&net, &data, 5)?;
Problem: Agents not communicating
# Check swarm status
npx ruv-swarm status --detailed
# Verify connectivity
npx ruv-swarm test --connectivity
# Reset coordination
npx ruv-swarm reset --agents-only
Problem: Swarm performance degradation
// Solution: Optimize swarm configuration
let swarm = SwarmBuilder::new()
.topology(Topology::Mesh) // Try different topologies
.max_agents(8) // Optimal agent count
.message_buffer_size(1000) // Increase buffer
.consensus_timeout(Duration::from_secs(5))
.load_balancing(LoadBalancing::RoundRobin)
.fault_tolerance(true)
.build()?;
// Monitor performance
let metrics = swarm.get_performance_metrics();
println!("Throughput: {} tasks/sec", metrics.throughput);
Problem: Task scheduling failures
// Solution: Improve task management
let scheduler = TaskScheduler::new()
.priority_queue(true) // Enable priority scheduling
.max_concurrent_tasks(16) // Limit concurrency
.retry_policy(RetryPolicy::ExponentialBackoff)
.timeout(Duration::from_secs(30))
.heartbeat_interval(Duration::from_secs(1));
// Add health checking
scheduler.enable_health_checks(Duration::from_secs(5));
Problem: High memory usage in swarms
// Solution: Optimize memory management
let memory_manager = MemoryManager::new()
.max_memory_per_agent(512 * 1024 * 1024) // 512MB per agent
.garbage_collection_interval(Duration::from_secs(60))
.memory_pool_size(1024)
.enable_compression(true);
// Monitor memory usage
let usage = memory_manager.get_usage_stats();
if usage.used_percentage > 80.0 {
memory_manager.force_cleanup();
}
Problem: Agent crashes or deadlocks
// Solution: Implement robust error handling
let agent = Agent::builder()
.timeout(Duration::from_secs(30)) // Prevent hangs
.max_retries(3) // Retry on failure
.circuit_breaker(true) // Break on repeated failures
.watchdog_timeout(Duration::from_secs(60)) // Kill unresponsive agents
.isolation_level(IsolationLevel::Sandboxed) // Isolate agents
.build()?;
// Add monitoring
agent.on_error(|error| {
log::error!("Agent error: {}", error);
// Implement recovery logic
});
Problem: SIMD not being used
# Check SIMD compilation
RUSTFLAGS="-C target-cpu=native -C target_feature=+avx2" cargo build --release
# Verify SIMD usage
objdump -d target/release/ruv-fann | grep -i "vmul\|vadd\|vfma"
# Runtime SIMD detection
RUST_LOG=debug cargo run
# Look for "SIMD features detected" messages
Problem: Poor vectorization performance
// Solution: Optimize data layout
#[repr(align(32))] // Align for AVX2
struct AlignedVector {
data: Vec<f32>,
}
// Use SIMD-friendly operations
use ruv_fann::simd::*;
let a = SimdF32::from_slice(&vec_a);
let b = SimdF32::from_slice(&vec_b);
let result = a * b + c; // Vectorized operation
Problem: GPU not detected
# Check GPU availability
nvidia-smi # For NVIDIA GPUs
clinfo # For OpenCL devices
# Verify GPU feature compilation
cargo build --features gpu,cuda --release
# Runtime GPU detection
RUST_LOG=debug cargo run --features gpu
Problem: GPU memory errors
// Solution: Manage GPU memory carefully
let gpu_manager = GpuManager::new()?
.memory_pool_size(1024 * 1024 * 1024) // 1GB pool
.enable_memory_mapping(true)
.batch_transfer_size(64 * 1024 * 1024); // 64MB batches
// Monitor GPU memory
let gpu_stats = gpu_manager.get_memory_stats();
if gpu_stats.free_memory < 100 * 1024 * 1024 { // Less than 100MB
gpu_manager.cleanup_unused_buffers();
}
Problem: Poor parallel performance
// Solution: Optimize threading
use rayon::prelude::*;
// Configure thread pool
rayon::ThreadPoolBuilder::new()
.num_threads(num_cpus::get())
.thread_name(|i| format!("ruv-fann-{}", i))
.build_global()
.unwrap();
// Use parallel iterators efficiently
let results: Vec<_> = data
.par_chunks(1000) // Optimal chunk size
.map(|chunk| process_chunk(chunk))
.collect();
Problem: MCP server not connecting
# Check MCP server status
npx claude-flow@alpha status
# Restart MCP server
npx claude-flow@alpha restart
# Check configuration
cat ~/.claude/claude_desktop_config.json
# Verify MCP tools are registered
npx claude-flow@alpha mcp list
Problem: Swarm commands not working in Claude Code
# Verify installation
which claude-flow
which ruv-swarm
# Check PATH configuration
echo $PATH
# Reinstall if necessary
npm install -g claude-flow@alpha ruv-swarm@latest
Problem: WASM module loading fails
// Solution: Proper WASM initialization
import init, { NeuralNetwork } from './pkg/ruv_fann.js';
async function initializeWasm() {
try {
await init();
console.log('WASM module loaded successfully');
// Test basic functionality
const net = new NeuralNetwork(2, [3], 1);
console.log('Neural network created');
} catch (error) {
console.error('WASM initialization failed:', error);
// Fallback to non-WASM implementation
}
}
Problem: WASM performance is slow
// Solution: Optimize WASM usage
// Pre-allocate memory
const wasmMemory = new WebAssembly.Memory({
initial: 256, // 16MB initial
maximum: 1024 // 64MB maximum
});
// Use SharedArrayBuffer for better performance
if (typeof SharedArrayBuffer !== 'undefined') {
// Use shared memory for multi-threaded WASM
}
// Batch operations
const batchSize = 1000;
for (let i = 0; i < data.length; i += batchSize) {
const batch = data.slice(i, i + batchSize);
await processBatch(batch);
}
Enable comprehensive logging:
# Rust logging
export RUST_LOG=debug
export RUST_BACKTRACE=1
# Node.js debugging
export DEBUG=ruv-swarm:*
export NODE_ENV=development
# Run with detailed output
cargo run 2>&1 | tee debug.log
Network debugging:
// Add debug logging to networks
let mut net = NeuralNetwork::builder()
.debug_mode(true)
.log_level(LogLevel::Debug)
.profiling(true)
.build()?;
// Log training progress
net.on_epoch(|epoch, error| {
println!("Epoch {}: error = {:.6}", epoch, error);
});
Swarm debugging:
// Enable swarm diagnostics
let swarm = SwarmBuilder::new()
.debug_mode(true)
.trace_messages(true)
.performance_monitoring(true)
.build()?;
// Monitor agent health
swarm.agents().iter().for_each(|agent| {
println!("Agent {} status: {:?}", agent.id(), agent.status());
});
Detect memory leaks:
# Use Valgrind on Linux
valgrind --tool=memcheck --leak-check=full ./target/debug/ruv-fann
# Use AddressSanitizer
RUSTFLAGS="-Z sanitizer=address" cargo run
# Monitor memory usage
watch -n 1 'ps aux | grep ruv-fann'
Profile memory usage:
#[cfg(feature = "profiling")]
use pprof::ProfilerGuard;
fn profile_memory_usage() {
let guard = pprof::ProfilerGuard::new(100).unwrap();
// Your code here
if let Ok(report) = guard.report().build() {
let file = std::fs::File::create("profile.pb").unwrap();
report.pprof().unwrap().write_to(file).unwrap();
}
}
// Implement checkpointing
let checkpoint = NetworkCheckpoint::new(&network)?;
checkpoint.save("checkpoint.bin")?;
// Recovery from checkpoint
if let Ok(checkpoint) = NetworkCheckpoint::load("checkpoint.bin") {
network = checkpoint.restore()?;
} else {
// Fallback: reinitialize network
network = create_default_network()?;
}
// Implement swarm resilience
let recovery_config = RecoveryConfig::new()
.max_retries(3)
.backoff_strategy(BackoffStrategy::Exponential)
.checkpoint_interval(Duration::from_secs(60));
swarm.set_recovery_config(recovery_config);
// Automatic recovery on failure
swarm.on_failure(|error| {
log::warn!("Swarm failure: {}", error);
swarm.attempt_recovery();
});
-
Enable all relevant CPU features:
RUSTFLAGS="-C target-cpu=native" cargo build --release
-
Use appropriate data types:
// Use f32 instead of f64 for better SIMD performance type Float = f32; // Align data structures #[repr(align(32))] struct AlignedData { /* ... */ }
-
Optimize memory allocation:
// Pre-allocate vectors let mut vec = Vec::with_capacity(expected_size); // Use memory pools let pool = MemoryPool::new(1024 * 1024);
-
Profile and benchmark:
cargo bench cargo flamegraph --bin ruv-fann
-
Complete reinstallation:
# Remove everything rustup self uninstall rm -rf ~/.cargo rm -rf ~/.claude-flow rm -rf node_modules # Fresh installation curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh npm install -g ruv-swarm@latest claude-flow@alpha
-
Docker deployment:
FROM rust:1.75-slim RUN apt-get update && apt-get install -y build-essential pkg-config libssl-dev COPY . /app WORKDIR /app RUN cargo build --release CMD ["./target/release/ruv-fann"]
-
Seek community help:
- Check this troubleshooting guide
- Search existing GitHub issues
- Verify your environment meets minimum requirements
- Try the minimal reproduction case
Include this information:
# System information
uname -a
rustc --version
cargo --version
node --version
npm --version
# ruv-FANN version
cargo tree | grep ruv-fann
npm list -g | grep ruv-swarm
# Error logs
RUST_LOG=debug cargo run 2>&1 | head -100
#!/bin/bash
echo "=== ruv-FANN Debug Information ===" > debug_info.txt
echo "Date: $(date)" >> debug_info.txt
echo "System: $(uname -a)" >> debug_info.txt
echo "Rust: $(rustc --version)" >> debug_info.txt
echo "Cargo: $(cargo --version)" >> debug_info.txt
echo "Node: $(node --version)" >> debug_info.txt
echo "NPM: $(npm --version)" >> debug_info.txt
echo "CPU: $(lscpu | grep 'Model name')" >> debug_info.txt
echo "Memory: $(free -h)" >> debug_info.txt
echo "SIMD: $(lscpu | grep -i simd)" >> debug_info.txt
echo "=== End Debug Information ===" >> debug_info.txt
cat debug_info.txt
Run this script and include the output with your issue report.
Remember: Most issues can be resolved by following this guide systematically. Start with the most common solutions before moving to advanced debugging techniques.