Local (User Space)
- sudo pacman -S podman
- systemctl --user enable podman.socket --now
- vim ~/.config/containers/storage.conf
[storage]
driver = "overlay"
graphroot = "/home/user/path/to/podman-storage"
runroot = "/tmp/podman-run-1000"
- mkdir -p /home/user/path/to/podman-storage
- mkdir -p /home/user/path/to/ollama
- vim /home/user/path/to/run-ollama.sh
#!/bin/sh
APP_DIR="/home/user/path/to/ollama"
mkdir -p $APP_DIR
if podman container exists ollama; then
podman start ollama
else
podman run -d \
--name ollama \
--restart=always \
--device nvidia.com/gpu=all \
-v /home/user/path/to/ollama:/root/.ollama \
-p 11434:11434 \
docker.io/ollama/ollama
fi
if ! podman exec ollama ollama list | grep -q "qwen3.5:9b"; then
podman exec -it ollama ollama pull qwen3.5:9b
fi
if ! podman exec ollama ollama list | grep -q "nomic-embed-text"; then
podman exec -it ollama ollama pull nomic-embed-text
fi
podman exec -it ollama ollama run qwen3.5:9b
- chmod +x /home/user/path/to/run-ollama.sh
- Now exec the run.sh for running the ollama and model
RAG (IA with documentation to read)
- mkdir -p /home/user/path/to/rag-api
- mkdir -p /home/user/path/to/documents
cat > /home/user/path/to/rag-api/Containerfile <<EOF
FROM python:3.12
WORKDIR /app
RUN pip install fastapi uvicorn \
llama-index \
llama-index-llms-ollama \
llama-index-embeddings-ollama \
llama-index-vector-stores-chroma \
chromadb \
pypdf
COPY app.py .
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
EOF
cat > /home/user/path/to/rag-api/app.py <<EOF
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb
app = FastAPI()
OLLAMA_URL = "http://host.containers.internal:11434"
embed_model = OllamaEmbedding(model_name="nomic-embed-text", base_url=OLLAMA_URL, request_timeout=300.0)
llm = Ollama(model="qwen3.5:4b", base_url=OLLAMA_URL, request_timeout=300.0, context_window=4096)
chroma_client = chromadb.HttpClient(host="host.containers.internal", port=8001)
collection = chroma_client.get_or_create_collection("documents")
vector_store = ChromaVectorStore(chroma_collection=collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
query_engine = index.as_query_engine(llm=llm, similarity_top_k=2)
streaming_engine = index.as_query_engine(llm=llm, similarity_top_k=2, streaming=True)
class AskRequest(BaseModel):
question: str
@app.post("/ask")
def ask(req: AskRequest):
response = query_engine.query(req.question)
return {"answer": str(response)}
@app.post("/ask/stream")
def ask_stream(req: AskRequest):
response = streaming_engine.query(req.question)
def generate():
for token in response.response_gen:
yield token
return StreamingResponse(generate(), media_type="text/plain")
@app.post("/ingest")
def ingest():
docs = SimpleDirectoryReader("/app/documents").load_data()
for doc in docs:
index.insert(doc)
return {"ingested": len(docs)}
@app.get("/health")
def health():
return {"status": "ok"}
- vim /home/user/path/to/run-chromadb.sh
#!/bin/sh
APP_DIR="/home/user/path/to/chromadb"
mkdir -p $APP_DIR
if podman container exists chromadb; then
podman start chromadb
else
podman run -d \
--name chromadb \
--restart=always \
-v /home/user/path/to/chromadb:/chroma/chroma \
-p 8001:8000 \
docker.io/chromadb/chroma
fi
podman logs --tail 999 chromadb && podman exec -it chromadb sh
- vim /home/user/path/to/run-rag-api.sh
#!/bin/sh
IMAGE_NAME="localhost/rag-api"
APP_DIR="/home/user/path/to/rag-api"
mkdir -p $APP_DIR
if ! podman image exists "$IMAGE_NAME"; then
echo "Building rag-api..."
podman build -t "$IMAGE_NAME" "$APP_DIR"
fi
if podman container exists rag-api; then
podman start rag-api
else
podman run -d \
--name rag-api \
--restart=always \
-v /home/user/path/to/documents:/app/documents \
-p 8000:8000 \
"$IMAGE_NAME"
fi
podman logs --tail 999 rag-api && podman exec -it rag-api sh
- Script to run everthing at once (run with the script once before using this):
#!/bin/sh
cd "$(dirname "$0")"
./stop.sh
start_container() {
NAME=$1
shift
if podman container exists "$NAME"; then
podman start "$NAME"
else
podman run -d "$@"
fi
echo "[$NAME] started"
}
# ChromaDB
APP_DIR="/home/leans/System/AI/chromadb"
mkdir -p "$APP_DIR"
start_container chromadb \
--name chromadb \
--restart=always \
-v "$APP_DIR:/chroma/chroma" \
-p 8001:8000 \
docker.io/chromadb/chroma
# Ollama
APP_DIR="/home/leans/System/AI/ollama"
mkdir -p "$APP_DIR"
start_container ollama \
--name ollama \
--restart=always \
--device nvidia.com/gpu=all \
-v "$APP_DIR:/root/.ollama" \
-p 11434:11434 \
docker.io/ollama/ollama
# Pull models se necessário
for MODEL in qwen3.5:4b nomic-embed-text; do
if ! podman exec ollama ollama list | grep -q "$MODEL"; then
echo "[ollama] pulling $MODEL..."
podman exec ollama ollama pull "$MODEL"
fi
done
# RAG API
IMAGE_NAME="localhost/rag-api"
APP_DIR="/home/leans/System/AI/rag-api"
mkdir -p "$APP_DIR"
if ! podman image exists "$IMAGE_NAME"; then
echo "[rag-api] building..."
podman build -t "$IMAGE_NAME" -f "$APP_DIR/Containerfile" "$APP_DIR"
fi
start_container rag-api \
--name rag-api \
--restart=always \
-v /home/leans/System/AI/documents:/app/documents \
-p 8000:8000 \
"$IMAGE_NAME"
echo ""
echo "Waiting services..."
sleep 3
echo ""
podman ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"