IA Ollama Installation - LeandroTheDev/arch_linux GitHub Wiki

Local (User Space)

  • sudo pacman -S podman
  • systemctl --user enable podman.socket --now
  • vim ~/.config/containers/storage.conf
[storage]
driver = "overlay"
graphroot = "/home/user/path/to/podman-storage"
runroot = "/tmp/podman-run-1000"
  • mkdir -p /home/user/path/to/podman-storage
  • mkdir -p /home/user/path/to/ollama
  • vim /home/user/path/to/run-ollama.sh
#!/bin/sh

APP_DIR="/home/user/path/to/ollama"
mkdir -p $APP_DIR

if podman container exists ollama; then
    podman start ollama
else
    podman run -d \
      --name ollama \
      --restart=always \
      --device nvidia.com/gpu=all \
      -v /home/user/path/to/ollama:/root/.ollama \
      -p 11434:11434 \
      docker.io/ollama/ollama
fi

if ! podman exec ollama ollama list | grep -q "qwen3.5:9b"; then
    podman exec -it ollama ollama pull qwen3.5:9b
fi

if ! podman exec ollama ollama list | grep -q "nomic-embed-text"; then
    podman exec -it ollama ollama pull nomic-embed-text
fi

podman exec -it ollama ollama run qwen3.5:9b
  • chmod +x /home/user/path/to/run-ollama.sh
  • Now exec the run.sh for running the ollama and model

RAG (IA with documentation to read)

  • mkdir -p /home/user/path/to/rag-api
  • mkdir -p /home/user/path/to/documents
cat > /home/user/path/to/rag-api/Containerfile <<EOF
FROM python:3.12

WORKDIR /app

RUN pip install fastapi uvicorn \
    llama-index \
    llama-index-llms-ollama \
    llama-index-embeddings-ollama \
    llama-index-vector-stores-chroma \
    chromadb \
    pypdf

COPY app.py .

CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
EOF
cat > /home/user/path/to/rag-api/app.py <<EOF
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb

app = FastAPI()
OLLAMA_URL = "http://host.containers.internal:11434"

embed_model = OllamaEmbedding(model_name="nomic-embed-text", base_url=OLLAMA_URL, request_timeout=300.0)
llm = Ollama(model="qwen3.5:4b", base_url=OLLAMA_URL, request_timeout=300.0, context_window=4096)

chroma_client = chromadb.HttpClient(host="host.containers.internal", port=8001)
collection = chroma_client.get_or_create_collection("documents")
vector_store = ChromaVectorStore(chroma_collection=collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
query_engine = index.as_query_engine(llm=llm, similarity_top_k=2)
streaming_engine = index.as_query_engine(llm=llm, similarity_top_k=2, streaming=True)

class AskRequest(BaseModel):
    question: str

@app.post("/ask")
def ask(req: AskRequest):
    response = query_engine.query(req.question)
    return {"answer": str(response)}

@app.post("/ask/stream")
def ask_stream(req: AskRequest):
    response = streaming_engine.query(req.question)
    def generate():
        for token in response.response_gen:
            yield token
    return StreamingResponse(generate(), media_type="text/plain")

@app.post("/ingest")
def ingest():
    docs = SimpleDirectoryReader("/app/documents").load_data()
    for doc in docs:
        index.insert(doc)
    return {"ingested": len(docs)}

@app.get("/health")
def health():
    return {"status": "ok"}
  • vim /home/user/path/to/run-chromadb.sh
#!/bin/sh

APP_DIR="/home/user/path/to/chromadb"
mkdir -p $APP_DIR

if podman container exists chromadb; then
    podman start chromadb
else
    podman run -d \
      --name chromadb \
      --restart=always \
      -v /home/user/path/to/chromadb:/chroma/chroma \
      -p 8001:8000 \
      docker.io/chromadb/chroma
fi

podman logs --tail 999 chromadb && podman exec -it chromadb sh
  • vim /home/user/path/to/run-rag-api.sh
#!/bin/sh

IMAGE_NAME="localhost/rag-api"
APP_DIR="/home/user/path/to/rag-api"
mkdir -p $APP_DIR

if ! podman image exists "$IMAGE_NAME"; then
    echo "Building rag-api..."
    podman build -t "$IMAGE_NAME" "$APP_DIR"
fi

if podman container exists rag-api; then
    podman start rag-api
else
    podman run -d \
      --name rag-api \
      --restart=always \
      -v /home/user/path/to/documents:/app/documents \
      -p 8000:8000 \
      "$IMAGE_NAME"
fi

podman logs --tail 999 rag-api && podman exec -it rag-api sh
  • Script to run everthing at once (run with the script once before using this):
#!/bin/sh
cd "$(dirname "$0")"

./stop.sh

start_container() {
    NAME=$1
    shift
    if podman container exists "$NAME"; then
        podman start "$NAME"
    else
        podman run -d "$@"
    fi
    echo "[$NAME] started"
}

# ChromaDB
APP_DIR="/home/leans/System/AI/chromadb"
mkdir -p "$APP_DIR"
start_container chromadb \
    --name chromadb \
    --restart=always \
    -v "$APP_DIR:/chroma/chroma" \
    -p 8001:8000 \
    docker.io/chromadb/chroma

# Ollama
APP_DIR="/home/leans/System/AI/ollama"
mkdir -p "$APP_DIR"
start_container ollama \
    --name ollama \
    --restart=always \
    --device nvidia.com/gpu=all \
    -v "$APP_DIR:/root/.ollama" \
    -p 11434:11434 \
    docker.io/ollama/ollama

# Pull models se necessário
for MODEL in qwen3.5:4b nomic-embed-text; do
    if ! podman exec ollama ollama list | grep -q "$MODEL"; then
        echo "[ollama] pulling $MODEL..."
        podman exec ollama ollama pull "$MODEL"
    fi
done

# RAG API
IMAGE_NAME="localhost/rag-api"
APP_DIR="/home/leans/System/AI/rag-api"
mkdir -p "$APP_DIR"
if ! podman image exists "$IMAGE_NAME"; then
    echo "[rag-api] building..."
    podman build -t "$IMAGE_NAME" -f "$APP_DIR/Containerfile" "$APP_DIR"
fi
start_container rag-api \
    --name rag-api \
    --restart=always \
    -v /home/leans/System/AI/documents:/app/documents \
    -p 8000:8000 \
    "$IMAGE_NAME"

echo ""
echo "Waiting services..."
sleep 3

echo ""
podman ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"