Sample controller with REST endpoints for embeddings - krickert/search-api GitHub Wiki

package com.example.controller;

import io.micronaut.core.annotation.Introspected;
import io.micronaut.http.MediaType;
import io.micronaut.http.annotation.*;
import io.micronaut.serde.annotation.Serdeable;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.media.*;
import io.swagger.v3.oas.annotations.parameters.Parameter;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import jakarta.inject.Inject;

import java.util.List;

@Controller("/api/embeddings")
public class EmbeddingController {

    private final EmbeddingService embeddingService;

    @Inject
    public EmbeddingController(EmbeddingService embeddingService) {
        this.embeddingService = embeddingService;
    }

    @Operation(summary = "Get single embedding via GET",
        description = "Provide a single 'text' query parameter to receive one embedding.")
    @Get(uri = "/predict", produces = MediaType.APPLICATION_JSON)
    public SingleEmbeddingResponse predict(@QueryValue("text") String text) {
        float[] embedding = embeddingService.getEmbedding(text);
        return new SingleEmbeddingResponse(embedding);
    }

    @Operation(summary = "Get batch embeddings via GET",
        description = "Provide multiple 'text' query parameters to receive multiple embeddings.")
    @Get(uri = "/batchPredict", produces = MediaType.APPLICATION_JSON)
    public MultiEmbeddingResponse batchPredict(@QueryValue("text") List<String> texts) {
        List<float[]> embeddings = embeddingService.getEmbeddings(texts);
        return new MultiEmbeddingResponse(embeddings);
    }

    @Operation(summary = "Get single embedding via JSON POST",
        description = "Submit JSON with a single 'text' to get one embedding.")
    @Post(uri = "/predict", consumes = MediaType.APPLICATION_JSON, produces = MediaType.APPLICATION_JSON)
    public SingleEmbeddingResponse predictJson(@Body SingleTextRequest request) {
        float[] embedding = embeddingService.getEmbedding(request.text());
        return new SingleEmbeddingResponse(embedding);
    }

    @Operation(summary = "Get batch embeddings via JSON POST",
        description = "Submit JSON with 'texts' (array of strings) to get multiple embeddings.")
    @Post(uri = "/batchPredict", consumes = MediaType.APPLICATION_JSON, produces = MediaType.APPLICATION_JSON)
    public MultiEmbeddingResponse batchPredictJson(@Body MultiTextRequest request) {
        List<float[]> embeddings = embeddingService.getEmbeddings(request.texts());
        return new MultiEmbeddingResponse(embeddings);
    }

    @Introspected
    @Serdeable.Deserializable
    public record SingleTextRequest(String text) {}

    @Introspected
    @Serdeable.Deserializable
    public record MultiTextRequest(List<String> texts) {}

    @Introspected
    @Serdeable.Serializable
    public record MultiEmbeddingResponse(List<float[]> embeddings) {}

    @Introspected
    @Serdeable.Serializable
    public record SingleEmbeddingResponse(float[] embedding) {}
}

Example controller to thread out stuff:

package com.example.controller;

import io.micronaut.core.annotation.Introspected;
import io.micronaut.http.HttpStatus;
import io.micronaut.http.MediaType;
import io.micronaut.http.exceptions.HttpStatusException;
import io.micronaut.http.annotation.*;
import io.micronaut.serde.annotation.Serdeable;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import jakarta.inject.Inject;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Schedulers;

import java.util.List;
import java.util.Optional;

@Controller("/api/chunker")
public class ChunkerController {

    private final ChunkerService chunkerService;

    @Inject
    public ChunkerController(ChunkerService chunkerService) {
        this.chunkerService = chunkerService;
    }

    @Operation(summary = "Chunk a single document",
        description = "Provide a single text to chunk into smaller segments.")
    @ApiResponse(description = "Returns chunked segments of the input text.")
    @Post(uri = "/", consumes = MediaType.APPLICATION_JSON, produces = MediaType.APPLICATION_JSON)
    public ChunkResponse chunk(@Body SingleTextRequest request,
                               @QueryValue Optional<Integer> chunkSize,
                               @QueryValue Optional<Integer> overlap) {
        int finalChunkSize = resolveParameter("chunkSize", chunkSize, request.chunkSize(), 500, 11);
        int finalOverlap = resolveParameter("overlap", overlap, request.overlap(), 100, 0);
        List<String> chunks = chunkerService.chunk(request.text(), finalChunkSize, finalOverlap);
        return new ChunkResponse(chunks);
    }

    @Operation(summary = "Chunk multiple documents with reactive backpressure",
        description = "Chunk multiple texts concurrently with a maximum of 5 concurrent chunking operations.")
    @ApiResponse(description = "Returns chunked segments for each input text.")
    @Post(uri = "/batch", consumes = MediaType.APPLICATION_JSON, produces = MediaType.APPLICATION_JSON)
    public Flux<ChunkResponse> batchChunk(@Body MultiTextRequest request,
                                          @QueryValue Optional<Integer> chunkSize,
                                          @QueryValue Optional<Integer> overlap) {
        int finalChunkSize = resolveParameter("chunkSize", chunkSize, request.chunkSize(), 500, 11);
        int finalOverlap = resolveParameter("overlap", overlap, request.overlap(), 100, 0);
        return Flux.fromIterable(request.texts())
            .flatMap(text -> Mono.fromCallable(() -> chunkerService.chunk(text, finalChunkSize, finalOverlap))
                    .subscribeOn(Schedulers.boundedElastic())
                    .map(ChunkResponse::new),
                5
            );
    }

    @Operation(summary = "Chunk a single document via GET",
        description = "Provide a single text to chunk into smaller segments.")
    @ApiResponse(description = "Returns chunked segments of the input text.")
    @Get(uri = "/", produces = MediaType.APPLICATION_JSON)
    public ChunkResponse chunkGet(@QueryValue String text,
                                  @QueryValue Optional<Integer> chunkSize,
                                  @QueryValue Optional<Integer> overlap) {
        int finalChunkSize = resolveParameter("chunkSize", chunkSize, Optional.empty(), 500, 11);
        int finalOverlap = resolveParameter("overlap", overlap, Optional.empty(), 100, 0);
        List<String> chunks = chunkerService.chunk(text, finalChunkSize, finalOverlap);
        return new ChunkResponse(chunks);
    }

    @Operation(summary = "Chunk multiple documents via GET with reactive backpressure",
        description = "Chunk multiple texts concurrently with a maximum of 5 concurrent chunking operations.")
    @ApiResponse(description = "Returns chunked segments for each input text.")
    @Get(uri = "/batch", produces = MediaType.APPLICATION_JSON)
    public Flux<ChunkResponse> batchChunkGet(@QueryValue List<String> texts,
                                             @QueryValue Optional<Integer> chunkSize,
                                             @QueryValue Optional<Integer> overlap) {
        int finalChunkSize = resolveParameter("chunkSize", chunkSize, Optional.empty(), 500, 11);
        int finalOverlap = resolveParameter("overlap", overlap, Optional.empty(), 100, 0);
        return Flux.fromIterable(texts)
            .flatMap(text -> Mono.fromCallable(() -> chunkerService.chunk(text, finalChunkSize, finalOverlap))
                    .subscribeOn(Schedulers.boundedElastic())
                    .map(ChunkResponse::new),
                5
            );
    }

    private int resolveParameter(String paramName, Optional<Integer> queryParam, Optional<Integer> bodyParam, int defaultValue, int minValue) {
        Integer paramValue = queryParam.orElseGet(() -> bodyParam.orElse(defaultValue));
        if (paramValue < minValue) {
            throw new HttpStatusException(HttpStatus.BAD_REQUEST, "Invalid parameter '" + paramName + "', minimum allowed is " + minValue);
        }
        return paramValue;
    }

    @Introspected
    @Serdeable.Deserializable
    public record SingleTextRequest(String text, Optional<Integer> chunkSize, Optional<Integer> overlap) {}

    @Introspected
    @Serdeable.Deserializable
    public record MultiTextRequest(List<String> texts, Optional<Integer> chunkSize, Optional<Integer> overlap) {}

    @Introspected
    @Serdeable.Serializable
    public record ChunkResponse(List<String> chunks) {}
}
⚠️ **GitHub.com Fallback** ⚠️