Sample controller with REST endpoints for embeddings - krickert/search-api GitHub Wiki
package com.example.controller;
import io.micronaut.core.annotation.Introspected;
import io.micronaut.http.MediaType;
import io.micronaut.http.annotation.*;
import io.micronaut.serde.annotation.Serdeable;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.media.*;
import io.swagger.v3.oas.annotations.parameters.Parameter;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import jakarta.inject.Inject;
import java.util.List;
@Controller("/api/embeddings")
public class EmbeddingController {
private final EmbeddingService embeddingService;
@Inject
public EmbeddingController(EmbeddingService embeddingService) {
this.embeddingService = embeddingService;
}
@Operation(summary = "Get single embedding via GET",
description = "Provide a single 'text' query parameter to receive one embedding.")
@Get(uri = "/predict", produces = MediaType.APPLICATION_JSON)
public SingleEmbeddingResponse predict(@QueryValue("text") String text) {
float[] embedding = embeddingService.getEmbedding(text);
return new SingleEmbeddingResponse(embedding);
}
@Operation(summary = "Get batch embeddings via GET",
description = "Provide multiple 'text' query parameters to receive multiple embeddings.")
@Get(uri = "/batchPredict", produces = MediaType.APPLICATION_JSON)
public MultiEmbeddingResponse batchPredict(@QueryValue("text") List<String> texts) {
List<float[]> embeddings = embeddingService.getEmbeddings(texts);
return new MultiEmbeddingResponse(embeddings);
}
@Operation(summary = "Get single embedding via JSON POST",
description = "Submit JSON with a single 'text' to get one embedding.")
@Post(uri = "/predict", consumes = MediaType.APPLICATION_JSON, produces = MediaType.APPLICATION_JSON)
public SingleEmbeddingResponse predictJson(@Body SingleTextRequest request) {
float[] embedding = embeddingService.getEmbedding(request.text());
return new SingleEmbeddingResponse(embedding);
}
@Operation(summary = "Get batch embeddings via JSON POST",
description = "Submit JSON with 'texts' (array of strings) to get multiple embeddings.")
@Post(uri = "/batchPredict", consumes = MediaType.APPLICATION_JSON, produces = MediaType.APPLICATION_JSON)
public MultiEmbeddingResponse batchPredictJson(@Body MultiTextRequest request) {
List<float[]> embeddings = embeddingService.getEmbeddings(request.texts());
return new MultiEmbeddingResponse(embeddings);
}
@Introspected
@Serdeable.Deserializable
public record SingleTextRequest(String text) {}
@Introspected
@Serdeable.Deserializable
public record MultiTextRequest(List<String> texts) {}
@Introspected
@Serdeable.Serializable
public record MultiEmbeddingResponse(List<float[]> embeddings) {}
@Introspected
@Serdeable.Serializable
public record SingleEmbeddingResponse(float[] embedding) {}
}
Example controller to thread out stuff:
package com.example.controller;
import io.micronaut.core.annotation.Introspected;
import io.micronaut.http.HttpStatus;
import io.micronaut.http.MediaType;
import io.micronaut.http.exceptions.HttpStatusException;
import io.micronaut.http.annotation.*;
import io.micronaut.serde.annotation.Serdeable;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import jakarta.inject.Inject;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Schedulers;
import java.util.List;
import java.util.Optional;
@Controller("/api/chunker")
public class ChunkerController {
private final ChunkerService chunkerService;
@Inject
public ChunkerController(ChunkerService chunkerService) {
this.chunkerService = chunkerService;
}
@Operation(summary = "Chunk a single document",
description = "Provide a single text to chunk into smaller segments.")
@ApiResponse(description = "Returns chunked segments of the input text.")
@Post(uri = "/", consumes = MediaType.APPLICATION_JSON, produces = MediaType.APPLICATION_JSON)
public ChunkResponse chunk(@Body SingleTextRequest request,
@QueryValue Optional<Integer> chunkSize,
@QueryValue Optional<Integer> overlap) {
int finalChunkSize = resolveParameter("chunkSize", chunkSize, request.chunkSize(), 500, 11);
int finalOverlap = resolveParameter("overlap", overlap, request.overlap(), 100, 0);
List<String> chunks = chunkerService.chunk(request.text(), finalChunkSize, finalOverlap);
return new ChunkResponse(chunks);
}
@Operation(summary = "Chunk multiple documents with reactive backpressure",
description = "Chunk multiple texts concurrently with a maximum of 5 concurrent chunking operations.")
@ApiResponse(description = "Returns chunked segments for each input text.")
@Post(uri = "/batch", consumes = MediaType.APPLICATION_JSON, produces = MediaType.APPLICATION_JSON)
public Flux<ChunkResponse> batchChunk(@Body MultiTextRequest request,
@QueryValue Optional<Integer> chunkSize,
@QueryValue Optional<Integer> overlap) {
int finalChunkSize = resolveParameter("chunkSize", chunkSize, request.chunkSize(), 500, 11);
int finalOverlap = resolveParameter("overlap", overlap, request.overlap(), 100, 0);
return Flux.fromIterable(request.texts())
.flatMap(text -> Mono.fromCallable(() -> chunkerService.chunk(text, finalChunkSize, finalOverlap))
.subscribeOn(Schedulers.boundedElastic())
.map(ChunkResponse::new),
5
);
}
@Operation(summary = "Chunk a single document via GET",
description = "Provide a single text to chunk into smaller segments.")
@ApiResponse(description = "Returns chunked segments of the input text.")
@Get(uri = "/", produces = MediaType.APPLICATION_JSON)
public ChunkResponse chunkGet(@QueryValue String text,
@QueryValue Optional<Integer> chunkSize,
@QueryValue Optional<Integer> overlap) {
int finalChunkSize = resolveParameter("chunkSize", chunkSize, Optional.empty(), 500, 11);
int finalOverlap = resolveParameter("overlap", overlap, Optional.empty(), 100, 0);
List<String> chunks = chunkerService.chunk(text, finalChunkSize, finalOverlap);
return new ChunkResponse(chunks);
}
@Operation(summary = "Chunk multiple documents via GET with reactive backpressure",
description = "Chunk multiple texts concurrently with a maximum of 5 concurrent chunking operations.")
@ApiResponse(description = "Returns chunked segments for each input text.")
@Get(uri = "/batch", produces = MediaType.APPLICATION_JSON)
public Flux<ChunkResponse> batchChunkGet(@QueryValue List<String> texts,
@QueryValue Optional<Integer> chunkSize,
@QueryValue Optional<Integer> overlap) {
int finalChunkSize = resolveParameter("chunkSize", chunkSize, Optional.empty(), 500, 11);
int finalOverlap = resolveParameter("overlap", overlap, Optional.empty(), 100, 0);
return Flux.fromIterable(texts)
.flatMap(text -> Mono.fromCallable(() -> chunkerService.chunk(text, finalChunkSize, finalOverlap))
.subscribeOn(Schedulers.boundedElastic())
.map(ChunkResponse::new),
5
);
}
private int resolveParameter(String paramName, Optional<Integer> queryParam, Optional<Integer> bodyParam, int defaultValue, int minValue) {
Integer paramValue = queryParam.orElseGet(() -> bodyParam.orElse(defaultValue));
if (paramValue < minValue) {
throw new HttpStatusException(HttpStatus.BAD_REQUEST, "Invalid parameter '" + paramName + "', minimum allowed is " + minValue);
}
return paramValue;
}
@Introspected
@Serdeable.Deserializable
public record SingleTextRequest(String text, Optional<Integer> chunkSize, Optional<Integer> overlap) {}
@Introspected
@Serdeable.Deserializable
public record MultiTextRequest(List<String> texts, Optional<Integer> chunkSize, Optional<Integer> overlap) {}
@Introspected
@Serdeable.Serializable
public record ChunkResponse(List<String> chunks) {}
}