Change OllamaClient::embed_batch to accept &[&str] instead of Vec<String>. The EmbedRequest struct now borrows both model name and input texts, eliminating per-batch cloning of chunk text (up to 32KB per chunk x 32 chunks per batch). Serialization output is identical since serde serializes &str and String to the same JSON. In hybrid search, defer the RrfResult->HybridResult mapping until after filter+take, so only `limit` items (typically 20) are constructed instead of up to 1,500 at RECALL_CAP. Also switch filtered_ids to into_iter() to avoid an extra .copied() pass. Switch FTS search_fts from prepare() to prepare_cached() for statement reuse across repeated searches. Benchmarked at ~1.6x faster. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
201 lines
5.3 KiB
Rust
201 lines
5.3 KiB
Rust
use reqwest::Client;
|
|
use serde::{Deserialize, Serialize};
|
|
use std::time::Duration;
|
|
use tracing::warn;
|
|
|
|
use crate::core::error::{LoreError, Result};
|
|
|
|
pub struct OllamaConfig {
|
|
pub base_url: String,
|
|
pub model: String,
|
|
pub timeout_secs: u64,
|
|
}
|
|
|
|
impl Default for OllamaConfig {
|
|
fn default() -> Self {
|
|
Self {
|
|
base_url: "http://localhost:11434".to_string(),
|
|
model: "nomic-embed-text".to_string(),
|
|
timeout_secs: 60,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct OllamaClient {
|
|
client: Client,
|
|
config: OllamaConfig,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
struct EmbedRequest<'a> {
|
|
model: &'a str,
|
|
input: Vec<&'a str>,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
struct EmbedResponse {
|
|
#[allow(dead_code)]
|
|
model: String,
|
|
embeddings: Vec<Vec<f32>>,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
struct TagsResponse {
|
|
models: Vec<ModelInfo>,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
struct ModelInfo {
|
|
name: String,
|
|
}
|
|
|
|
impl OllamaClient {
|
|
pub fn new(config: OllamaConfig) -> Self {
|
|
let client = Client::builder()
|
|
.timeout(Duration::from_secs(config.timeout_secs))
|
|
.build()
|
|
.unwrap_or_else(|e| {
|
|
warn!(
|
|
error = %e,
|
|
"Failed to build configured Ollama HTTP client; falling back to default client"
|
|
);
|
|
Client::new()
|
|
});
|
|
|
|
Self { client, config }
|
|
}
|
|
|
|
pub async fn health_check(&self) -> Result<()> {
|
|
let url = format!("{}/api/tags", self.config.base_url);
|
|
|
|
let response =
|
|
self.client
|
|
.get(&url)
|
|
.send()
|
|
.await
|
|
.map_err(|e| LoreError::OllamaUnavailable {
|
|
base_url: self.config.base_url.clone(),
|
|
source: Some(e),
|
|
})?;
|
|
|
|
let tags: TagsResponse =
|
|
response
|
|
.json()
|
|
.await
|
|
.map_err(|e| LoreError::OllamaUnavailable {
|
|
base_url: self.config.base_url.clone(),
|
|
source: Some(e),
|
|
})?;
|
|
|
|
let model_found = tags
|
|
.models
|
|
.iter()
|
|
.any(|m| m.name.starts_with(&self.config.model));
|
|
|
|
if !model_found {
|
|
return Err(LoreError::OllamaModelNotFound {
|
|
model: self.config.model.clone(),
|
|
});
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub async fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
|
|
let url = format!("{}/api/embed", self.config.base_url);
|
|
|
|
let request = EmbedRequest {
|
|
model: &self.config.model,
|
|
input: texts.to_vec(),
|
|
};
|
|
|
|
let response = self
|
|
.client
|
|
.post(&url)
|
|
.json(&request)
|
|
.send()
|
|
.await
|
|
.map_err(|e| LoreError::OllamaUnavailable {
|
|
base_url: self.config.base_url.clone(),
|
|
source: Some(e),
|
|
})?;
|
|
|
|
let status = response.status();
|
|
if !status.is_success() {
|
|
let body = response.text().await.unwrap_or_default();
|
|
return Err(LoreError::EmbeddingFailed {
|
|
document_id: 0,
|
|
reason: format!("HTTP {}: {}", status, body),
|
|
});
|
|
}
|
|
|
|
let embed_response: EmbedResponse =
|
|
response
|
|
.json()
|
|
.await
|
|
.map_err(|e| LoreError::EmbeddingFailed {
|
|
document_id: 0,
|
|
reason: format!("Failed to parse embed response: {}", e),
|
|
})?;
|
|
|
|
Ok(embed_response.embeddings)
|
|
}
|
|
}
|
|
|
|
pub async fn check_ollama_health(base_url: &str) -> bool {
|
|
let client = Client::builder()
|
|
.timeout(Duration::from_secs(5))
|
|
.build()
|
|
.ok();
|
|
|
|
let Some(client) = client else {
|
|
return false;
|
|
};
|
|
|
|
let url = format!("{base_url}/api/tags");
|
|
client.get(&url).send().await.is_ok()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_config_defaults() {
|
|
let config = OllamaConfig::default();
|
|
assert_eq!(config.base_url, "http://localhost:11434");
|
|
assert_eq!(config.model, "nomic-embed-text");
|
|
assert_eq!(config.timeout_secs, 60);
|
|
}
|
|
|
|
#[test]
|
|
fn test_health_check_model_starts_with() {
|
|
let model = "nomic-embed-text";
|
|
let tag_name = "nomic-embed-text:latest";
|
|
assert!(tag_name.starts_with(model));
|
|
|
|
let wrong_model = "llama2";
|
|
assert!(!tag_name.starts_with(wrong_model));
|
|
}
|
|
|
|
#[test]
|
|
fn test_embed_request_serialization() {
|
|
let request = EmbedRequest {
|
|
model: "nomic-embed-text",
|
|
input: vec!["hello", "world"],
|
|
};
|
|
let json = serde_json::to_string(&request).unwrap();
|
|
assert!(json.contains("\"model\":\"nomic-embed-text\""));
|
|
assert!(json.contains("\"input\":[\"hello\",\"world\"]"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_embed_response_deserialization() {
|
|
let json = r#"{"model":"nomic-embed-text","embeddings":[[0.1,0.2,0.3],[0.4,0.5,0.6]]}"#;
|
|
let response: EmbedResponse = serde_json::from_str(json).unwrap();
|
|
assert_eq!(response.embeddings.len(), 2);
|
|
assert_eq!(response.embeddings[0], vec![0.1, 0.2, 0.3]);
|
|
assert_eq!(response.embeddings[1], vec![0.4, 0.5, 0.6]);
|
|
}
|
|
}
|