Removes module-level doc comments (//! lines) and excessive inline doc comments that were duplicating information already evident from: - Function/struct names (self-documenting code) - Type signatures (the what is clear from types) - Implementation context (the how is clear from code) Affected modules: - cli/* - Removed command descriptions duplicating clap help text - core/* - Removed module headers and obvious function docs - documents/* - Removed extractor/regenerator/truncation docs - embedding/* - Removed pipeline and chunking docs - gitlab/* - Removed client and transformer docs (kept type definitions) - ingestion/* - Removed orchestrator and ingestion docs - search/* - Removed FTS and vector search docs Philosophy: Code should be self-documenting. Comments should explain "why" (business decisions, non-obvious constraints) not "what" (which the code itself shows). This change reduces noise and maintenance burden while keeping the codebase just as understandable. Retains comments for: - Non-obvious business logic - Important safety invariants - Complex algorithm explanations - Public API boundaries where generated docs matter Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
194 lines
5.1 KiB
Rust
194 lines
5.1 KiB
Rust
use reqwest::Client;
|
|
use serde::{Deserialize, Serialize};
|
|
use std::time::Duration;
|
|
|
|
use crate::core::error::{LoreError, Result};
|
|
|
|
pub struct OllamaConfig {
|
|
pub base_url: String,
|
|
pub model: String,
|
|
pub timeout_secs: u64,
|
|
}
|
|
|
|
impl Default for OllamaConfig {
|
|
fn default() -> Self {
|
|
Self {
|
|
base_url: "http://localhost:11434".to_string(),
|
|
model: "nomic-embed-text".to_string(),
|
|
timeout_secs: 60,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct OllamaClient {
|
|
client: Client,
|
|
config: OllamaConfig,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
struct EmbedRequest {
|
|
model: String,
|
|
input: Vec<String>,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
struct EmbedResponse {
|
|
#[allow(dead_code)]
|
|
model: String,
|
|
embeddings: Vec<Vec<f32>>,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
struct TagsResponse {
|
|
models: Vec<ModelInfo>,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
struct ModelInfo {
|
|
name: String,
|
|
}
|
|
|
|
impl OllamaClient {
|
|
pub fn new(config: OllamaConfig) -> Self {
|
|
let client = Client::builder()
|
|
.timeout(Duration::from_secs(config.timeout_secs))
|
|
.build()
|
|
.expect("Failed to create HTTP client");
|
|
|
|
Self { client, config }
|
|
}
|
|
|
|
pub async fn health_check(&self) -> Result<()> {
|
|
let url = format!("{}/api/tags", self.config.base_url);
|
|
|
|
let response =
|
|
self.client
|
|
.get(&url)
|
|
.send()
|
|
.await
|
|
.map_err(|e| LoreError::OllamaUnavailable {
|
|
base_url: self.config.base_url.clone(),
|
|
source: Some(e),
|
|
})?;
|
|
|
|
let tags: TagsResponse =
|
|
response
|
|
.json()
|
|
.await
|
|
.map_err(|e| LoreError::OllamaUnavailable {
|
|
base_url: self.config.base_url.clone(),
|
|
source: Some(e),
|
|
})?;
|
|
|
|
let model_found = tags
|
|
.models
|
|
.iter()
|
|
.any(|m| m.name.starts_with(&self.config.model));
|
|
|
|
if !model_found {
|
|
return Err(LoreError::OllamaModelNotFound {
|
|
model: self.config.model.clone(),
|
|
});
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub async fn embed_batch(&self, texts: Vec<String>) -> Result<Vec<Vec<f32>>> {
|
|
let url = format!("{}/api/embed", self.config.base_url);
|
|
|
|
let request = EmbedRequest {
|
|
model: self.config.model.clone(),
|
|
input: texts,
|
|
};
|
|
|
|
let response = self
|
|
.client
|
|
.post(&url)
|
|
.json(&request)
|
|
.send()
|
|
.await
|
|
.map_err(|e| LoreError::OllamaUnavailable {
|
|
base_url: self.config.base_url.clone(),
|
|
source: Some(e),
|
|
})?;
|
|
|
|
let status = response.status();
|
|
if !status.is_success() {
|
|
let body = response.text().await.unwrap_or_default();
|
|
return Err(LoreError::EmbeddingFailed {
|
|
document_id: 0,
|
|
reason: format!("HTTP {}: {}", status, body),
|
|
});
|
|
}
|
|
|
|
let embed_response: EmbedResponse =
|
|
response
|
|
.json()
|
|
.await
|
|
.map_err(|e| LoreError::EmbeddingFailed {
|
|
document_id: 0,
|
|
reason: format!("Failed to parse embed response: {}", e),
|
|
})?;
|
|
|
|
Ok(embed_response.embeddings)
|
|
}
|
|
}
|
|
|
|
pub async fn check_ollama_health(base_url: &str) -> bool {
|
|
let client = Client::builder()
|
|
.timeout(Duration::from_secs(5))
|
|
.build()
|
|
.ok();
|
|
|
|
let Some(client) = client else {
|
|
return false;
|
|
};
|
|
|
|
let url = format!("{base_url}/api/tags");
|
|
client.get(&url).send().await.is_ok()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_config_defaults() {
|
|
let config = OllamaConfig::default();
|
|
assert_eq!(config.base_url, "http://localhost:11434");
|
|
assert_eq!(config.model, "nomic-embed-text");
|
|
assert_eq!(config.timeout_secs, 60);
|
|
}
|
|
|
|
#[test]
|
|
fn test_health_check_model_starts_with() {
|
|
let model = "nomic-embed-text";
|
|
let tag_name = "nomic-embed-text:latest";
|
|
assert!(tag_name.starts_with(model));
|
|
|
|
let wrong_model = "llama2";
|
|
assert!(!tag_name.starts_with(wrong_model));
|
|
}
|
|
|
|
#[test]
|
|
fn test_embed_request_serialization() {
|
|
let request = EmbedRequest {
|
|
model: "nomic-embed-text".to_string(),
|
|
input: vec!["hello".to_string(), "world".to_string()],
|
|
};
|
|
let json = serde_json::to_string(&request).unwrap();
|
|
assert!(json.contains("\"model\":\"nomic-embed-text\""));
|
|
assert!(json.contains("\"input\":[\"hello\",\"world\"]"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_embed_response_deserialization() {
|
|
let json = r#"{"model":"nomic-embed-text","embeddings":[[0.1,0.2,0.3],[0.4,0.5,0.6]]}"#;
|
|
let response: EmbedResponse = serde_json::from_str(json).unwrap();
|
|
assert_eq!(response.embeddings.len(), 2);
|
|
assert_eq!(response.embeddings[0], vec![0.1, 0.2, 0.3]);
|
|
assert_eq!(response.embeddings[1], vec![0.4, 0.5, 0.6]);
|
|
}
|
|
}
|