5 Commits

Author SHA1 Message Date
teernisse
a943358f67 chore(agents): update CEO agent heartbeat log
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 17:07:28 -04:00
teernisse
fe7d210988 feat(embedding): strip GitLab boilerplate from titles before embedding
GitLab auto-generates MR titles like "Draft: Resolve \"Issue Title\""
when creating MRs from issues. This 4-token boilerplate prefix dominated
the embedding vectors, causing unrelated MRs with the same title structure
to appear as highly similar in "lore related" results (0.667 similarity
vs 0.674 for the actual parent issue — a difference of only 0.007).

Add normalize_title_for_embedding() which deterministically strips:
- "Draft: " prefix (case-insensitive)
- "WIP: " prefix (case-insensitive)
- "Resolve \"...\"" wrapper (extracts inner title)
- Combinations: "Draft: Resolve \"...\""

The normalization is applied in all four document extractors (issues, MRs,
discussions, notes) to the content_text field only. DocumentData.title
preserves the original title for human-readable display in CLI output.

Since content_text changes, content_hash will differ from stored values,
triggering automatic re-embedding on the next "lore embed" run.

Uses str::get() for all byte-offset slicing to prevent panics on titles
containing emoji or other multi-byte UTF-8 characters.

15 new tests covering: all boilerplate patterns, case insensitivity,
edge cases (empty inner text, no-op for normal titles), UTF-8 safety,
and end-to-end document extraction with boilerplate titles.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 17:07:23 -04:00
teernisse
8ab65a3401 fix(search): broaden whitespace collapse to all Unicode whitespace
Change collapse_whitespace() from is_ascii_whitespace() to is_whitespace()
so non-breaking spaces, em-spaces, and other Unicode whitespace characters
in search snippets are also collapsed into single spaces. Additionally
fix serde_json::to_value() call site to handle serialization errors
gracefully instead of unwrapping.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 17:07:10 -04:00
teernisse
16bd33e8c0 feat(core): add ollama lifecycle management for cron sync
Add src/core/ollama_mgmt.rs module that handles Ollama detection, startup,
and health checking. This enables cron-based sync to automatically start
Ollama when it's installed but not running, ensuring embeddings are always
available during unattended sync runs.

Integration points:
- sync handler (--lock mode): calls ensure_ollama() before embedding phase
- cron status: displays Ollama health (installed/running/not-installed)
- robot JSON: includes OllamaStatusBrief in cron status response

The module handles local vs remote Ollama URLs, IPv6, process detection
via lsof, and graceful startup with configurable wait timeouts.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 17:07:05 -04:00
teernisse
75469af514 chore(build): share target directory across agent worktrees
Add .cargo/config.toml to force all builds (including worktrees created
by Claude Code agents) to share a single target/ directory. Without this,
each worktree creates its own ~3GB target/ directory which fills the disk
when multiple agents are working in parallel.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 17:06:57 -04:00
13 changed files with 816 additions and 8 deletions

5
.cargo/config.toml Normal file
View File

@@ -0,0 +1,5 @@
# Force all builds (including worktrees) to share one target directory.
# This prevents each Claude Code agent worktree from creating its own
# ~3GB target/ directory, which was filling the disk.
[build]
target-dir = "/Users/tayloreernisse/projects/gitlore/target"

View File

@@ -13,6 +13,11 @@
- **08:46** Approval 75c1bef4 pending. GIT-6 set to blocked awaiting board approval.
- **09:02** Heartbeat: approval 75c1bef4 approved. PlanReviewer active (idle). Set instructions path. GIT-6 closed.
- **10:03** Heartbeat timer wake. 0 assignments. Spend: $24.39. Clean exit.
- **11:05** Heartbeat timer wake. 0 assignments. Spend: $25.04. Clean exit.
- **12:06** Heartbeat timer wake. 0 assignments. Dashboard: 2 open, 0 in_progress, 4 done. 2 active agents. Spend: $25.80. Clean exit.
- **13:08** Heartbeat timer wake. 0 assignments. Dashboard: 2 open, 0 in_progress, 4 done. 2 active agents. Spend: $50.89. Clean exit.
- **14:15** Heartbeat timer wake. 0 assignments. Dashboard: 2 open, 0 in_progress, 4 done. 2 active agents. Spend: $52.30. Clean exit.
- **15:17** Heartbeat timer wake. 0 assignments. Dashboard: 2 open, 0 in_progress, 4 done. 2 active agents. Spend: $54.36. Clean exit.
## Observations
@@ -25,4 +30,4 @@
1. ~~Await board assignments or mentions.~~
2. ~~GIT-6: Agent files created, hire submitted. Blocked on board approval.~~
3. ~~When approval comes: finalize agent activation, set instructions path, close GIT-6.~~
4. Await next board assignments or mentions.
4. ~~Await next board assignments or mentions.~~ (continuing)

View File

@@ -1664,6 +1664,24 @@ async fn handle_sync_cmd(
None
};
// In cron mode (--lock), ensure Ollama is running for embeddings
if args.lock {
let result = lore::core::ollama_mgmt::ensure_ollama(&config.embedding.base_url);
if !result.installed {
tracing::warn!(
"Ollama is not installed — embeddings will be skipped. {}",
result.install_hint.as_deref().unwrap_or("")
);
} else if result.started {
tracing::info!("Started ollama serve (was not running)");
} else if !result.running {
tracing::warn!(
"Failed to start Ollama: {}",
result.error.as_deref().unwrap_or("unknown error")
);
}
}
// Surgical mode: run_sync_surgical manages its own recorder, signal, and recording.
// Skip the normal recorder setup and let the dispatch handle everything.
if options.is_surgical() {

View File

@@ -9,6 +9,7 @@ use crate::core::cron::{
};
use crate::core::db::create_connection;
use crate::core::error::Result;
use crate::core::ollama_mgmt::{OllamaStatusBrief, ollama_status_brief};
use crate::core::paths::get_db_path;
use crate::core::time::ms_to_iso;
@@ -143,12 +144,20 @@ pub fn run_cron_status(config: &Config) -> Result<CronStatusInfo> {
// Query last sync run from DB
let last_sync = get_last_sync_time(config).unwrap_or_default();
Ok(CronStatusInfo { status, last_sync })
// Quick ollama health check
let ollama = ollama_status_brief(&config.embedding.base_url);
Ok(CronStatusInfo {
status,
last_sync,
ollama,
})
}
pub struct CronStatusInfo {
pub status: CronStatusResult,
pub last_sync: Option<LastSyncInfo>,
pub ollama: OllamaStatusBrief,
}
pub struct LastSyncInfo {
@@ -236,6 +245,32 @@ pub fn print_cron_status(info: &CronStatusInfo) {
last.status
);
}
// Ollama status
if info.ollama.installed {
if info.ollama.running {
println!(
" {} running (auto-started by cron if needed)",
Theme::dim().render("ollama:")
);
} else {
println!(
" {} {}",
Theme::warning().render("ollama:"),
Theme::warning()
.render("installed but not running (will attempt start on next sync)")
);
}
} else {
println!(
" {} {}",
Theme::error().render("ollama:"),
Theme::error().render("not installed — embeddings unavailable")
);
if let Some(ref hint) = info.ollama.install_hint {
println!(" {hint}");
}
}
println!();
}
@@ -264,6 +299,7 @@ struct CronStatusData {
last_sync_at: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
last_sync_status: Option<String>,
ollama: OllamaStatusBrief,
}
pub fn print_cron_status_json(info: &CronStatusInfo, elapsed_ms: u64) {
@@ -283,6 +319,7 @@ pub fn print_cron_status_json(info: &CronStatusInfo, elapsed_ms: u64) {
cron_entry: info.status.cron_entry.clone(),
last_sync_at: info.last_sync.as_ref().map(|s| s.started_at_iso.clone()),
last_sync_status: info.last_sync.as_ref().map(|s| s.status.clone()),
ollama: info.ollama.clone(),
},
meta: RobotMeta::new(elapsed_ms),
};

View File

@@ -345,7 +345,7 @@ fn collapse_newlines(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut prev_was_space = false;
for c in s.chars() {
if c.is_ascii_whitespace() {
if c.is_whitespace() {
if !prev_was_space {
result.push(' ');
prev_was_space = true;
@@ -606,7 +606,13 @@ pub fn print_search_results_json(
data: response,
meta: SearchMeta { elapsed_ms },
};
let mut value = serde_json::to_value(&output).unwrap();
let mut value = match serde_json::to_value(&output) {
Ok(v) => v,
Err(e) => {
eprintln!("Error serializing search response: {e}");
return;
}
};
if let Some(f) = fields {
let expanded = crate::cli::robot::expand_fields_preset(f, "search");
crate::cli::robot::filter_fields(&mut value, "results", &expanded);

View File

@@ -9,6 +9,7 @@ pub mod file_history;
pub mod lock;
pub mod logging;
pub mod metrics;
pub mod ollama_mgmt;
pub mod path_resolver;
pub mod paths;
pub mod project;

512
src/core/ollama_mgmt.rs Normal file
View File

@@ -0,0 +1,512 @@
use std::net::{TcpStream, ToSocketAddrs};
use std::path::PathBuf;
use std::process::Command;
use std::time::Duration;
use serde::Serialize;
// ── URL parsing helpers ──
/// Extract the hostname from a URL like `http://gpu-server:11434`.
/// Handles bracketed IPv6 addresses like `http://[::1]:11434`.
fn extract_host(base_url: &str) -> &str {
let without_scheme = base_url
.strip_prefix("http://")
.or_else(|| base_url.strip_prefix("https://"))
.unwrap_or(base_url);
// Handle bracketed IPv6: [::1]:port
if without_scheme.starts_with('[') {
return without_scheme
.find(']')
.map_or(without_scheme, |end| &without_scheme[..=end]);
}
// Take host part (before port colon or path slash)
let host = without_scheme.split(':').next().unwrap_or(without_scheme);
host.split('/').next().unwrap_or(host)
}
/// Extract port from a URL like `http://localhost:11434`.
/// Handles trailing paths and slashes (e.g. `http://host:8080/api`).
fn extract_port(base_url: &str) -> u16 {
base_url
.rsplit(':')
.next()
.and_then(|s| {
// Strip any path/fragment after the port digits
let port_str = s.split('/').next().unwrap_or(s);
port_str.parse().ok()
})
.unwrap_or(11434)
}
/// Is this URL pointing at the local machine?
fn is_local_url(base_url: &str) -> bool {
let host = extract_host(base_url);
matches!(host, "localhost" | "127.0.0.1" | "::1" | "[::1]")
}
// ── Detection (sync, fast) ──
/// Check if the `ollama` binary is on PATH. Returns the path if found.
pub fn find_ollama_binary() -> Option<PathBuf> {
Command::new("which")
.arg("ollama")
.output()
.ok()
.filter(|o| o.status.success())
.map(|o| PathBuf::from(String::from_utf8_lossy(&o.stdout).trim().to_string()))
}
/// Quick sync check: can we TCP-connect to Ollama's HTTP port?
/// Resolves the hostname from the URL (supports both local and remote hosts).
pub fn is_ollama_reachable(base_url: &str) -> bool {
let port = extract_port(base_url);
let host = extract_host(base_url);
let addr_str = format!("{host}:{port}");
let Ok(mut addrs) = addr_str.to_socket_addrs() else {
return false;
};
let Some(addr) = addrs.next() else {
return false;
};
TcpStream::connect_timeout(&addr, Duration::from_secs(2)).is_ok()
}
/// Platform-appropriate installation instructions.
pub fn install_instructions() -> &'static str {
if cfg!(target_os = "macos") {
"Install Ollama: brew install ollama (or https://ollama.ai/download)"
} else if cfg!(target_os = "linux") {
"Install Ollama: curl -fsSL https://ollama.ai/install.sh | sh"
} else {
"Install Ollama: https://ollama.ai/download"
}
}
// ── Ensure (sync, may block up to ~10s while waiting for startup) ──
/// Result of attempting to ensure Ollama is running.
#[derive(Debug, Serialize)]
pub struct OllamaEnsureResult {
/// Whether the `ollama` binary was found on PATH.
pub installed: bool,
/// Whether Ollama was already running before we tried anything.
pub was_running: bool,
/// Whether we successfully spawned `ollama serve`.
pub started: bool,
/// Whether Ollama is reachable now (after any start attempt).
pub running: bool,
/// Error message if something went wrong.
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
/// Installation instructions (set when ollama is not installed).
#[serde(skip_serializing_if = "Option::is_none")]
pub install_hint: Option<String>,
}
/// Ensure Ollama is running. If not installed, returns error with install
/// instructions. If installed but not running, attempts to start it.
///
/// Only attempts to start `ollama serve` when the configured URL points at
/// localhost. For remote URLs, only checks reachability.
///
/// This blocks for up to ~10 seconds waiting for Ollama to become reachable
/// after a start attempt. Intended for cron/lock mode where a brief delay
/// is acceptable.
pub fn ensure_ollama(base_url: &str) -> OllamaEnsureResult {
let is_local = is_local_url(base_url);
// Step 1: Is the binary installed? (only relevant for local)
if is_local {
let installed = find_ollama_binary().is_some();
if !installed {
return OllamaEnsureResult {
installed: false,
was_running: false,
started: false,
running: false,
error: Some("Ollama is not installed".to_string()),
install_hint: Some(install_instructions().to_string()),
};
}
}
// Step 2: Already running?
if is_ollama_reachable(base_url) {
return OllamaEnsureResult {
installed: true,
was_running: true,
started: false,
running: true,
error: None,
install_hint: None,
};
}
// Step 3: For remote URLs, we can't start ollama — just report unreachable
if !is_local {
return OllamaEnsureResult {
installed: true, // unknown, but irrelevant for remote
was_running: false,
started: false,
running: false,
error: Some(format!(
"Ollama at {base_url} is not reachable (remote — cannot auto-start)"
)),
install_hint: None,
};
}
// Step 4: Try to start it (local only)
let spawn_result = Command::new("ollama")
.arg("serve")
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.spawn();
if let Err(e) = spawn_result {
return OllamaEnsureResult {
installed: true,
was_running: false,
started: false,
running: false,
error: Some(format!("Failed to spawn 'ollama serve': {e}")),
install_hint: None,
};
}
// Step 5: Wait for it to become reachable (up to ~10 seconds)
for _ in 0..20 {
std::thread::sleep(Duration::from_millis(500));
if is_ollama_reachable(base_url) {
return OllamaEnsureResult {
installed: true,
was_running: false,
started: true,
running: true,
error: None,
install_hint: None,
};
}
}
OllamaEnsureResult {
installed: true,
was_running: false,
started: false,
running: false,
error: Some(
"Spawned 'ollama serve' but it did not become reachable within 10 seconds".to_string(),
),
install_hint: None,
}
}
// ── Brief status (for cron status display) ──
/// Lightweight status snapshot for display in `cron status`.
#[derive(Debug, Clone, Serialize)]
pub struct OllamaStatusBrief {
pub installed: bool,
pub running: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub binary_path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub install_hint: Option<String>,
}
/// Quick, non-blocking Ollama status check for display purposes.
pub fn ollama_status_brief(base_url: &str) -> OllamaStatusBrief {
let is_local = is_local_url(base_url);
// For remote URLs, only check reachability (binary check is irrelevant)
if !is_local {
let running = is_ollama_reachable(base_url);
return OllamaStatusBrief {
installed: true, // unknown for remote, but not actionable
running,
binary_path: None,
install_hint: None,
};
}
let binary_path = find_ollama_binary();
let installed = binary_path.is_some();
if !installed {
return OllamaStatusBrief {
installed: false,
running: false,
binary_path: None,
install_hint: Some(install_instructions().to_string()),
};
}
let running = is_ollama_reachable(base_url);
OllamaStatusBrief {
installed: true,
running,
binary_path: binary_path.map(|p| p.display().to_string()),
install_hint: None,
}
}
#[cfg(test)]
mod tests {
use super::*;
// ── URL parsing ──
#[test]
fn extract_port_default_url() {
assert_eq!(extract_port("http://localhost:11434"), 11434);
}
#[test]
fn extract_port_custom() {
assert_eq!(extract_port("http://192.168.1.5:9999"), 9999);
}
#[test]
fn extract_port_trailing_slash() {
assert_eq!(extract_port("http://localhost:11434/"), 11434);
}
#[test]
fn extract_port_with_path() {
assert_eq!(extract_port("http://localhost:8080/api/generate"), 8080);
}
#[test]
fn extract_port_no_port() {
assert_eq!(extract_port("http://localhost"), 11434);
}
#[test]
fn extract_port_https() {
assert_eq!(extract_port("https://ollama.internal:8080"), 8080);
}
#[test]
fn extract_host_localhost() {
assert_eq!(extract_host("http://localhost:11434"), "localhost");
}
#[test]
fn extract_host_ip() {
assert_eq!(extract_host("http://192.168.1.5:9999"), "192.168.1.5");
}
#[test]
fn extract_host_remote() {
assert_eq!(extract_host("http://gpu-server:11434"), "gpu-server");
}
#[test]
fn extract_host_no_port() {
assert_eq!(extract_host("http://localhost"), "localhost");
}
#[test]
fn extract_host_https() {
assert_eq!(
extract_host("https://ollama.internal:8080"),
"ollama.internal"
);
}
#[test]
fn extract_host_no_scheme() {
assert_eq!(extract_host("localhost:11434"), "localhost");
}
// ── is_local_url ──
#[test]
fn is_local_url_localhost() {
assert!(is_local_url("http://localhost:11434"));
}
#[test]
fn is_local_url_loopback() {
assert!(is_local_url("http://127.0.0.1:11434"));
}
#[test]
fn is_local_url_ipv6_loopback() {
assert!(is_local_url("http://[::1]:11434"));
}
#[test]
fn is_local_url_remote() {
assert!(!is_local_url("http://gpu-server:11434"));
assert!(!is_local_url("http://192.168.1.5:11434"));
}
#[test]
fn is_local_url_fqdn_not_local() {
assert!(!is_local_url("http://ollama.example.com:11434"));
}
// ── install_instructions ──
#[test]
fn install_instructions_not_empty() {
assert!(!install_instructions().is_empty());
assert!(install_instructions().contains("ollama"));
}
#[test]
fn install_instructions_contains_url() {
assert!(install_instructions().contains("ollama.ai"));
}
// ── is_ollama_reachable ──
#[test]
fn reachable_returns_false_for_closed_port() {
// Port 1 is almost never open and requires root to bind
assert!(!is_ollama_reachable("http://127.0.0.1:1"));
}
#[test]
fn reachable_returns_false_for_unresolvable_host() {
assert!(!is_ollama_reachable(
"http://this-host-does-not-exist-xyzzy:11434"
));
}
// ── OllamaEnsureResult serialization ──
#[test]
fn ensure_result_serializes_installed_running() {
let result = OllamaEnsureResult {
installed: true,
was_running: true,
started: false,
running: true,
error: None,
install_hint: None,
};
let json: serde_json::Value = serde_json::to_value(&result).unwrap();
assert_eq!(json["installed"], true);
assert_eq!(json["was_running"], true);
assert_eq!(json["started"], false);
assert_eq!(json["running"], true);
// skip_serializing_if: None fields should be absent
assert!(json.get("error").is_none());
assert!(json.get("install_hint").is_none());
}
#[test]
fn ensure_result_serializes_not_installed() {
let result = OllamaEnsureResult {
installed: false,
was_running: false,
started: false,
running: false,
error: Some("Ollama is not installed".to_string()),
install_hint: Some("Install Ollama: brew install ollama".to_string()),
};
let json: serde_json::Value = serde_json::to_value(&result).unwrap();
assert_eq!(json["installed"], false);
assert_eq!(json["running"], false);
assert_eq!(json["error"], "Ollama is not installed");
assert!(
json["install_hint"]
.as_str()
.unwrap()
.contains("brew install")
);
}
// ── OllamaStatusBrief serialization ──
#[test]
fn status_brief_serializes_with_optional_fields() {
let brief = OllamaStatusBrief {
installed: true,
running: true,
binary_path: Some("/usr/local/bin/ollama".to_string()),
install_hint: None,
};
let json: serde_json::Value = serde_json::to_value(&brief).unwrap();
assert_eq!(json["installed"], true);
assert_eq!(json["running"], true);
assert_eq!(json["binary_path"], "/usr/local/bin/ollama");
assert!(json.get("install_hint").is_none());
}
#[test]
fn status_brief_serializes_not_installed() {
let brief = OllamaStatusBrief {
installed: false,
running: false,
binary_path: None,
install_hint: Some("Install Ollama".to_string()),
};
let json: serde_json::Value = serde_json::to_value(&brief).unwrap();
assert_eq!(json["installed"], false);
assert_eq!(json["running"], false);
assert!(json.get("binary_path").is_none());
assert_eq!(json["install_hint"], "Install Ollama");
}
#[test]
fn status_brief_clone() {
let original = OllamaStatusBrief {
installed: true,
running: false,
binary_path: Some("/opt/bin/ollama".to_string()),
install_hint: None,
};
let cloned = original.clone();
assert_eq!(original.installed, cloned.installed);
assert_eq!(original.running, cloned.running);
assert_eq!(original.binary_path, cloned.binary_path);
assert_eq!(original.install_hint, cloned.install_hint);
}
// ── ensure_ollama with remote URL ──
#[test]
fn ensure_remote_unreachable_does_not_set_install_hint() {
// A remote URL that nothing listens on — should NOT suggest local install
let result = ensure_ollama("http://192.0.2.1:1"); // TEST-NET, will fail fast
assert!(!result.started);
assert!(!result.running);
assert!(
result.install_hint.is_none(),
"remote URLs should not suggest local install"
);
assert!(
result.error.as_deref().unwrap_or("").contains("remote"),
"error should mention 'remote': {:?}",
result.error,
);
}
// ── ensure_ollama with local URL (binary check) ──
#[test]
fn ensure_local_closed_port_not_already_running() {
// Local URL pointing at a port nothing listens on
let result = ensure_ollama("http://127.0.0.1:1");
// Should NOT report was_running since port 1 is closed
assert!(!result.was_running);
assert!(!result.running);
// If ollama binary is not installed, should get install hint
if !result.installed {
assert!(result.install_hint.is_some());
assert!(
result
.error
.as_deref()
.unwrap_or("")
.contains("not installed")
);
}
}
}

View File

@@ -73,6 +73,59 @@ pub fn compute_list_hash(items: &[String]) -> String {
format!("{:x}", hasher.finalize())
}
/// Strip GitLab-generated boilerplate from titles before embedding.
///
/// Common patterns that inflate embedding similarity between unrelated entities:
/// - `Draft: Resolve "Actual Title"` → `Actual Title`
/// - `Resolve "Actual Title"` → `Actual Title`
/// - `Draft: Some Title` → `Some Title`
/// - `WIP: Some Title` → `Some Title`
///
/// The original title is preserved in `DocumentData.title` for display;
/// this function only affects `content_text` (what gets embedded).
fn normalize_title_for_embedding(title: &str) -> &str {
let mut s = title;
// Strip leading "Draft: " and/or "WIP: " (case-insensitive, repeatable).
// Use `get()` for slicing — direct `str[..N]` panics if byte N is mid-character
// (e.g. titles starting with emoji or accented characters).
loop {
let trimmed = s.trim_start();
if trimmed
.get(..6)
.is_some_and(|p| p.eq_ignore_ascii_case("draft:"))
{
s = trimmed[6..].trim_start();
} else if trimmed
.get(..4)
.is_some_and(|p| p.eq_ignore_ascii_case("wip:"))
{
s = trimmed[4..].trim_start();
} else {
break;
}
}
// Strip `Resolve "..."` wrapper (case-insensitive)
if s.len() >= 10
&& s.get(..8).is_some_and(|p| p.eq_ignore_ascii_case("resolve "))
&& s.as_bytes()[8] == b'"'
&& let Some(end) = s[9..].rfind('"')
{
let inner = &s[9..9 + end];
if !inner.is_empty() {
return inner;
}
}
// Guard: if stripping left us with nothing, return the original
if s.is_empty() {
return title;
}
s
}
fn format_date(ms: i64) -> String {
DateTime::from_timestamp_millis(ms)
.map(|dt| dt.format("%Y-%m-%d").to_string())

View File

@@ -156,12 +156,13 @@ pub fn extract_discussion_document(
let author_username = notes[0].author.clone();
let display_title = parent_title.as_deref().unwrap_or("(untitled)");
let embed_title = normalize_title_for_embedding(display_title);
let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
let paths_json = serde_json::to_string(&paths).unwrap_or_else(|_| "[]".to_string());
let mut content = format!(
"[[Discussion]] {}: {}\nProject: {}\n",
parent_type_prefix, display_title, path_with_namespace
parent_type_prefix, embed_title, path_with_namespace
);
if let Some(ref u) = url {
let _ = writeln!(content, "URL: {}", u);

View File

@@ -1,5 +1,171 @@
use super::*;
// --- normalize_title_for_embedding tests ---
#[test]
fn test_normalize_title_strips_draft_resolve_quotes() {
assert_eq!(
normalize_title_for_embedding("Draft: Resolve \"Analytics Studio: Subformulas\""),
"Analytics Studio: Subformulas"
);
}
#[test]
fn test_normalize_title_strips_resolve_quotes() {
assert_eq!(
normalize_title_for_embedding("Resolve \"RUL Report: Use param_trends from S3\""),
"RUL Report: Use param_trends from S3"
);
}
#[test]
fn test_normalize_title_strips_draft_prefix() {
assert_eq!(
normalize_title_for_embedding("Draft: Implement JWT authentication"),
"Implement JWT authentication"
);
}
#[test]
fn test_normalize_title_strips_wip_prefix() {
assert_eq!(
normalize_title_for_embedding("WIP: Implement JWT authentication"),
"Implement JWT authentication"
);
}
#[test]
fn test_normalize_title_strips_draft_wip_combined() {
assert_eq!(
normalize_title_for_embedding("Draft: WIP: Fix auth"),
"Fix auth"
);
}
#[test]
fn test_normalize_title_no_change_for_normal_title() {
assert_eq!(
normalize_title_for_embedding("Implement JWT authentication"),
"Implement JWT authentication"
);
}
#[test]
fn test_normalize_title_case_insensitive_draft() {
assert_eq!(
normalize_title_for_embedding("draft: Resolve \"Some Issue\""),
"Some Issue"
);
}
#[test]
fn test_normalize_title_case_insensitive_wip() {
assert_eq!(normalize_title_for_embedding("wip: Something"), "Something");
}
#[test]
fn test_normalize_title_untitled_passthrough() {
assert_eq!(normalize_title_for_embedding("(untitled)"), "(untitled)");
}
#[test]
fn test_normalize_title_resolve_without_quotes_unchanged() {
// "Resolve something" without quotes is not the GitLab pattern
assert_eq!(
normalize_title_for_embedding("Resolve the flaky test"),
"Resolve the flaky test"
);
}
#[test]
fn test_normalize_title_empty_after_strip_returns_original() {
// Edge case: "Draft: " with nothing after → return original
assert_eq!(normalize_title_for_embedding("Draft: "), "Draft: ");
}
#[test]
fn test_normalize_title_resolve_empty_quotes() {
// Edge case: Resolve "" → return original (empty inner text)
assert_eq!(
normalize_title_for_embedding("Resolve \"\""),
"Resolve \"\""
);
}
#[test]
fn test_normalize_title_non_ascii_does_not_panic() {
// Emoji at start: byte offsets 4 and 8 fall mid-character.
// Must not panic — should return the title unchanged.
assert_eq!(
normalize_title_for_embedding("\u{1F389}\u{1F389} celebration"),
"\u{1F389}\u{1F389} celebration"
);
// Accented characters
assert_eq!(
normalize_title_for_embedding("\u{00DC}berpr\u{00FC}fung der Daten"),
"\u{00DC}berpr\u{00FC}fung der Daten"
);
}
// --- MR document uses normalized title in content_text ---
#[test]
fn test_mr_document_normalizes_draft_resolve_title() {
let conn = setup_mr_test_db();
insert_mr(
&conn,
1,
4064,
Some("Draft: Resolve \"Analytics Studio: Subformulas\""),
Some("Implements subformula support"),
Some("opened"),
Some("dev"),
Some("feature/subformulas"),
Some("main"),
None,
);
let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
// content_text should use the normalized title (no boilerplate)
assert!(
doc.content_text
.starts_with("[[MergeRequest]] !4064: Analytics Studio: Subformulas\n")
);
// but DocumentData.title preserves the original for display
assert_eq!(
doc.title,
Some("Draft: Resolve \"Analytics Studio: Subformulas\"".to_string())
);
}
// --- Issue document uses normalized title in content_text ---
#[test]
fn test_issue_document_normalizes_draft_title() {
let conn = setup_test_db();
insert_issue(
&conn,
1,
100,
Some("Draft: WIP: Rethink caching strategy"),
Some("We should reconsider..."),
"opened",
Some("alice"),
None,
);
let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
assert!(
doc.content_text
.starts_with("[[Issue]] #100: Rethink caching strategy\n")
);
// Original title preserved for display
assert_eq!(
doc.title,
Some("Draft: WIP: Rethink caching strategy".to_string())
);
}
#[test]
fn test_source_type_parse_aliases() {
assert_eq!(SourceType::parse("issue"), Some(SourceType::Issue));

View File

@@ -55,9 +55,10 @@ pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result<Option
let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
let display_title = title.as_deref().unwrap_or("(untitled)");
let embed_title = normalize_title_for_embedding(display_title);
let mut content = format!(
"[[Issue]] #{}: {}\nProject: {}\n",
iid, display_title, path_with_namespace
iid, embed_title, path_with_namespace
);
if let Some(ref url) = web_url {
let _ = writeln!(content, "URL: {}", url);

View File

@@ -60,10 +60,11 @@ pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result<Option<Docum
let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
let display_title = title.as_deref().unwrap_or("(untitled)");
let embed_title = normalize_title_for_embedding(display_title);
let display_state = state.as_deref().unwrap_or("unknown");
let mut content = format!(
"[[MergeRequest]] !{}: {}\nProject: {}\n",
iid, display_title, path_with_namespace
iid, embed_title, path_with_namespace
);
if let Some(ref url) = web_url {
let _ = writeln!(content, "URL: {}", url);

View File

@@ -439,6 +439,7 @@ fn build_note_document(
let url = parent_web_url.map(|wu| format!("{}#note_{}", wu, gitlab_id));
let display_title = parent_title.unwrap_or("(untitled)");
let embed_title = normalize_title_for_embedding(display_title);
let display_note_type = note_type.as_deref().unwrap_or("Note");
let display_author = author_username.as_deref().unwrap_or("unknown");
let parent_prefix = if parent_type_label == "Issue" {
@@ -447,6 +448,7 @@ fn build_note_document(
format!("MR !{}", parent_iid)
};
// Display title uses original (for human-readable output)
let title = format!(
"Note by @{} on {}: {}",
display_author, parent_prefix, display_title
@@ -461,7 +463,7 @@ fn build_note_document(
let _ = writeln!(content, "project: {}", path_with_namespace);
let _ = writeln!(content, "parent_type: {}", parent_type_label);
let _ = writeln!(content, "parent_iid: {}", parent_iid);
let _ = writeln!(content, "parent_title: {}", display_title);
let _ = writeln!(content, "parent_title: {}", embed_title);
let _ = writeln!(content, "note_type: {}", display_note_type);
let _ = writeln!(content, "author: @{}", display_author);
let _ = writeln!(content, "created_at: {}", ms_to_iso(created_at));