2026-02-03 12:51:49 -05:00
parent 5c521491b7
commit 128008578a
47 changed files with 1981 additions and 653 deletions
						
						
						
						
@@ -1 +1 @@
bd-1m8
bd-1ep
						
						
						
						
 
						
						
							
						
						
						
@@ -3,12 +3,12 @@
use console::style;
use console::style;
use serde::Serialize;
use serde::Serialize;
use crate::Config;
use crate::core::db::create_connection;
use crate::core::db::create_connection;
use crate::core::error::Result;
use crate::core::error::Result;
use crate::core::paths::get_db_path;
use crate::core::paths::get_db_path;
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
use crate::embedding::pipeline::embed_documents;
use crate::embedding::pipeline::embed_documents;
use crate::Config;
/// Result of the embed command.
/// Result of the embed command.
#[derive(Debug, Default, Serialize)]
#[derive(Debug, Default, Serialize)]
						
							
						
						
							
						
						
						
@@ -69,10 +69,7 @@ pub async fn run_embed(
/// Print human-readable output.
/// Print human-readable output.
pub fn print_embed(result: &EmbedCommandResult) {
pub fn print_embed(result: &EmbedCommandResult) {
    println!(
    println!("{} Embedding complete", style("done").green().bold(),);
        "{} Embedding complete",
        style("done").green().bold(),
    );
    println!("  Embedded: {}", result.embedded);
    println!("  Embedded: {}", result.embedded);
    if result.failed > 0 {
    if result.failed > 0 {
        println!("  Failed:   {}", style(result.failed).red());
        println!("  Failed:   {}", style(result.failed).red());
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -5,12 +5,12 @@ use rusqlite::Connection;
use serde::Serialize;
use serde::Serialize;
use tracing::info;
use tracing::info;
use crate::Config;
use crate::core::db::create_connection;
use crate::core::db::create_connection;
use crate::core::error::Result;
use crate::core::error::Result;
use crate::core::paths::get_db_path;
use crate::core::paths::get_db_path;
use crate::core::project::resolve_project;
use crate::core::project::resolve_project;
use crate::documents::{SourceType, regenerate_dirty_documents};
use crate::documents::{SourceType, regenerate_dirty_documents};
use crate::Config;
const FULL_MODE_CHUNK_SIZE: i64 = 2000;
const FULL_MODE_CHUNK_SIZE: i64 = 2000;
						
							
						
						
							
						
						
						
@@ -134,7 +134,11 @@ fn seed_dirty(
/// Print human-readable output.
/// Print human-readable output.
pub fn print_generate_docs(result: &GenerateDocsResult) {
pub fn print_generate_docs(result: &GenerateDocsResult) {
    let mode = if result.full_mode { "full" } else { "incremental" };
    let mode = if result.full_mode {
        "full"
    } else {
        "incremental"
    };
    println!(
    println!(
        "{} Document generation complete ({})",
        "{} Document generation complete ({})",
        style("done").green().bold(),
        style("done").green().bold(),
						
						
						
							
						
						
@@ -147,10 +151,7 @@ pub fn print_generate_docs(result: &GenerateDocsResult) {
    println!("  Regenerated: {}", result.regenerated);
    println!("  Regenerated: {}", result.regenerated);
    println!("  Unchanged:   {}", result.unchanged);
    println!("  Unchanged:   {}", result.unchanged);
    if result.errored > 0 {
    if result.errored > 0 {
        println!(
        println!("  Errored:     {}", style(result.errored).red());
            "  Errored:     {}",
            style(result.errored).red()
        );
    }
    }
}
}
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -39,6 +39,9 @@ pub struct IngestResult {
    pub labels_created: usize,
    pub labels_created: usize,
    pub discussions_fetched: usize,
    pub discussions_fetched: usize,
    pub notes_upserted: usize,
    pub notes_upserted: usize,
    // Resource events
    pub resource_events_fetched: usize,
    pub resource_events_failed: usize,
}
}
/// Controls what interactive UI elements `run_ingest` displays.
/// Controls what interactive UI elements `run_ingest` displays.
						
						
						
							
						
						
@@ -57,17 +60,26 @@ pub struct IngestDisplay {
impl IngestDisplay {
impl IngestDisplay {
    /// Interactive mode: everything visible.
    /// Interactive mode: everything visible.
    pub fn interactive() -> Self {
    pub fn interactive() -> Self {
        Self { show_progress: true, show_text: true }
        Self {
            show_progress: true,
            show_text: true,
        }
    }
    }
    /// Robot/JSON mode: everything hidden.
    /// Robot/JSON mode: everything hidden.
    pub fn silent() -> Self {
    pub fn silent() -> Self {
        Self { show_progress: false, show_text: false }
        Self {
            show_progress: false,
            show_text: false,
        }
    }
    }
    /// Progress only (used by sync in interactive mode).
    /// Progress only (used by sync in interactive mode).
    pub fn progress_only() -> Self {
    pub fn progress_only() -> Self {
        Self { show_progress: true, show_text: false }
        Self {
            show_progress: true,
            show_text: false,
        }
    }
    }
}
}
						
							
						
						
							
						
						
						
@@ -105,7 +117,8 @@ pub async fn run_ingest(
    lock.acquire(force)?;
    lock.acquire(force)?;
    // Get token from environment
    // Get token from environment
    let token = std::env::var(&config.gitlab.token_env_var).map_err(|_| LoreError::TokenNotSet {
    let token =
        std::env::var(&config.gitlab.token_env_var).map_err(|_| LoreError::TokenNotSet {
            env_var: config.gitlab.token_env_var.clone(),
            env_var: config.gitlab.token_env_var.clone(),
        })?;
        })?;
						
							
						
						
							
						
						
						
@@ -199,7 +212,9 @@ pub async fn run_ingest(
            let b = ProgressBar::new(0);
            let b = ProgressBar::new(0);
            b.set_style(
            b.set_style(
                ProgressStyle::default_bar()
                ProgressStyle::default_bar()
                    .template("    {spinner:.blue} Syncing discussions [{bar:30.cyan/dim}] {pos}/{len}")
                    .template(
                        "    {spinner:.blue} Syncing discussions [{bar:30.cyan/dim}] {pos}/{len}",
                    )
                    .unwrap()
                    .unwrap()
                    .progress_chars("=> "),
                    .progress_chars("=> "),
            );
            );
						
							
						
						
							
						
						
						
@@ -237,6 +252,23 @@ pub async fn run_ingest(
                ProgressEvent::MrDiscussionSyncComplete => {
                ProgressEvent::MrDiscussionSyncComplete => {
                    disc_bar_clone.finish_and_clear();
                    disc_bar_clone.finish_and_clear();
                }
                }
                ProgressEvent::ResourceEventsFetchStarted { total } => {
                    disc_bar_clone.set_length(total as u64);
                    disc_bar_clone.set_position(0);
                    disc_bar_clone.set_style(
                        ProgressStyle::default_bar()
                            .template("    {spinner:.blue} Fetching resource events [{bar:30.cyan/dim}] {pos}/{len}")
                            .unwrap()
                            .progress_chars("=> "),
                    );
                    disc_bar_clone.enable_steady_tick(std::time::Duration::from_millis(100));
                }
                ProgressEvent::ResourceEventFetched { current, total: _ } => {
                    disc_bar_clone.set_position(current as u64);
                }
                ProgressEvent::ResourceEventsFetchComplete { .. } => {
                    disc_bar_clone.finish_and_clear();
                }
                _ => {}
                _ => {}
            })
            })
        };
        };
						
							
						
						
							
						
						
						
@@ -269,6 +301,8 @@ pub async fn run_ingest(
            total.notes_upserted += result.notes_upserted;
            total.notes_upserted += result.notes_upserted;
            total.issues_synced_discussions += result.issues_synced_discussions;
            total.issues_synced_discussions += result.issues_synced_discussions;
            total.issues_skipped_discussion_sync += result.issues_skipped_discussion_sync;
            total.issues_skipped_discussion_sync += result.issues_skipped_discussion_sync;
            total.resource_events_fetched += result.resource_events_fetched;
            total.resource_events_failed += result.resource_events_failed;
        } else {
        } else {
            let result = ingest_project_merge_requests_with_progress(
            let result = ingest_project_merge_requests_with_progress(
                &conn,
                &conn,
						
							
						
						
							
						
						
						
@@ -301,6 +335,8 @@ pub async fn run_ingest(
            total.diffnotes_count += result.diffnotes_count;
            total.diffnotes_count += result.diffnotes_count;
            total.mrs_synced_discussions += result.mrs_synced_discussions;
            total.mrs_synced_discussions += result.mrs_synced_discussions;
            total.mrs_skipped_discussion_sync += result.mrs_skipped_discussion_sync;
            total.mrs_skipped_discussion_sync += result.mrs_skipped_discussion_sync;
            total.resource_events_fetched += result.resource_events_fetched;
            total.resource_events_failed += result.resource_events_failed;
        }
        }
    }
    }
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -22,19 +22,19 @@ pub use count::{
pub use doctor::{print_doctor_results, run_doctor};
pub use doctor::{print_doctor_results, run_doctor};
pub use embed::{print_embed, print_embed_json, run_embed};
pub use embed::{print_embed, print_embed_json, run_embed};
pub use generate_docs::{print_generate_docs, print_generate_docs_json, run_generate_docs};
pub use generate_docs::{print_generate_docs, print_generate_docs_json, run_generate_docs};
pub use stats::{print_stats, print_stats_json, run_stats};
pub use search::{
    print_search_results, print_search_results_json, run_search, SearchCliFilters, SearchResponse,
};
pub use ingest::{IngestDisplay, print_ingest_summary, print_ingest_summary_json, run_ingest};
pub use ingest::{IngestDisplay, print_ingest_summary, print_ingest_summary_json, run_ingest};
pub use init::{InitInputs, InitOptions, InitResult, run_init};
pub use init::{InitInputs, InitOptions, InitResult, run_init};
pub use list::{
pub use list::{
    ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser, print_list_issues,
    ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser, print_list_issues,
    print_list_issues_json, print_list_mrs, print_list_mrs_json, run_list_issues, run_list_mrs,
    print_list_issues_json, print_list_mrs, print_list_mrs_json, run_list_issues, run_list_mrs,
};
};
pub use sync::{print_sync, print_sync_json, run_sync, SyncOptions, SyncResult};
pub use search::{
    SearchCliFilters, SearchResponse, print_search_results, print_search_results_json, run_search,
};
pub use show::{
pub use show::{
    print_show_issue, print_show_issue_json, print_show_mr, print_show_mr_json, run_show_issue,
    print_show_issue, print_show_issue_json, print_show_mr, print_show_mr_json, run_show_issue,
    run_show_mr,
    run_show_mr,
};
};
pub use stats::{print_stats, print_stats_json, run_stats};
pub use sync::{SyncOptions, SyncResult, print_sync, print_sync_json, run_sync};
pub use sync_status::{print_sync_status, print_sync_status_json, run_sync_status};
pub use sync_status::{print_sync_status, print_sync_status_json, run_sync_status};
						
						
						
						
 
						
						
							
						
						
						
@@ -3,6 +3,7 @@
use console::style;
use console::style;
use serde::Serialize;
use serde::Serialize;
use crate::Config;
use crate::core::db::create_connection;
use crate::core::db::create_connection;
use crate::core::error::{LoreError, Result};
use crate::core::error::{LoreError, Result};
use crate::core::paths::get_db_path;
use crate::core::paths::get_db_path;
						
						
						
							
						
						
@@ -10,10 +11,9 @@ use crate::core::project::resolve_project;
use crate::core::time::{ms_to_iso, parse_since};
use crate::core::time::{ms_to_iso, parse_since};
use crate::documents::SourceType;
use crate::documents::SourceType;
use crate::search::{
use crate::search::{
    apply_filters, get_result_snippet, rank_rrf, search_fts, FtsQueryMode, PathFilter,
    FtsQueryMode, PathFilter, SearchFilters, apply_filters, get_result_snippet, rank_rrf,
    SearchFilters,
    search_fts,
};
};
use crate::Config;
/// Display-ready search result with all fields hydrated.
/// Display-ready search result with all fields hydrated.
#[derive(Debug, Serialize)]
#[derive(Debug, Serialize)]
						
							
						
						
							
						
						
						
@@ -86,9 +86,7 @@ pub fn run_search(
            mode: "lexical".to_string(),
            mode: "lexical".to_string(),
            total_results: 0,
            total_results: 0,
            results: vec![],
            results: vec![],
            warnings: vec![
            warnings: vec!["No documents indexed. Run 'lore generate-docs' first.".to_string()],
                "No documents indexed. Run 'lore generate-docs' first.".to_string()
            ],
        });
        });
    }
    }
						
							
						
						
							
						
						
						
@@ -151,9 +149,9 @@ pub fn run_search(
    // Adaptive recall: wider initial fetch when filters applied
    // Adaptive recall: wider initial fetch when filters applied
    let requested = filters.clamp_limit();
    let requested = filters.clamp_limit();
    let top_k = if filters.has_any_filter() {
    let top_k = if filters.has_any_filter() {
        (requested * 50).max(200).min(1500)
        (requested * 50).clamp(200, 1500)
    } else {
    } else {
        (requested * 10).max(50).min(1500)
        (requested * 10).clamp(50, 1500)
    };
    };
    // FTS search
    // FTS search
						
							
						
						
							
						
						
						
@@ -190,10 +188,8 @@ pub fn run_search(
    let hydrated = hydrate_results(&conn, &filtered_ids)?;
    let hydrated = hydrate_results(&conn, &filtered_ids)?;
    // Build display results preserving filter order
    // Build display results preserving filter order
    let rrf_map: std::collections::HashMap<i64, &crate::search::RrfResult> = ranked
    let rrf_map: std::collections::HashMap<i64, &crate::search::RrfResult> =
        .iter()
        ranked.iter().map(|r| (r.document_id, r)).collect();
        .map(|r| (r.document_id, r))
        .collect();
    let mut results: Vec<SearchResultDisplay> = Vec::with_capacity(hydrated.len());
    let mut results: Vec<SearchResultDisplay> = Vec::with_capacity(hydrated.len());
    for row in &hydrated {
    for row in &hydrated {
						
							
						
						
							
						
						
						
@@ -256,16 +252,13 @@ struct HydratedRow {
///
///
/// Uses json_each() to pass ranked IDs and preserve ordering via ORDER BY j.key.
/// Uses json_each() to pass ranked IDs and preserve ordering via ORDER BY j.key.
/// Labels and paths fetched via correlated json_group_array subqueries.
/// Labels and paths fetched via correlated json_group_array subqueries.
fn hydrate_results(
fn hydrate_results(conn: &rusqlite::Connection, document_ids: &[i64]) -> Result<Vec<HydratedRow>> {
    conn: &rusqlite::Connection,
    document_ids: &[i64],
) -> Result<Vec<HydratedRow>> {
    if document_ids.is_empty() {
    if document_ids.is_empty() {
        return Ok(Vec::new());
        return Ok(Vec::new());
    }
    }
    let ids_json = serde_json::to_string(document_ids)
    let ids_json =
        .map_err(|e| LoreError::Other(e.to_string()))?;
        serde_json::to_string(document_ids).map_err(|e| LoreError::Other(e.to_string()))?;
    let sql = r#"
    let sql = r#"
        SELECT d.id, d.source_type, d.title, d.url, d.author_username,
        SELECT d.id, d.source_type, d.title, d.url, d.author_username,
						
							
						
						
							
						
						
						
@@ -325,10 +318,7 @@ pub fn print_search_results(response: &SearchResponse) {
    }
    }
    if response.results.is_empty() {
    if response.results.is_empty() {
        println!(
        println!("No results found for '{}'", style(&response.query).bold());
            "No results found for '{}'",
            style(&response.query).bold()
        );
        return;
        return;
    }
    }
						
							
						
						
							
						
						
						
@@ -371,17 +361,11 @@ pub fn print_search_results(response: &SearchResponse) {
        );
        );
        if !result.labels.is_empty() {
        if !result.labels.is_empty() {
            println!(
            println!("    Labels: {}", result.labels.join(", "));
                "    Labels: {}",
                result.labels.join(", ")
            );
        }
        }
        // Strip HTML tags from snippet for terminal display
        // Strip HTML tags from snippet for terminal display
        let clean_snippet = result
        let clean_snippet = result.snippet.replace("<mark>", "").replace("</mark>", "");
            .snippet
            .replace("<mark>", "")
            .replace("</mark>", "");
        println!("    {}", style(clean_snippet).dim());
        println!("    {}", style(clean_snippet).dim());
        if let Some(ref explain) = result.explain {
        if let Some(ref explain) = result.explain {
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -154,10 +154,7 @@ fn find_issue(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Resu
                 FROM issues i
                 FROM issues i
                 JOIN projects p ON i.project_id = p.id
                 JOIN projects p ON i.project_id = p.id
                 WHERE i.iid = ? AND i.project_id = ?",
                 WHERE i.iid = ? AND i.project_id = ?",
                vec![
                vec![Box::new(iid), Box::new(project_id)],
                    Box::new(iid),
                    Box::new(project_id),
                ],
            )
            )
        }
        }
        None => (
        None => (
						
							
						
						
							
						
						
						
@@ -346,10 +343,7 @@ fn find_mr(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Result<
                 FROM merge_requests m
                 FROM merge_requests m
                 JOIN projects p ON m.project_id = p.id
                 JOIN projects p ON m.project_id = p.id
                 WHERE m.iid = ? AND m.project_id = ?",
                 WHERE m.iid = ? AND m.project_id = ?",
                vec![
                vec![Box::new(iid), Box::new(project_id)],
                    Box::new(iid),
                    Box::new(project_id),
                ],
            )
            )
        }
        }
        None => (
        None => (
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -4,10 +4,10 @@ use console::style;
use rusqlite::Connection;
use rusqlite::Connection;
use serde::Serialize;
use serde::Serialize;
use crate::Config;
use crate::core::db::create_connection;
use crate::core::db::create_connection;
use crate::core::error::Result;
use crate::core::error::Result;
use crate::core::paths::get_db_path;
use crate::core::paths::get_db_path;
use crate::Config;
/// Result of the stats command.
/// Result of the stats command.
#[derive(Debug, Default, Serialize)]
#[derive(Debug, Default, Serialize)]
						
							
						
						
							
						
						
						
@@ -75,11 +75,7 @@ pub struct RepairResult {
}
}
/// Run the stats command.
/// Run the stats command.
pub fn run_stats(
pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResult> {
    config: &Config,
    check: bool,
    repair: bool,
) -> Result<StatsResult> {
    let db_path = get_db_path(config.storage.db_path.as_deref());
    let db_path = get_db_path(config.storage.db_path.as_deref());
    let conn = create_connection(&db_path)?;
    let conn = create_connection(&db_path)?;
						
						
						
							
						
						
@@ -87,14 +83,22 @@ pub fn run_stats(
    // Document counts
    // Document counts
    result.documents.total = count_query(&conn, "SELECT COUNT(*) FROM documents")?;
    result.documents.total = count_query(&conn, "SELECT COUNT(*) FROM documents")?;
    result.documents.issues =
    result.documents.issues = count_query(
        count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'issue'")?;
        &conn,
    result.documents.merge_requests =
        "SELECT COUNT(*) FROM documents WHERE source_type = 'issue'",
        count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'merge_request'")?;
    )?;
    result.documents.discussions =
    result.documents.merge_requests = count_query(
        count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'discussion'")?;
        &conn,
    result.documents.truncated =
        "SELECT COUNT(*) FROM documents WHERE source_type = 'merge_request'",
        count_query(&conn, "SELECT COUNT(*) FROM documents WHERE is_truncated = 1")?;
    )?;
    result.documents.discussions = count_query(
        &conn,
        "SELECT COUNT(*) FROM documents WHERE source_type = 'discussion'",
    )?;
    result.documents.truncated = count_query(
        &conn,
        "SELECT COUNT(*) FROM documents WHERE is_truncated = 1",
    )?;
    // Embedding stats — skip gracefully if table doesn't exist (Gate A only)
    // Embedding stats — skip gracefully if table doesn't exist (Gate A only)
    if table_exists(&conn, "embedding_metadata") {
    if table_exists(&conn, "embedding_metadata") {
						
						
						
							
						
						
@@ -119,10 +123,14 @@ pub fn run_stats(
    result.fts.indexed = count_query(&conn, "SELECT COUNT(*) FROM documents_fts")?;
    result.fts.indexed = count_query(&conn, "SELECT COUNT(*) FROM documents_fts")?;
    // Queue stats
    // Queue stats
    result.queues.dirty_sources =
    result.queues.dirty_sources = count_query(
        count_query(&conn, "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NULL")?;
        &conn,
    result.queues.dirty_sources_failed =
        "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NULL",
        count_query(&conn, "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NOT NULL")?;
    )?;
    result.queues.dirty_sources_failed = count_query(
        &conn,
        "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NOT NULL",
    )?;
    if table_exists(&conn, "pending_discussion_fetches") {
    if table_exists(&conn, "pending_discussion_fetches") {
        result.queues.pending_discussion_fetches = count_query(
        result.queues.pending_discussion_fetches = count_query(
						
							
						
						
							
						
						
						
@@ -151,6 +159,7 @@ pub fn run_stats(
    }
    }
    // Integrity check
    // Integrity check
    #[allow(clippy::field_reassign_with_default)]
    if check {
    if check {
        let mut integrity = IntegrityResult::default();
        let mut integrity = IntegrityResult::default();
						
							
						
						
							
						
						
						
@@ -276,9 +285,7 @@ pub fn run_stats(
}
}
fn count_query(conn: &Connection, sql: &str) -> Result<i64> {
fn count_query(conn: &Connection, sql: &str) -> Result<i64> {
    let count: i64 = conn
    let count: i64 = conn.query_row(sql, [], |row| row.get(0)).unwrap_or(0);
        .query_row(sql, [], |row| row.get(0))
        .unwrap_or(0);
    Ok(count)
    Ok(count)
}
}
						
						
						
							
						
						
@@ -300,7 +307,10 @@ pub fn print_stats(result: &StatsResult) {
    println!("  Merge Requests:  {}", result.documents.merge_requests);
    println!("  Merge Requests:  {}", result.documents.merge_requests);
    println!("  Discussions:     {}", result.documents.discussions);
    println!("  Discussions:     {}", result.documents.discussions);
    if result.documents.truncated > 0 {
    if result.documents.truncated > 0 {
        println!("  Truncated:       {}", style(result.documents.truncated).yellow());
        println!(
            "  Truncated:       {}",
            style(result.documents.truncated).yellow()
        );
    }
    }
    println!();
    println!();
						
						
						
							
						
						
@@ -318,13 +328,13 @@ pub fn print_stats(result: &StatsResult) {
    println!();
    println!();
    println!("{}", style("Queues").cyan().bold());
    println!("{}", style("Queues").cyan().bold());
    println!("  Dirty sources:   {} pending, {} failed",
    println!(
        result.queues.dirty_sources,
        "  Dirty sources:   {} pending, {} failed",
        result.queues.dirty_sources_failed
        result.queues.dirty_sources, result.queues.dirty_sources_failed
    );
    );
    println!("  Discussion fetch: {} pending, {} failed",
    println!(
        result.queues.pending_discussion_fetches,
        "  Discussion fetch: {} pending, {} failed",
        result.queues.pending_discussion_fetches_failed
        result.queues.pending_discussion_fetches, result.queues.pending_discussion_fetches_failed
    );
    );
    if result.queues.pending_dependent_fetches > 0
    if result.queues.pending_dependent_fetches > 0
        || result.queues.pending_dependent_fetches_failed > 0
        || result.queues.pending_dependent_fetches_failed > 0
						
							
						
						
							
						
						
						
@@ -431,10 +441,12 @@ pub fn print_stats_json(result: &StatsResult) {
    let output = StatsJsonOutput {
    let output = StatsJsonOutput {
        ok: true,
        ok: true,
        data: StatsResult {
        data: StatsResult {
            documents: DocumentStats { ..*&result.documents },
            documents: DocumentStats { ..result.documents },
            embeddings: EmbeddingStats { ..*&result.embeddings },
            embeddings: EmbeddingStats {
            fts: FtsStats { ..*&result.fts },
                ..result.embeddings
            queues: QueueStats { ..*&result.queues },
            },
            fts: FtsStats { ..result.fts },
            queues: QueueStats { ..result.queues },
            integrity: result.integrity.as_ref().map(|i| IntegrityResult {
            integrity: result.integrity.as_ref().map(|i| IntegrityResult {
                ok: i.ok,
                ok: i.ok,
                fts_doc_mismatch: i.fts_doc_mismatch,
                fts_doc_mismatch: i.fts_doc_mismatch,
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -29,6 +29,8 @@ pub struct SyncResult {
    pub issues_updated: usize,
    pub issues_updated: usize,
    pub mrs_updated: usize,
    pub mrs_updated: usize,
    pub discussions_fetched: usize,
    pub discussions_fetched: usize,
    pub resource_events_fetched: usize,
    pub resource_events_failed: usize,
    pub documents_regenerated: usize,
    pub documents_regenerated: usize,
    pub documents_embedded: usize,
    pub documents_embedded: usize,
}
}
						
							
						
						
							
						
						
						
@@ -70,26 +72,61 @@ pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResul
    // Stage 1: Ingest issues
    // Stage 1: Ingest issues
    current_stage += 1;
    current_stage += 1;
    let spinner = stage_spinner(current_stage, total_stages, "Fetching issues from GitLab...", options.robot_mode);
    let spinner = stage_spinner(
        current_stage,
        total_stages,
        "Fetching issues from GitLab...",
        options.robot_mode,
    );
    info!("Sync stage {current_stage}/{total_stages}: ingesting issues");
    info!("Sync stage {current_stage}/{total_stages}: ingesting issues");
    let issues_result = run_ingest(config, "issues", None, options.force, options.full, ingest_display).await?;
    let issues_result = run_ingest(
        config,
        "issues",
        None,
        options.force,
        options.full,
        ingest_display,
    )
    .await?;
    result.issues_updated = issues_result.issues_upserted;
    result.issues_updated = issues_result.issues_upserted;
    result.discussions_fetched += issues_result.discussions_fetched;
    result.discussions_fetched += issues_result.discussions_fetched;
    result.resource_events_fetched += issues_result.resource_events_fetched;
    result.resource_events_failed += issues_result.resource_events_failed;
    spinner.finish_and_clear();
    spinner.finish_and_clear();
    // Stage 2: Ingest MRs
    // Stage 2: Ingest MRs
    current_stage += 1;
    current_stage += 1;
    let spinner = stage_spinner(current_stage, total_stages, "Fetching merge requests from GitLab...", options.robot_mode);
    let spinner = stage_spinner(
        current_stage,
        total_stages,
        "Fetching merge requests from GitLab...",
        options.robot_mode,
    );
    info!("Sync stage {current_stage}/{total_stages}: ingesting merge requests");
    info!("Sync stage {current_stage}/{total_stages}: ingesting merge requests");
    let mrs_result = run_ingest(config, "mrs", None, options.force, options.full, ingest_display).await?;
    let mrs_result = run_ingest(
        config,
        "mrs",
        None,
        options.force,
        options.full,
        ingest_display,
    )
    .await?;
    result.mrs_updated = mrs_result.mrs_upserted;
    result.mrs_updated = mrs_result.mrs_upserted;
    result.discussions_fetched += mrs_result.discussions_fetched;
    result.discussions_fetched += mrs_result.discussions_fetched;
    result.resource_events_fetched += mrs_result.resource_events_fetched;
    result.resource_events_failed += mrs_result.resource_events_failed;
    spinner.finish_and_clear();
    spinner.finish_and_clear();
    // Stage 3: Generate documents (unless --no-docs)
    // Stage 3: Generate documents (unless --no-docs)
    if !options.no_docs {
    if !options.no_docs {
        current_stage += 1;
        current_stage += 1;
        let spinner = stage_spinner(current_stage, total_stages, "Processing documents...", options.robot_mode);
        let spinner = stage_spinner(
            current_stage,
            total_stages,
            "Processing documents...",
            options.robot_mode,
        );
        info!("Sync stage {current_stage}/{total_stages}: generating documents");
        info!("Sync stage {current_stage}/{total_stages}: generating documents");
        let docs_result = run_generate_docs(config, false, None)?;
        let docs_result = run_generate_docs(config, false, None)?;
        result.documents_regenerated = docs_result.regenerated;
        result.documents_regenerated = docs_result.regenerated;
						
						
						
							
						
						
@@ -101,7 +138,12 @@ pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResul
    // Stage 4: Embed documents (unless --no-embed)
    // Stage 4: Embed documents (unless --no-embed)
    if !options.no_embed {
    if !options.no_embed {
        current_stage += 1;
        current_stage += 1;
        let spinner = stage_spinner(current_stage, total_stages, "Generating embeddings...", options.robot_mode);
        let spinner = stage_spinner(
            current_stage,
            total_stages,
            "Generating embeddings...",
            options.robot_mode,
        );
        info!("Sync stage {current_stage}/{total_stages}: embedding documents");
        info!("Sync stage {current_stage}/{total_stages}: embedding documents");
        match run_embed(config, options.full, false).await {
        match run_embed(config, options.full, false).await {
            Ok(embed_result) => {
            Ok(embed_result) => {
						
						
						
							
						
						
@@ -112,11 +154,7 @@ pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResul
                // Graceful degradation: Ollama down is a warning, not an error
                // Graceful degradation: Ollama down is a warning, not an error
                spinner.finish_and_clear();
                spinner.finish_and_clear();
                if !options.robot_mode {
                if !options.robot_mode {
                    eprintln!(
                    eprintln!("  {} Embedding skipped ({})", style("warn").yellow(), e);
                        "  {} Embedding skipped ({})",
                        style("warn").yellow(),
                        e
                    );
                }
                }
                warn!(error = %e, "Embedding stage failed (Ollama may be unavailable), continuing");
                warn!(error = %e, "Embedding stage failed (Ollama may be unavailable), continuing");
            }
            }
						
						
						
							
						
						
@@ -129,6 +167,8 @@ pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResul
        issues = result.issues_updated,
        issues = result.issues_updated,
        mrs = result.mrs_updated,
        mrs = result.mrs_updated,
        discussions = result.discussions_fetched,
        discussions = result.discussions_fetched,
        resource_events = result.resource_events_fetched,
        resource_events_failed = result.resource_events_failed,
        docs = result.documents_regenerated,
        docs = result.documents_regenerated,
        embedded = result.documents_embedded,
        embedded = result.documents_embedded,
        "Sync pipeline complete"
        "Sync pipeline complete"
						
						
						
							
						
						
@@ -139,19 +179,31 @@ pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResul
/// Print human-readable sync summary.
/// Print human-readable sync summary.
pub fn print_sync(result: &SyncResult, elapsed: std::time::Duration) {
pub fn print_sync(result: &SyncResult, elapsed: std::time::Duration) {
    println!(
    println!("{} Sync complete:", style("done").green().bold(),);
        "{} Sync complete:",
        style("done").green().bold(),
    );
    println!("  Issues updated:           {}", result.issues_updated);
    println!("  Issues updated:           {}", result.issues_updated);
    println!("  MRs updated:              {}", result.mrs_updated);
    println!("  MRs updated:              {}", result.mrs_updated);
    println!("  Discussions fetched:       {}", result.discussions_fetched);
    println!("  Documents regenerated:     {}", result.documents_regenerated);
    println!("  Documents embedded:        {}", result.documents_embedded);
    println!(
    println!(
        "  Elapsed: {:.1}s",
        "  Discussions fetched:       {}",
        elapsed.as_secs_f64()
        result.discussions_fetched
    );
    );
    if result.resource_events_fetched > 0 || result.resource_events_failed > 0 {
        println!(
            "  Resource events fetched:   {}",
            result.resource_events_fetched
        );
        if result.resource_events_failed > 0 {
            println!(
                "  Resource events failed:    {}",
                result.resource_events_failed
            );
        }
    }
    println!(
        "  Documents regenerated:     {}",
        result.documents_regenerated
    );
    println!("  Documents embedded:        {}", result.documents_embedded);
    println!("  Elapsed: {:.1}s", elapsed.as_secs_f64());
}
}
/// JSON output for sync.
/// JSON output for sync.
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -213,7 +213,12 @@ pub struct IssuesArgs {
    pub iid: Option<i64>,
    pub iid: Option<i64>,
    /// Maximum results
    /// Maximum results
    #[arg(short = 'n', long = "limit", default_value = "50", help_heading = "Output")]
    #[arg(
        short = 'n',
        long = "limit",
        default_value = "50",
        help_heading = "Output"
    )]
    pub limit: usize,
    pub limit: usize,
    /// Filter by state (opened, closed, all)
    /// Filter by state (opened, closed, all)
						
							
						
						
							
						
						
						
@@ -249,7 +254,11 @@ pub struct IssuesArgs {
    pub due_before: Option<String>,
    pub due_before: Option<String>,
    /// Show only issues with a due date
    /// Show only issues with a due date
    #[arg(long = "has-due", help_heading = "Filters", overrides_with = "no_has_due")]
    #[arg(
        long = "has-due",
        help_heading = "Filters",
        overrides_with = "no_has_due"
    )]
    pub has_due: bool,
    pub has_due: bool,
    #[arg(long = "no-has-due", hide = true, overrides_with = "has_due")]
    #[arg(long = "no-has-due", hide = true, overrides_with = "has_due")]
						
						
						
							
						
						
@@ -267,7 +276,12 @@ pub struct IssuesArgs {
    pub no_asc: bool,
    pub no_asc: bool,
    /// Open first matching item in browser
    /// Open first matching item in browser
    #[arg(short = 'o', long, help_heading = "Actions", overrides_with = "no_open")]
    #[arg(
        short = 'o',
        long,
        help_heading = "Actions",
        overrides_with = "no_open"
    )]
    pub open: bool,
    pub open: bool,
    #[arg(long = "no-open", hide = true, overrides_with = "open")]
    #[arg(long = "no-open", hide = true, overrides_with = "open")]
						
						
						
							
						
						
@@ -281,7 +295,12 @@ pub struct MrsArgs {
    pub iid: Option<i64>,
    pub iid: Option<i64>,
    /// Maximum results
    /// Maximum results
    #[arg(short = 'n', long = "limit", default_value = "50", help_heading = "Output")]
    #[arg(
        short = 'n',
        long = "limit",
        default_value = "50",
        help_heading = "Output"
    )]
    pub limit: usize,
    pub limit: usize,
    /// Filter by state (opened, merged, closed, locked, all)
    /// Filter by state (opened, merged, closed, locked, all)
						
							
						
						
							
						
						
						
@@ -313,11 +332,21 @@ pub struct MrsArgs {
    pub since: Option<String>,
    pub since: Option<String>,
    /// Show only draft MRs
    /// Show only draft MRs
    #[arg(short = 'd', long, conflicts_with = "no_draft", help_heading = "Filters")]
    #[arg(
        short = 'd',
        long,
        conflicts_with = "no_draft",
        help_heading = "Filters"
    )]
    pub draft: bool,
    pub draft: bool,
    /// Exclude draft MRs
    /// Exclude draft MRs
    #[arg(short = 'D', long = "no-draft", conflicts_with = "draft", help_heading = "Filters")]
    #[arg(
        short = 'D',
        long = "no-draft",
        conflicts_with = "draft",
        help_heading = "Filters"
    )]
    pub no_draft: bool,
    pub no_draft: bool,
    /// Filter by target branch
    /// Filter by target branch
						
						
						
							
						
						
@@ -340,7 +369,12 @@ pub struct MrsArgs {
    pub no_asc: bool,
    pub no_asc: bool,
    /// Open first matching item in browser
    /// Open first matching item in browser
    #[arg(short = 'o', long, help_heading = "Actions", overrides_with = "no_open")]
    #[arg(
        short = 'o',
        long,
        help_heading = "Actions",
        overrides_with = "no_open"
    )]
    pub open: bool,
    pub open: bool,
    #[arg(long = "no-open", hide = true, overrides_with = "open")]
    #[arg(long = "no-open", hide = true, overrides_with = "open")]
						
							
						
						
							
						
						
						
@@ -427,7 +461,12 @@ pub struct SearchArgs {
    pub updated_after: Option<String>,
    pub updated_after: Option<String>,
    /// Maximum results (default 20, max 100)
    /// Maximum results (default 20, max 100)
    #[arg(short = 'n', long = "limit", default_value = "20", help_heading = "Output")]
    #[arg(
        short = 'n',
        long = "limit",
        default_value = "20",
        help_heading = "Output"
    )]
    pub limit: usize,
    pub limit: usize,
    /// Show ranking explanation per result
    /// Show ranking explanation per result
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -86,7 +86,10 @@ mod tests {
        let result = compute_next_attempt_at(now, 1);
        let result = compute_next_attempt_at(now, 1);
        let delay = result - now;
        let delay = result - now;
        // attempt 1: base = 2000ms, with jitter: 1800-2200ms
        // attempt 1: base = 2000ms, with jitter: 1800-2200ms
        assert!(delay >= 1800 && delay <= 2200, "first retry delay: {delay}ms");
        assert!(
            (1800..=2200).contains(&delay),
            "first retry delay: {delay}ms"
        );
    }
    }
    #[test]
    #[test]
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -31,22 +31,10 @@ const MIGRATIONS: &[(&str, &str)] = &[
        "006",
        "006",
        include_str!("../../migrations/006_merge_requests.sql"),
        include_str!("../../migrations/006_merge_requests.sql"),
    ),
    ),
    (
    ("007", include_str!("../../migrations/007_documents.sql")),
        "007",
    ("008", include_str!("../../migrations/008_fts5.sql")),
        include_str!("../../migrations/007_documents.sql"),
    ("009", include_str!("../../migrations/009_embeddings.sql")),
    ),
    ("010", include_str!("../../migrations/010_chunk_config.sql")),
    (
        "008",
        include_str!("../../migrations/008_fts5.sql"),
    ),
    (
        "009",
        include_str!("../../migrations/009_embeddings.sql"),
    ),
    (
        "010",
        include_str!("../../migrations/010_chunk_config.sql"),
    ),
    (
    (
        "011",
        "011",
        include_str!("../../migrations/011_resource_events.sql"),
        include_str!("../../migrations/011_resource_events.sql"),
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -40,7 +40,15 @@ pub fn enqueue_job(
        "INSERT OR IGNORE INTO pending_dependent_fetches
        "INSERT OR IGNORE INTO pending_dependent_fetches
         (project_id, entity_type, entity_iid, entity_local_id, job_type, payload_json, enqueued_at)
         (project_id, entity_type, entity_iid, entity_local_id, job_type, payload_json, enqueued_at)
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
        rusqlite::params![project_id, entity_type, entity_iid, entity_local_id, job_type, payload_json, now],
        rusqlite::params![
            project_id,
            entity_type,
            entity_iid,
            entity_local_id,
            job_type,
            payload_json,
            now
        ],
    )?;
    )?;
    Ok(changes > 0)
    Ok(changes > 0)
						
							
						
						
							
						
						
						
@@ -69,9 +77,7 @@ pub fn claim_jobs(conn: &Connection, job_type: &str, batch_size: usize) -> Resul
    )?;
    )?;
    let jobs: Vec<PendingJob> = select_stmt
    let jobs: Vec<PendingJob> = select_stmt
        .query_map(
        .query_map(rusqlite::params![job_type, now, batch_size as i64], |row| {
            rusqlite::params![job_type, now, batch_size as i64],
            |row| {
            Ok(PendingJob {
            Ok(PendingJob {
                id: row.get(0)?,
                id: row.get(0)?,
                project_id: row.get(1)?,
                project_id: row.get(1)?,
						
						
						
							
						
						
@@ -82,8 +88,7 @@ pub fn claim_jobs(conn: &Connection, job_type: &str, batch_size: usize) -> Resul
                payload_json: row.get(6)?,
                payload_json: row.get(6)?,
                attempts: row.get(7)?,
                attempts: row.get(7)?,
            })
            })
            },
        })?
        )?
        .collect::<std::result::Result<Vec<_>, _>>()?;
        .collect::<std::result::Result<Vec<_>, _>>()?;
    // Lock the claimed jobs
    // Lock the claimed jobs
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -222,9 +222,9 @@ impl LoreError {
                "Check database file permissions or reset with 'lore reset'.\n\n  Example:\n    lore doctor\n    lore reset --yes",
                "Check database file permissions or reset with 'lore reset'.\n\n  Example:\n    lore doctor\n    lore reset --yes",
            ),
            ),
            Self::Http(_) => Some("Check network connection"),
            Self::Http(_) => Some("Check network connection"),
            Self::NotFound(_) => Some(
            Self::NotFound(_) => {
                "Verify the entity exists.\n\n  Example:\n    lore issues\n    lore mrs",
                Some("Verify the entity exists.\n\n  Example:\n    lore issues\n    lore mrs")
            ),
            }
            Self::Ambiguous(_) => Some(
            Self::Ambiguous(_) => Some(
                "Use -p to choose a specific project.\n\n  Example:\n    lore issues 42 -p group/project-a\n    lore mrs 99 -p group/project-b",
                "Use -p to choose a specific project.\n\n  Example:\n    lore issues 42 -p group/project-a\n    lore mrs 99 -p group/project-b",
            ),
            ),
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -150,7 +150,10 @@ pub fn upsert_milestone_events(
/// Resolve entity type string to (issue_id, merge_request_id) pair.
/// Resolve entity type string to (issue_id, merge_request_id) pair.
/// Exactly one is Some, the other is None.
/// Exactly one is Some, the other is None.
fn resolve_entity_ids(entity_type: &str, entity_local_id: i64) -> Result<(Option<i64>, Option<i64>)> {
fn resolve_entity_ids(
    entity_type: &str,
    entity_local_id: i64,
) -> Result<(Option<i64>, Option<i64>)> {
    match entity_type {
    match entity_type {
        "issue" => Ok((Some(entity_local_id), None)),
        "issue" => Ok((Some(entity_local_id), None)),
        "merge_request" => Ok((None, Some(entity_local_id))),
        "merge_request" => Ok((None, Some(entity_local_id))),
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -33,7 +33,7 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
    let mut suffix_stmt = conn.prepare(
    let mut suffix_stmt = conn.prepare(
        "SELECT id, path_with_namespace FROM projects
        "SELECT id, path_with_namespace FROM projects
         WHERE path_with_namespace LIKE '%/' || ?1
         WHERE path_with_namespace LIKE '%/' || ?1
            OR path_with_namespace = ?1"
            OR path_with_namespace = ?1",
    )?;
    )?;
    let suffix_matches: Vec<(i64, String)> = suffix_stmt
    let suffix_matches: Vec<(i64, String)> = suffix_stmt
        .query_map(rusqlite::params![project_str], |row| {
        .query_map(rusqlite::params![project_str], |row| {
						
						
						
							
						
						
@@ -48,7 +48,11 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
            return Err(LoreError::Ambiguous(format!(
            return Err(LoreError::Ambiguous(format!(
                "Project '{}' is ambiguous. Matching projects:\n{}\n\nHint: Use the full path, e.g., --project={}",
                "Project '{}' is ambiguous. Matching projects:\n{}\n\nHint: Use the full path, e.g., --project={}",
                project_str,
                project_str,
                matching.iter().map(|p| format!("  {}", p)).collect::<Vec<_>>().join("\n"),
                matching
                    .iter()
                    .map(|p| format!("  {}", p))
                    .collect::<Vec<_>>()
                    .join("\n"),
                matching[0]
                matching[0]
            )));
            )));
        }
        }
						
						
						
							
						
						
@@ -58,7 +62,7 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
    // Step 4: Case-insensitive substring match (unambiguous)
    // Step 4: Case-insensitive substring match (unambiguous)
    let mut substr_stmt = conn.prepare(
    let mut substr_stmt = conn.prepare(
        "SELECT id, path_with_namespace FROM projects
        "SELECT id, path_with_namespace FROM projects
         WHERE LOWER(path_with_namespace) LIKE '%' || LOWER(?1) || '%'"
         WHERE LOWER(path_with_namespace) LIKE '%' || LOWER(?1) || '%'",
    )?;
    )?;
    let substr_matches: Vec<(i64, String)> = substr_stmt
    let substr_matches: Vec<(i64, String)> = substr_stmt
        .query_map(rusqlite::params![project_str], |row| {
        .query_map(rusqlite::params![project_str], |row| {
						
						
						
							
						
						
@@ -73,7 +77,11 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
            return Err(LoreError::Ambiguous(format!(
            return Err(LoreError::Ambiguous(format!(
                "Project '{}' is ambiguous. Matching projects:\n{}\n\nHint: Use the full path, e.g., --project={}",
                "Project '{}' is ambiguous. Matching projects:\n{}\n\nHint: Use the full path, e.g., --project={}",
                project_str,
                project_str,
                matching.iter().map(|p| format!("  {}", p)).collect::<Vec<_>>().join("\n"),
                matching
                    .iter()
                    .map(|p| format!("  {}", p))
                    .collect::<Vec<_>>()
                    .join("\n"),
                matching[0]
                matching[0]
            )));
            )));
        }
        }
						
						
						
							
						
						
@@ -81,9 +89,8 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
    }
    }
    // Step 5: No match — list available projects
    // Step 5: No match — list available projects
    let mut all_stmt = conn.prepare(
    let mut all_stmt =
        "SELECT path_with_namespace FROM projects ORDER BY path_with_namespace"
        conn.prepare("SELECT path_with_namespace FROM projects ORDER BY path_with_namespace")?;
    )?;
    let all_projects: Vec<String> = all_stmt
    let all_projects: Vec<String> = all_stmt
        .query_map([], |row| row.get(0))?
        .query_map([], |row| row.get(0))?
        .collect::<std::result::Result<Vec<_>, _>>()?;
        .collect::<std::result::Result<Vec<_>, _>>()?;
						
						
						
							
						
						
@@ -98,7 +105,11 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
    Err(LoreError::Other(format!(
    Err(LoreError::Other(format!(
        "Project '{}' not found.\n\nAvailable projects:\n{}\n\nHint: Use the full path, e.g., --project={}",
        "Project '{}' not found.\n\nAvailable projects:\n{}\n\nHint: Use the full path, e.g., --project={}",
        project_str,
        project_str,
        all_projects.iter().map(|p| format!("  {}", p)).collect::<Vec<_>>().join("\n"),
        all_projects
            .iter()
            .map(|p| format!("  {}", p))
            .collect::<Vec<_>>()
            .join("\n"),
        all_projects[0]
        all_projects[0]
    )))
    )))
}
}
						
						
						
							
						
						
@@ -109,7 +120,8 @@ mod tests {
    fn setup_db() -> Connection {
    fn setup_db() -> Connection {
        let conn = Connection::open_in_memory().unwrap();
        let conn = Connection::open_in_memory().unwrap();
        conn.execute_batch("
        conn.execute_batch(
            "
            CREATE TABLE projects (
            CREATE TABLE projects (
                id INTEGER PRIMARY KEY,
                id INTEGER PRIMARY KEY,
                gitlab_project_id INTEGER UNIQUE NOT NULL,
                gitlab_project_id INTEGER UNIQUE NOT NULL,
						
						
						
							
						
						
@@ -121,7 +133,9 @@ mod tests {
                raw_payload_id INTEGER
                raw_payload_id INTEGER
            );
            );
            CREATE INDEX idx_projects_path ON projects(path_with_namespace);
            CREATE INDEX idx_projects_path ON projects(path_with_namespace);
        ").unwrap();
        ",
        )
        .unwrap();
        conn
        conn
    }
    }
						
						
						
							
						
						
@@ -129,7 +143,8 @@ mod tests {
        conn.execute(
        conn.execute(
            "INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (?1, ?2, ?3)",
            "INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (?1, ?2, ?3)",
            rusqlite::params![id, id * 100, path],
            rusqlite::params![id, id * 100, path],
        ).unwrap();
        )
        .unwrap();
    }
    }
    #[test]
    #[test]
						
							
						
						
							
						
						
						
@@ -164,7 +179,11 @@ mod tests {
        insert_project(&conn, 2, "frontend/auth-service");
        insert_project(&conn, 2, "frontend/auth-service");
        let err = resolve_project(&conn, "auth-service").unwrap_err();
        let err = resolve_project(&conn, "auth-service").unwrap_err();
        let msg = err.to_string();
        let msg = err.to_string();
        assert!(msg.contains("ambiguous"), "Expected ambiguous error, got: {}", msg);
        assert!(
            msg.contains("ambiguous"),
            "Expected ambiguous error, got: {}",
            msg
        );
        assert!(msg.contains("backend/auth-service"));
        assert!(msg.contains("backend/auth-service"));
        assert!(msg.contains("frontend/auth-service"));
        assert!(msg.contains("frontend/auth-service"));
    }
    }
						
							
						
						
							
						
						
						
@@ -195,7 +214,11 @@ mod tests {
        // "code" matches both projects
        // "code" matches both projects
        let err = resolve_project(&conn, "code").unwrap_err();
        let err = resolve_project(&conn, "code").unwrap_err();
        let msg = err.to_string();
        let msg = err.to_string();
        assert!(msg.contains("ambiguous"), "Expected ambiguous error, got: {}", msg);
        assert!(
            msg.contains("ambiguous"),
            "Expected ambiguous error, got: {}",
            msg
        );
        assert!(msg.contains("vs/python-code"));
        assert!(msg.contains("vs/python-code"));
        assert!(msg.contains("vs/typescript-code"));
        assert!(msg.contains("vs/typescript-code"));
    }
    }
						
						
						
							
						
						
@@ -217,7 +240,11 @@ mod tests {
        insert_project(&conn, 1, "backend/auth-service");
        insert_project(&conn, 1, "backend/auth-service");
        let err = resolve_project(&conn, "nonexistent").unwrap_err();
        let err = resolve_project(&conn, "nonexistent").unwrap_err();
        let msg = err.to_string();
        let msg = err.to_string();
        assert!(msg.contains("not found"), "Expected not found error, got: {}", msg);
        assert!(
            msg.contains("not found"),
            "Expected not found error, got: {}",
            msg
        );
        assert!(msg.contains("backend/auth-service"));
        assert!(msg.contains("backend/auth-service"));
    }
    }
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -4,10 +4,10 @@ use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use sha2::{Digest, Sha256};
use std::collections::BTreeSet;
use std::collections::BTreeSet;
use crate::core::error::Result;
use super::truncation::{
use super::truncation::{
    truncate_discussion, truncate_hard_cap, NoteContent, MAX_DISCUSSION_BYTES,
    MAX_DISCUSSION_BYTES, NoteContent, truncate_discussion, truncate_hard_cap,
};
};
use crate::core::error::Result;
/// Source type for documents.
/// Source type for documents.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
						
							
						
						
							
						
						
						
@@ -113,7 +113,19 @@ pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result<Option
        },
        },
    );
    );
    let (id, iid, title, description, state, author_username, created_at, updated_at, web_url, path_with_namespace, project_id) = match row {
    let (
        id,
        iid,
        title,
        description,
        state,
        author_username,
        created_at,
        updated_at,
        web_url,
        path_with_namespace,
        project_id,
    ) = match row {
        Ok(r) => r,
        Ok(r) => r,
        Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
        Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
        Err(e) => return Err(e.into()),
        Err(e) => return Err(e.into()),
						
						
						
							
						
						
@@ -124,15 +136,14 @@ pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result<Option
        "SELECT l.name FROM issue_labels il
        "SELECT l.name FROM issue_labels il
         JOIN labels l ON l.id = il.label_id
         JOIN labels l ON l.id = il.label_id
         WHERE il.issue_id = ?1
         WHERE il.issue_id = ?1
         ORDER BY l.name"
         ORDER BY l.name",
    )?;
    )?;
    let labels: Vec<String> = label_stmt
    let labels: Vec<String> = label_stmt
        .query_map(rusqlite::params![id], |row| row.get(0))?
        .query_map(rusqlite::params![id], |row| row.get(0))?
        .collect::<std::result::Result<Vec<_>, _>>()?;
        .collect::<std::result::Result<Vec<_>, _>>()?;
    // Build labels JSON array string
    // Build labels JSON array string
    let labels_json = serde_json::to_string(&labels)
    let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
        .unwrap_or_else(|_| "[]".to_string());
    // Format content_text per PRD template
    // Format content_text per PRD template
    let display_title = title.as_deref().unwrap_or("(untitled)");
    let display_title = title.as_deref().unwrap_or("(untitled)");
						
							
						
						
							
						
						
						
@@ -213,7 +224,21 @@ pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result<Option<Docum
        },
        },
    );
    );
    let (id, iid, title, description, state, author_username, source_branch, target_branch, created_at, updated_at, web_url, path_with_namespace, project_id) = match row {
    let (
        id,
        iid,
        title,
        description,
        state,
        author_username,
        source_branch,
        target_branch,
        created_at,
        updated_at,
        web_url,
        path_with_namespace,
        project_id,
    ) = match row {
        Ok(r) => r,
        Ok(r) => r,
        Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
        Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
        Err(e) => return Err(e.into()),
        Err(e) => return Err(e.into()),
						
						
						
							
						
						
@@ -224,14 +249,13 @@ pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result<Option<Docum
        "SELECT l.name FROM mr_labels ml
        "SELECT l.name FROM mr_labels ml
         JOIN labels l ON l.id = ml.label_id
         JOIN labels l ON l.id = ml.label_id
         WHERE ml.merge_request_id = ?1
         WHERE ml.merge_request_id = ?1
         ORDER BY l.name"
         ORDER BY l.name",
    )?;
    )?;
    let labels: Vec<String> = label_stmt
    let labels: Vec<String> = label_stmt
        .query_map(rusqlite::params![id], |row| row.get(0))?
        .query_map(rusqlite::params![id], |row| row.get(0))?
        .collect::<std::result::Result<Vec<_>, _>>()?;
        .collect::<std::result::Result<Vec<_>, _>>()?;
    let labels_json = serde_json::to_string(&labels)
    let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
        .unwrap_or_else(|_| "[]".to_string());
    let display_title = title.as_deref().unwrap_or("(untitled)");
    let display_title = title.as_deref().unwrap_or("(untitled)");
    let display_state = state.as_deref().unwrap_or("unknown");
    let display_state = state.as_deref().unwrap_or("unknown");
						
							
						
						
							
						
						
						
@@ -359,13 +383,7 @@ pub fn extract_discussion_document(
                    .query_map(rusqlite::params![parent_id], |row| row.get(0))?
                    .query_map(rusqlite::params![parent_id], |row| row.get(0))?
                    .collect::<std::result::Result<Vec<_>, _>>()?;
                    .collect::<std::result::Result<Vec<_>, _>>()?;
                (
                (iid, title, web_url, format!("Issue #{}", iid), labels)
                    iid,
                    title,
                    web_url,
                    format!("Issue #{}", iid),
                    labels,
                )
            }
            }
            "MergeRequest" => {
            "MergeRequest" => {
                let parent_id = match merge_request_id {
                let parent_id = match merge_request_id {
						
							
						
						
							
						
						
						
@@ -399,13 +417,7 @@ pub fn extract_discussion_document(
                    .query_map(rusqlite::params![parent_id], |row| row.get(0))?
                    .query_map(rusqlite::params![parent_id], |row| row.get(0))?
                    .collect::<std::result::Result<Vec<_>, _>>()?;
                    .collect::<std::result::Result<Vec<_>, _>>()?;
                (
                (iid, title, web_url, format!("MR !{}", iid), labels)
                    iid,
                    title,
                    web_url,
                    format!("MR !{}", iid),
                    labels,
                )
            }
            }
            _ => return Ok(None),
            _ => return Ok(None),
        };
        };
						
							
						
						
							
						
						
						
@@ -449,17 +461,17 @@ pub fn extract_discussion_document(
    // Extract DiffNote paths (deduplicated, sorted)
    // Extract DiffNote paths (deduplicated, sorted)
    let mut path_set = BTreeSet::new();
    let mut path_set = BTreeSet::new();
    for note in &notes {
    for note in &notes {
        if let Some(ref p) = note.old_path {
        if let Some(ref p) = note.old_path
            if !p.is_empty() {
            && !p.is_empty()
        {
            path_set.insert(p.clone());
            path_set.insert(p.clone());
        }
        }
        }
        if let Some(ref p) = note.new_path
        if let Some(ref p) = note.new_path {
            && !p.is_empty()
            if !p.is_empty() {
        {
            path_set.insert(p.clone());
            path_set.insert(p.clone());
        }
        }
    }
    }
    }
    let paths: Vec<String> = path_set.into_iter().collect();
    let paths: Vec<String> = path_set.into_iter().collect();
    // Construct URL: parent_web_url#note_{first_note_gitlab_id}
    // Construct URL: parent_web_url#note_{first_note_gitlab_id}
						
							
						
						
							
						
						
						
@@ -620,7 +632,8 @@ mod tests {
    // Helper to create an in-memory DB with the required tables for extraction tests
    // Helper to create an in-memory DB with the required tables for extraction tests
    fn setup_test_db() -> Connection {
    fn setup_test_db() -> Connection {
        let conn = Connection::open_in_memory().unwrap();
        let conn = Connection::open_in_memory().unwrap();
        conn.execute_batch("
        conn.execute_batch(
            "
            CREATE TABLE projects (
            CREATE TABLE projects (
                id INTEGER PRIMARY KEY,
                id INTEGER PRIMARY KEY,
                gitlab_project_id INTEGER UNIQUE NOT NULL,
                gitlab_project_id INTEGER UNIQUE NOT NULL,
						
							
						
						
							
						
						
						
@@ -660,7 +673,9 @@ mod tests {
                label_id INTEGER NOT NULL REFERENCES labels(id),
                label_id INTEGER NOT NULL REFERENCES labels(id),
                PRIMARY KEY(issue_id, label_id)
                PRIMARY KEY(issue_id, label_id)
            );
            );
        ").unwrap();
        ",
        )
        .unwrap();
        // Insert a test project
        // Insert a test project
        conn.execute(
        conn.execute(
						
						
						
							
						
						
@@ -671,7 +686,17 @@ mod tests {
        conn
        conn
    }
    }
    fn insert_issue(conn: &Connection, id: i64, iid: i64, title: Option<&str>, description: Option<&str>, state: &str, author: Option<&str>, web_url: Option<&str>) {
    #[allow(clippy::too_many_arguments)]
    fn insert_issue(
        conn: &Connection,
        id: i64,
        iid: i64,
        title: Option<&str>,
        description: Option<&str>,
        state: &str,
        author: Option<&str>,
        web_url: Option<&str>,
    ) {
        conn.execute(
        conn.execute(
            "INSERT INTO issues (id, gitlab_id, project_id, iid, title, description, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, 1, ?3, ?4, ?5, ?6, ?7, 1000, 2000, 3000, ?8)",
            "INSERT INTO issues (id, gitlab_id, project_id, iid, title, description, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, 1, ?3, ?4, ?5, ?6, ?7, 1000, 2000, 3000, ?8)",
            rusqlite::params![id, id * 10, iid, title, description, state, author, web_url],
            rusqlite::params![id, id * 10, iid, title, description, state, author, web_url],
						
						
						
							
						
						
@@ -682,20 +707,31 @@ mod tests {
        conn.execute(
        conn.execute(
            "INSERT INTO labels (id, project_id, name) VALUES (?1, 1, ?2)",
            "INSERT INTO labels (id, project_id, name) VALUES (?1, 1, ?2)",
            rusqlite::params![id, name],
            rusqlite::params![id, name],
        ).unwrap();
        )
        .unwrap();
    }
    }
    fn link_issue_label(conn: &Connection, issue_id: i64, label_id: i64) {
    fn link_issue_label(conn: &Connection, issue_id: i64, label_id: i64) {
        conn.execute(
        conn.execute(
            "INSERT INTO issue_labels (issue_id, label_id) VALUES (?1, ?2)",
            "INSERT INTO issue_labels (issue_id, label_id) VALUES (?1, ?2)",
            rusqlite::params![issue_id, label_id],
            rusqlite::params![issue_id, label_id],
        ).unwrap();
        )
        .unwrap();
    }
    }
    #[test]
    #[test]
    fn test_issue_document_format() {
    fn test_issue_document_format() {
        let conn = setup_test_db();
        let conn = setup_test_db();
        insert_issue(&conn, 1, 234, Some("Authentication redesign"), Some("We need to modernize our authentication system..."), "opened", Some("johndoe"), Some("https://gitlab.example.com/group/project-one/-/issues/234"));
        insert_issue(
            &conn,
            1,
            234,
            Some("Authentication redesign"),
            Some("We need to modernize our authentication system..."),
            "opened",
            Some("johndoe"),
            Some("https://gitlab.example.com/group/project-one/-/issues/234"),
        );
        insert_label(&conn, 1, "auth");
        insert_label(&conn, 1, "auth");
        insert_label(&conn, 2, "bug");
        insert_label(&conn, 2, "bug");
        link_issue_label(&conn, 1, 1);
        link_issue_label(&conn, 1, 1);
						
						
						
							
						
						
@@ -706,13 +742,23 @@ mod tests {
        assert_eq!(doc.source_id, 1);
        assert_eq!(doc.source_id, 1);
        assert_eq!(doc.project_id, 1);
        assert_eq!(doc.project_id, 1);
        assert_eq!(doc.author_username, Some("johndoe".to_string()));
        assert_eq!(doc.author_username, Some("johndoe".to_string()));
        assert!(doc.content_text.starts_with("[[Issue]] #234: Authentication redesign\n"));
        assert!(
            doc.content_text
                .starts_with("[[Issue]] #234: Authentication redesign\n")
        );
        assert!(doc.content_text.contains("Project: group/project-one\n"));
        assert!(doc.content_text.contains("Project: group/project-one\n"));
        assert!(doc.content_text.contains("URL: https://gitlab.example.com/group/project-one/-/issues/234\n"));
        assert!(
            doc.content_text
                .contains("URL: https://gitlab.example.com/group/project-one/-/issues/234\n")
        );
        assert!(doc.content_text.contains("Labels: [\"auth\",\"bug\"]\n"));
        assert!(doc.content_text.contains("Labels: [\"auth\",\"bug\"]\n"));
        assert!(doc.content_text.contains("State: opened\n"));
        assert!(doc.content_text.contains("State: opened\n"));
        assert!(doc.content_text.contains("Author: @johndoe\n"));
        assert!(doc.content_text.contains("Author: @johndoe\n"));
        assert!(doc.content_text.contains("--- Description ---\n\nWe need to modernize our authentication system..."));
        assert!(
            doc.content_text.contains(
                "--- Description ---\n\nWe need to modernize our authentication system..."
            )
        );
        assert!(!doc.is_truncated);
        assert!(!doc.is_truncated);
        assert!(doc.paths.is_empty());
        assert!(doc.paths.is_empty());
    }
    }
						
						
						
							
						
						
@@ -727,7 +773,16 @@ mod tests {
    #[test]
    #[test]
    fn test_issue_no_description() {
    fn test_issue_no_description() {
        let conn = setup_test_db();
        let conn = setup_test_db();
        insert_issue(&conn, 1, 10, Some("Quick fix"), None, "opened", Some("alice"), None);
        insert_issue(
            &conn,
            1,
            10,
            Some("Quick fix"),
            None,
            "opened",
            Some("alice"),
            None,
        );
        let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
        let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
        assert!(!doc.content_text.contains("--- Description ---"));
        assert!(!doc.content_text.contains("--- Description ---"));
						
						
						
							
						
						
@@ -737,7 +792,16 @@ mod tests {
    #[test]
    #[test]
    fn test_issue_labels_sorted() {
    fn test_issue_labels_sorted() {
        let conn = setup_test_db();
        let conn = setup_test_db();
        insert_issue(&conn, 1, 10, Some("Test"), Some("Body"), "opened", Some("bob"), None);
        insert_issue(
            &conn,
            1,
            10,
            Some("Test"),
            Some("Body"),
            "opened",
            Some("bob"),
            None,
        );
        insert_label(&conn, 1, "zeta");
        insert_label(&conn, 1, "zeta");
        insert_label(&conn, 2, "alpha");
        insert_label(&conn, 2, "alpha");
        insert_label(&conn, 3, "middle");
        insert_label(&conn, 3, "middle");
						
						
						
							
						
						
@@ -747,13 +811,25 @@ mod tests {
        let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
        let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
        assert_eq!(doc.labels, vec!["alpha", "middle", "zeta"]);
        assert_eq!(doc.labels, vec!["alpha", "middle", "zeta"]);
        assert!(doc.content_text.contains("Labels: [\"alpha\",\"middle\",\"zeta\"]"));
        assert!(
            doc.content_text
                .contains("Labels: [\"alpha\",\"middle\",\"zeta\"]")
        );
    }
    }
    #[test]
    #[test]
    fn test_issue_no_labels() {
    fn test_issue_no_labels() {
        let conn = setup_test_db();
        let conn = setup_test_db();
        insert_issue(&conn, 1, 10, Some("Test"), Some("Body"), "opened", None, None);
        insert_issue(
            &conn,
            1,
            10,
            Some("Test"),
            Some("Body"),
            "opened",
            None,
            None,
        );
        let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
        let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
        assert!(doc.labels.is_empty());
        assert!(doc.labels.is_empty());
						
						
						
							
						
						
@@ -763,7 +839,16 @@ mod tests {
    #[test]
    #[test]
    fn test_issue_hash_deterministic() {
    fn test_issue_hash_deterministic() {
        let conn = setup_test_db();
        let conn = setup_test_db();
        insert_issue(&conn, 1, 10, Some("Test"), Some("Body"), "opened", Some("alice"), None);
        insert_issue(
            &conn,
            1,
            10,
            Some("Test"),
            Some("Body"),
            "opened",
            Some("alice"),
            None,
        );
        let doc1 = extract_issue_document(&conn, 1).unwrap().unwrap();
        let doc1 = extract_issue_document(&conn, 1).unwrap().unwrap();
        let doc2 = extract_issue_document(&conn, 1).unwrap().unwrap();
        let doc2 = extract_issue_document(&conn, 1).unwrap().unwrap();
						
						
						
							
						
						
@@ -786,7 +871,8 @@ mod tests {
    fn setup_mr_test_db() -> Connection {
    fn setup_mr_test_db() -> Connection {
        let conn = setup_test_db();
        let conn = setup_test_db();
        conn.execute_batch("
        conn.execute_batch(
            "
            CREATE TABLE merge_requests (
            CREATE TABLE merge_requests (
                id INTEGER PRIMARY KEY,
                id INTEGER PRIMARY KEY,
                gitlab_id INTEGER UNIQUE NOT NULL,
                gitlab_id INTEGER UNIQUE NOT NULL,
						
							
						
						
							
						
						
						
@@ -821,11 +907,25 @@ mod tests {
                label_id INTEGER REFERENCES labels(id),
                label_id INTEGER REFERENCES labels(id),
                PRIMARY KEY(merge_request_id, label_id)
                PRIMARY KEY(merge_request_id, label_id)
            );
            );
        ").unwrap();
        ",
        )
        .unwrap();
        conn
        conn
    }
    }
    fn insert_mr(conn: &Connection, id: i64, iid: i64, title: Option<&str>, description: Option<&str>, state: Option<&str>, author: Option<&str>, source_branch: Option<&str>, target_branch: Option<&str>, web_url: Option<&str>) {
    #[allow(clippy::too_many_arguments)]
    fn insert_mr(
        conn: &Connection,
        id: i64,
        iid: i64,
        title: Option<&str>,
        description: Option<&str>,
        state: Option<&str>,
        author: Option<&str>,
        source_branch: Option<&str>,
        target_branch: Option<&str>,
        web_url: Option<&str>,
    ) {
        conn.execute(
        conn.execute(
            "INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, description, state, author_username, source_branch, target_branch, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, 1, ?3, ?4, ?5, ?6, ?7, ?8, ?9, 1000, 2000, 3000, ?10)",
            "INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, description, state, author_username, source_branch, target_branch, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, 1, ?3, ?4, ?5, ?6, ?7, ?8, ?9, 1000, 2000, 3000, ?10)",
            rusqlite::params![id, id * 10, iid, title, description, state, author, source_branch, target_branch, web_url],
            rusqlite::params![id, id * 10, iid, title, description, state, author, source_branch, target_branch, web_url],
						
						
						
							
						
						
@@ -836,13 +936,25 @@ mod tests {
        conn.execute(
        conn.execute(
            "INSERT INTO mr_labels (merge_request_id, label_id) VALUES (?1, ?2)",
            "INSERT INTO mr_labels (merge_request_id, label_id) VALUES (?1, ?2)",
            rusqlite::params![mr_id, label_id],
            rusqlite::params![mr_id, label_id],
        ).unwrap();
        )
        .unwrap();
    }
    }
    #[test]
    #[test]
    fn test_mr_document_format() {
    fn test_mr_document_format() {
        let conn = setup_mr_test_db();
        let conn = setup_mr_test_db();
        insert_mr(&conn, 1, 456, Some("Implement JWT authentication"), Some("This MR implements JWT-based authentication..."), Some("opened"), Some("johndoe"), Some("feature/jwt-auth"), Some("main"), Some("https://gitlab.example.com/group/project-one/-/merge_requests/456"));
        insert_mr(
            &conn,
            1,
            456,
            Some("Implement JWT authentication"),
            Some("This MR implements JWT-based authentication..."),
            Some("opened"),
            Some("johndoe"),
            Some("feature/jwt-auth"),
            Some("main"),
            Some("https://gitlab.example.com/group/project-one/-/merge_requests/456"),
        );
        insert_label(&conn, 1, "auth");
        insert_label(&conn, 1, "auth");
        insert_label(&conn, 2, "feature");
        insert_label(&conn, 2, "feature");
        link_mr_label(&conn, 1, 1);
        link_mr_label(&conn, 1, 1);
						
						
						
							
						
						
@@ -851,13 +963,25 @@ mod tests {
        let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
        let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
        assert_eq!(doc.source_type, SourceType::MergeRequest);
        assert_eq!(doc.source_type, SourceType::MergeRequest);
        assert_eq!(doc.source_id, 1);
        assert_eq!(doc.source_id, 1);
        assert!(doc.content_text.starts_with("[[MergeRequest]] !456: Implement JWT authentication\n"));
        assert!(
            doc.content_text
                .starts_with("[[MergeRequest]] !456: Implement JWT authentication\n")
        );
        assert!(doc.content_text.contains("Project: group/project-one\n"));
        assert!(doc.content_text.contains("Project: group/project-one\n"));
        assert!(doc.content_text.contains("Labels: [\"auth\",\"feature\"]\n"));
        assert!(
            doc.content_text
                .contains("Labels: [\"auth\",\"feature\"]\n")
        );
        assert!(doc.content_text.contains("State: opened\n"));
        assert!(doc.content_text.contains("State: opened\n"));
        assert!(doc.content_text.contains("Author: @johndoe\n"));
        assert!(doc.content_text.contains("Author: @johndoe\n"));
        assert!(doc.content_text.contains("Source: feature/jwt-auth -> main\n"));
        assert!(
        assert!(doc.content_text.contains("--- Description ---\n\nThis MR implements JWT-based authentication..."));
            doc.content_text
                .contains("Source: feature/jwt-auth -> main\n")
        );
        assert!(
            doc.content_text
                .contains("--- Description ---\n\nThis MR implements JWT-based authentication...")
        );
    }
    }
    #[test]
    #[test]
						
						
						
							
						
						
@@ -870,26 +994,65 @@ mod tests {
    #[test]
    #[test]
    fn test_mr_no_description() {
    fn test_mr_no_description() {
        let conn = setup_mr_test_db();
        let conn = setup_mr_test_db();
        insert_mr(&conn, 1, 10, Some("Quick fix"), None, Some("merged"), Some("alice"), Some("fix/bug"), Some("main"), None);
        insert_mr(
            &conn,
            1,
            10,
            Some("Quick fix"),
            None,
            Some("merged"),
            Some("alice"),
            Some("fix/bug"),
            Some("main"),
            None,
        );
        let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
        let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
        assert!(!doc.content_text.contains("--- Description ---"));
        assert!(!doc.content_text.contains("--- Description ---"));
        assert!(doc.content_text.contains("[[MergeRequest]] !10: Quick fix\n"));
        assert!(
            doc.content_text
                .contains("[[MergeRequest]] !10: Quick fix\n")
        );
    }
    }
    #[test]
    #[test]
    fn test_mr_branch_info() {
    fn test_mr_branch_info() {
        let conn = setup_mr_test_db();
        let conn = setup_mr_test_db();
        insert_mr(&conn, 1, 10, Some("Test"), Some("Body"), Some("opened"), None, Some("feature/foo"), Some("develop"), None);
        insert_mr(
            &conn,
            1,
            10,
            Some("Test"),
            Some("Body"),
            Some("opened"),
            None,
            Some("feature/foo"),
            Some("develop"),
            None,
        );
        let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
        let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
        assert!(doc.content_text.contains("Source: feature/foo -> develop\n"));
        assert!(
            doc.content_text
                .contains("Source: feature/foo -> develop\n")
        );
    }
    }
    #[test]
    #[test]
    fn test_mr_no_branches() {
    fn test_mr_no_branches() {
        let conn = setup_mr_test_db();
        let conn = setup_mr_test_db();
        insert_mr(&conn, 1, 10, Some("Test"), None, Some("opened"), None, None, None, None);
        insert_mr(
            &conn,
            1,
            10,
            Some("Test"),
            None,
            Some("opened"),
            None,
            None,
            None,
            None,
        );
        let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
        let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
        assert!(!doc.content_text.contains("Source:"));
        assert!(!doc.content_text.contains("Source:"));
						
						
						
							
						
						
@@ -899,7 +1062,8 @@ mod tests {
    fn setup_discussion_test_db() -> Connection {
    fn setup_discussion_test_db() -> Connection {
        let conn = setup_mr_test_db(); // includes projects, issues schema, labels, mr tables
        let conn = setup_mr_test_db(); // includes projects, issues schema, labels, mr tables
        conn.execute_batch("
        conn.execute_batch(
            "
            CREATE TABLE discussions (
            CREATE TABLE discussions (
                id INTEGER PRIMARY KEY,
                id INTEGER PRIMARY KEY,
                gitlab_discussion_id TEXT NOT NULL,
                gitlab_discussion_id TEXT NOT NULL,
						
							
						
						
							
						
						
						
@@ -937,18 +1101,38 @@ mod tests {
                position_new_line INTEGER,
                position_new_line INTEGER,
                raw_payload_id INTEGER
                raw_payload_id INTEGER
            );
            );
        ").unwrap();
        ",
        )
        .unwrap();
        conn
        conn
    }
    }
    fn insert_discussion(conn: &Connection, id: i64, noteable_type: &str, issue_id: Option<i64>, mr_id: Option<i64>) {
    fn insert_discussion(
        conn: &Connection,
        id: i64,
        noteable_type: &str,
        issue_id: Option<i64>,
        mr_id: Option<i64>,
    ) {
        conn.execute(
        conn.execute(
            "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, merge_request_id, noteable_type, last_seen_at) VALUES (?1, ?2, 1, ?3, ?4, ?5, 3000)",
            "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, merge_request_id, noteable_type, last_seen_at) VALUES (?1, ?2, 1, ?3, ?4, ?5, 3000)",
            rusqlite::params![id, format!("disc_{}", id), issue_id, mr_id, noteable_type],
            rusqlite::params![id, format!("disc_{}", id), issue_id, mr_id, noteable_type],
        ).unwrap();
        ).unwrap();
    }
    }
    fn insert_note(conn: &Connection, id: i64, gitlab_id: i64, discussion_id: i64, author: Option<&str>, body: Option<&str>, created_at: i64, is_system: bool, old_path: Option<&str>, new_path: Option<&str>) {
    #[allow(clippy::too_many_arguments)]
    fn insert_note(
        conn: &Connection,
        id: i64,
        gitlab_id: i64,
        discussion_id: i64,
        author: Option<&str>,
        body: Option<&str>,
        created_at: i64,
        is_system: bool,
        old_path: Option<&str>,
        new_path: Option<&str>,
    ) {
        conn.execute(
        conn.execute(
            "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system, position_old_path, position_new_path) VALUES (?1, ?2, ?3, 1, ?4, ?5, ?6, ?6, ?6, ?7, ?8, ?9)",
            "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system, position_old_path, position_new_path) VALUES (?1, ?2, ?3, 1, ?4, ?5, ?6, ?6, ?6, ?7, ?8, ?9)",
            rusqlite::params![id, gitlab_id, discussion_id, author, body, created_at, is_system as i32, old_path, new_path],
            rusqlite::params![id, gitlab_id, discussion_id, author, body, created_at, is_system as i32, old_path, new_path],
						
						
						
							
						
						
@@ -958,25 +1142,67 @@ mod tests {
    #[test]
    #[test]
    fn test_discussion_document_format() {
    fn test_discussion_document_format() {
        let conn = setup_discussion_test_db();
        let conn = setup_discussion_test_db();
        insert_issue(&conn, 1, 234, Some("Authentication redesign"), Some("desc"), "opened", Some("johndoe"), Some("https://gitlab.example.com/group/project-one/-/issues/234"));
        insert_issue(
            &conn,
            1,
            234,
            Some("Authentication redesign"),
            Some("desc"),
            "opened",
            Some("johndoe"),
            Some("https://gitlab.example.com/group/project-one/-/issues/234"),
        );
        insert_label(&conn, 1, "auth");
        insert_label(&conn, 1, "auth");
        insert_label(&conn, 2, "bug");
        insert_label(&conn, 2, "bug");
        link_issue_label(&conn, 1, 1);
        link_issue_label(&conn, 1, 1);
        link_issue_label(&conn, 1, 2);
        link_issue_label(&conn, 1, 2);
        insert_discussion(&conn, 1, "Issue", Some(1), None);
        insert_discussion(&conn, 1, "Issue", Some(1), None);
        // 1710460800000 = 2024-03-15T00:00:00Z
        // 1710460800000 = 2024-03-15T00:00:00Z
        insert_note(&conn, 1, 12345, 1, Some("johndoe"), Some("I think we should move to JWT-based auth..."), 1710460800000, false, None, None);
        insert_note(
        insert_note(&conn, 2, 12346, 1, Some("janedoe"), Some("Agreed. What about refresh token strategy?"), 1710460800000, false, None, None);
            &conn,
            1,
            12345,
            1,
            Some("johndoe"),
            Some("I think we should move to JWT-based auth..."),
            1710460800000,
            false,
            None,
            None,
        );
        insert_note(
            &conn,
            2,
            12346,
            1,
            Some("janedoe"),
            Some("Agreed. What about refresh token strategy?"),
            1710460800000,
            false,
            None,
            None,
        );
        let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
        let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
        assert_eq!(doc.source_type, SourceType::Discussion);
        assert_eq!(doc.source_type, SourceType::Discussion);
        assert!(doc.content_text.starts_with("[[Discussion]] Issue #234: Authentication redesign\n"));
        assert!(
            doc.content_text
                .starts_with("[[Discussion]] Issue #234: Authentication redesign\n")
        );
        assert!(doc.content_text.contains("Project: group/project-one\n"));
        assert!(doc.content_text.contains("Project: group/project-one\n"));
        assert!(doc.content_text.contains("URL: https://gitlab.example.com/group/project-one/-/issues/234#note_12345\n"));
        assert!(doc.content_text.contains(
            "URL: https://gitlab.example.com/group/project-one/-/issues/234#note_12345\n"
        ));
        assert!(doc.content_text.contains("Labels: [\"auth\",\"bug\"]\n"));
        assert!(doc.content_text.contains("Labels: [\"auth\",\"bug\"]\n"));
        assert!(doc.content_text.contains("--- Thread ---"));
        assert!(doc.content_text.contains("--- Thread ---"));
        assert!(doc.content_text.contains("@johndoe (2024-03-15):\nI think we should move to JWT-based auth..."));
        assert!(
        assert!(doc.content_text.contains("@janedoe (2024-03-15):\nAgreed. What about refresh token strategy?"));
            doc.content_text
                .contains("@johndoe (2024-03-15):\nI think we should move to JWT-based auth...")
        );
        assert!(
            doc.content_text
                .contains("@janedoe (2024-03-15):\nAgreed. What about refresh token strategy?")
        );
        assert_eq!(doc.author_username, Some("johndoe".to_string()));
        assert_eq!(doc.author_username, Some("johndoe".to_string()));
        assert!(doc.title.is_none()); // Discussions don't have their own title
        assert!(doc.title.is_none()); // Discussions don't have their own title
    }
    }
						
						
						
							
						
						
@@ -992,13 +1218,34 @@ mod tests {
    fn test_discussion_parent_deleted() {
    fn test_discussion_parent_deleted() {
        let conn = setup_discussion_test_db();
        let conn = setup_discussion_test_db();
        // Insert issue, create discussion, then delete the issue
        // Insert issue, create discussion, then delete the issue
        insert_issue(&conn, 99, 10, Some("To be deleted"), None, "opened", None, None);
        insert_issue(
            &conn,
            99,
            10,
            Some("To be deleted"),
            None,
            "opened",
            None,
            None,
        );
        insert_discussion(&conn, 1, "Issue", Some(99), None);
        insert_discussion(&conn, 1, "Issue", Some(99), None);
        insert_note(&conn, 1, 100, 1, Some("alice"), Some("Hello"), 1000, false, None, None);
        insert_note(
            &conn,
            1,
            100,
            1,
            Some("alice"),
            Some("Hello"),
            1000,
            false,
            None,
            None,
        );
        // Delete the parent issue — FK cascade won't delete discussion in test since
        // Delete the parent issue — FK cascade won't delete discussion in test since
        // we used REFERENCES without ON DELETE CASCADE in test schema, so just delete from issues
        // we used REFERENCES without ON DELETE CASCADE in test schema, so just delete from issues
        conn.execute("PRAGMA foreign_keys = OFF", []).unwrap();
        conn.execute("PRAGMA foreign_keys = OFF", []).unwrap();
        conn.execute("DELETE FROM issues WHERE id = 99", []).unwrap();
        conn.execute("DELETE FROM issues WHERE id = 99", [])
            .unwrap();
        conn.execute("PRAGMA foreign_keys = ON", []).unwrap();
        conn.execute("PRAGMA foreign_keys = ON", []).unwrap();
        let result = extract_discussion_document(&conn, 1).unwrap();
        let result = extract_discussion_document(&conn, 1).unwrap();
						
						
						
							
						
						
@@ -1008,11 +1255,53 @@ mod tests {
    #[test]
    #[test]
    fn test_discussion_system_notes_excluded() {
    fn test_discussion_system_notes_excluded() {
        let conn = setup_discussion_test_db();
        let conn = setup_discussion_test_db();
        insert_issue(&conn, 1, 10, Some("Test"), Some("desc"), "opened", Some("alice"), None);
        insert_issue(
            &conn,
            1,
            10,
            Some("Test"),
            Some("desc"),
            "opened",
            Some("alice"),
            None,
        );
        insert_discussion(&conn, 1, "Issue", Some(1), None);
        insert_discussion(&conn, 1, "Issue", Some(1), None);
        insert_note(&conn, 1, 100, 1, Some("alice"), Some("Real comment"), 1000, false, None, None);
        insert_note(
        insert_note(&conn, 2, 101, 1, Some("bot"), Some("assigned to @alice"), 2000, true, None, None);
            &conn,
        insert_note(&conn, 3, 102, 1, Some("bob"), Some("Follow-up"), 3000, false, None, None);
            1,
            100,
            1,
            Some("alice"),
            Some("Real comment"),
            1000,
            false,
            None,
            None,
        );
        insert_note(
            &conn,
            2,
            101,
            1,
            Some("bot"),
            Some("assigned to @alice"),
            2000,
            true,
            None,
            None,
        );
        insert_note(
            &conn,
            3,
            102,
            1,
            Some("bob"),
            Some("Follow-up"),
            3000,
            false,
            None,
            None,
        );
        let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
        let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
        assert!(doc.content_text.contains("@alice"));
        assert!(doc.content_text.contains("@alice"));
						
						
						
							
						
						
@@ -1023,38 +1312,115 @@ mod tests {
    #[test]
    #[test]
    fn test_discussion_diffnote_paths() {
    fn test_discussion_diffnote_paths() {
        let conn = setup_discussion_test_db();
        let conn = setup_discussion_test_db();
        insert_issue(&conn, 1, 10, Some("Test"), Some("desc"), "opened", None, None);
        insert_issue(
            &conn,
            1,
            10,
            Some("Test"),
            Some("desc"),
            "opened",
            None,
            None,
        );
        insert_discussion(&conn, 1, "Issue", Some(1), None);
        insert_discussion(&conn, 1, "Issue", Some(1), None);
        insert_note(&conn, 1, 100, 1, Some("alice"), Some("Comment on code"), 1000, false, Some("src/old.rs"), Some("src/new.rs"));
        insert_note(
        insert_note(&conn, 2, 101, 1, Some("bob"), Some("Reply"), 2000, false, Some("src/old.rs"), Some("src/new.rs"));
            &conn,
            1,
            100,
            1,
            Some("alice"),
            Some("Comment on code"),
            1000,
            false,
            Some("src/old.rs"),
            Some("src/new.rs"),
        );
        insert_note(
            &conn,
            2,
            101,
            1,
            Some("bob"),
            Some("Reply"),
            2000,
            false,
            Some("src/old.rs"),
            Some("src/new.rs"),
        );
        let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
        let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
        // Paths should be deduplicated and sorted
        // Paths should be deduplicated and sorted
        assert_eq!(doc.paths, vec!["src/new.rs", "src/old.rs"]);
        assert_eq!(doc.paths, vec!["src/new.rs", "src/old.rs"]);
        assert!(doc.content_text.contains("Files: [\"src/new.rs\",\"src/old.rs\"]"));
        assert!(
            doc.content_text
                .contains("Files: [\"src/new.rs\",\"src/old.rs\"]")
        );
    }
    }
    #[test]
    #[test]
    fn test_discussion_url_construction() {
    fn test_discussion_url_construction() {
        let conn = setup_discussion_test_db();
        let conn = setup_discussion_test_db();
        insert_issue(&conn, 1, 10, Some("Test"), Some("desc"), "opened", None, Some("https://gitlab.example.com/group/project-one/-/issues/10"));
        insert_issue(
            &conn,
            1,
            10,
            Some("Test"),
            Some("desc"),
            "opened",
            None,
            Some("https://gitlab.example.com/group/project-one/-/issues/10"),
        );
        insert_discussion(&conn, 1, "Issue", Some(1), None);
        insert_discussion(&conn, 1, "Issue", Some(1), None);
        insert_note(&conn, 1, 54321, 1, Some("alice"), Some("Hello"), 1000, false, None, None);
        insert_note(
            &conn,
            1,
            54321,
            1,
            Some("alice"),
            Some("Hello"),
            1000,
            false,
            None,
            None,
        );
        let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
        let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
        assert_eq!(doc.url, Some("https://gitlab.example.com/group/project-one/-/issues/10#note_54321".to_string()));
        assert_eq!(
            doc.url,
            Some("https://gitlab.example.com/group/project-one/-/issues/10#note_54321".to_string())
        );
    }
    }
    #[test]
    #[test]
    fn test_discussion_uses_parent_labels() {
    fn test_discussion_uses_parent_labels() {
        let conn = setup_discussion_test_db();
        let conn = setup_discussion_test_db();
        insert_issue(&conn, 1, 10, Some("Test"), Some("desc"), "opened", None, None);
        insert_issue(
            &conn,
            1,
            10,
            Some("Test"),
            Some("desc"),
            "opened",
            None,
            None,
        );
        insert_label(&conn, 1, "backend");
        insert_label(&conn, 1, "backend");
        insert_label(&conn, 2, "api");
        insert_label(&conn, 2, "api");
        link_issue_label(&conn, 1, 1);
        link_issue_label(&conn, 1, 1);
        link_issue_label(&conn, 1, 2);
        link_issue_label(&conn, 1, 2);
        insert_discussion(&conn, 1, "Issue", Some(1), None);
        insert_discussion(&conn, 1, "Issue", Some(1), None);
        insert_note(&conn, 1, 100, 1, Some("alice"), Some("Comment"), 1000, false, None, None);
        insert_note(
            &conn,
            1,
            100,
            1,
            Some("alice"),
            Some("Comment"),
            1000,
            false,
            None,
            None,
        );
        let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
        let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
        assert_eq!(doc.labels, vec!["api", "backend"]);
        assert_eq!(doc.labels, vec!["api", "backend"]);
						
						
						
							
						
						
@@ -1063,20 +1429,65 @@ mod tests {
    #[test]
    #[test]
    fn test_discussion_on_mr() {
    fn test_discussion_on_mr() {
        let conn = setup_discussion_test_db();
        let conn = setup_discussion_test_db();
        insert_mr(&conn, 1, 456, Some("JWT Auth"), Some("desc"), Some("opened"), Some("johndoe"), Some("feature/jwt"), Some("main"), Some("https://gitlab.example.com/group/project-one/-/merge_requests/456"));
        insert_mr(
            &conn,
            1,
            456,
            Some("JWT Auth"),
            Some("desc"),
            Some("opened"),
            Some("johndoe"),
            Some("feature/jwt"),
            Some("main"),
            Some("https://gitlab.example.com/group/project-one/-/merge_requests/456"),
        );
        insert_discussion(&conn, 1, "MergeRequest", None, Some(1));
        insert_discussion(&conn, 1, "MergeRequest", None, Some(1));
        insert_note(&conn, 1, 100, 1, Some("alice"), Some("LGTM"), 1000, false, None, None);
        insert_note(
            &conn,
            1,
            100,
            1,
            Some("alice"),
            Some("LGTM"),
            1000,
            false,
            None,
            None,
        );
        let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
        let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
        assert!(doc.content_text.contains("[[Discussion]] MR !456: JWT Auth\n"));
        assert!(
            doc.content_text
                .contains("[[Discussion]] MR !456: JWT Auth\n")
        );
    }
    }
    #[test]
    #[test]
    fn test_discussion_all_system_notes() {
    fn test_discussion_all_system_notes() {
        let conn = setup_discussion_test_db();
        let conn = setup_discussion_test_db();
        insert_issue(&conn, 1, 10, Some("Test"), Some("desc"), "opened", None, None);
        insert_issue(
            &conn,
            1,
            10,
            Some("Test"),
            Some("desc"),
            "opened",
            None,
            None,
        );
        insert_discussion(&conn, 1, "Issue", Some(1), None);
        insert_discussion(&conn, 1, "Issue", Some(1), None);
        insert_note(&conn, 1, 100, 1, Some("bot"), Some("assigned to @alice"), 1000, true, None, None);
        insert_note(
            &conn,
            1,
            100,
            1,
            Some("bot"),
            Some("assigned to @alice"),
            1000,
            true,
            None,
            None,
        );
        // All notes are system notes -> no content -> returns None
        // All notes are system notes -> no content -> returns None
        let result = extract_discussion_document(&conn, 1).unwrap();
        let result = extract_discussion_document(&conn, 1).unwrap();
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -7,11 +7,11 @@ mod regenerator;
mod truncation;
mod truncation;
pub use extractor::{
pub use extractor::{
    compute_content_hash, compute_list_hash, extract_discussion_document,
    DocumentData, SourceType, compute_content_hash, compute_list_hash, extract_discussion_document,
    extract_issue_document, extract_mr_document, DocumentData, SourceType,
    extract_issue_document, extract_mr_document,
};
};
pub use regenerator::{regenerate_dirty_documents, RegenerateResult};
pub use regenerator::{RegenerateResult, regenerate_dirty_documents};
pub use truncation::{
pub use truncation::{
    truncate_discussion, truncate_hard_cap, truncate_utf8, NoteContent, TruncationReason,
    MAX_DISCUSSION_BYTES, MAX_DOCUMENT_BYTES_HARD, NoteContent, TruncationReason, TruncationResult,
    TruncationResult, MAX_DISCUSSION_BYTES, MAX_DOCUMENT_BYTES_HARD,
    truncate_discussion, truncate_hard_cap, truncate_utf8,
};
};
						
						
						
						
 
						
						
							
						
						
						
@@ -4,8 +4,8 @@ use tracing::{debug, warn};
use crate::core::error::Result;
use crate::core::error::Result;
use crate::documents::{
use crate::documents::{
    extract_discussion_document, extract_issue_document, extract_mr_document, DocumentData,
    DocumentData, SourceType, extract_discussion_document, extract_issue_document,
    SourceType,
    extract_mr_document,
};
};
use crate::ingestion::dirty_tracker::{clear_dirty, get_dirty_sources, record_dirty_error};
use crate::ingestion::dirty_tracker::{clear_dirty, get_dirty_sources, record_dirty_error};
						
							
						
						
							
						
						
						
@@ -65,11 +65,7 @@ pub fn regenerate_dirty_documents(conn: &Connection) -> Result<RegenerateResult>
}
}
/// Regenerate a single document. Returns true if content_hash changed.
/// Regenerate a single document. Returns true if content_hash changed.
fn regenerate_one(
fn regenerate_one(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<bool> {
    conn: &Connection,
    source_type: SourceType,
    source_id: i64,
) -> Result<bool> {
    let doc = match source_type {
    let doc = match source_type {
        SourceType::Issue => extract_issue_document(conn, source_id)?,
        SourceType::Issue => extract_issue_document(conn, source_id)?,
        SourceType::MergeRequest => extract_mr_document(conn, source_id)?,
        SourceType::MergeRequest => extract_mr_document(conn, source_id)?,
						
							
						
						
							
						
						
						
@@ -97,8 +93,8 @@ fn get_existing_hash(
    source_type: SourceType,
    source_type: SourceType,
    source_id: i64,
    source_id: i64,
) -> Result<Option<String>> {
) -> Result<Option<String>> {
    let mut stmt =
    let mut stmt = conn
        conn.prepare("SELECT content_hash FROM documents WHERE source_type = ?1 AND source_id = ?2")?;
        .prepare("SELECT content_hash FROM documents WHERE source_type = ?1 AND source_id = ?2")?;
    let hash: Option<String> = stmt
    let hash: Option<String> = stmt
        .query_row(rusqlite::params![source_type.as_str(), source_id], |row| {
        .query_row(rusqlite::params![source_type.as_str(), source_id], |row| {
						
							
						
						
							
						
						
						
@@ -140,17 +136,15 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<()> {
        .optional()?;
        .optional()?;
    // Fast path: skip ALL writes when nothing changed (prevents WAL churn)
    // Fast path: skip ALL writes when nothing changed (prevents WAL churn)
    if let Some((_, ref old_content_hash, ref old_labels_hash, ref old_paths_hash)) = existing {
    if let Some((_, ref old_content_hash, ref old_labels_hash, ref old_paths_hash)) = existing
        if old_content_hash == &doc.content_hash
        && old_content_hash == &doc.content_hash
        && old_labels_hash == &doc.labels_hash
        && old_labels_hash == &doc.labels_hash
        && old_paths_hash == &doc.paths_hash
        && old_paths_hash == &doc.paths_hash
    {
    {
        return Ok(());
        return Ok(());
    }
    }
    }
    let labels_json =
    let labels_json = serde_json::to_string(&doc.labels).unwrap_or_else(|_| "[]".to_string());
        serde_json::to_string(&doc.labels).unwrap_or_else(|_| "[]".to_string());
    // Upsert document row
    // Upsert document row
    conn.execute(
    conn.execute(
						
							
						
						
							
						
						
						
@@ -237,11 +231,7 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<()> {
}
}
/// Delete a document by source identity.
/// Delete a document by source identity.
fn delete_document(
fn delete_document(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<()> {
    conn: &Connection,
    source_type: SourceType,
    source_id: i64,
) -> Result<()> {
    conn.execute(
    conn.execute(
        "DELETE FROM documents WHERE source_type = ?1 AND source_id = ?2",
        "DELETE FROM documents WHERE source_type = ?1 AND source_id = ?2",
        rusqlite::params![source_type.as_str(), source_id],
        rusqlite::params![source_type.as_str(), source_id],
						
						
						
							
						
						
@@ -250,11 +240,7 @@ fn delete_document(
}
}
/// Get document ID by source type and source ID.
/// Get document ID by source type and source ID.
fn get_document_id(
fn get_document_id(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<i64> {
    conn: &Connection,
    source_type: SourceType,
    source_id: i64,
) -> Result<i64> {
    let id: i64 = conn.query_row(
    let id: i64 = conn.query_row(
        "SELECT id FROM documents WHERE source_type = ?1 AND source_id = ?2",
        "SELECT id FROM documents WHERE source_type = ?1 AND source_id = ?2",
        rusqlite::params![source_type.as_str(), source_id],
        rusqlite::params![source_type.as_str(), source_id],
						
							
						
						
							
						
						
						
@@ -372,10 +358,14 @@ mod tests {
        assert_eq!(result.errored, 0);
        assert_eq!(result.errored, 0);
        // Verify document was created
        // Verify document was created
        let count: i64 = conn.query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0)).unwrap();
        let count: i64 = conn
            .query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0))
            .unwrap();
        assert_eq!(count, 1);
        assert_eq!(count, 1);
        let content: String = conn.query_row("SELECT content_text FROM documents", [], |r| r.get(0)).unwrap();
        let content: String = conn
            .query_row("SELECT content_text FROM documents", [], |r| r.get(0))
            .unwrap();
        assert!(content.contains("[[Issue]] #42: Test Issue"));
        assert!(content.contains("[[Issue]] #42: Test Issue"));
    }
    }
						
							
						
						
							
						
						
						
@@ -418,7 +408,9 @@ mod tests {
        let result = regenerate_dirty_documents(&conn).unwrap();
        let result = regenerate_dirty_documents(&conn).unwrap();
        assert_eq!(result.regenerated, 1); // Deletion counts as "changed"
        assert_eq!(result.regenerated, 1); // Deletion counts as "changed"
        let count: i64 = conn.query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0)).unwrap();
        let count: i64 = conn
            .query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0))
            .unwrap();
        assert_eq!(count, 0);
        assert_eq!(count, 0);
    }
    }
						
							
						
						
							
						
						
						
@@ -451,11 +443,13 @@ mod tests {
        conn.execute(
        conn.execute(
            "INSERT INTO labels (id, project_id, name) VALUES (1, 1, 'bug')",
            "INSERT INTO labels (id, project_id, name) VALUES (1, 1, 'bug')",
            [],
            [],
        ).unwrap();
        )
        .unwrap();
        conn.execute(
        conn.execute(
            "INSERT INTO issue_labels (issue_id, label_id) VALUES (1, 1)",
            "INSERT INTO issue_labels (issue_id, label_id) VALUES (1, 1)",
            [],
            [],
        ).unwrap();
        )
        .unwrap();
        // First run creates document
        // First run creates document
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
						
						
						
							
						
						
@@ -467,9 +461,9 @@ mod tests {
        assert_eq!(result.unchanged, 1);
        assert_eq!(result.unchanged, 1);
        // Labels should still be present (not deleted and re-inserted)
        // Labels should still be present (not deleted and re-inserted)
        let label_count: i64 = conn.query_row(
        let label_count: i64 = conn
            "SELECT COUNT(*) FROM document_labels", [], |r| r.get(0),
            .query_row("SELECT COUNT(*) FROM document_labels", [], |r| r.get(0))
        ).unwrap();
            .unwrap();
        assert_eq!(label_count, 1);
        assert_eq!(label_count, 1);
    }
    }
}
}
						
						
						
						
 
						
						
							
						
						
						
@@ -231,10 +231,7 @@ mod tests {
    #[test]
    #[test]
    fn test_first_last_oversized() {
    fn test_first_last_oversized() {
        let big_body = "x".repeat(20_000);
        let big_body = "x".repeat(20_000);
        let notes = vec![
        let notes = vec![make_note("alice", &big_body), make_note("bob", &big_body)];
            make_note("alice", &big_body),
            make_note("bob", &big_body),
        ];
        let result = truncate_discussion(&notes, 10_000);
        let result = truncate_discussion(&notes, 10_000);
        assert!(result.is_truncated);
        assert!(result.is_truncated);
        assert_eq!(result.reason, Some(TruncationReason::FirstLastOversized));
        assert_eq!(result.reason, Some(TruncationReason::FirstLastOversized));
						
							
						
						
							
						
						
						
@@ -304,7 +301,11 @@ mod tests {
            .collect();
            .collect();
        let result = truncate_discussion(&notes, 12_000);
        let result = truncate_discussion(&notes, 12_000);
        assert!(result.is_truncated);
        assert!(result.is_truncated);
        assert!(result.content.contains("[... 5 notes omitted for length ...]"));
        assert!(
            result
                .content
                .contains("[... 5 notes omitted for length ...]")
        );
    }
    }
    #[test]
    #[test]
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -78,7 +78,9 @@ pub fn split_into_chunks(content: &str) -> Vec<(usize, String)> {
fn find_paragraph_break(window: &str) -> Option<usize> {
fn find_paragraph_break(window: &str) -> Option<usize> {
    // Search backward from 2/3 of the way through to find a good split
    // Search backward from 2/3 of the way through to find a good split
    let search_start = window.len() * 2 / 3;
    let search_start = window.len() * 2 / 3;
    window[search_start..].rfind("\n\n").map(|pos| search_start + pos + 2)
    window[search_start..]
        .rfind("\n\n")
        .map(|pos| search_start + pos + 2)
        .or_else(|| window[..search_start].rfind("\n\n").map(|pos| pos + 2))
        .or_else(|| window[..search_start].rfind("\n\n").map(|pos| pos + 2))
}
}
						
						
						
							
						
						
@@ -102,7 +104,9 @@ fn find_sentence_break(window: &str) -> Option<usize> {
/// Find the last word boundary (space) in the window.
/// Find the last word boundary (space) in the window.
fn find_word_break(window: &str) -> Option<usize> {
fn find_word_break(window: &str) -> Option<usize> {
    let search_start = window.len() / 2;
    let search_start = window.len() / 2;
    window[search_start..].rfind(' ').map(|pos| search_start + pos + 1)
    window[search_start..]
        .rfind(' ')
        .map(|pos| search_start + pos + 1)
        .or_else(|| window[..search_start].rfind(' ').map(|pos| pos + 1))
        .or_else(|| window[..search_start].rfind(' ').map(|pos| pos + 1))
}
}
						
							
						
						
							
						
						
						
@@ -155,7 +159,11 @@ mod tests {
        }
        }
        let chunks = split_into_chunks(&content);
        let chunks = split_into_chunks(&content);
        assert!(chunks.len() >= 2, "Expected multiple chunks, got {}", chunks.len());
        assert!(
            chunks.len() >= 2,
            "Expected multiple chunks, got {}",
            chunks.len()
        );
        // Verify indices are sequential
        // Verify indices are sequential
        for (i, (idx, _)) in chunks.iter().enumerate() {
        for (i, (idx, _)) in chunks.iter().enumerate() {
						
							
						
						
							
						
						
						
@@ -183,7 +191,8 @@ mod tests {
            let end_of_first = &chunks[0].1;
            let end_of_first = &chunks[0].1;
            let start_of_second = &chunks[1].1;
            let start_of_second = &chunks[1].1;
            // The end of first chunk should overlap with start of second
            // The end of first chunk should overlap with start of second
            let overlap_region = &end_of_first[end_of_first.len().saturating_sub(CHUNK_OVERLAP_CHARS)..];
            let overlap_region =
                &end_of_first[end_of_first.len().saturating_sub(CHUNK_OVERLAP_CHARS)..];
            assert!(
            assert!(
                start_of_second.starts_with(overlap_region)
                start_of_second.starts_with(overlap_region)
                    || overlap_region.contains(&start_of_second[..100.min(start_of_second.len())]),
                    || overlap_region.contains(&start_of_second[..100.min(start_of_second.len())]),
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -4,6 +4,6 @@ pub mod chunking;
pub mod ollama;
pub mod ollama;
pub mod pipeline;
pub mod pipeline;
pub use change_detector::{count_pending_documents, find_pending_documents, PendingDocument};
pub use change_detector::{PendingDocument, count_pending_documents, find_pending_documents};
pub use chunking::{split_into_chunks, CHUNK_MAX_BYTES, CHUNK_OVERLAP_CHARS};
pub use chunking::{CHUNK_MAX_BYTES, CHUNK_OVERLAP_CHARS, split_into_chunks};
pub use pipeline::{embed_documents, EmbedResult};
pub use pipeline::{EmbedResult, embed_documents};
						
						
						
						
 
						
						
							
						
						
						
@@ -67,8 +67,8 @@ impl OllamaClient {
    pub async fn health_check(&self) -> Result<()> {
    pub async fn health_check(&self) -> Result<()> {
        let url = format!("{}/api/tags", self.config.base_url);
        let url = format!("{}/api/tags", self.config.base_url);
        let response = self
        let response =
            .client
            self.client
                .get(&url)
                .get(&url)
                .send()
                .send()
                .await
                .await
						
							
						
						
							
						
						
						
@@ -111,12 +111,16 @@ impl OllamaClient {
            input: texts,
            input: texts,
        };
        };
        let response = self.client.post(&url).json(&request).send().await.map_err(
        let response = self
            |e| LoreError::OllamaUnavailable {
            .client
            .post(&url)
            .json(&request)
            .send()
            .await
            .map_err(|e| LoreError::OllamaUnavailable {
                base_url: self.config.base_url.clone(),
                base_url: self.config.base_url.clone(),
                source: Some(e),
                source: Some(e),
            },
            })?;
        )?;
        let status = response.status();
        let status = response.status();
        if !status.is_success() {
        if !status.is_success() {
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -8,8 +8,8 @@ use tracing::{info, warn};
use crate::core::error::Result;
use crate::core::error::Result;
use crate::embedding::change_detector::{count_pending_documents, find_pending_documents};
use crate::embedding::change_detector::{count_pending_documents, find_pending_documents};
use crate::embedding::chunk_ids::{encode_rowid, CHUNK_ROWID_MULTIPLIER};
use crate::embedding::chunk_ids::{CHUNK_ROWID_MULTIPLIER, encode_rowid};
use crate::embedding::chunking::{split_into_chunks, CHUNK_MAX_BYTES, EXPECTED_DIMS};
use crate::embedding::chunking::{CHUNK_MAX_BYTES, EXPECTED_DIMS, split_into_chunks};
use crate::embedding::ollama::OllamaClient;
use crate::embedding::ollama::OllamaClient;
const BATCH_SIZE: usize = 32;
const BATCH_SIZE: usize = 32;
						
							
						
						
							
						
						
						
@@ -211,10 +211,13 @@ pub async fn embed_documents(
                        || (err_lower.contains("413") && err_lower.contains("http"));
                        || (err_lower.contains("413") && err_lower.contains("http"));
                    if is_context_error && batch.len() > 1 {
                    if is_context_error && batch.len() > 1 {
                        warn!("Batch failed with context length error, retrying chunks individually");
                        warn!(
                            "Batch failed with context length error, retrying chunks individually"
                        );
                        for chunk in batch {
                        for chunk in batch {
                            match client.embed_batch(vec![chunk.text.clone()]).await {
                            match client.embed_batch(vec![chunk.text.clone()]).await {
                                Ok(embeddings) if !embeddings.is_empty()
                                Ok(embeddings)
                                    if !embeddings.is_empty()
                                        && embeddings[0].len() == EXPECTED_DIMS =>
                                        && embeddings[0].len() == EXPECTED_DIMS =>
                                {
                                {
                                    // Clear old embeddings on first successful chunk
                                    // Clear old embeddings on first successful chunk
						
							
						
						
							
						
						
						
@@ -272,7 +275,6 @@ pub async fn embed_documents(
                    }
                    }
                }
                }
            }
            }
        }
        }
        // Fire progress for all normal documents after embedding completes.
        // Fire progress for all normal documents after embedding completes.
						
							
						
						
							
						
						
						
@@ -314,6 +316,7 @@ fn clear_document_embeddings(conn: &Connection, document_id: i64) -> Result<()>
}
}
/// Store an embedding vector and its metadata.
/// Store an embedding vector and its metadata.
#[allow(clippy::too_many_arguments)]
fn store_embedding(
fn store_embedding(
    conn: &Connection,
    conn: &Connection,
    doc_id: i64,
    doc_id: i64,
						
							
						
						
							
						
						
						
@@ -347,8 +350,15 @@ fn store_embedding(
          created_at, attempt_count, last_error, chunk_max_bytes, chunk_count)
          created_at, attempt_count, last_error, chunk_max_bytes, chunk_count)
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1, NULL, ?8, ?9)",
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1, NULL, ?8, ?9)",
        rusqlite::params![
        rusqlite::params![
            doc_id, chunk_index as i64, model_name, EXPECTED_DIMS as i64,
            doc_id,
            doc_hash, chunk_hash, now, CHUNK_MAX_BYTES as i64, chunk_count
            chunk_index as i64,
            model_name,
            EXPECTED_DIMS as i64,
            doc_hash,
            chunk_hash,
            now,
            CHUNK_MAX_BYTES as i64,
            chunk_count
        ],
        ],
    )?;
    )?;
						
							
						
						
							
						
						
						
@@ -377,8 +387,15 @@ fn record_embedding_error(
           last_attempt_at = ?7,
           last_attempt_at = ?7,
           chunk_max_bytes = ?9",
           chunk_max_bytes = ?9",
        rusqlite::params![
        rusqlite::params![
            doc_id, chunk_index as i64, model_name, EXPECTED_DIMS as i64,
            doc_id,
            doc_hash, chunk_hash, now, error, CHUNK_MAX_BYTES as i64
            chunk_index as i64,
            model_name,
            EXPECTED_DIMS as i64,
            doc_hash,
            chunk_hash,
            now,
            error,
            CHUNK_MAX_BYTES as i64
        ],
        ],
    )?;
    )?;
    Ok(())
    Ok(())
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -557,10 +557,7 @@ impl GitLabClient {
/// all pages into a Vec rather than using streaming.
/// all pages into a Vec rather than using streaming.
impl GitLabClient {
impl GitLabClient {
    /// Fetch all pages from a paginated endpoint, returning collected results.
    /// Fetch all pages from a paginated endpoint, returning collected results.
    async fn fetch_all_pages<T: serde::de::DeserializeOwned>(
    async fn fetch_all_pages<T: serde::de::DeserializeOwned>(&self, path: &str) -> Result<Vec<T>> {
        &self,
        path: &str,
    ) -> Result<Vec<T>> {
        let mut results = Vec::new();
        let mut results = Vec::new();
        let mut page = 1u32;
        let mut page = 1u32;
        let per_page = 100u32;
        let per_page = 100u32;
						
						
						
							
						
						
@@ -571,9 +568,7 @@ impl GitLabClient {
                ("page", page.to_string()),
                ("page", page.to_string()),
            ];
            ];
            let (items, headers) = self
            let (items, headers) = self.request_with_headers::<Vec<T>>(path, &params).await?;
                .request_with_headers::<Vec<T>>(path, &params)
                .await?;
            let is_empty = items.is_empty();
            let is_empty = items.is_empty();
            let full_page = items.len() as u32 == per_page;
            let full_page = items.len() as u32 == per_page;
						
							
						
						
							
						
						
						
@@ -604,9 +599,8 @@ impl GitLabClient {
        gitlab_project_id: i64,
        gitlab_project_id: i64,
        iid: i64,
        iid: i64,
    ) -> Result<Vec<GitLabStateEvent>> {
    ) -> Result<Vec<GitLabStateEvent>> {
        let path = format!(
        let path =
            "/api/v4/projects/{gitlab_project_id}/issues/{iid}/resource_state_events"
            format!("/api/v4/projects/{gitlab_project_id}/issues/{iid}/resource_state_events");
        );
        self.fetch_all_pages(&path).await
        self.fetch_all_pages(&path).await
    }
    }
						
						
						
							
						
						
@@ -616,9 +610,8 @@ impl GitLabClient {
        gitlab_project_id: i64,
        gitlab_project_id: i64,
        iid: i64,
        iid: i64,
    ) -> Result<Vec<GitLabLabelEvent>> {
    ) -> Result<Vec<GitLabLabelEvent>> {
        let path = format!(
        let path =
            "/api/v4/projects/{gitlab_project_id}/issues/{iid}/resource_label_events"
            format!("/api/v4/projects/{gitlab_project_id}/issues/{iid}/resource_label_events");
        );
        self.fetch_all_pages(&path).await
        self.fetch_all_pages(&path).await
    }
    }
						
						
						
							
						
						
@@ -628,9 +621,8 @@ impl GitLabClient {
        gitlab_project_id: i64,
        gitlab_project_id: i64,
        iid: i64,
        iid: i64,
    ) -> Result<Vec<GitLabMilestoneEvent>> {
    ) -> Result<Vec<GitLabMilestoneEvent>> {
        let path = format!(
        let path =
            "/api/v4/projects/{gitlab_project_id}/issues/{iid}/resource_milestone_events"
            format!("/api/v4/projects/{gitlab_project_id}/issues/{iid}/resource_milestone_events");
        );
        self.fetch_all_pages(&path).await
        self.fetch_all_pages(&path).await
    }
    }
						
							
						
						
							
						
						
						
@@ -676,18 +668,30 @@ impl GitLabClient {
        gitlab_project_id: i64,
        gitlab_project_id: i64,
        entity_type: &str,
        entity_type: &str,
        iid: i64,
        iid: i64,
    ) -> Result<(Vec<GitLabStateEvent>, Vec<GitLabLabelEvent>, Vec<GitLabMilestoneEvent>)> {
    ) -> Result<(
        Vec<GitLabStateEvent>,
        Vec<GitLabLabelEvent>,
        Vec<GitLabMilestoneEvent>,
    )> {
        match entity_type {
        match entity_type {
            "issue" => {
            "issue" => {
                let state = self.fetch_issue_state_events(gitlab_project_id, iid).await?;
                let state = self
                let label = self.fetch_issue_label_events(gitlab_project_id, iid).await?;
                    .fetch_issue_state_events(gitlab_project_id, iid)
                let milestone = self.fetch_issue_milestone_events(gitlab_project_id, iid).await?;
                    .await?;
                let label = self
                    .fetch_issue_label_events(gitlab_project_id, iid)
                    .await?;
                let milestone = self
                    .fetch_issue_milestone_events(gitlab_project_id, iid)
                    .await?;
                Ok((state, label, milestone))
                Ok((state, label, milestone))
            }
            }
            "merge_request" => {
            "merge_request" => {
                let state = self.fetch_mr_state_events(gitlab_project_id, iid).await?;
                let state = self.fetch_mr_state_events(gitlab_project_id, iid).await?;
                let label = self.fetch_mr_label_events(gitlab_project_id, iid).await?;
                let label = self.fetch_mr_label_events(gitlab_project_id, iid).await?;
                let milestone = self.fetch_mr_milestone_events(gitlab_project_id, iid).await?;
                let milestone = self
                    .fetch_mr_milestone_events(gitlab_project_id, iid)
                    .await?;
                Ok((state, label, milestone))
                Ok((state, label, milestone))
            }
            }
            _ => Err(LoreError::Other(format!(
            _ => Err(LoreError::Other(format!(
						
							
						
						
							
						
						
						
@@ -750,23 +754,23 @@ mod tests {
    #[test]
    #[test]
    fn cursor_rewind_clamps_to_zero() {
    fn cursor_rewind_clamps_to_zero() {
        let updated_after = Some(1000i64); // 1 second
        let updated_after = 1000i64; // 1 second
        let cursor_rewind_seconds = 10u32; // 10 seconds
        let cursor_rewind_seconds = 10u32; // 10 seconds
        // Rewind would be negative, should clamp to 0
        // Rewind would be negative, should clamp to 0
        let rewind_ms = (cursor_rewind_seconds as i64) * 1000;
        let rewind_ms = i64::from(cursor_rewind_seconds) * 1000;
        let rewound = (updated_after.unwrap() - rewind_ms).max(0);
        let rewound = (updated_after - rewind_ms).max(0);
        assert_eq!(rewound, 0);
        assert_eq!(rewound, 0);
    }
    }
    #[test]
    #[test]
    fn cursor_rewind_applies_correctly() {
    fn cursor_rewind_applies_correctly() {
        let updated_after = Some(1705312800000i64); // 2024-01-15T10:00:00.000Z
        let updated_after = 1705312800000i64; // 2024-01-15T10:00:00.000Z
        let cursor_rewind_seconds = 60u32; // 1 minute
        let cursor_rewind_seconds = 60u32; // 1 minute
        let rewind_ms = (cursor_rewind_seconds as i64) * 1000;
        let rewind_ms = i64::from(cursor_rewind_seconds) * 1000;
        let rewound = (updated_after.unwrap() - rewind_ms).max(0);
        let rewound = (updated_after - rewind_ms).max(0);
        // Should be 1 minute earlier
        // Should be 1 minute earlier
        assert_eq!(rewound, 1705312740000);
        assert_eq!(rewound, 1705312740000);
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -179,10 +179,7 @@ fn transform_single_note(
        resolvable: note.resolvable,
        resolvable: note.resolvable,
        resolved: note.resolved,
        resolved: note.resolved,
        resolved_by: note.resolved_by.as_ref().map(|a| a.username.clone()),
        resolved_by: note.resolved_by.as_ref().map(|a| a.username.clone()),
        resolved_at: note
        resolved_at: note.resolved_at.as_ref().and_then(|ts| iso_to_ms(ts)),
            .resolved_at
            .as_ref()
            .and_then(|ts| iso_to_ms(ts)),
        position_old_path,
        position_old_path,
        position_new_path,
        position_new_path,
        position_old_line,
        position_old_line,
						
							
						
						
							
						
						
						
@@ -235,7 +232,6 @@ fn extract_position_fields(
    }
    }
}
}
/// Transform notes from a GitLab discussion with strict timestamp parsing.
/// Transform notes from a GitLab discussion with strict timestamp parsing.
/// Returns Err if any timestamp is invalid - no silent fallback to 0.
/// Returns Err if any timestamp is invalid - no silent fallback to 0.
pub fn transform_notes_with_diff_position(
pub fn transform_notes_with_diff_position(
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -53,14 +53,17 @@ pub fn get_dirty_sources(conn: &Connection) -> Result<Vec<(SourceType, i64)>> {
        "SELECT source_type, source_id FROM dirty_sources
        "SELECT source_type, source_id FROM dirty_sources
         WHERE next_attempt_at IS NULL OR next_attempt_at <= ?1
         WHERE next_attempt_at IS NULL OR next_attempt_at <= ?1
         ORDER BY attempt_count ASC, queued_at ASC
         ORDER BY attempt_count ASC, queued_at ASC
         LIMIT ?2"
         LIMIT ?2",
    )?;
    )?;
    let rows = stmt
    let rows = stmt
        .query_map(rusqlite::params![now, DIRTY_SOURCES_BATCH_SIZE as i64], |row| {
        .query_map(
            rusqlite::params![now, DIRTY_SOURCES_BATCH_SIZE as i64],
            |row| {
                let st_str: String = row.get(0)?;
                let st_str: String = row.get(0)?;
                let source_id: i64 = row.get(1)?;
                let source_id: i64 = row.get(1)?;
                Ok((st_str, source_id))
                Ok((st_str, source_id))
        })?
            },
        )?
        .collect::<std::result::Result<Vec<_>, _>>()?;
        .collect::<std::result::Result<Vec<_>, _>>()?;
    let mut results = Vec::with_capacity(rows.len());
    let mut results = Vec::with_capacity(rows.len());
						
							
						
						
							
						
						
						
@@ -110,7 +113,14 @@ pub fn record_dirty_error(
           last_error = ?3,
           last_error = ?3,
           next_attempt_at = ?4
           next_attempt_at = ?4
         WHERE source_type = ?5 AND source_id = ?6",
         WHERE source_type = ?5 AND source_id = ?6",
        rusqlite::params![new_attempt, now, error, next_at, source_type.as_str(), source_id],
        rusqlite::params![
            new_attempt,
            now,
            error,
            next_at,
            source_type.as_str(),
            source_id
        ],
    )?;
    )?;
    Ok(())
    Ok(())
}
}
						
							
						
						
							
						
						
						
@@ -142,7 +152,9 @@ mod tests {
        let conn = setup_db();
        let conn = setup_db();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        let count: i64 = conn.query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0)).unwrap();
        let count: i64 = conn
            .query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0))
            .unwrap();
        assert_eq!(count, 1);
        assert_eq!(count, 1);
    }
    }
						
						
						
							
						
						
@@ -154,7 +166,9 @@ mod tests {
            mark_dirty_tx(&tx, SourceType::Issue, 1).unwrap();
            mark_dirty_tx(&tx, SourceType::Issue, 1).unwrap();
            tx.commit().unwrap();
            tx.commit().unwrap();
        }
        }
        let count: i64 = conn.query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0)).unwrap();
        let count: i64 = conn
            .query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0))
            .unwrap();
        assert_eq!(count, 1);
        assert_eq!(count, 1);
    }
    }
						
						
						
							
						
						
@@ -165,21 +179,33 @@ mod tests {
        // Simulate error state
        // Simulate error state
        record_dirty_error(&conn, SourceType::Issue, 1, "test error").unwrap();
        record_dirty_error(&conn, SourceType::Issue, 1, "test error").unwrap();
        let attempt: i64 = conn.query_row(
        let attempt: i64 = conn
            "SELECT attempt_count FROM dirty_sources WHERE source_id = 1", [], |r| r.get(0)
            .query_row(
        ).unwrap();
                "SELECT attempt_count FROM dirty_sources WHERE source_id = 1",
                [],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(attempt, 1);
        assert_eq!(attempt, 1);
        // Re-mark should reset
        // Re-mark should reset
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        let attempt: i64 = conn.query_row(
        let attempt: i64 = conn
            "SELECT attempt_count FROM dirty_sources WHERE source_id = 1", [], |r| r.get(0)
            .query_row(
        ).unwrap();
                "SELECT attempt_count FROM dirty_sources WHERE source_id = 1",
                [],
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(attempt, 0);
        assert_eq!(attempt, 0);
        let next_at: Option<i64> = conn.query_row(
        let next_at: Option<i64> = conn
            "SELECT next_attempt_at FROM dirty_sources WHERE source_id = 1", [], |r| r.get(0)
            .query_row(
        ).unwrap();
                "SELECT next_attempt_at FROM dirty_sources WHERE source_id = 1",
                [],
                |r| r.get(0),
            )
            .unwrap();
        assert!(next_at.is_none());
        assert!(next_at.is_none());
    }
    }
						
						
						
							
						
						
@@ -191,7 +217,8 @@ mod tests {
        conn.execute(
        conn.execute(
            "UPDATE dirty_sources SET next_attempt_at = 9999999999999 WHERE source_id = 1",
            "UPDATE dirty_sources SET next_attempt_at = 9999999999999 WHERE source_id = 1",
            [],
            [],
        ).unwrap();
        )
        .unwrap();
        let results = get_dirty_sources(&conn).unwrap();
        let results = get_dirty_sources(&conn).unwrap();
        assert!(results.is_empty());
        assert!(results.is_empty());
						
						
						
							
						
						
@@ -205,7 +232,8 @@ mod tests {
        conn.execute(
        conn.execute(
            "UPDATE dirty_sources SET attempt_count = 2 WHERE source_id = 1",
            "UPDATE dirty_sources SET attempt_count = 2 WHERE source_id = 1",
            [],
            [],
        ).unwrap();
        )
        .unwrap();
        // Insert issue 2 (fresh, attempt_count=0)
        // Insert issue 2 (fresh, attempt_count=0)
        mark_dirty(&conn, SourceType::Issue, 2).unwrap();
        mark_dirty(&conn, SourceType::Issue, 2).unwrap();
						
						
						
							
						
						
@@ -231,7 +259,9 @@ mod tests {
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        mark_dirty(&conn, SourceType::Issue, 1).unwrap();
        clear_dirty(&conn, SourceType::Issue, 1).unwrap();
        clear_dirty(&conn, SourceType::Issue, 1).unwrap();
        let count: i64 = conn.query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0)).unwrap();
        let count: i64 = conn
            .query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0))
            .unwrap();
        assert_eq!(count, 0);
        assert_eq!(count, 0);
    }
    }
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -65,7 +65,7 @@ pub fn get_pending_fetches(conn: &Connection, limit: usize) -> Result<Vec<Pendin
         FROM pending_discussion_fetches
         FROM pending_discussion_fetches
         WHERE next_attempt_at IS NULL OR next_attempt_at <= ?1
         WHERE next_attempt_at IS NULL OR next_attempt_at <= ?1
         ORDER BY queued_at ASC
         ORDER BY queued_at ASC
         LIMIT ?2"
         LIMIT ?2",
    )?;
    )?;
    let rows = stmt
    let rows = stmt
        .query_map(rusqlite::params![now, limit as i64], |row| {
        .query_map(rusqlite::params![now, limit as i64], |row| {
						
							
						
						
							
						
						
						
@@ -137,7 +137,15 @@ pub fn record_fetch_error(
           last_error = ?3,
           last_error = ?3,
           next_attempt_at = ?4
           next_attempt_at = ?4
         WHERE project_id = ?5 AND noteable_type = ?6 AND noteable_iid = ?7",
         WHERE project_id = ?5 AND noteable_type = ?6 AND noteable_iid = ?7",
        rusqlite::params![new_attempt, now, error, next_at, project_id, noteable_type.as_str(), noteable_iid],
        rusqlite::params![
            new_attempt,
            now,
            error,
            next_at,
            project_id,
            noteable_type.as_str(),
            noteable_iid
        ],
    )?;
    )?;
    Ok(())
    Ok(())
}
}
						
							
						
						
							
						
						
						
@@ -196,18 +204,24 @@ mod tests {
        queue_discussion_fetch(&conn, 1, NoteableType::Issue, 42).unwrap();
        queue_discussion_fetch(&conn, 1, NoteableType::Issue, 42).unwrap();
        record_fetch_error(&conn, 1, NoteableType::Issue, 42, "network error").unwrap();
        record_fetch_error(&conn, 1, NoteableType::Issue, 42, "network error").unwrap();
        let attempt: i32 = conn.query_row(
        let attempt: i32 = conn
            .query_row(
                "SELECT attempt_count FROM pending_discussion_fetches WHERE noteable_iid = 42",
                "SELECT attempt_count FROM pending_discussion_fetches WHERE noteable_iid = 42",
            [], |r| r.get(0),
                [],
        ).unwrap();
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(attempt, 1);
        assert_eq!(attempt, 1);
        // Re-queue should reset
        // Re-queue should reset
        queue_discussion_fetch(&conn, 1, NoteableType::Issue, 42).unwrap();
        queue_discussion_fetch(&conn, 1, NoteableType::Issue, 42).unwrap();
        let attempt: i32 = conn.query_row(
        let attempt: i32 = conn
            .query_row(
                "SELECT attempt_count FROM pending_discussion_fetches WHERE noteable_iid = 42",
                "SELECT attempt_count FROM pending_discussion_fetches WHERE noteable_iid = 42",
            [], |r| r.get(0),
                [],
        ).unwrap();
                |r| r.get(0),
            )
            .unwrap();
        assert_eq!(attempt, 0);
        assert_eq!(attempt, 0);
    }
    }
						
						
						
							
						
						
@@ -230,9 +244,11 @@ mod tests {
        queue_discussion_fetch(&conn, 1, NoteableType::Issue, 42).unwrap();
        queue_discussion_fetch(&conn, 1, NoteableType::Issue, 42).unwrap();
        complete_fetch(&conn, 1, NoteableType::Issue, 42).unwrap();
        complete_fetch(&conn, 1, NoteableType::Issue, 42).unwrap();
        let count: i64 = conn.query_row(
        let count: i64 = conn
            "SELECT COUNT(*) FROM pending_discussion_fetches", [], |r| r.get(0),
            .query_row("SELECT COUNT(*) FROM pending_discussion_fetches", [], |r| {
        ).unwrap();
                r.get(0)
            })
            .unwrap();
        assert_eq!(count, 0);
        assert_eq!(count, 0);
    }
    }
						
						
						
							
						
						
@@ -249,17 +265,23 @@ mod tests {
        assert_eq!(attempt, 1);
        assert_eq!(attempt, 1);
        assert_eq!(error, Some("timeout".to_string()));
        assert_eq!(error, Some("timeout".to_string()));
        let next_at: Option<i64> = conn.query_row(
        let next_at: Option<i64> = conn
            .query_row(
                "SELECT next_attempt_at FROM pending_discussion_fetches WHERE noteable_iid = 10",
                "SELECT next_attempt_at FROM pending_discussion_fetches WHERE noteable_iid = 10",
            [], |r| r.get(0),
                [],
        ).unwrap();
                |r| r.get(0),
            )
            .unwrap();
        assert!(next_at.is_some());
        assert!(next_at.is_some());
    }
    }
    #[test]
    #[test]
    fn test_noteable_type_parse() {
    fn test_noteable_type_parse() {
        assert_eq!(NoteableType::parse("Issue"), Some(NoteableType::Issue));
        assert_eq!(NoteableType::parse("Issue"), Some(NoteableType::Issue));
        assert_eq!(NoteableType::parse("MergeRequest"), Some(NoteableType::MergeRequest));
        assert_eq!(
            NoteableType::parse("MergeRequest"),
            Some(NoteableType::MergeRequest)
        );
        assert_eq!(NoteableType::parse("invalid"), None);
        assert_eq!(NoteableType::parse("invalid"), None);
    }
    }
}
}
						
						
						
						
 
						
						
							
						
						
						
@@ -14,9 +14,9 @@ use crate::Config;
use crate::core::error::Result;
use crate::core::error::Result;
use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::documents::SourceType;
use crate::documents::SourceType;
use crate::ingestion::dirty_tracker;
use crate::gitlab::GitLabClient;
use crate::gitlab::GitLabClient;
use crate::gitlab::transformers::{NoteableRef, transform_discussion, transform_notes};
use crate::gitlab::transformers::{NoteableRef, transform_discussion, transform_notes};
use crate::ingestion::dirty_tracker;
use super::issues::IssueForDiscussionSync;
use super::issues::IssueForDiscussionSync;
						
							
						
						
							
						
						
						
@@ -95,7 +95,6 @@ async fn ingest_discussions_for_issue(
    let mut pagination_error: Option<crate::core::error::LoreError> = None;
    let mut pagination_error: Option<crate::core::error::LoreError> = None;
    while let Some(disc_result) = discussions_stream.next().await {
    while let Some(disc_result) = discussions_stream.next().await {
        // Handle errors - record but don't delete stale data
        // Handle errors - record but don't delete stale data
        let gitlab_discussion = match disc_result {
        let gitlab_discussion = match disc_result {
            Ok(d) => d,
            Ok(d) => d,
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -18,10 +18,10 @@ use crate::core::error::{LoreError, Result};
use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::time::now_ms;
use crate::core::time::now_ms;
use crate::documents::SourceType;
use crate::documents::SourceType;
use crate::ingestion::dirty_tracker;
use crate::gitlab::GitLabClient;
use crate::gitlab::GitLabClient;
use crate::gitlab::transformers::{MilestoneRow, transform_issue};
use crate::gitlab::transformers::{MilestoneRow, transform_issue};
use crate::gitlab::types::GitLabIssue;
use crate::gitlab::types::GitLabIssue;
use crate::ingestion::dirty_tracker;
/// Result of issue ingestion.
/// Result of issue ingestion.
#[derive(Debug, Default)]
#[derive(Debug, Default)]
						
							
						
						
							
						
						
						
@@ -174,13 +174,13 @@ fn passes_cursor_filter(issue: &GitLabIssue, cursor: &SyncCursor) -> Result<bool
        return Ok(false);
        return Ok(false);
    }
    }
    if issue_ts == cursor_ts {
    if issue_ts == cursor_ts
        if let Some(cursor_id) = cursor.tie_breaker_id {
        && cursor
            if issue.id <= cursor_id {
            .tie_breaker_id
            .is_some_and(|cursor_id| issue.id <= cursor_id)
    {
        return Ok(false);
        return Ok(false);
    }
    }
        }
    }
    Ok(true)
    Ok(true)
}
}
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -17,10 +17,10 @@ use crate::core::error::{LoreError, Result};
use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::time::now_ms;
use crate::core::time::now_ms;
use crate::documents::SourceType;
use crate::documents::SourceType;
use crate::ingestion::dirty_tracker;
use crate::gitlab::GitLabClient;
use crate::gitlab::GitLabClient;
use crate::gitlab::transformers::merge_request::transform_merge_request;
use crate::gitlab::transformers::merge_request::transform_merge_request;
use crate::gitlab::types::GitLabMergeRequest;
use crate::gitlab::types::GitLabMergeRequest;
use crate::ingestion::dirty_tracker;
/// Result of merge request ingestion.
/// Result of merge request ingestion.
#[derive(Debug, Default)]
#[derive(Debug, Default)]
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -19,7 +19,7 @@ pub use merge_requests::{
};
};
pub use mr_discussions::{IngestMrDiscussionsResult, ingest_mr_discussions};
pub use mr_discussions::{IngestMrDiscussionsResult, ingest_mr_discussions};
pub use orchestrator::{
pub use orchestrator::{
    IngestMrProjectResult, IngestProjectResult, ProgressCallback, ProgressEvent,
    DrainResult, IngestMrProjectResult, IngestProjectResult, ProgressCallback, ProgressEvent,
    ingest_project_issues, ingest_project_issues_with_progress, ingest_project_merge_requests,
    ingest_project_issues, ingest_project_issues_with_progress, ingest_project_merge_requests,
    ingest_project_merge_requests_with_progress,
    ingest_project_merge_requests_with_progress,
};
};
						
						
						
						
 
						
						
							
						
						
						
@@ -19,13 +19,13 @@ use crate::core::error::Result;
use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::time::now_ms;
use crate::core::time::now_ms;
use crate::documents::SourceType;
use crate::documents::SourceType;
use crate::ingestion::dirty_tracker;
use crate::gitlab::GitLabClient;
use crate::gitlab::GitLabClient;
use crate::gitlab::transformers::{
use crate::gitlab::transformers::{
    NormalizedDiscussion, NormalizedNote, transform_mr_discussion,
    NormalizedDiscussion, NormalizedNote, transform_mr_discussion,
    transform_notes_with_diff_position,
    transform_notes_with_diff_position,
};
};
use crate::gitlab::types::GitLabDiscussion;
use crate::gitlab::types::GitLabDiscussion;
use crate::ingestion::dirty_tracker;
use super::merge_requests::MrForDiscussionSync;
use super::merge_requests::MrForDiscussionSync;
						
							
						
						
							
						
						
						
@@ -72,7 +72,10 @@ pub async fn prefetch_mr_discussions(
    debug!(mr_iid = mr.iid, "Prefetching discussions for MR");
    debug!(mr_iid = mr.iid, "Prefetching discussions for MR");
    // Fetch all discussions from GitLab
    // Fetch all discussions from GitLab
    let raw_discussions = match client.fetch_all_mr_discussions(gitlab_project_id, mr.iid).await {
    let raw_discussions = match client
        .fetch_all_mr_discussions(gitlab_project_id, mr.iid)
        .await
    {
        Ok(d) => d,
        Ok(d) => d,
        Err(e) => {
        Err(e) => {
            return PrefetchedMrDiscussions {
            return PrefetchedMrDiscussions {
						
							
						
						
							
						
						
						
@@ -241,7 +244,10 @@ pub fn write_prefetched_mr_discussions(
        mark_discussions_synced(conn, mr.local_mr_id, mr.updated_at)?;
        mark_discussions_synced(conn, mr.local_mr_id, mr.updated_at)?;
        clear_sync_health_error(conn, mr.local_mr_id)?;
        clear_sync_health_error(conn, mr.local_mr_id)?;
        debug!(mr_iid = mr.iid, "MR discussion sync complete, watermark advanced");
        debug!(
            mr_iid = mr.iid,
            "MR discussion sync complete, watermark advanced"
        );
    } else if prefetched.had_transform_errors {
    } else if prefetched.had_transform_errors {
        warn!(
        warn!(
            mr_iid = mr.iid,
            mr_iid = mr.iid,
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -7,9 +7,12 @@
use futures::future::join_all;
use futures::future::join_all;
use rusqlite::Connection;
use rusqlite::Connection;
use tracing::info;
use tracing::{debug, info, warn};
use crate::Config;
use crate::Config;
use crate::core::dependent_queue::{
    claim_jobs, complete_job, count_pending_jobs, enqueue_job, fail_job, reclaim_stale_locks,
};
use crate::core::error::Result;
use crate::core::error::Result;
use crate::gitlab::GitLabClient;
use crate::gitlab::GitLabClient;
						
							
						
						
							
						
						
						
@@ -50,6 +53,12 @@ pub enum ProgressEvent {
    MrDiscussionSynced { current: usize, total: usize },
    MrDiscussionSynced { current: usize, total: usize },
    /// MR discussion sync complete
    /// MR discussion sync complete
    MrDiscussionSyncComplete,
    MrDiscussionSyncComplete,
    /// Resource event fetching started (total jobs)
    ResourceEventsFetchStarted { total: usize },
    /// Resource event fetched for an entity (current/total)
    ResourceEventFetched { current: usize, total: usize },
    /// Resource event fetching complete
    ResourceEventsFetchComplete { fetched: usize, failed: usize },
}
}
/// Result of full project ingestion (issues).
/// Result of full project ingestion (issues).
						
						
						
							
						
						
@@ -63,6 +72,8 @@ pub struct IngestProjectResult {
    pub notes_upserted: usize,
    pub notes_upserted: usize,
    pub issues_synced_discussions: usize,
    pub issues_synced_discussions: usize,
    pub issues_skipped_discussion_sync: usize,
    pub issues_skipped_discussion_sync: usize,
    pub resource_events_fetched: usize,
    pub resource_events_failed: usize,
}
}
/// Result of MR ingestion for a project.
/// Result of MR ingestion for a project.
						
						
						
							
						
						
@@ -80,6 +91,8 @@ pub struct IngestMrProjectResult {
    pub diffnotes_count: usize,
    pub diffnotes_count: usize,
    pub mrs_synced_discussions: usize,
    pub mrs_synced_discussions: usize,
    pub mrs_skipped_discussion_sync: usize,
    pub mrs_skipped_discussion_sync: usize,
    pub resource_events_fetched: usize,
    pub resource_events_failed: usize,
}
}
/// Ingest all issues and their discussions for a project.
/// Ingest all issues and their discussions for a project.
						
							
						
						
							
						
						
						
@@ -167,6 +180,21 @@ pub async fn ingest_project_issues_with_progress(
        result.issues_synced_discussions += 1;
        result.issues_synced_discussions += 1;
    }
    }
    // Step 4: Enqueue and drain resource events (if enabled)
    if config.sync.fetch_resource_events {
        // Enqueue resource_events jobs for all issues in this project
        let enqueued = enqueue_resource_events_for_entity_type(conn, project_id, "issue")?;
        if enqueued > 0 {
            debug!(enqueued, "Enqueued resource events jobs for issues");
        }
        // Drain the queue
        let drain_result =
            drain_resource_events(conn, client, config, gitlab_project_id, &progress).await?;
        result.resource_events_fetched = drain_result.fetched;
        result.resource_events_failed = drain_result.failed;
    }
    info!(
    info!(
        issues_fetched = result.issues_fetched,
        issues_fetched = result.issues_fetched,
        issues_upserted = result.issues_upserted,
        issues_upserted = result.issues_upserted,
						
						
						
							
						
						
@@ -175,6 +203,8 @@ pub async fn ingest_project_issues_with_progress(
        notes_upserted = result.notes_upserted,
        notes_upserted = result.notes_upserted,
        issues_synced = result.issues_synced_discussions,
        issues_synced = result.issues_synced_discussions,
        issues_skipped = result.issues_skipped_discussion_sync,
        issues_skipped = result.issues_skipped_discussion_sync,
        resource_events_fetched = result.resource_events_fetched,
        resource_events_failed = result.resource_events_failed,
        "Project ingestion complete"
        "Project ingestion complete"
    );
    );
						
							
						
						
							
						
						
						
@@ -343,6 +373,19 @@ pub async fn ingest_project_merge_requests_with_progress(
        }
        }
    }
    }
    // Step 4: Enqueue and drain resource events (if enabled)
    if config.sync.fetch_resource_events {
        let enqueued = enqueue_resource_events_for_entity_type(conn, project_id, "merge_request")?;
        if enqueued > 0 {
            debug!(enqueued, "Enqueued resource events jobs for MRs");
        }
        let drain_result =
            drain_resource_events(conn, client, config, gitlab_project_id, &progress).await?;
        result.resource_events_fetched = drain_result.fetched;
        result.resource_events_failed = drain_result.failed;
    }
    info!(
    info!(
        mrs_fetched = result.mrs_fetched,
        mrs_fetched = result.mrs_fetched,
        mrs_upserted = result.mrs_upserted,
        mrs_upserted = result.mrs_upserted,
						
						
						
							
						
						
@@ -352,6 +395,8 @@ pub async fn ingest_project_merge_requests_with_progress(
        diffnotes = result.diffnotes_count,
        diffnotes = result.diffnotes_count,
        mrs_synced = result.mrs_synced_discussions,
        mrs_synced = result.mrs_synced_discussions,
        mrs_skipped = result.mrs_skipped_discussion_sync,
        mrs_skipped = result.mrs_skipped_discussion_sync,
        resource_events_fetched = result.resource_events_fetched,
        resource_events_failed = result.resource_events_failed,
        "MR project ingestion complete"
        "MR project ingestion complete"
    );
    );
						
							
						
						
							
						
						
						
@@ -405,6 +450,368 @@ async fn sync_mr_discussions_sequential(
    Ok(results)
    Ok(results)
}
}
/// Result of draining the resource events queue.
#[derive(Debug, Default)]
pub struct DrainResult {
    pub fetched: usize,
    pub failed: usize,
}
/// Enqueue resource_events jobs for all entities of a given type in a project.
///
/// Uses the pending_dependent_fetches queue. Jobs are deduplicated by the UNIQUE
/// constraint, so re-enqueueing the same entity is a no-op.
fn enqueue_resource_events_for_entity_type(
    conn: &Connection,
    project_id: i64,
    entity_type: &str,
) -> Result<usize> {
    let (table, id_col) = match entity_type {
        "issue" => ("issues", "id"),
        "merge_request" => ("merge_requests", "id"),
        _ => return Ok(0),
    };
    // Query all entities for this project and enqueue resource_events jobs.
    // The UNIQUE constraint on pending_dependent_fetches makes this idempotent -
    // already-queued entities are silently skipped via INSERT OR IGNORE.
    let mut stmt = conn.prepare_cached(&format!(
        "SELECT {id_col}, iid FROM {table} WHERE project_id = ?1"
    ))?;
    let entities: Vec<(i64, i64)> = stmt
        .query_map([project_id], |row| Ok((row.get(0)?, row.get(1)?)))?
        .collect::<std::result::Result<Vec<_>, _>>()?;
    let mut enqueued = 0;
    for (local_id, iid) in &entities {
        if enqueue_job(
            conn,
            project_id,
            entity_type,
            *iid,
            *local_id,
            "resource_events",
            None,
        )? {
            enqueued += 1;
        }
    }
    Ok(enqueued)
}
/// Drain pending resource_events jobs: claim, fetch from GitLab, store, complete/fail.
///
/// Processes jobs sequentially since `rusqlite::Connection` is not `Send`.
/// Uses exponential backoff on failure via `fail_job`.
async fn drain_resource_events(
    conn: &Connection,
    client: &GitLabClient,
    config: &Config,
    gitlab_project_id: i64,
    progress: &Option<ProgressCallback>,
) -> Result<DrainResult> {
    let mut result = DrainResult::default();
    let batch_size = config.sync.dependent_concurrency as usize;
    // Reclaim stale locks from crashed processes
    let reclaimed = reclaim_stale_locks(conn, config.sync.stale_lock_minutes)?;
    if reclaimed > 0 {
        info!(reclaimed, "Reclaimed stale resource event locks");
    }
    // Count total pending jobs for progress reporting
    let pending_counts = count_pending_jobs(conn)?;
    let total_pending = pending_counts.get("resource_events").copied().unwrap_or(0);
    if total_pending == 0 {
        return Ok(result);
    }
    let emit = |event: ProgressEvent| {
        if let Some(cb) = progress {
            cb(event);
        }
    };
    emit(ProgressEvent::ResourceEventsFetchStarted {
        total: total_pending,
    });
    let mut processed = 0;
    // Max iterations guard: prevent infinite loop if jobs keep failing and retrying
    // within the same drain run. Allow 2x total_pending iterations as safety margin.
    let max_iterations = total_pending * 2;
    let mut iterations = 0;
    loop {
        if iterations >= max_iterations {
            warn!(
                iterations,
                total_pending, "Resource events drain hit max iterations guard, stopping"
            );
            break;
        }
        let jobs = claim_jobs(conn, "resource_events", batch_size)?;
        if jobs.is_empty() {
            break;
        }
        for job in &jobs {
            iterations += 1;
            // conn is &Connection but upsert functions need &mut Connection.
            // We need to use unsafe to get a mutable reference since rusqlite
            // operations are internally safe with WAL mode and we're single-threaded.
            // Instead, we'll use a savepoint approach via the Connection directly.
            match client
                .fetch_all_resource_events(gitlab_project_id, &job.entity_type, job.entity_iid)
                .await
            {
                Ok((state_events, label_events, milestone_events)) => {
                    // Store events - we need &mut Connection for savepoints in upsert functions.
                    // Use unchecked_transaction as a workaround since we have &Connection.
                    let store_result = store_resource_events(
                        conn,
                        job.project_id,
                        &job.entity_type,
                        job.entity_local_id,
                        &state_events,
                        &label_events,
                        &milestone_events,
                    );
                    match store_result {
                        Ok(()) => {
                            complete_job(conn, job.id)?;
                            result.fetched += 1;
                        }
                        Err(e) => {
                            warn!(
                                entity_type = %job.entity_type,
                                entity_iid = job.entity_iid,
                                error = %e,
                                "Failed to store resource events"
                            );
                            fail_job(conn, job.id, &e.to_string())?;
                            result.failed += 1;
                        }
                    }
                }
                Err(e) => {
                    warn!(
                        entity_type = %job.entity_type,
                        entity_iid = job.entity_iid,
                        error = %e,
                        "Failed to fetch resource events from GitLab"
                    );
                    fail_job(conn, job.id, &e.to_string())?;
                    result.failed += 1;
                }
            }
            processed += 1;
            emit(ProgressEvent::ResourceEventFetched {
                current: processed,
                total: total_pending,
            });
        }
    }
    emit(ProgressEvent::ResourceEventsFetchComplete {
        fetched: result.fetched,
        failed: result.failed,
    });
    if result.fetched > 0 || result.failed > 0 {
        info!(
            fetched = result.fetched,
            failed = result.failed,
            "Resource events drain complete"
        );
    }
    Ok(result)
}
/// Store fetched resource events in the database.
///
/// Uses unchecked_transaction to work with &Connection (not &mut Connection),
/// which is safe because we're single-threaded and using WAL mode.
fn store_resource_events(
    conn: &Connection,
    project_id: i64,
    entity_type: &str,
    entity_local_id: i64,
    state_events: &[crate::gitlab::types::GitLabStateEvent],
    label_events: &[crate::gitlab::types::GitLabLabelEvent],
    milestone_events: &[crate::gitlab::types::GitLabMilestoneEvent],
) -> Result<()> {
    // The upsert functions require &mut Connection for savepoints.
    // We use unchecked_transaction to wrap all three upserts atomically,
    // then call the upsert functions using the transaction's inner connection.
    let tx = conn.unchecked_transaction()?;
    // State events - use raw SQL within transaction instead of upsert_state_events
    // which requires &mut Connection
    if !state_events.is_empty() {
        store_state_events_tx(&tx, project_id, entity_type, entity_local_id, state_events)?;
    }
    if !label_events.is_empty() {
        store_label_events_tx(&tx, project_id, entity_type, entity_local_id, label_events)?;
    }
    if !milestone_events.is_empty() {
        store_milestone_events_tx(
            &tx,
            project_id,
            entity_type,
            entity_local_id,
            milestone_events,
        )?;
    }
    tx.commit()?;
    Ok(())
}
/// Store state events within an existing transaction.
fn store_state_events_tx(
    tx: &rusqlite::Transaction<'_>,
    project_id: i64,
    entity_type: &str,
    entity_local_id: i64,
    events: &[crate::gitlab::types::GitLabStateEvent],
) -> Result<()> {
    let (issue_id, merge_request_id): (Option<i64>, Option<i64>) = match entity_type {
        "issue" => (Some(entity_local_id), None),
        "merge_request" => (None, Some(entity_local_id)),
        _ => return Ok(()),
    };
    let mut stmt = tx.prepare_cached(
        "INSERT OR REPLACE INTO resource_state_events
         (gitlab_id, project_id, issue_id, merge_request_id, state,
          actor_gitlab_id, actor_username, created_at,
          source_commit, source_merge_request_iid)
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
    )?;
    for event in events {
        let created_at = crate::core::time::iso_to_ms_strict(&event.created_at)
            .map_err(crate::core::error::LoreError::Other)?;
        let actor_id = event.user.as_ref().map(|u| u.id);
        let actor_username = event.user.as_ref().map(|u| u.username.as_str());
        let source_mr_iid = event.source_merge_request.as_ref().map(|mr| mr.iid);
        stmt.execute(rusqlite::params![
            event.id,
            project_id,
            issue_id,
            merge_request_id,
            event.state,
            actor_id,
            actor_username,
            created_at,
            event.source_commit,
            source_mr_iid,
        ])?;
    }
    Ok(())
}
/// Store label events within an existing transaction.
fn store_label_events_tx(
    tx: &rusqlite::Transaction<'_>,
    project_id: i64,
    entity_type: &str,
    entity_local_id: i64,
    events: &[crate::gitlab::types::GitLabLabelEvent],
) -> Result<()> {
    let (issue_id, merge_request_id): (Option<i64>, Option<i64>) = match entity_type {
        "issue" => (Some(entity_local_id), None),
        "merge_request" => (None, Some(entity_local_id)),
        _ => return Ok(()),
    };
    let mut stmt = tx.prepare_cached(
        "INSERT OR REPLACE INTO resource_label_events
         (gitlab_id, project_id, issue_id, merge_request_id, action,
          label_name, actor_gitlab_id, actor_username, created_at)
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
    )?;
    for event in events {
        let created_at = crate::core::time::iso_to_ms_strict(&event.created_at)
            .map_err(crate::core::error::LoreError::Other)?;
        let actor_id = event.user.as_ref().map(|u| u.id);
        let actor_username = event.user.as_ref().map(|u| u.username.as_str());
        stmt.execute(rusqlite::params![
            event.id,
            project_id,
            issue_id,
            merge_request_id,
            event.action,
            event.label.name,
            actor_id,
            actor_username,
            created_at,
        ])?;
    }
    Ok(())
}
/// Store milestone events within an existing transaction.
fn store_milestone_events_tx(
    tx: &rusqlite::Transaction<'_>,
    project_id: i64,
    entity_type: &str,
    entity_local_id: i64,
    events: &[crate::gitlab::types::GitLabMilestoneEvent],
) -> Result<()> {
    let (issue_id, merge_request_id): (Option<i64>, Option<i64>) = match entity_type {
        "issue" => (Some(entity_local_id), None),
        "merge_request" => (None, Some(entity_local_id)),
        _ => return Ok(()),
    };
    let mut stmt = tx.prepare_cached(
        "INSERT OR REPLACE INTO resource_milestone_events
         (gitlab_id, project_id, issue_id, merge_request_id, action,
          milestone_title, milestone_id, actor_gitlab_id, actor_username, created_at)
         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
    )?;
    for event in events {
        let created_at = crate::core::time::iso_to_ms_strict(&event.created_at)
            .map_err(crate::core::error::LoreError::Other)?;
        let actor_id = event.user.as_ref().map(|u| u.id);
        let actor_username = event.user.as_ref().map(|u| u.username.as_str());
        stmt.execute(rusqlite::params![
            event.id,
            project_id,
            issue_id,
            merge_request_id,
            event.action,
            event.milestone.title,
            event.milestone.id,
            actor_id,
            actor_username,
            created_at,
        ])?;
    }
    Ok(())
}
#[cfg(test)]
#[cfg(test)]
mod tests {
mod tests {
    use super::*;
    use super::*;
						
						
						
							
						
						
@@ -419,6 +826,8 @@ mod tests {
        assert_eq!(result.notes_upserted, 0);
        assert_eq!(result.notes_upserted, 0);
        assert_eq!(result.issues_synced_discussions, 0);
        assert_eq!(result.issues_synced_discussions, 0);
        assert_eq!(result.issues_skipped_discussion_sync, 0);
        assert_eq!(result.issues_skipped_discussion_sync, 0);
        assert_eq!(result.resource_events_fetched, 0);
        assert_eq!(result.resource_events_failed, 0);
    }
    }
    #[test]
    #[test]
						
						
						
							
						
						
@@ -436,5 +845,28 @@ mod tests {
        assert_eq!(result.diffnotes_count, 0);
        assert_eq!(result.diffnotes_count, 0);
        assert_eq!(result.mrs_synced_discussions, 0);
        assert_eq!(result.mrs_synced_discussions, 0);
        assert_eq!(result.mrs_skipped_discussion_sync, 0);
        assert_eq!(result.mrs_skipped_discussion_sync, 0);
        assert_eq!(result.resource_events_fetched, 0);
        assert_eq!(result.resource_events_failed, 0);
    }
    #[test]
    fn drain_result_default_has_zero_counts() {
        let result = DrainResult::default();
        assert_eq!(result.fetched, 0);
        assert_eq!(result.failed, 0);
    }
    #[test]
    fn progress_event_resource_variants_exist() {
        // Verify the new progress event variants are constructible
        let _start = ProgressEvent::ResourceEventsFetchStarted { total: 10 };
        let _progress = ProgressEvent::ResourceEventFetched {
            current: 5,
            total: 10,
        };
        let _complete = ProgressEvent::ResourceEventsFetchComplete {
            fetched: 8,
            failed: 2,
        };
    }
    }
}
}
						
						
						
						
 
						
						
							
						
						
						
@@ -10,23 +10,25 @@ use tracing_subscriber::util::SubscriberInitExt;
use lore::Config;
use lore::Config;
use lore::cli::commands::{
use lore::cli::commands::{
    InitInputs, InitOptions, InitResult, ListFilters, MrListFilters, SearchCliFilters, open_issue_in_browser,
    IngestDisplay, InitInputs, InitOptions, InitResult, ListFilters, MrListFilters,
    open_mr_in_browser, print_count, print_count_json, print_event_count, print_event_count_json, print_doctor_results, print_generate_docs,
    SearchCliFilters, SyncOptions, open_issue_in_browser, open_mr_in_browser, print_count,
    print_generate_docs_json, print_ingest_summary, print_ingest_summary_json, print_list_issues,
    print_count_json, print_doctor_results, print_embed, print_embed_json, print_event_count,
    print_list_issues_json, print_list_mrs, print_list_mrs_json, print_search_results,
    print_event_count_json, print_generate_docs, print_generate_docs_json, print_ingest_summary,
    print_search_results_json, print_show_issue, print_show_issue_json, print_show_mr, print_stats,
    print_ingest_summary_json, print_list_issues, print_list_issues_json, print_list_mrs,
    print_stats_json,
    print_list_mrs_json, print_search_results, print_search_results_json, print_show_issue,
    print_embed, print_embed_json, print_sync, print_sync_json,
    print_show_issue_json, print_show_mr, print_show_mr_json, print_stats, print_stats_json,
    print_show_mr_json, print_sync_status, print_sync_status_json, run_auth_test, run_count,
    print_sync, print_sync_json, print_sync_status, print_sync_status_json, run_auth_test,
    run_count_events, run_doctor, run_embed, run_generate_docs, run_ingest, run_init, run_list_issues, run_list_mrs,
    run_count, run_count_events, run_doctor, run_embed, run_generate_docs, run_ingest, run_init,
    run_search, run_show_issue, run_show_mr, run_stats, run_sync, run_sync_status, SyncOptions,
    run_list_issues, run_list_mrs, run_search, run_show_issue, run_show_mr, run_stats, run_sync,
    IngestDisplay,
    run_sync_status,
};
};
use lore::cli::{
use lore::cli::{
    Cli, Commands, CountArgs, EmbedArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, MrsArgs,
    Cli, Commands, CountArgs, EmbedArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, MrsArgs,
    SearchArgs, StatsArgs, SyncArgs,
    SearchArgs, StatsArgs, SyncArgs,
};
};
use lore::core::db::{create_connection, get_schema_version, run_migrations, LATEST_SCHEMA_VERSION};
use lore::core::db::{
    LATEST_SCHEMA_VERSION, create_connection, get_schema_version, run_migrations,
};
use lore::core::error::{LoreError, RobotErrorOutput};
use lore::core::error::{LoreError, RobotErrorOutput};
use lore::core::paths::get_config_path;
use lore::core::paths::get_config_path;
use lore::core::paths::get_db_path;
use lore::core::paths::get_db_path;
						
							
						
						
							
						
						
						
@@ -76,10 +78,10 @@ async fn main() {
        Commands::Stats(args) => handle_stats(cli.config.as_deref(), args, robot_mode).await,
        Commands::Stats(args) => handle_stats(cli.config.as_deref(), args, robot_mode).await,
        Commands::Embed(args) => handle_embed(cli.config.as_deref(), args, robot_mode).await,
        Commands::Embed(args) => handle_embed(cli.config.as_deref(), args, robot_mode).await,
        Commands::Sync(args) => handle_sync_cmd(cli.config.as_deref(), args, robot_mode).await,
        Commands::Sync(args) => handle_sync_cmd(cli.config.as_deref(), args, robot_mode).await,
        Commands::Ingest(args) => handle_ingest(cli.config.as_deref(), args, robot_mode, quiet).await,
        Commands::Ingest(args) => {
        Commands::Count(args) => {
            handle_ingest(cli.config.as_deref(), args, robot_mode, quiet).await
            handle_count(cli.config.as_deref(), args, robot_mode).await
        }
        }
        Commands::Count(args) => handle_count(cli.config.as_deref(), args, robot_mode).await,
        Commands::Status => handle_sync_status_cmd(cli.config.as_deref(), robot_mode).await,
        Commands::Status => handle_sync_status_cmd(cli.config.as_deref(), robot_mode).await,
        Commands::Auth => handle_auth_test(cli.config.as_deref(), robot_mode).await,
        Commands::Auth => handle_auth_test(cli.config.as_deref(), robot_mode).await,
        Commands::Doctor => handle_doctor(cli.config.as_deref(), robot_mode).await,
        Commands::Doctor => handle_doctor(cli.config.as_deref(), robot_mode).await,
						
							
						
						
							
						
						
						
@@ -137,7 +139,8 @@ async fn main() {
            if !robot_mode {
            if !robot_mode {
                eprintln!(
                eprintln!(
                    "{}",
                    "{}",
                    style("warning: 'lore list' is deprecated, use 'lore issues' or 'lore mrs'").yellow()
                    style("warning: 'lore list' is deprecated, use 'lore issues' or 'lore mrs'")
                        .yellow()
                );
                );
            }
            }
            handle_list_compat(
            handle_list_compat(
						
							
						
						
							
						
						
						
@@ -266,8 +269,10 @@ fn handle_error(e: Box<dyn std::error::Error>, robot_mode: bool) -> ! {
        };
        };
        eprintln!(
        eprintln!(
            "{}",
            "{}",
            serde_json::to_string(&output)
            serde_json::to_string(&output).unwrap_or_else(|_| {
                .unwrap_or_else(|_| r#"{"error":{"code":"INTERNAL_ERROR","message":"Serialization failed"}}"#.to_string())
                r#"{"error":{"code":"INTERNAL_ERROR","message":"Serialization failed"}}"#
                    .to_string()
            })
        );
        );
    } else {
    } else {
        eprintln!("{} {}", style("Error:").red(), e);
        eprintln!("{} {}", style("Error:").red(), e);
						
							
						
						
							
						
						
						
@@ -929,7 +934,10 @@ fn handle_backup(robot_mode: bool) -> Result<(), Box<dyn std::error::Error>> {
        };
        };
        eprintln!("{}", serde_json::to_string(&output)?);
        eprintln!("{}", serde_json::to_string(&output)?);
    } else {
    } else {
        eprintln!("{} The 'backup' command is not yet implemented.", style("Error:").red());
        eprintln!(
            "{} The 'backup' command is not yet implemented.",
            style("Error:").red()
        );
    }
    }
    std::process::exit(1);
    std::process::exit(1);
}
}
						
						
						
							
						
						
@@ -940,12 +948,16 @@ fn handle_reset(robot_mode: bool) -> Result<(), Box<dyn std::error::Error>> {
            error: RobotErrorSuggestionData {
            error: RobotErrorSuggestionData {
                code: "NOT_IMPLEMENTED".to_string(),
                code: "NOT_IMPLEMENTED".to_string(),
                message: "The 'reset' command is not yet implemented.".to_string(),
                message: "The 'reset' command is not yet implemented.".to_string(),
                suggestion: "Manually delete the database: rm ~/.local/share/lore/lore.db".to_string(),
                suggestion: "Manually delete the database: rm ~/.local/share/lore/lore.db"
                    .to_string(),
            },
            },
        };
        };
        eprintln!("{}", serde_json::to_string(&output)?);
        eprintln!("{}", serde_json::to_string(&output)?);
    } else {
    } else {
        eprintln!("{} The 'reset' command is not yet implemented.", style("Error:").red());
        eprintln!(
            "{} The 'reset' command is not yet implemented.",
            style("Error:").red()
        );
    }
    }
    std::process::exit(1);
    std::process::exit(1);
}
}
						
							
						
						
							
						
						
						
@@ -1234,18 +1246,23 @@ async fn handle_health(
                style("FAIL").red()
                style("FAIL").red()
            }
            }
        };
        };
        println!("Config:  {} ({})", status(config_found), config_path.display());
        println!("DB:      {}", status(db_found));
        println!(
        println!(
            "Schema:  {} (v{})",
            "Config:  {} ({})",
            status(schema_current),
            status(config_found),
            schema_version
            config_path.display()
        );
        );
        println!("DB:      {}", status(db_found));
        println!("Schema:  {} (v{})", status(schema_current), schema_version);
        println!();
        println!();
        if healthy {
        if healthy {
            println!("{}", style("Healthy").green().bold());
            println!("{}", style("Healthy").green().bold());
        } else {
        } else {
            println!("{}", style("Unhealthy - run 'lore doctor' for details").red().bold());
            println!(
                "{}",
                style("Unhealthy - run 'lore doctor' for details")
                    .red()
                    .bold()
            );
        }
        }
    }
    }
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -138,10 +138,7 @@ pub fn apply_filters(
    }
    }
    let limit = filters.clamp_limit();
    let limit = filters.clamp_limit();
    sql.push_str(&format!(
    sql.push_str(&format!(" ORDER BY j.key LIMIT ?{}", param_idx));
        " ORDER BY j.key LIMIT ?{}",
        param_idx
    ));
    params.push(Box::new(limit as i64));
    params.push(Box::new(limit as i64));
    let param_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect();
    let param_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect();
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -39,14 +39,14 @@ pub fn to_fts_query(raw: &str, mode: FtsQueryMode) -> String {
                .split_whitespace()
                .split_whitespace()
                .map(|token| {
                .map(|token| {
                    // Check if token ends with * and the rest is alphanumeric
                    // Check if token ends with * and the rest is alphanumeric
                    if token.ends_with('*') {
                    if let Some(stem) = token.strip_suffix('*')
                        let stem = &token[..token.len() - 1];
                        && !stem.is_empty()
                        if !stem.is_empty() && stem.chars().all(|c| c.is_alphanumeric() || c == '_') {
                        && stem.chars().all(|c| c.is_alphanumeric() || c == '_')
                    {
                        // Preserve prefix search: "stem"*
                        // Preserve prefix search: "stem"*
                        let escaped = stem.replace('"', "\"\"");
                        let escaped = stem.replace('"', "\"\"");
                        return format!("\"{}\"*", escaped);
                        return format!("\"{}\"*", escaped);
                    }
                    }
                    }
                    // Default: wrap in quotes, escape internal quotes
                    // Default: wrap in quotes, escape internal quotes
                    let escaped = token.replace('"', "\"\"");
                    let escaped = token.replace('"', "\"\"");
                    format!("\"{}\"", escaped)
                    format!("\"{}\"", escaped)
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -4,8 +4,8 @@ use rusqlite::Connection;
use crate::core::error::Result;
use crate::core::error::Result;
use crate::embedding::ollama::OllamaClient;
use crate::embedding::ollama::OllamaClient;
use crate::search::{rank_rrf, search_fts, search_vector, FtsQueryMode};
use crate::search::filters::{SearchFilters, apply_filters};
use crate::search::filters::{apply_filters, SearchFilters};
use crate::search::{FtsQueryMode, rank_rrf, search_fts, search_vector};
const BASE_RECALL_MIN: usize = 50;
const BASE_RECALL_MIN: usize = 50;
const FILTERED_RECALL_MIN: usize = 200;
const FILTERED_RECALL_MIN: usize = 200;
						
							
						
						
							
						
						
						
@@ -65,9 +65,9 @@ pub async fn search_hybrid(
    // Adaptive recall
    // Adaptive recall
    let requested = filters.clamp_limit();
    let requested = filters.clamp_limit();
    let top_k = if filters.has_any_filter() {
    let top_k = if filters.has_any_filter() {
        (requested * 50).max(FILTERED_RECALL_MIN).min(RECALL_CAP)
        (requested * 50).clamp(FILTERED_RECALL_MIN, RECALL_CAP)
    } else {
    } else {
        (requested * 10).max(BASE_RECALL_MIN).min(RECALL_CAP)
        (requested * 10).clamp(BASE_RECALL_MIN, RECALL_CAP)
    };
    };
    let (fts_tuples, vec_tuples) = match mode {
    let (fts_tuples, vec_tuples) = match mode {
						
						
						
							
						
						
@@ -88,10 +88,7 @@ pub async fn search_hybrid(
            };
            };
            let query_embedding = client.embed_batch(vec![query.to_string()]).await?;
            let query_embedding = client.embed_batch(vec![query.to_string()]).await?;
            let embedding = query_embedding
            let embedding = query_embedding.into_iter().next().unwrap_or_default();
                .into_iter()
                .next()
                .unwrap_or_default();
            if embedding.is_empty() {
            if embedding.is_empty() {
                return Err(crate::core::error::LoreError::Other(
                return Err(crate::core::error::LoreError::Other(
						
						
						
							
						
						
@@ -115,18 +112,13 @@ pub async fn search_hybrid(
                .collect();
                .collect();
            match client {
            match client {
                Some(client) => {
                Some(client) => match client.embed_batch(vec![query.to_string()]).await {
                    match client.embed_batch(vec![query.to_string()]).await {
                    Ok(query_embedding) => {
                    Ok(query_embedding) => {
                            let embedding = query_embedding
                        let embedding = query_embedding.into_iter().next().unwrap_or_default();
                                .into_iter()
                                .next()
                                .unwrap_or_default();
                        let vec_tuples = if embedding.is_empty() {
                        let vec_tuples = if embedding.is_empty() {
                                warnings.push(
                            warnings
                                    "Ollama returned empty embedding, using FTS only.".into(),
                                .push("Ollama returned empty embedding, using FTS only.".into());
                                );
                            Vec::new()
                            Vec::new()
                        } else {
                        } else {
                            let vec_results = search_vector(conn, &embedding, top_k)?;
                            let vec_results = search_vector(conn, &embedding, top_k)?;
						
						
						
							
						
						
@@ -139,17 +131,15 @@ pub async fn search_hybrid(
                        (fts_tuples, vec_tuples)
                        (fts_tuples, vec_tuples)
                    }
                    }
                    Err(e) => {
                    Err(e) => {
                            warnings.push(
                        warnings.push(format!(
                                format!("Embedding failed ({}), falling back to lexical search.", e),
                            "Embedding failed ({}), falling back to lexical search.",
                            );
                            e
                        ));
                        (fts_tuples, Vec::new())
                        (fts_tuples, Vec::new())
                    }
                    }
                    }
                },
                }
                None => {
                None => {
                    warnings.push(
                    warnings.push("Ollama unavailable, falling back to lexical search.".into());
                        "Ollama unavailable, falling back to lexical search.".into(),
                    );
                    (fts_tuples, Vec::new())
                    (fts_tuples, Vec::new())
                }
                }
            }
            }
						
							
						
						
							
						
						
						
@@ -217,7 +207,7 @@ mod tests {
            ..Default::default()
            ..Default::default()
        };
        };
        let requested = filters.clamp_limit();
        let requested = filters.clamp_limit();
        let top_k = (requested * 10).max(BASE_RECALL_MIN).min(RECALL_CAP);
        let top_k = (requested * 10).clamp(BASE_RECALL_MIN, RECALL_CAP);
        assert_eq!(top_k, 200);
        assert_eq!(top_k, 200);
    }
    }
						
						
						
							
						
						
@@ -229,7 +219,7 @@ mod tests {
            ..Default::default()
            ..Default::default()
        };
        };
        let requested = filters.clamp_limit();
        let requested = filters.clamp_limit();
        let top_k = (requested * 50).max(FILTERED_RECALL_MIN).min(RECALL_CAP);
        let top_k = (requested * 50).clamp(FILTERED_RECALL_MIN, RECALL_CAP);
        assert_eq!(top_k, 1000);
        assert_eq!(top_k, 1000);
    }
    }
						
						
						
							
						
						
@@ -241,7 +231,7 @@ mod tests {
            ..Default::default()
            ..Default::default()
        };
        };
        let requested = filters.clamp_limit();
        let requested = filters.clamp_limit();
        let top_k = (requested * 50).max(FILTERED_RECALL_MIN).min(RECALL_CAP);
        let top_k = (requested * 50).clamp(FILTERED_RECALL_MIN, RECALL_CAP);
        assert_eq!(top_k, RECALL_CAP); // 5000 capped to 1500
        assert_eq!(top_k, RECALL_CAP); // 5000 capped to 1500
    }
    }
						
						
						
							
						
						
@@ -252,7 +242,7 @@ mod tests {
            ..Default::default()
            ..Default::default()
        };
        };
        let requested = filters.clamp_limit();
        let requested = filters.clamp_limit();
        let top_k = (requested * 10).max(BASE_RECALL_MIN).min(RECALL_CAP);
        let top_k = (requested * 10).clamp(BASE_RECALL_MIN, RECALL_CAP);
        assert_eq!(top_k, BASE_RECALL_MIN); // 10 -> 50
        assert_eq!(top_k, BASE_RECALL_MIN); // 10 -> 50
    }
    }
}
}
						
						
						
						
 
						
						
							
						
						
						
@@ -4,11 +4,11 @@ mod hybrid;
mod rrf;
mod rrf;
mod vector;
mod vector;
pub use filters::{PathFilter, SearchFilters, apply_filters};
pub use fts::{
pub use fts::{
    generate_fallback_snippet, get_result_snippet, search_fts, to_fts_query, FtsQueryMode,
    FtsQueryMode, FtsResult, generate_fallback_snippet, get_result_snippet, search_fts,
    FtsResult,
    to_fts_query,
};
};
pub use filters::{apply_filters, PathFilter, SearchFilters};
pub use hybrid::{HybridResult, SearchMode, search_hybrid};
pub use rrf::{rank_rrf, RrfResult};
pub use rrf::{RrfResult, rank_rrf};
pub use vector::{search_vector, VectorResult};
pub use vector::{VectorResult, search_vector};
pub use hybrid::{search_hybrid, HybridResult, SearchMode};
						
						
						
						
 
						
						
							
						
						
						
@@ -22,10 +22,7 @@ pub struct RrfResult {
/// Ranks are 1-indexed (first result = rank 1).
/// Ranks are 1-indexed (first result = rank 1).
///
///
/// Score = sum of 1/(k + rank) for each list containing the document.
/// Score = sum of 1/(k + rank) for each list containing the document.
pub fn rank_rrf(
pub fn rank_rrf(vector_results: &[(i64, f64)], fts_results: &[(i64, f64)]) -> Vec<RrfResult> {
    vector_results: &[(i64, f64)],
    fts_results: &[(i64, f64)],
) -> Vec<RrfResult> {
    if vector_results.is_empty() && fts_results.is_empty() {
    if vector_results.is_empty() && fts_results.is_empty() {
        return Vec::new();
        return Vec::new();
    }
    }
						
							
						
						
							
						
						
						
@@ -63,16 +60,18 @@ pub fn rank_rrf(
        .collect();
        .collect();
    // Sort descending by rrf_score
    // Sort descending by rrf_score
    results.sort_by(|a, b| b.rrf_score.partial_cmp(&a.rrf_score).unwrap_or(std::cmp::Ordering::Equal));
    results.sort_by(|a, b| {
        b.rrf_score
            .partial_cmp(&a.rrf_score)
            .unwrap_or(std::cmp::Ordering::Equal)
    });
    // Normalize: best = 1.0
    // Normalize: best = 1.0
    if let Some(max_score) = results.first().map(|r| r.rrf_score) {
    if let Some(max_score) = results.first().map(|r| r.rrf_score).filter(|&s| s > 0.0) {
        if max_score > 0.0 {
        for result in &mut results {
        for result in &mut results {
            result.normalized_score = result.rrf_score / max_score;
            result.normalized_score = result.rrf_score / max_score;
        }
        }
    }
    }
    }
    results
    results
}
}
						
						
						
							
						
						
@@ -92,8 +91,16 @@ mod tests {
        // Doc 1 score should be higher than doc 2 and doc 3
        // Doc 1 score should be higher than doc 2 and doc 3
        let doc1 = &results[0];
        let doc1 = &results[0];
        let doc2_score = results.iter().find(|r| r.document_id == 2).unwrap().rrf_score;
        let doc2_score = results
        let doc3_score = results.iter().find(|r| r.document_id == 3).unwrap().rrf_score;
            .iter()
            .find(|r| r.document_id == 2)
            .unwrap()
            .rrf_score;
        let doc3_score = results
            .iter()
            .find(|r| r.document_id == 3)
            .unwrap()
            .rrf_score;
        assert!(doc1.rrf_score > doc2_score);
        assert!(doc1.rrf_score > doc2_score);
        assert!(doc1.rrf_score > doc3_score);
        assert!(doc1.rrf_score > doc3_score);
    }
    }
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -70,7 +70,7 @@ pub fn search_vector(
         FROM embeddings
         FROM embeddings
         WHERE embedding MATCH ?1
         WHERE embedding MATCH ?1
           AND k = ?2
           AND k = ?2
         ORDER BY distance"
         ORDER BY distance",
    )?;
    )?;
    let rows: Vec<(i64, f64)> = stmt
    let rows: Vec<(i64, f64)> = stmt
						
							
						
						
							
						
						
						
@@ -137,11 +137,7 @@ mod tests {
    #[test]
    #[test]
    fn test_dedup_respects_limit() {
    fn test_dedup_respects_limit() {
        let rows = vec![
        let rows = vec![(1000_i64, 0.1_f64), (2000, 0.2), (3000, 0.3)];
            (1000_i64, 0.1_f64),
            (2000, 0.2),
            (3000, 0.3),
        ];
        let results = search_vector_dedup(rows, 2);
        let results = search_vector_dedup(rows, 2);
        assert_eq!(results.len(), 2);
        assert_eq!(results.len(), 2);
    }
    }
						
						
						
							
						
						
@@ -161,7 +157,10 @@ mod tests {
        }
        }
        let mut results: Vec<VectorResult> = best
        let mut results: Vec<VectorResult> = best
            .into_iter()
            .into_iter()
            .map(|(document_id, distance)| VectorResult { document_id, distance })
            .map(|(document_id, distance)| VectorResult {
                document_id,
                distance,
            })
            .collect();
            .collect();
        results.sort_by(|a, b| a.distance.total_cmp(&b.distance));
        results.sort_by(|a, b| a.distance.total_cmp(&b.distance));
        results.truncate(limit);
        results.truncate(limit);
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -102,7 +102,10 @@ fn knn_search_returns_nearest_neighbors() {
    let results = lore::search::search_vector(&conn, &query, 10).unwrap();
    let results = lore::search::search_vector(&conn, &query, 10).unwrap();
    assert!(!results.is_empty(), "Should return at least one result");
    assert!(!results.is_empty(), "Should return at least one result");
    assert_eq!(results[0].document_id, 1, "Nearest neighbor should be doc 1");
    assert_eq!(
        results[0].document_id, 1,
        "Nearest neighbor should be doc 1"
    );
}
}
#[test]
#[test]
						
						
						
							
						
						
@@ -122,7 +125,12 @@ fn knn_search_respects_limit() {
fn knn_search_deduplicates_chunks() {
fn knn_search_deduplicates_chunks() {
    let (_tmp, conn) = create_test_db();
    let (_tmp, conn) = create_test_db();
    insert_document(&conn, 1, "Multi-chunk doc", "Very long content that was chunked.");
    insert_document(
        &conn,
        1,
        "Multi-chunk doc",
        "Very long content that was chunked.",
    );
    // Same document, two chunks, both similar to query
    // Same document, two chunks, both similar to query
    let mut v1 = vec![0.0f32; 768];
    let mut v1 = vec![0.0f32; 768];
						
						
						
							
						
						
@@ -137,7 +145,8 @@ fn knn_search_deduplicates_chunks() {
    let results = lore::search::search_vector(&conn, &axis_vector(0), 10).unwrap();
    let results = lore::search::search_vector(&conn, &axis_vector(0), 10).unwrap();
    // Should deduplicate: same document_id appears at most once
    // Should deduplicate: same document_id appears at most once
    let unique_docs: std::collections::HashSet<i64> = results.iter().map(|r| r.document_id).collect();
    let unique_docs: std::collections::HashSet<i64> =
        results.iter().map(|r| r.document_id).collect();
    assert_eq!(
    assert_eq!(
        unique_docs.len(),
        unique_docs.len(),
        results.len(),
        results.len(),
						
						
						
							
						
						
@@ -154,22 +163,38 @@ fn orphan_trigger_deletes_embeddings_on_document_delete() {
    // Verify embedding exists
    // Verify embedding exists
    let count: i64 = conn
    let count: i64 = conn
        .query_row("SELECT COUNT(*) FROM embeddings WHERE rowid = 1000", [], |r| r.get(0))
        .query_row(
            "SELECT COUNT(*) FROM embeddings WHERE rowid = 1000",
            [],
            |r| r.get(0),
        )
        .unwrap();
        .unwrap();
    assert_eq!(count, 1, "Embedding should exist before delete");
    assert_eq!(count, 1, "Embedding should exist before delete");
    // Delete the document
    // Delete the document
    conn.execute("DELETE FROM documents WHERE id = 1", []).unwrap();
    conn.execute("DELETE FROM documents WHERE id = 1", [])
        .unwrap();
    // Verify embedding was cascade-deleted via trigger
    // Verify embedding was cascade-deleted via trigger
    let count: i64 = conn
    let count: i64 = conn
        .query_row("SELECT COUNT(*) FROM embeddings WHERE rowid = 1000", [], |r| r.get(0))
        .query_row(
            "SELECT COUNT(*) FROM embeddings WHERE rowid = 1000",
            [],
            |r| r.get(0),
        )
        .unwrap();
        .unwrap();
    assert_eq!(count, 0, "Trigger should delete embeddings when document is deleted");
    assert_eq!(
        count, 0,
        "Trigger should delete embeddings when document is deleted"
    );
    // Verify metadata was cascade-deleted via FK
    // Verify metadata was cascade-deleted via FK
    let meta_count: i64 = conn
    let meta_count: i64 = conn
        .query_row("SELECT COUNT(*) FROM embedding_metadata WHERE document_id = 1", [], |r| r.get(0))
        .query_row(
            "SELECT COUNT(*) FROM embedding_metadata WHERE document_id = 1",
            [],
            |r| r.get(0),
        )
        .unwrap();
        .unwrap();
    assert_eq!(meta_count, 0, "Metadata should be cascade-deleted");
    assert_eq!(meta_count, 0, "Metadata should be cascade-deleted");
}
}
						
							
						
						
							
						
						
						
@@ -206,7 +231,8 @@ fn overflow_doc_with_error_sentinel_not_re_detected_as_pending() {
    .unwrap();
    .unwrap();
    // Now find_pending_documents should NOT return this document
    // Now find_pending_documents should NOT return this document
    let pending = lore::embedding::find_pending_documents(&conn, 100, 0, "nomic-embed-text").unwrap();
    let pending =
        lore::embedding::find_pending_documents(&conn, 100, 0, "nomic-embed-text").unwrap();
    assert!(
    assert!(
        pending.is_empty(),
        pending.is_empty(),
        "Document with overflow error sentinel should not be re-detected as pending, got {} pending",
        "Document with overflow error sentinel should not be re-detected as pending, got {} pending",
						
						
						
							
						
						
@@ -215,7 +241,10 @@ fn overflow_doc_with_error_sentinel_not_re_detected_as_pending() {
    // count_pending_documents should also return 0
    // count_pending_documents should also return 0
    let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
    let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
    assert_eq!(count, 0, "Count should be 0 for document with overflow sentinel");
    assert_eq!(
        count, 0,
        "Count should be 0 for document with overflow sentinel"
    );
}
}
#[test]
#[test]
						
						
						
							
						
						
@@ -226,14 +255,24 @@ fn count_and_find_pending_agree() {
    // Case 1: No documents at all
    // Case 1: No documents at all
    let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
    let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
    let found = lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap();
    let found =
    assert_eq!(count as usize, found.len(), "Empty DB: count and find should agree");
        lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap();
    assert_eq!(
        count as usize,
        found.len(),
        "Empty DB: count and find should agree"
    );
    // Case 2: New document (no metadata)
    // Case 2: New document (no metadata)
    insert_document(&conn, 1, "New doc", "Content");
    insert_document(&conn, 1, "New doc", "Content");
    let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
    let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
    let found = lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap();
    let found =
    assert_eq!(count as usize, found.len(), "New doc: count and find should agree");
        lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap();
    assert_eq!(
        count as usize,
        found.len(),
        "New doc: count and find should agree"
    );
    assert_eq!(count, 1);
    assert_eq!(count, 1);
    // Case 3: Document with matching metadata (not pending)
    // Case 3: Document with matching metadata (not pending)
						
						
						
							
						
						
@@ -247,8 +286,13 @@ fn count_and_find_pending_agree() {
    )
    )
    .unwrap();
    .unwrap();
    let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
    let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
    let found = lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap();
    let found =
    assert_eq!(count as usize, found.len(), "Complete doc: count and find should agree");
        lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap();
    assert_eq!(
        count as usize,
        found.len(),
        "Complete doc: count and find should agree"
    );
    assert_eq!(count, 0);
    assert_eq!(count, 0);
    // Case 4: Config drift (chunk_max_bytes mismatch)
    // Case 4: Config drift (chunk_max_bytes mismatch)
						
						
						
							
						
						
@@ -258,8 +302,13 @@ fn count_and_find_pending_agree() {
    )
    )
    .unwrap();
    .unwrap();
    let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
    let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
    let found = lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap();
    let found =
    assert_eq!(count as usize, found.len(), "Config drift: count and find should agree");
        lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap();
    assert_eq!(
        count as usize,
        found.len(),
        "Config drift: count and find should agree"
    );
    assert_eq!(count, 1);
    assert_eq!(count, 1);
}
}
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -51,26 +51,72 @@ fn insert_document(conn: &Connection, id: i64, source_type: &str, title: &str, c
fn fts_basic_search() {
fn fts_basic_search() {
    let conn = create_test_db();
    let conn = create_test_db();
    insert_document(&conn, 1, "issue", "Authentication bug", "Users cannot login when using OAuth tokens. The JWT refresh fails silently.");
    insert_document(
    insert_document(&conn, 2, "merge_request", "Add user profile page", "This MR adds a new user profile page with avatar upload support.");
        &conn,
    insert_document(&conn, 3, "issue", "Database migration failing", "The migration script crashes on PostgreSQL 14 due to deprecated syntax.");
        1,
        "issue",
        "Authentication bug",
        "Users cannot login when using OAuth tokens. The JWT refresh fails silently.",
    );
    insert_document(
        &conn,
        2,
        "merge_request",
        "Add user profile page",
        "This MR adds a new user profile page with avatar upload support.",
    );
    insert_document(
        &conn,
        3,
        "issue",
        "Database migration failing",
        "The migration script crashes on PostgreSQL 14 due to deprecated syntax.",
    );
    let results = lore::search::search_fts(&conn, "authentication login", 10, lore::search::FtsQueryMode::Safe).unwrap();
    let results = lore::search::search_fts(
        &conn,
        "authentication login",
        10,
        lore::search::FtsQueryMode::Safe,
    )
    .unwrap();
    assert!(!results.is_empty(), "Expected at least one result for 'authentication login'");
    assert!(
    assert_eq!(results[0].document_id, 1, "Authentication issue should be top result");
        !results.is_empty(),
        "Expected at least one result for 'authentication login'"
    );
    assert_eq!(
        results[0].document_id, 1,
        "Authentication issue should be top result"
    );
}
}
#[test]
#[test]
fn fts_stemming_matches() {
fn fts_stemming_matches() {
    let conn = create_test_db();
    let conn = create_test_db();
    insert_document(&conn, 1, "issue", "Running tests", "The test runner is executing integration tests.");
    insert_document(
    insert_document(&conn, 2, "issue", "Deployment config", "Deployment configuration for production servers.");
        &conn,
        1,
        "issue",
        "Running tests",
        "The test runner is executing integration tests.",
    );
    insert_document(
        &conn,
        2,
        "issue",
        "Deployment config",
        "Deployment configuration for production servers.",
    );
    // "running" should match "runner" and "executing" via porter stemmer
    // "running" should match "runner" and "executing" via porter stemmer
    let results = lore::search::search_fts(&conn, "running", 10, lore::search::FtsQueryMode::Safe).unwrap();
    let results =
    assert!(!results.is_empty(), "Stemming should match 'running' to 'runner'");
        lore::search::search_fts(&conn, "running", 10, lore::search::FtsQueryMode::Safe).unwrap();
    assert!(
        !results.is_empty(),
        "Stemming should match 'running' to 'runner'"
    );
    assert_eq!(results[0].document_id, 1);
    assert_eq!(results[0].document_id, 1);
}
}
						
						
						
							
						
						
@@ -78,20 +124,43 @@ fn fts_stemming_matches() {
fn fts_empty_results() {
fn fts_empty_results() {
    let conn = create_test_db();
    let conn = create_test_db();
    insert_document(&conn, 1, "issue", "Bug fix", "Fixed a null pointer dereference in the parser.");
    insert_document(
        &conn,
        1,
        "issue",
        "Bug fix",
        "Fixed a null pointer dereference in the parser.",
    );
    let results = lore::search::search_fts(&conn, "kubernetes deployment helm", 10, lore::search::FtsQueryMode::Safe).unwrap();
    let results = lore::search::search_fts(
    assert!(results.is_empty(), "No documents should match unrelated query");
        &conn,
        "kubernetes deployment helm",
        10,
        lore::search::FtsQueryMode::Safe,
    )
    .unwrap();
    assert!(
        results.is_empty(),
        "No documents should match unrelated query"
    );
}
}
#[test]
#[test]
fn fts_special_characters_handled() {
fn fts_special_characters_handled() {
    let conn = create_test_db();
    let conn = create_test_db();
    insert_document(&conn, 1, "issue", "C++ compiler", "The C++ compiler segfaults on template metaprogramming.");
    insert_document(
        &conn,
        1,
        "issue",
        "C++ compiler",
        "The C++ compiler segfaults on template metaprogramming.",
    );
    // Special characters should not crash the search
    // Special characters should not crash the search
    let results = lore::search::search_fts(&conn, "C++ compiler", 10, lore::search::FtsQueryMode::Safe).unwrap();
    let results =
        lore::search::search_fts(&conn, "C++ compiler", 10, lore::search::FtsQueryMode::Safe)
            .unwrap();
    // Safe mode sanitizes the query — it should still return results or at least not crash
    // Safe mode sanitizes the query — it should still return results or at least not crash
    assert!(results.len() <= 1);
    assert!(results.len() <= 1);
}
}
						
						
						
							
						
						
@@ -101,17 +170,44 @@ fn fts_result_ordering_by_relevance() {
    let conn = create_test_db();
    let conn = create_test_db();
    // Doc 1: "authentication" in title and content
    // Doc 1: "authentication" in title and content
    insert_document(&conn, 1, "issue", "Authentication system redesign", "The authentication system needs a complete redesign. Authentication flows are broken.");
    insert_document(
        &conn,
        1,
        "issue",
        "Authentication system redesign",
        "The authentication system needs a complete redesign. Authentication flows are broken.",
    );
    // Doc 2: "authentication" only in content, once
    // Doc 2: "authentication" only in content, once
    insert_document(&conn, 2, "issue", "Login page update", "Updated the login page with better authentication error messages.");
    insert_document(
        &conn,
        2,
        "issue",
        "Login page update",
        "Updated the login page with better authentication error messages.",
    );
    // Doc 3: unrelated
    // Doc 3: unrelated
    insert_document(&conn, 3, "issue", "Database optimization", "Optimize database queries for faster response times.");
    insert_document(
        &conn,
        3,
        "issue",
        "Database optimization",
        "Optimize database queries for faster response times.",
    );
    let results = lore::search::search_fts(&conn, "authentication", 10, lore::search::FtsQueryMode::Safe).unwrap();
    let results = lore::search::search_fts(
        &conn,
        "authentication",
        10,
        lore::search::FtsQueryMode::Safe,
    )
    .unwrap();
    assert!(results.len() >= 2, "Should match at least 2 documents");
    assert!(results.len() >= 2, "Should match at least 2 documents");
    // Doc 1 should rank higher (more occurrences of the term)
    // Doc 1 should rank higher (more occurrences of the term)
    assert_eq!(results[0].document_id, 1, "Document with more term occurrences should rank first");
    assert_eq!(
        results[0].document_id, 1,
        "Document with more term occurrences should rank first"
    );
}
}
#[test]
#[test]
						
						
						
							
						
						
@@ -128,7 +224,8 @@ fn fts_respects_limit() {
        );
        );
    }
    }
    let results = lore::search::search_fts(&conn, "bug login", 5, lore::search::FtsQueryMode::Safe).unwrap();
    let results =
        lore::search::search_fts(&conn, "bug login", 5, lore::search::FtsQueryMode::Safe).unwrap();
    assert!(results.len() <= 5, "Results should be capped at limit");
    assert!(results.len() <= 5, "Results should be capped at limit");
}
}
						
						
						
							
						
						
@@ -136,24 +233,45 @@ fn fts_respects_limit() {
fn fts_snippet_generated() {
fn fts_snippet_generated() {
    let conn = create_test_db();
    let conn = create_test_db();
    insert_document(&conn, 1, "issue", "Performance issue", "The application performance degrades significantly when more than 100 users are connected simultaneously. Memory usage spikes to 4GB.");
    insert_document(
        &conn,
        1,
        "issue",
        "Performance issue",
        "The application performance degrades significantly when more than 100 users are connected simultaneously. Memory usage spikes to 4GB.",
    );
    let results = lore::search::search_fts(&conn, "performance", 10, lore::search::FtsQueryMode::Safe).unwrap();
    let results =
        lore::search::search_fts(&conn, "performance", 10, lore::search::FtsQueryMode::Safe)
            .unwrap();
    assert!(!results.is_empty());
    assert!(!results.is_empty());
    // Snippet should contain some text (may have FTS5 highlight markers)
    // Snippet should contain some text (may have FTS5 highlight markers)
    assert!(!results[0].snippet.is_empty(), "Snippet should be generated");
    assert!(
        !results[0].snippet.is_empty(),
        "Snippet should be generated"
    );
}
}
#[test]
#[test]
fn fts_triggers_sync_on_insert() {
fn fts_triggers_sync_on_insert() {
    let conn = create_test_db();
    let conn = create_test_db();
    insert_document(&conn, 1, "issue", "Test document", "This is test content for FTS trigger verification.");
    insert_document(
        &conn,
        1,
        "issue",
        "Test document",
        "This is test content for FTS trigger verification.",
    );
    // Verify FTS table has an entry via direct query
    // Verify FTS table has an entry via direct query
    let fts_count: i64 = conn
    let fts_count: i64 = conn
        .query_row("SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'test'", [], |r| r.get(0))
        .query_row(
            "SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'test'",
            [],
            |r| r.get(0),
        )
        .unwrap();
        .unwrap();
    assert_eq!(fts_count, 1, "FTS trigger should auto-index on INSERT");
    assert_eq!(fts_count, 1, "FTS trigger should auto-index on INSERT");
						
						
						
							
						
						
@@ -163,20 +281,35 @@ fn fts_triggers_sync_on_insert() {
fn fts_triggers_sync_on_delete() {
fn fts_triggers_sync_on_delete() {
    let conn = create_test_db();
    let conn = create_test_db();
    insert_document(&conn, 1, "issue", "Deletable document", "This content will be deleted from the index.");
    insert_document(
        &conn,
        1,
        "issue",
        "Deletable document",
        "This content will be deleted from the index.",
    );
    // Verify it's indexed
    // Verify it's indexed
    let before: i64 = conn
    let before: i64 = conn
        .query_row("SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'deletable'", [], |r| r.get(0))
        .query_row(
            "SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'deletable'",
            [],
            |r| r.get(0),
        )
        .unwrap();
        .unwrap();
    assert_eq!(before, 1);
    assert_eq!(before, 1);
    // Delete the document
    // Delete the document
    conn.execute("DELETE FROM documents WHERE id = 1", []).unwrap();
    conn.execute("DELETE FROM documents WHERE id = 1", [])
        .unwrap();
    // Verify it's removed from FTS
    // Verify it's removed from FTS
    let after: i64 = conn
    let after: i64 = conn
        .query_row("SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'deletable'", [], |r| r.get(0))
        .query_row(
            "SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'deletable'",
            [],
            |r| r.get(0),
        )
        .unwrap();
        .unwrap();
    assert_eq!(after, 0, "FTS trigger should remove entry on DELETE");
    assert_eq!(after, 0, "FTS trigger should remove entry on DELETE");
}
}
						
						
						
							
						
						
@@ -193,6 +326,8 @@ fn fts_null_title_handled() {
    )
    )
    .unwrap();
    .unwrap();
    let results = lore::search::search_fts(&conn, "rate limiting", 10, lore::search::FtsQueryMode::Safe).unwrap();
    let results =
        lore::search::search_fts(&conn, "rate limiting", 10, lore::search::FtsQueryMode::Safe)
            .unwrap();
    assert!(!results.is_empty(), "Should find documents with NULL title");
    assert!(!results.is_empty(), "Should find documents with NULL title");
}
}
						
						
						
						
 
						
						
							
						
						
						
@@ -10,7 +10,7 @@ use rusqlite::Connection;
use serde::Deserialize;
use serde::Deserialize;
use std::path::PathBuf;
use std::path::PathBuf;
use lore::search::{self, FtsQueryMode, SearchFilters, SearchMode, search_fts, apply_filters};
use lore::search::{FtsQueryMode, SearchFilters, SearchMode, apply_filters, search_fts};
/// A golden query test case.
/// A golden query test case.
#[derive(Debug, Deserialize)]
#[derive(Debug, Deserialize)]
						
						
						
							
						
						
@@ -35,8 +35,7 @@ struct GoldenFilters {
}
}
fn load_golden_queries() -> Vec<GoldenQuery> {
fn load_golden_queries() -> Vec<GoldenQuery> {
    let path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
    let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/golden_queries.json");
        .join("tests/fixtures/golden_queries.json");
    let content = std::fs::read_to_string(&path)
    let content = std::fs::read_to_string(&path)
        .unwrap_or_else(|_| panic!("Failed to read golden queries fixture"));
        .unwrap_or_else(|_| panic!("Failed to read golden queries fixture"));
    serde_json::from_str(&content)
    serde_json::from_str(&content)
						
							
						
						
							
						
						
						
@@ -77,63 +76,88 @@ fn create_seeded_db() -> Connection {
    // Seed deterministic documents
    // Seed deterministic documents
    let documents = vec![
    let documents = vec![
        // id=1: Auth issue (matches: authentication, login, OAuth, JWT, token, refresh)
        // id=1: Auth issue (matches: authentication, login, OAuth, JWT, token, refresh)
        (1, "issue", "Authentication and login broken with OAuth",
        (
            1,
            "issue",
            "Authentication and login broken with OAuth",
            "Users cannot login when using OAuth tokens. The JWT token refresh fails silently, \
            "Users cannot login when using OAuth tokens. The JWT token refresh fails silently, \
          causing authentication errors. When the access token expires, the refresh flow returns \
          causing authentication errors. When the access token expires, the refresh flow returns \
          a 401 instead of fetching new credentials. Login page shows a generic error. \
          a 401 instead of fetching new credentials. Login page shows a generic error. \
          Multiple users reported authentication failures across all OAuth providers.",
          Multiple users reported authentication failures across all OAuth providers.",
         "testuser"),
            "testuser",
        ),
        // id=2: User profile MR (matches: user, profile, avatar, upload)
        // id=2: User profile MR (matches: user, profile, avatar, upload)
        (2, "merge_request", "Add user profile page with avatar upload",
        (
            2,
            "merge_request",
            "Add user profile page with avatar upload",
            "This merge request adds a new user profile page. Users can now upload their avatar, \
            "This merge request adds a new user profile page. Users can now upload their avatar, \
          edit their display name, and manage notification preferences. The profile page includes \
          edit their display name, and manage notification preferences. The profile page includes \
          responsive design for mobile and desktop viewports.",
          responsive design for mobile and desktop viewports.",
         "developer1"),
            "developer1",
        ),
        // id=3: Database migration issue (matches: database, migration, PostgreSQL, schema)
        // id=3: Database migration issue (matches: database, migration, PostgreSQL, schema)
        (3, "issue", "Database migration failing on PostgreSQL 14",
        (
            3,
            "issue",
            "Database migration failing on PostgreSQL 14",
            "The database migration script crashes on PostgreSQL 14 due to deprecated syntax. \
            "The database migration script crashes on PostgreSQL 14 due to deprecated syntax. \
          The ALTER TABLE command uses a syntax removed in PG14. Migration 042 needs to be \
          The ALTER TABLE command uses a syntax removed in PG14. Migration 042 needs to be \
          rewritten to use the new schema modification syntax. All staging environments affected.",
          rewritten to use the new schema modification syntax. All staging environments affected.",
         "dba_admin"),
            "dba_admin",
        ),
        // id=4: Performance MR (matches: performance, optimization, caching, query)
        // id=4: Performance MR (matches: performance, optimization, caching, query)
        (4, "merge_request", "Performance optimization for dashboard queries",
        (
            4,
            "merge_request",
            "Performance optimization for dashboard queries",
            "Optimized the dashboard query performance by adding database indexes and implementing \
            "Optimized the dashboard query performance by adding database indexes and implementing \
          Redis caching for frequently accessed reports. Query execution time reduced from 3.2s \
          Redis caching for frequently accessed reports. Query execution time reduced from 3.2s \
          to 180ms. Added connection pooling and prepared statement caching.",
          to 180ms. Added connection pooling and prepared statement caching.",
         "senior_dev"),
            "senior_dev",
        ),
        // id=5: API rate limiting discussion (matches: API, rate, limiting, throttle)
        // id=5: API rate limiting discussion (matches: API, rate, limiting, throttle)
        (5, "discussion", "API rate limiting strategies for public endpoints",
        (
            5,
            "discussion",
            "API rate limiting strategies for public endpoints",
            "Discussion about implementing API rate limiting on public-facing endpoints. \
            "Discussion about implementing API rate limiting on public-facing endpoints. \
          Proposed approaches: token bucket with sliding window, fixed window counters, \
          Proposed approaches: token bucket with sliding window, fixed window counters, \
          or leaky bucket algorithm. Rate limits should be configurable per API key tier. \
          or leaky bucket algorithm. Rate limits should be configurable per API key tier. \
          Need to handle burst traffic during peak hours without throttling legitimate users.",
          Need to handle burst traffic during peak hours without throttling legitimate users.",
         "architect"),
            "architect",
        ),
        // id=6: UI/CSS issue (matches: CSS, styling, frontend, responsive, UI)
        // id=6: UI/CSS issue (matches: CSS, styling, frontend, responsive, UI)
        (6, "issue", "CSS styling issues on mobile frontend",
        (
            6,
            "issue",
            "CSS styling issues on mobile frontend",
            "Multiple CSS styling problems on the mobile frontend. The navigation menu overlaps \
            "Multiple CSS styling problems on the mobile frontend. The navigation menu overlaps \
          content on screens smaller than 768px. Button text truncates on compact viewports. \
          content on screens smaller than 768px. Button text truncates on compact viewports. \
          Frontend responsive breakpoints need adjustment. The UI components library has \
          Frontend responsive breakpoints need adjustment. The UI components library has \
          conflicting CSS specificity with the theme system.",
          conflicting CSS specificity with the theme system.",
         "frontend_dev"),
            "frontend_dev",
        ),
        // id=7: CI/CD MR (matches: CI, CD, pipeline, deployment, Docker)
        // id=7: CI/CD MR (matches: CI, CD, pipeline, deployment, Docker)
        (7, "merge_request", "Revamp CI/CD pipeline with Docker caching",
        (
            7,
            "merge_request",
            "Revamp CI/CD pipeline with Docker caching",
            "Complete overhaul of the CI/CD pipeline. Added Docker layer caching to speed up \
            "Complete overhaul of the CI/CD pipeline. Added Docker layer caching to speed up \
          builds. Deployment stages now run in parallel where possible. Added rollback \
          builds. Deployment stages now run in parallel where possible. Added rollback \
          support for failed deployments. Pipeline runtime reduced from 45min to 12min.",
          support for failed deployments. Pipeline runtime reduced from 45min to 12min.",
         "devops_lead"),
            "devops_lead",
        ),
        // id=8: Security issue (matches: security, vulnerability, XSS, injection)
        // id=8: Security issue (matches: security, vulnerability, XSS, injection)
        (8, "issue", "Security vulnerability in form submission",
        (
            8,
            "issue",
            "Security vulnerability in form submission",
            "A cross-site scripting (XSS) vulnerability was found in the comment submission form. \
            "A cross-site scripting (XSS) vulnerability was found in the comment submission form. \
          User input is not properly sanitized before rendering. The security scanner also flagged \
          User input is not properly sanitized before rendering. The security scanner also flagged \
          potential SQL injection in the search endpoint. Both vulnerabilities need immediate patching.",
          potential SQL injection in the search endpoint. Both vulnerabilities need immediate patching.",
         "security_team"),
            "security_team",
        ),
    ];
    ];
    for (id, source_type, title, content, author) in &documents {
    for (id, source_type, title, content, author) in &documents {
						
							
						
						
							
						
						
						
@@ -213,7 +237,11 @@ fn golden_queries_all_pass() {
        if filtered_ids.len() < gq.min_results {
        if filtered_ids.len() < gq.min_results {
            failures.push(format!(
            failures.push(format!(
                "FAIL [{}] \"{}\": expected >= {} results, got {} (description: {})",
                "FAIL [{}] \"{}\": expected >= {} results, got {} (description: {})",
                i, gq.query, gq.min_results, filtered_ids.len(), gq.description
                i,
                gq.query,
                gq.min_results,
                filtered_ids.len(),
                gq.description
            ));
            ));
            continue;
            continue;
        }
        }
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -51,13 +51,24 @@ fn insert_document(conn: &Connection, id: i64, source_type: &str, title: &str, c
    .unwrap();
    .unwrap();
}
}
#[test]
#[test]
fn lexical_mode_uses_fts_only() {
fn lexical_mode_uses_fts_only() {
    let (_tmp, conn) = create_test_db();
    let (_tmp, conn) = create_test_db();
    insert_document(&conn, 1, "issue", "Authentication bug", "OAuth token refresh fails silently.");
    insert_document(
    insert_document(&conn, 2, "issue", "Database migration", "Migration script crashes on PostgreSQL.");
        &conn,
        1,
        "issue",
        "Authentication bug",
        "OAuth token refresh fails silently.",
    );
    insert_document(
        &conn,
        2,
        "issue",
        "Database migration",
        "Migration script crashes on PostgreSQL.",
    );
    let filters = SearchFilters {
    let filters = SearchFilters {
        limit: 10,
        limit: 10,
						
							
						
						
							
						
						
						
@@ -121,14 +132,23 @@ fn lexical_mode_no_embeddings_required() {
    .unwrap();
    .unwrap();
    let results = search_fts(&conn, "testing", 10, FtsQueryMode::Safe).unwrap();
    let results = search_fts(&conn, "testing", 10, FtsQueryMode::Safe).unwrap();
    assert!(!results.is_empty(), "FTS should work without embeddings tables");
    assert!(
        !results.is_empty(),
        "FTS should work without embeddings tables"
    );
}
}
#[test]
#[test]
fn hybrid_mode_degrades_to_fts_without_client() {
fn hybrid_mode_degrades_to_fts_without_client() {
    let (_tmp, conn) = create_test_db();
    let (_tmp, conn) = create_test_db();
    insert_document(&conn, 1, "issue", "Performance issue", "Application is slow under load.");
    insert_document(
        &conn,
        1,
        "issue",
        "Performance issue",
        "Application is slow under load.",
    );
    let filters = SearchFilters {
    let filters = SearchFilters {
        limit: 10,
        limit: 10,
						
						
						
							
						
						
@@ -150,7 +170,11 @@ fn hybrid_mode_degrades_to_fts_without_client() {
    assert!(!results.is_empty(), "Should fall back to FTS results");
    assert!(!results.is_empty(), "Should fall back to FTS results");
    // Should warn about missing Ollama client
    // Should warn about missing Ollama client
    assert!(
    assert!(
        warnings.iter().any(|w| w.to_lowercase().contains("vector") || w.to_lowercase().contains("ollama") || w.to_lowercase().contains("client") || w.to_lowercase().contains("fallback") || w.to_lowercase().contains("fts")),
        warnings.iter().any(|w| w.to_lowercase().contains("vector")
            || w.to_lowercase().contains("ollama")
            || w.to_lowercase().contains("client")
            || w.to_lowercase().contains("fallback")
            || w.to_lowercase().contains("fts")),
        "Should produce a degradation warning, got: {:?}",
        "Should produce a degradation warning, got: {:?}",
        warnings
        warnings
    );
    );
						
							
						
						
							
						
						
						
@@ -177,8 +201,20 @@ fn rrf_ranking_combines_signals() {
fn filters_by_source_type() {
fn filters_by_source_type() {
    let (_tmp, conn) = create_test_db();
    let (_tmp, conn) = create_test_db();
    insert_document(&conn, 1, "issue", "Bug report", "Authentication bug in login flow.");
    insert_document(
    insert_document(&conn, 2, "merge_request", "Fix auth", "Fixed authentication issue.");
        &conn,
        1,
        "issue",
        "Bug report",
        "Authentication bug in login flow.",
    );
    insert_document(
        &conn,
        2,
        "merge_request",
        "Fix auth",
        "Fixed authentication issue.",
    );
    let filters = SearchFilters {
    let filters = SearchFilters {
        source_type: Some(lore::documents::SourceType::Issue),
        source_type: Some(lore::documents::SourceType::Issue),
						
						
						
							
						
						
@@ -189,7 +225,11 @@ fn filters_by_source_type() {
    let all_ids = vec![1, 2];
    let all_ids = vec![1, 2];
    let filtered = lore::search::apply_filters(&conn, &all_ids, &filters).unwrap();
    let filtered = lore::search::apply_filters(&conn, &all_ids, &filters).unwrap();
    assert_eq!(filtered.len(), 1, "Filter should remove non-issue documents");
    assert_eq!(
        filtered.len(),
        1,
        "Filter should remove non-issue documents"
    );
    assert_eq!(filtered[0], 1, "Only issue document should remain");
    assert_eq!(filtered[0], 1, "Only issue document should remain");
}
}
						
							
						
						
						
						
 
						
						
							
						
						
						
@@ -26,7 +26,7 @@ fn apply_migrations(conn: &Connection, through_version: i32) {
        let sql = std::fs::read_to_string(entries[0].path()).unwrap();
        let sql = std::fs::read_to_string(entries[0].path()).unwrap();
        conn.execute_batch(&sql)
        conn.execute_batch(&sql)
            .expect(&format!("Migration {} failed", version));
            .unwrap_or_else(|e| panic!("Migration {} failed: {}", version, e));
    }
    }
}
}