feat(events): Wire resource event fetching into sync pipeline (bd-1ep)

Enqueue resource_events jobs for all issues/MRs after discussion sync,
then drain the queue by fetching state/label/milestone events from GitLab
API and storing them via transaction-based wrappers. Adds progress events,
count tracking through orchestrator->ingest->sync result chain, and
respects fetch_resource_events config flag. Includes clippy fixes across
codebase from parallel agent work.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Taylor Eernisse
2026-02-03 12:51:49 -05:00
parent 5c521491b7
commit 128008578a
47 changed files with 1981 additions and 653 deletions

File diff suppressed because one or more lines are too long

View File

@@ -1 +1 @@
bd-1m8 bd-1ep

View File

@@ -3,12 +3,12 @@
use console::style; use console::style;
use serde::Serialize; use serde::Serialize;
use crate::Config;
use crate::core::db::create_connection; use crate::core::db::create_connection;
use crate::core::error::Result; use crate::core::error::Result;
use crate::core::paths::get_db_path; use crate::core::paths::get_db_path;
use crate::embedding::ollama::{OllamaClient, OllamaConfig}; use crate::embedding::ollama::{OllamaClient, OllamaConfig};
use crate::embedding::pipeline::embed_documents; use crate::embedding::pipeline::embed_documents;
use crate::Config;
/// Result of the embed command. /// Result of the embed command.
#[derive(Debug, Default, Serialize)] #[derive(Debug, Default, Serialize)]
@@ -69,10 +69,7 @@ pub async fn run_embed(
/// Print human-readable output. /// Print human-readable output.
pub fn print_embed(result: &EmbedCommandResult) { pub fn print_embed(result: &EmbedCommandResult) {
println!( println!("{} Embedding complete", style("done").green().bold(),);
"{} Embedding complete",
style("done").green().bold(),
);
println!(" Embedded: {}", result.embedded); println!(" Embedded: {}", result.embedded);
if result.failed > 0 { if result.failed > 0 {
println!(" Failed: {}", style(result.failed).red()); println!(" Failed: {}", style(result.failed).red());

View File

@@ -5,12 +5,12 @@ use rusqlite::Connection;
use serde::Serialize; use serde::Serialize;
use tracing::info; use tracing::info;
use crate::Config;
use crate::core::db::create_connection; use crate::core::db::create_connection;
use crate::core::error::Result; use crate::core::error::Result;
use crate::core::paths::get_db_path; use crate::core::paths::get_db_path;
use crate::core::project::resolve_project; use crate::core::project::resolve_project;
use crate::documents::{SourceType, regenerate_dirty_documents}; use crate::documents::{SourceType, regenerate_dirty_documents};
use crate::Config;
const FULL_MODE_CHUNK_SIZE: i64 = 2000; const FULL_MODE_CHUNK_SIZE: i64 = 2000;
@@ -134,7 +134,11 @@ fn seed_dirty(
/// Print human-readable output. /// Print human-readable output.
pub fn print_generate_docs(result: &GenerateDocsResult) { pub fn print_generate_docs(result: &GenerateDocsResult) {
let mode = if result.full_mode { "full" } else { "incremental" }; let mode = if result.full_mode {
"full"
} else {
"incremental"
};
println!( println!(
"{} Document generation complete ({})", "{} Document generation complete ({})",
style("done").green().bold(), style("done").green().bold(),
@@ -147,10 +151,7 @@ pub fn print_generate_docs(result: &GenerateDocsResult) {
println!(" Regenerated: {}", result.regenerated); println!(" Regenerated: {}", result.regenerated);
println!(" Unchanged: {}", result.unchanged); println!(" Unchanged: {}", result.unchanged);
if result.errored > 0 { if result.errored > 0 {
println!( println!(" Errored: {}", style(result.errored).red());
" Errored: {}",
style(result.errored).red()
);
} }
} }

View File

@@ -39,6 +39,9 @@ pub struct IngestResult {
pub labels_created: usize, pub labels_created: usize,
pub discussions_fetched: usize, pub discussions_fetched: usize,
pub notes_upserted: usize, pub notes_upserted: usize,
// Resource events
pub resource_events_fetched: usize,
pub resource_events_failed: usize,
} }
/// Controls what interactive UI elements `run_ingest` displays. /// Controls what interactive UI elements `run_ingest` displays.
@@ -57,17 +60,26 @@ pub struct IngestDisplay {
impl IngestDisplay { impl IngestDisplay {
/// Interactive mode: everything visible. /// Interactive mode: everything visible.
pub fn interactive() -> Self { pub fn interactive() -> Self {
Self { show_progress: true, show_text: true } Self {
show_progress: true,
show_text: true,
}
} }
/// Robot/JSON mode: everything hidden. /// Robot/JSON mode: everything hidden.
pub fn silent() -> Self { pub fn silent() -> Self {
Self { show_progress: false, show_text: false } Self {
show_progress: false,
show_text: false,
}
} }
/// Progress only (used by sync in interactive mode). /// Progress only (used by sync in interactive mode).
pub fn progress_only() -> Self { pub fn progress_only() -> Self {
Self { show_progress: true, show_text: false } Self {
show_progress: true,
show_text: false,
}
} }
} }
@@ -105,7 +117,8 @@ pub async fn run_ingest(
lock.acquire(force)?; lock.acquire(force)?;
// Get token from environment // Get token from environment
let token = std::env::var(&config.gitlab.token_env_var).map_err(|_| LoreError::TokenNotSet { let token =
std::env::var(&config.gitlab.token_env_var).map_err(|_| LoreError::TokenNotSet {
env_var: config.gitlab.token_env_var.clone(), env_var: config.gitlab.token_env_var.clone(),
})?; })?;
@@ -199,7 +212,9 @@ pub async fn run_ingest(
let b = ProgressBar::new(0); let b = ProgressBar::new(0);
b.set_style( b.set_style(
ProgressStyle::default_bar() ProgressStyle::default_bar()
.template(" {spinner:.blue} Syncing discussions [{bar:30.cyan/dim}] {pos}/{len}") .template(
" {spinner:.blue} Syncing discussions [{bar:30.cyan/dim}] {pos}/{len}",
)
.unwrap() .unwrap()
.progress_chars("=> "), .progress_chars("=> "),
); );
@@ -237,6 +252,23 @@ pub async fn run_ingest(
ProgressEvent::MrDiscussionSyncComplete => { ProgressEvent::MrDiscussionSyncComplete => {
disc_bar_clone.finish_and_clear(); disc_bar_clone.finish_and_clear();
} }
ProgressEvent::ResourceEventsFetchStarted { total } => {
disc_bar_clone.set_length(total as u64);
disc_bar_clone.set_position(0);
disc_bar_clone.set_style(
ProgressStyle::default_bar()
.template(" {spinner:.blue} Fetching resource events [{bar:30.cyan/dim}] {pos}/{len}")
.unwrap()
.progress_chars("=> "),
);
disc_bar_clone.enable_steady_tick(std::time::Duration::from_millis(100));
}
ProgressEvent::ResourceEventFetched { current, total: _ } => {
disc_bar_clone.set_position(current as u64);
}
ProgressEvent::ResourceEventsFetchComplete { .. } => {
disc_bar_clone.finish_and_clear();
}
_ => {} _ => {}
}) })
}; };
@@ -269,6 +301,8 @@ pub async fn run_ingest(
total.notes_upserted += result.notes_upserted; total.notes_upserted += result.notes_upserted;
total.issues_synced_discussions += result.issues_synced_discussions; total.issues_synced_discussions += result.issues_synced_discussions;
total.issues_skipped_discussion_sync += result.issues_skipped_discussion_sync; total.issues_skipped_discussion_sync += result.issues_skipped_discussion_sync;
total.resource_events_fetched += result.resource_events_fetched;
total.resource_events_failed += result.resource_events_failed;
} else { } else {
let result = ingest_project_merge_requests_with_progress( let result = ingest_project_merge_requests_with_progress(
&conn, &conn,
@@ -301,6 +335,8 @@ pub async fn run_ingest(
total.diffnotes_count += result.diffnotes_count; total.diffnotes_count += result.diffnotes_count;
total.mrs_synced_discussions += result.mrs_synced_discussions; total.mrs_synced_discussions += result.mrs_synced_discussions;
total.mrs_skipped_discussion_sync += result.mrs_skipped_discussion_sync; total.mrs_skipped_discussion_sync += result.mrs_skipped_discussion_sync;
total.resource_events_fetched += result.resource_events_fetched;
total.resource_events_failed += result.resource_events_failed;
} }
} }

View File

@@ -22,19 +22,19 @@ pub use count::{
pub use doctor::{print_doctor_results, run_doctor}; pub use doctor::{print_doctor_results, run_doctor};
pub use embed::{print_embed, print_embed_json, run_embed}; pub use embed::{print_embed, print_embed_json, run_embed};
pub use generate_docs::{print_generate_docs, print_generate_docs_json, run_generate_docs}; pub use generate_docs::{print_generate_docs, print_generate_docs_json, run_generate_docs};
pub use stats::{print_stats, print_stats_json, run_stats};
pub use search::{
print_search_results, print_search_results_json, run_search, SearchCliFilters, SearchResponse,
};
pub use ingest::{IngestDisplay, print_ingest_summary, print_ingest_summary_json, run_ingest}; pub use ingest::{IngestDisplay, print_ingest_summary, print_ingest_summary_json, run_ingest};
pub use init::{InitInputs, InitOptions, InitResult, run_init}; pub use init::{InitInputs, InitOptions, InitResult, run_init};
pub use list::{ pub use list::{
ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser, print_list_issues, ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser, print_list_issues,
print_list_issues_json, print_list_mrs, print_list_mrs_json, run_list_issues, run_list_mrs, print_list_issues_json, print_list_mrs, print_list_mrs_json, run_list_issues, run_list_mrs,
}; };
pub use sync::{print_sync, print_sync_json, run_sync, SyncOptions, SyncResult}; pub use search::{
SearchCliFilters, SearchResponse, print_search_results, print_search_results_json, run_search,
};
pub use show::{ pub use show::{
print_show_issue, print_show_issue_json, print_show_mr, print_show_mr_json, run_show_issue, print_show_issue, print_show_issue_json, print_show_mr, print_show_mr_json, run_show_issue,
run_show_mr, run_show_mr,
}; };
pub use stats::{print_stats, print_stats_json, run_stats};
pub use sync::{SyncOptions, SyncResult, print_sync, print_sync_json, run_sync};
pub use sync_status::{print_sync_status, print_sync_status_json, run_sync_status}; pub use sync_status::{print_sync_status, print_sync_status_json, run_sync_status};

View File

@@ -3,6 +3,7 @@
use console::style; use console::style;
use serde::Serialize; use serde::Serialize;
use crate::Config;
use crate::core::db::create_connection; use crate::core::db::create_connection;
use crate::core::error::{LoreError, Result}; use crate::core::error::{LoreError, Result};
use crate::core::paths::get_db_path; use crate::core::paths::get_db_path;
@@ -10,10 +11,9 @@ use crate::core::project::resolve_project;
use crate::core::time::{ms_to_iso, parse_since}; use crate::core::time::{ms_to_iso, parse_since};
use crate::documents::SourceType; use crate::documents::SourceType;
use crate::search::{ use crate::search::{
apply_filters, get_result_snippet, rank_rrf, search_fts, FtsQueryMode, PathFilter, FtsQueryMode, PathFilter, SearchFilters, apply_filters, get_result_snippet, rank_rrf,
SearchFilters, search_fts,
}; };
use crate::Config;
/// Display-ready search result with all fields hydrated. /// Display-ready search result with all fields hydrated.
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
@@ -86,9 +86,7 @@ pub fn run_search(
mode: "lexical".to_string(), mode: "lexical".to_string(),
total_results: 0, total_results: 0,
results: vec![], results: vec![],
warnings: vec![ warnings: vec!["No documents indexed. Run 'lore generate-docs' first.".to_string()],
"No documents indexed. Run 'lore generate-docs' first.".to_string()
],
}); });
} }
@@ -151,9 +149,9 @@ pub fn run_search(
// Adaptive recall: wider initial fetch when filters applied // Adaptive recall: wider initial fetch when filters applied
let requested = filters.clamp_limit(); let requested = filters.clamp_limit();
let top_k = if filters.has_any_filter() { let top_k = if filters.has_any_filter() {
(requested * 50).max(200).min(1500) (requested * 50).clamp(200, 1500)
} else { } else {
(requested * 10).max(50).min(1500) (requested * 10).clamp(50, 1500)
}; };
// FTS search // FTS search
@@ -190,10 +188,8 @@ pub fn run_search(
let hydrated = hydrate_results(&conn, &filtered_ids)?; let hydrated = hydrate_results(&conn, &filtered_ids)?;
// Build display results preserving filter order // Build display results preserving filter order
let rrf_map: std::collections::HashMap<i64, &crate::search::RrfResult> = ranked let rrf_map: std::collections::HashMap<i64, &crate::search::RrfResult> =
.iter() ranked.iter().map(|r| (r.document_id, r)).collect();
.map(|r| (r.document_id, r))
.collect();
let mut results: Vec<SearchResultDisplay> = Vec::with_capacity(hydrated.len()); let mut results: Vec<SearchResultDisplay> = Vec::with_capacity(hydrated.len());
for row in &hydrated { for row in &hydrated {
@@ -256,16 +252,13 @@ struct HydratedRow {
/// ///
/// Uses json_each() to pass ranked IDs and preserve ordering via ORDER BY j.key. /// Uses json_each() to pass ranked IDs and preserve ordering via ORDER BY j.key.
/// Labels and paths fetched via correlated json_group_array subqueries. /// Labels and paths fetched via correlated json_group_array subqueries.
fn hydrate_results( fn hydrate_results(conn: &rusqlite::Connection, document_ids: &[i64]) -> Result<Vec<HydratedRow>> {
conn: &rusqlite::Connection,
document_ids: &[i64],
) -> Result<Vec<HydratedRow>> {
if document_ids.is_empty() { if document_ids.is_empty() {
return Ok(Vec::new()); return Ok(Vec::new());
} }
let ids_json = serde_json::to_string(document_ids) let ids_json =
.map_err(|e| LoreError::Other(e.to_string()))?; serde_json::to_string(document_ids).map_err(|e| LoreError::Other(e.to_string()))?;
let sql = r#" let sql = r#"
SELECT d.id, d.source_type, d.title, d.url, d.author_username, SELECT d.id, d.source_type, d.title, d.url, d.author_username,
@@ -325,10 +318,7 @@ pub fn print_search_results(response: &SearchResponse) {
} }
if response.results.is_empty() { if response.results.is_empty() {
println!( println!("No results found for '{}'", style(&response.query).bold());
"No results found for '{}'",
style(&response.query).bold()
);
return; return;
} }
@@ -371,17 +361,11 @@ pub fn print_search_results(response: &SearchResponse) {
); );
if !result.labels.is_empty() { if !result.labels.is_empty() {
println!( println!(" Labels: {}", result.labels.join(", "));
" Labels: {}",
result.labels.join(", ")
);
} }
// Strip HTML tags from snippet for terminal display // Strip HTML tags from snippet for terminal display
let clean_snippet = result let clean_snippet = result.snippet.replace("<mark>", "").replace("</mark>", "");
.snippet
.replace("<mark>", "")
.replace("</mark>", "");
println!(" {}", style(clean_snippet).dim()); println!(" {}", style(clean_snippet).dim());
if let Some(ref explain) = result.explain { if let Some(ref explain) = result.explain {

View File

@@ -154,10 +154,7 @@ fn find_issue(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Resu
FROM issues i FROM issues i
JOIN projects p ON i.project_id = p.id JOIN projects p ON i.project_id = p.id
WHERE i.iid = ? AND i.project_id = ?", WHERE i.iid = ? AND i.project_id = ?",
vec![ vec![Box::new(iid), Box::new(project_id)],
Box::new(iid),
Box::new(project_id),
],
) )
} }
None => ( None => (
@@ -346,10 +343,7 @@ fn find_mr(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Result<
FROM merge_requests m FROM merge_requests m
JOIN projects p ON m.project_id = p.id JOIN projects p ON m.project_id = p.id
WHERE m.iid = ? AND m.project_id = ?", WHERE m.iid = ? AND m.project_id = ?",
vec![ vec![Box::new(iid), Box::new(project_id)],
Box::new(iid),
Box::new(project_id),
],
) )
} }
None => ( None => (

View File

@@ -4,10 +4,10 @@ use console::style;
use rusqlite::Connection; use rusqlite::Connection;
use serde::Serialize; use serde::Serialize;
use crate::Config;
use crate::core::db::create_connection; use crate::core::db::create_connection;
use crate::core::error::Result; use crate::core::error::Result;
use crate::core::paths::get_db_path; use crate::core::paths::get_db_path;
use crate::Config;
/// Result of the stats command. /// Result of the stats command.
#[derive(Debug, Default, Serialize)] #[derive(Debug, Default, Serialize)]
@@ -75,11 +75,7 @@ pub struct RepairResult {
} }
/// Run the stats command. /// Run the stats command.
pub fn run_stats( pub fn run_stats(config: &Config, check: bool, repair: bool) -> Result<StatsResult> {
config: &Config,
check: bool,
repair: bool,
) -> Result<StatsResult> {
let db_path = get_db_path(config.storage.db_path.as_deref()); let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?; let conn = create_connection(&db_path)?;
@@ -87,14 +83,22 @@ pub fn run_stats(
// Document counts // Document counts
result.documents.total = count_query(&conn, "SELECT COUNT(*) FROM documents")?; result.documents.total = count_query(&conn, "SELECT COUNT(*) FROM documents")?;
result.documents.issues = result.documents.issues = count_query(
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'issue'")?; &conn,
result.documents.merge_requests = "SELECT COUNT(*) FROM documents WHERE source_type = 'issue'",
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'merge_request'")?; )?;
result.documents.discussions = result.documents.merge_requests = count_query(
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE source_type = 'discussion'")?; &conn,
result.documents.truncated = "SELECT COUNT(*) FROM documents WHERE source_type = 'merge_request'",
count_query(&conn, "SELECT COUNT(*) FROM documents WHERE is_truncated = 1")?; )?;
result.documents.discussions = count_query(
&conn,
"SELECT COUNT(*) FROM documents WHERE source_type = 'discussion'",
)?;
result.documents.truncated = count_query(
&conn,
"SELECT COUNT(*) FROM documents WHERE is_truncated = 1",
)?;
// Embedding stats — skip gracefully if table doesn't exist (Gate A only) // Embedding stats — skip gracefully if table doesn't exist (Gate A only)
if table_exists(&conn, "embedding_metadata") { if table_exists(&conn, "embedding_metadata") {
@@ -119,10 +123,14 @@ pub fn run_stats(
result.fts.indexed = count_query(&conn, "SELECT COUNT(*) FROM documents_fts")?; result.fts.indexed = count_query(&conn, "SELECT COUNT(*) FROM documents_fts")?;
// Queue stats // Queue stats
result.queues.dirty_sources = result.queues.dirty_sources = count_query(
count_query(&conn, "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NULL")?; &conn,
result.queues.dirty_sources_failed = "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NULL",
count_query(&conn, "SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NOT NULL")?; )?;
result.queues.dirty_sources_failed = count_query(
&conn,
"SELECT COUNT(*) FROM dirty_sources WHERE last_error IS NOT NULL",
)?;
if table_exists(&conn, "pending_discussion_fetches") { if table_exists(&conn, "pending_discussion_fetches") {
result.queues.pending_discussion_fetches = count_query( result.queues.pending_discussion_fetches = count_query(
@@ -151,6 +159,7 @@ pub fn run_stats(
} }
// Integrity check // Integrity check
#[allow(clippy::field_reassign_with_default)]
if check { if check {
let mut integrity = IntegrityResult::default(); let mut integrity = IntegrityResult::default();
@@ -276,9 +285,7 @@ pub fn run_stats(
} }
fn count_query(conn: &Connection, sql: &str) -> Result<i64> { fn count_query(conn: &Connection, sql: &str) -> Result<i64> {
let count: i64 = conn let count: i64 = conn.query_row(sql, [], |row| row.get(0)).unwrap_or(0);
.query_row(sql, [], |row| row.get(0))
.unwrap_or(0);
Ok(count) Ok(count)
} }
@@ -300,7 +307,10 @@ pub fn print_stats(result: &StatsResult) {
println!(" Merge Requests: {}", result.documents.merge_requests); println!(" Merge Requests: {}", result.documents.merge_requests);
println!(" Discussions: {}", result.documents.discussions); println!(" Discussions: {}", result.documents.discussions);
if result.documents.truncated > 0 { if result.documents.truncated > 0 {
println!(" Truncated: {}", style(result.documents.truncated).yellow()); println!(
" Truncated: {}",
style(result.documents.truncated).yellow()
);
} }
println!(); println!();
@@ -318,13 +328,13 @@ pub fn print_stats(result: &StatsResult) {
println!(); println!();
println!("{}", style("Queues").cyan().bold()); println!("{}", style("Queues").cyan().bold());
println!(" Dirty sources: {} pending, {} failed", println!(
result.queues.dirty_sources, " Dirty sources: {} pending, {} failed",
result.queues.dirty_sources_failed result.queues.dirty_sources, result.queues.dirty_sources_failed
); );
println!(" Discussion fetch: {} pending, {} failed", println!(
result.queues.pending_discussion_fetches, " Discussion fetch: {} pending, {} failed",
result.queues.pending_discussion_fetches_failed result.queues.pending_discussion_fetches, result.queues.pending_discussion_fetches_failed
); );
if result.queues.pending_dependent_fetches > 0 if result.queues.pending_dependent_fetches > 0
|| result.queues.pending_dependent_fetches_failed > 0 || result.queues.pending_dependent_fetches_failed > 0
@@ -431,10 +441,12 @@ pub fn print_stats_json(result: &StatsResult) {
let output = StatsJsonOutput { let output = StatsJsonOutput {
ok: true, ok: true,
data: StatsResult { data: StatsResult {
documents: DocumentStats { ..*&result.documents }, documents: DocumentStats { ..result.documents },
embeddings: EmbeddingStats { ..*&result.embeddings }, embeddings: EmbeddingStats {
fts: FtsStats { ..*&result.fts }, ..result.embeddings
queues: QueueStats { ..*&result.queues }, },
fts: FtsStats { ..result.fts },
queues: QueueStats { ..result.queues },
integrity: result.integrity.as_ref().map(|i| IntegrityResult { integrity: result.integrity.as_ref().map(|i| IntegrityResult {
ok: i.ok, ok: i.ok,
fts_doc_mismatch: i.fts_doc_mismatch, fts_doc_mismatch: i.fts_doc_mismatch,

View File

@@ -29,6 +29,8 @@ pub struct SyncResult {
pub issues_updated: usize, pub issues_updated: usize,
pub mrs_updated: usize, pub mrs_updated: usize,
pub discussions_fetched: usize, pub discussions_fetched: usize,
pub resource_events_fetched: usize,
pub resource_events_failed: usize,
pub documents_regenerated: usize, pub documents_regenerated: usize,
pub documents_embedded: usize, pub documents_embedded: usize,
} }
@@ -70,26 +72,61 @@ pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResul
// Stage 1: Ingest issues // Stage 1: Ingest issues
current_stage += 1; current_stage += 1;
let spinner = stage_spinner(current_stage, total_stages, "Fetching issues from GitLab...", options.robot_mode); let spinner = stage_spinner(
current_stage,
total_stages,
"Fetching issues from GitLab...",
options.robot_mode,
);
info!("Sync stage {current_stage}/{total_stages}: ingesting issues"); info!("Sync stage {current_stage}/{total_stages}: ingesting issues");
let issues_result = run_ingest(config, "issues", None, options.force, options.full, ingest_display).await?; let issues_result = run_ingest(
config,
"issues",
None,
options.force,
options.full,
ingest_display,
)
.await?;
result.issues_updated = issues_result.issues_upserted; result.issues_updated = issues_result.issues_upserted;
result.discussions_fetched += issues_result.discussions_fetched; result.discussions_fetched += issues_result.discussions_fetched;
result.resource_events_fetched += issues_result.resource_events_fetched;
result.resource_events_failed += issues_result.resource_events_failed;
spinner.finish_and_clear(); spinner.finish_and_clear();
// Stage 2: Ingest MRs // Stage 2: Ingest MRs
current_stage += 1; current_stage += 1;
let spinner = stage_spinner(current_stage, total_stages, "Fetching merge requests from GitLab...", options.robot_mode); let spinner = stage_spinner(
current_stage,
total_stages,
"Fetching merge requests from GitLab...",
options.robot_mode,
);
info!("Sync stage {current_stage}/{total_stages}: ingesting merge requests"); info!("Sync stage {current_stage}/{total_stages}: ingesting merge requests");
let mrs_result = run_ingest(config, "mrs", None, options.force, options.full, ingest_display).await?; let mrs_result = run_ingest(
config,
"mrs",
None,
options.force,
options.full,
ingest_display,
)
.await?;
result.mrs_updated = mrs_result.mrs_upserted; result.mrs_updated = mrs_result.mrs_upserted;
result.discussions_fetched += mrs_result.discussions_fetched; result.discussions_fetched += mrs_result.discussions_fetched;
result.resource_events_fetched += mrs_result.resource_events_fetched;
result.resource_events_failed += mrs_result.resource_events_failed;
spinner.finish_and_clear(); spinner.finish_and_clear();
// Stage 3: Generate documents (unless --no-docs) // Stage 3: Generate documents (unless --no-docs)
if !options.no_docs { if !options.no_docs {
current_stage += 1; current_stage += 1;
let spinner = stage_spinner(current_stage, total_stages, "Processing documents...", options.robot_mode); let spinner = stage_spinner(
current_stage,
total_stages,
"Processing documents...",
options.robot_mode,
);
info!("Sync stage {current_stage}/{total_stages}: generating documents"); info!("Sync stage {current_stage}/{total_stages}: generating documents");
let docs_result = run_generate_docs(config, false, None)?; let docs_result = run_generate_docs(config, false, None)?;
result.documents_regenerated = docs_result.regenerated; result.documents_regenerated = docs_result.regenerated;
@@ -101,7 +138,12 @@ pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResul
// Stage 4: Embed documents (unless --no-embed) // Stage 4: Embed documents (unless --no-embed)
if !options.no_embed { if !options.no_embed {
current_stage += 1; current_stage += 1;
let spinner = stage_spinner(current_stage, total_stages, "Generating embeddings...", options.robot_mode); let spinner = stage_spinner(
current_stage,
total_stages,
"Generating embeddings...",
options.robot_mode,
);
info!("Sync stage {current_stage}/{total_stages}: embedding documents"); info!("Sync stage {current_stage}/{total_stages}: embedding documents");
match run_embed(config, options.full, false).await { match run_embed(config, options.full, false).await {
Ok(embed_result) => { Ok(embed_result) => {
@@ -112,11 +154,7 @@ pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResul
// Graceful degradation: Ollama down is a warning, not an error // Graceful degradation: Ollama down is a warning, not an error
spinner.finish_and_clear(); spinner.finish_and_clear();
if !options.robot_mode { if !options.robot_mode {
eprintln!( eprintln!(" {} Embedding skipped ({})", style("warn").yellow(), e);
" {} Embedding skipped ({})",
style("warn").yellow(),
e
);
} }
warn!(error = %e, "Embedding stage failed (Ollama may be unavailable), continuing"); warn!(error = %e, "Embedding stage failed (Ollama may be unavailable), continuing");
} }
@@ -129,6 +167,8 @@ pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResul
issues = result.issues_updated, issues = result.issues_updated,
mrs = result.mrs_updated, mrs = result.mrs_updated,
discussions = result.discussions_fetched, discussions = result.discussions_fetched,
resource_events = result.resource_events_fetched,
resource_events_failed = result.resource_events_failed,
docs = result.documents_regenerated, docs = result.documents_regenerated,
embedded = result.documents_embedded, embedded = result.documents_embedded,
"Sync pipeline complete" "Sync pipeline complete"
@@ -139,19 +179,31 @@ pub async fn run_sync(config: &Config, options: SyncOptions) -> Result<SyncResul
/// Print human-readable sync summary. /// Print human-readable sync summary.
pub fn print_sync(result: &SyncResult, elapsed: std::time::Duration) { pub fn print_sync(result: &SyncResult, elapsed: std::time::Duration) {
println!( println!("{} Sync complete:", style("done").green().bold(),);
"{} Sync complete:",
style("done").green().bold(),
);
println!(" Issues updated: {}", result.issues_updated); println!(" Issues updated: {}", result.issues_updated);
println!(" MRs updated: {}", result.mrs_updated); println!(" MRs updated: {}", result.mrs_updated);
println!(" Discussions fetched: {}", result.discussions_fetched);
println!(" Documents regenerated: {}", result.documents_regenerated);
println!(" Documents embedded: {}", result.documents_embedded);
println!( println!(
" Elapsed: {:.1}s", " Discussions fetched: {}",
elapsed.as_secs_f64() result.discussions_fetched
); );
if result.resource_events_fetched > 0 || result.resource_events_failed > 0 {
println!(
" Resource events fetched: {}",
result.resource_events_fetched
);
if result.resource_events_failed > 0 {
println!(
" Resource events failed: {}",
result.resource_events_failed
);
}
}
println!(
" Documents regenerated: {}",
result.documents_regenerated
);
println!(" Documents embedded: {}", result.documents_embedded);
println!(" Elapsed: {:.1}s", elapsed.as_secs_f64());
} }
/// JSON output for sync. /// JSON output for sync.

View File

@@ -213,7 +213,12 @@ pub struct IssuesArgs {
pub iid: Option<i64>, pub iid: Option<i64>,
/// Maximum results /// Maximum results
#[arg(short = 'n', long = "limit", default_value = "50", help_heading = "Output")] #[arg(
short = 'n',
long = "limit",
default_value = "50",
help_heading = "Output"
)]
pub limit: usize, pub limit: usize,
/// Filter by state (opened, closed, all) /// Filter by state (opened, closed, all)
@@ -249,7 +254,11 @@ pub struct IssuesArgs {
pub due_before: Option<String>, pub due_before: Option<String>,
/// Show only issues with a due date /// Show only issues with a due date
#[arg(long = "has-due", help_heading = "Filters", overrides_with = "no_has_due")] #[arg(
long = "has-due",
help_heading = "Filters",
overrides_with = "no_has_due"
)]
pub has_due: bool, pub has_due: bool,
#[arg(long = "no-has-due", hide = true, overrides_with = "has_due")] #[arg(long = "no-has-due", hide = true, overrides_with = "has_due")]
@@ -267,7 +276,12 @@ pub struct IssuesArgs {
pub no_asc: bool, pub no_asc: bool,
/// Open first matching item in browser /// Open first matching item in browser
#[arg(short = 'o', long, help_heading = "Actions", overrides_with = "no_open")] #[arg(
short = 'o',
long,
help_heading = "Actions",
overrides_with = "no_open"
)]
pub open: bool, pub open: bool,
#[arg(long = "no-open", hide = true, overrides_with = "open")] #[arg(long = "no-open", hide = true, overrides_with = "open")]
@@ -281,7 +295,12 @@ pub struct MrsArgs {
pub iid: Option<i64>, pub iid: Option<i64>,
/// Maximum results /// Maximum results
#[arg(short = 'n', long = "limit", default_value = "50", help_heading = "Output")] #[arg(
short = 'n',
long = "limit",
default_value = "50",
help_heading = "Output"
)]
pub limit: usize, pub limit: usize,
/// Filter by state (opened, merged, closed, locked, all) /// Filter by state (opened, merged, closed, locked, all)
@@ -313,11 +332,21 @@ pub struct MrsArgs {
pub since: Option<String>, pub since: Option<String>,
/// Show only draft MRs /// Show only draft MRs
#[arg(short = 'd', long, conflicts_with = "no_draft", help_heading = "Filters")] #[arg(
short = 'd',
long,
conflicts_with = "no_draft",
help_heading = "Filters"
)]
pub draft: bool, pub draft: bool,
/// Exclude draft MRs /// Exclude draft MRs
#[arg(short = 'D', long = "no-draft", conflicts_with = "draft", help_heading = "Filters")] #[arg(
short = 'D',
long = "no-draft",
conflicts_with = "draft",
help_heading = "Filters"
)]
pub no_draft: bool, pub no_draft: bool,
/// Filter by target branch /// Filter by target branch
@@ -340,7 +369,12 @@ pub struct MrsArgs {
pub no_asc: bool, pub no_asc: bool,
/// Open first matching item in browser /// Open first matching item in browser
#[arg(short = 'o', long, help_heading = "Actions", overrides_with = "no_open")] #[arg(
short = 'o',
long,
help_heading = "Actions",
overrides_with = "no_open"
)]
pub open: bool, pub open: bool,
#[arg(long = "no-open", hide = true, overrides_with = "open")] #[arg(long = "no-open", hide = true, overrides_with = "open")]
@@ -427,7 +461,12 @@ pub struct SearchArgs {
pub updated_after: Option<String>, pub updated_after: Option<String>,
/// Maximum results (default 20, max 100) /// Maximum results (default 20, max 100)
#[arg(short = 'n', long = "limit", default_value = "20", help_heading = "Output")] #[arg(
short = 'n',
long = "limit",
default_value = "20",
help_heading = "Output"
)]
pub limit: usize, pub limit: usize,
/// Show ranking explanation per result /// Show ranking explanation per result

View File

@@ -86,7 +86,10 @@ mod tests {
let result = compute_next_attempt_at(now, 1); let result = compute_next_attempt_at(now, 1);
let delay = result - now; let delay = result - now;
// attempt 1: base = 2000ms, with jitter: 1800-2200ms // attempt 1: base = 2000ms, with jitter: 1800-2200ms
assert!(delay >= 1800 && delay <= 2200, "first retry delay: {delay}ms"); assert!(
(1800..=2200).contains(&delay),
"first retry delay: {delay}ms"
);
} }
#[test] #[test]

View File

@@ -31,22 +31,10 @@ const MIGRATIONS: &[(&str, &str)] = &[
"006", "006",
include_str!("../../migrations/006_merge_requests.sql"), include_str!("../../migrations/006_merge_requests.sql"),
), ),
( ("007", include_str!("../../migrations/007_documents.sql")),
"007", ("008", include_str!("../../migrations/008_fts5.sql")),
include_str!("../../migrations/007_documents.sql"), ("009", include_str!("../../migrations/009_embeddings.sql")),
), ("010", include_str!("../../migrations/010_chunk_config.sql")),
(
"008",
include_str!("../../migrations/008_fts5.sql"),
),
(
"009",
include_str!("../../migrations/009_embeddings.sql"),
),
(
"010",
include_str!("../../migrations/010_chunk_config.sql"),
),
( (
"011", "011",
include_str!("../../migrations/011_resource_events.sql"), include_str!("../../migrations/011_resource_events.sql"),

View File

@@ -40,7 +40,15 @@ pub fn enqueue_job(
"INSERT OR IGNORE INTO pending_dependent_fetches "INSERT OR IGNORE INTO pending_dependent_fetches
(project_id, entity_type, entity_iid, entity_local_id, job_type, payload_json, enqueued_at) (project_id, entity_type, entity_iid, entity_local_id, job_type, payload_json, enqueued_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
rusqlite::params![project_id, entity_type, entity_iid, entity_local_id, job_type, payload_json, now], rusqlite::params![
project_id,
entity_type,
entity_iid,
entity_local_id,
job_type,
payload_json,
now
],
)?; )?;
Ok(changes > 0) Ok(changes > 0)
@@ -69,9 +77,7 @@ pub fn claim_jobs(conn: &Connection, job_type: &str, batch_size: usize) -> Resul
)?; )?;
let jobs: Vec<PendingJob> = select_stmt let jobs: Vec<PendingJob> = select_stmt
.query_map( .query_map(rusqlite::params![job_type, now, batch_size as i64], |row| {
rusqlite::params![job_type, now, batch_size as i64],
|row| {
Ok(PendingJob { Ok(PendingJob {
id: row.get(0)?, id: row.get(0)?,
project_id: row.get(1)?, project_id: row.get(1)?,
@@ -82,8 +88,7 @@ pub fn claim_jobs(conn: &Connection, job_type: &str, batch_size: usize) -> Resul
payload_json: row.get(6)?, payload_json: row.get(6)?,
attempts: row.get(7)?, attempts: row.get(7)?,
}) })
}, })?
)?
.collect::<std::result::Result<Vec<_>, _>>()?; .collect::<std::result::Result<Vec<_>, _>>()?;
// Lock the claimed jobs // Lock the claimed jobs

View File

@@ -222,9 +222,9 @@ impl LoreError {
"Check database file permissions or reset with 'lore reset'.\n\n Example:\n lore doctor\n lore reset --yes", "Check database file permissions or reset with 'lore reset'.\n\n Example:\n lore doctor\n lore reset --yes",
), ),
Self::Http(_) => Some("Check network connection"), Self::Http(_) => Some("Check network connection"),
Self::NotFound(_) => Some( Self::NotFound(_) => {
"Verify the entity exists.\n\n Example:\n lore issues\n lore mrs", Some("Verify the entity exists.\n\n Example:\n lore issues\n lore mrs")
), }
Self::Ambiguous(_) => Some( Self::Ambiguous(_) => Some(
"Use -p to choose a specific project.\n\n Example:\n lore issues 42 -p group/project-a\n lore mrs 99 -p group/project-b", "Use -p to choose a specific project.\n\n Example:\n lore issues 42 -p group/project-a\n lore mrs 99 -p group/project-b",
), ),

View File

@@ -150,7 +150,10 @@ pub fn upsert_milestone_events(
/// Resolve entity type string to (issue_id, merge_request_id) pair. /// Resolve entity type string to (issue_id, merge_request_id) pair.
/// Exactly one is Some, the other is None. /// Exactly one is Some, the other is None.
fn resolve_entity_ids(entity_type: &str, entity_local_id: i64) -> Result<(Option<i64>, Option<i64>)> { fn resolve_entity_ids(
entity_type: &str,
entity_local_id: i64,
) -> Result<(Option<i64>, Option<i64>)> {
match entity_type { match entity_type {
"issue" => Ok((Some(entity_local_id), None)), "issue" => Ok((Some(entity_local_id), None)),
"merge_request" => Ok((None, Some(entity_local_id))), "merge_request" => Ok((None, Some(entity_local_id))),

View File

@@ -33,7 +33,7 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
let mut suffix_stmt = conn.prepare( let mut suffix_stmt = conn.prepare(
"SELECT id, path_with_namespace FROM projects "SELECT id, path_with_namespace FROM projects
WHERE path_with_namespace LIKE '%/' || ?1 WHERE path_with_namespace LIKE '%/' || ?1
OR path_with_namespace = ?1" OR path_with_namespace = ?1",
)?; )?;
let suffix_matches: Vec<(i64, String)> = suffix_stmt let suffix_matches: Vec<(i64, String)> = suffix_stmt
.query_map(rusqlite::params![project_str], |row| { .query_map(rusqlite::params![project_str], |row| {
@@ -48,7 +48,11 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
return Err(LoreError::Ambiguous(format!( return Err(LoreError::Ambiguous(format!(
"Project '{}' is ambiguous. Matching projects:\n{}\n\nHint: Use the full path, e.g., --project={}", "Project '{}' is ambiguous. Matching projects:\n{}\n\nHint: Use the full path, e.g., --project={}",
project_str, project_str,
matching.iter().map(|p| format!(" {}", p)).collect::<Vec<_>>().join("\n"), matching
.iter()
.map(|p| format!(" {}", p))
.collect::<Vec<_>>()
.join("\n"),
matching[0] matching[0]
))); )));
} }
@@ -58,7 +62,7 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
// Step 4: Case-insensitive substring match (unambiguous) // Step 4: Case-insensitive substring match (unambiguous)
let mut substr_stmt = conn.prepare( let mut substr_stmt = conn.prepare(
"SELECT id, path_with_namespace FROM projects "SELECT id, path_with_namespace FROM projects
WHERE LOWER(path_with_namespace) LIKE '%' || LOWER(?1) || '%'" WHERE LOWER(path_with_namespace) LIKE '%' || LOWER(?1) || '%'",
)?; )?;
let substr_matches: Vec<(i64, String)> = substr_stmt let substr_matches: Vec<(i64, String)> = substr_stmt
.query_map(rusqlite::params![project_str], |row| { .query_map(rusqlite::params![project_str], |row| {
@@ -73,7 +77,11 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
return Err(LoreError::Ambiguous(format!( return Err(LoreError::Ambiguous(format!(
"Project '{}' is ambiguous. Matching projects:\n{}\n\nHint: Use the full path, e.g., --project={}", "Project '{}' is ambiguous. Matching projects:\n{}\n\nHint: Use the full path, e.g., --project={}",
project_str, project_str,
matching.iter().map(|p| format!(" {}", p)).collect::<Vec<_>>().join("\n"), matching
.iter()
.map(|p| format!(" {}", p))
.collect::<Vec<_>>()
.join("\n"),
matching[0] matching[0]
))); )));
} }
@@ -81,9 +89,8 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
} }
// Step 5: No match — list available projects // Step 5: No match — list available projects
let mut all_stmt = conn.prepare( let mut all_stmt =
"SELECT path_with_namespace FROM projects ORDER BY path_with_namespace" conn.prepare("SELECT path_with_namespace FROM projects ORDER BY path_with_namespace")?;
)?;
let all_projects: Vec<String> = all_stmt let all_projects: Vec<String> = all_stmt
.query_map([], |row| row.get(0))? .query_map([], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?; .collect::<std::result::Result<Vec<_>, _>>()?;
@@ -98,7 +105,11 @@ pub fn resolve_project(conn: &Connection, project_str: &str) -> Result<i64> {
Err(LoreError::Other(format!( Err(LoreError::Other(format!(
"Project '{}' not found.\n\nAvailable projects:\n{}\n\nHint: Use the full path, e.g., --project={}", "Project '{}' not found.\n\nAvailable projects:\n{}\n\nHint: Use the full path, e.g., --project={}",
project_str, project_str,
all_projects.iter().map(|p| format!(" {}", p)).collect::<Vec<_>>().join("\n"), all_projects
.iter()
.map(|p| format!(" {}", p))
.collect::<Vec<_>>()
.join("\n"),
all_projects[0] all_projects[0]
))) )))
} }
@@ -109,7 +120,8 @@ mod tests {
fn setup_db() -> Connection { fn setup_db() -> Connection {
let conn = Connection::open_in_memory().unwrap(); let conn = Connection::open_in_memory().unwrap();
conn.execute_batch(" conn.execute_batch(
"
CREATE TABLE projects ( CREATE TABLE projects (
id INTEGER PRIMARY KEY, id INTEGER PRIMARY KEY,
gitlab_project_id INTEGER UNIQUE NOT NULL, gitlab_project_id INTEGER UNIQUE NOT NULL,
@@ -121,7 +133,9 @@ mod tests {
raw_payload_id INTEGER raw_payload_id INTEGER
); );
CREATE INDEX idx_projects_path ON projects(path_with_namespace); CREATE INDEX idx_projects_path ON projects(path_with_namespace);
").unwrap(); ",
)
.unwrap();
conn conn
} }
@@ -129,7 +143,8 @@ mod tests {
conn.execute( conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (?1, ?2, ?3)", "INSERT INTO projects (id, gitlab_project_id, path_with_namespace) VALUES (?1, ?2, ?3)",
rusqlite::params![id, id * 100, path], rusqlite::params![id, id * 100, path],
).unwrap(); )
.unwrap();
} }
#[test] #[test]
@@ -164,7 +179,11 @@ mod tests {
insert_project(&conn, 2, "frontend/auth-service"); insert_project(&conn, 2, "frontend/auth-service");
let err = resolve_project(&conn, "auth-service").unwrap_err(); let err = resolve_project(&conn, "auth-service").unwrap_err();
let msg = err.to_string(); let msg = err.to_string();
assert!(msg.contains("ambiguous"), "Expected ambiguous error, got: {}", msg); assert!(
msg.contains("ambiguous"),
"Expected ambiguous error, got: {}",
msg
);
assert!(msg.contains("backend/auth-service")); assert!(msg.contains("backend/auth-service"));
assert!(msg.contains("frontend/auth-service")); assert!(msg.contains("frontend/auth-service"));
} }
@@ -195,7 +214,11 @@ mod tests {
// "code" matches both projects // "code" matches both projects
let err = resolve_project(&conn, "code").unwrap_err(); let err = resolve_project(&conn, "code").unwrap_err();
let msg = err.to_string(); let msg = err.to_string();
assert!(msg.contains("ambiguous"), "Expected ambiguous error, got: {}", msg); assert!(
msg.contains("ambiguous"),
"Expected ambiguous error, got: {}",
msg
);
assert!(msg.contains("vs/python-code")); assert!(msg.contains("vs/python-code"));
assert!(msg.contains("vs/typescript-code")); assert!(msg.contains("vs/typescript-code"));
} }
@@ -217,7 +240,11 @@ mod tests {
insert_project(&conn, 1, "backend/auth-service"); insert_project(&conn, 1, "backend/auth-service");
let err = resolve_project(&conn, "nonexistent").unwrap_err(); let err = resolve_project(&conn, "nonexistent").unwrap_err();
let msg = err.to_string(); let msg = err.to_string();
assert!(msg.contains("not found"), "Expected not found error, got: {}", msg); assert!(
msg.contains("not found"),
"Expected not found error, got: {}",
msg
);
assert!(msg.contains("backend/auth-service")); assert!(msg.contains("backend/auth-service"));
} }

View File

@@ -4,10 +4,10 @@ use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use std::collections::BTreeSet; use std::collections::BTreeSet;
use crate::core::error::Result;
use super::truncation::{ use super::truncation::{
truncate_discussion, truncate_hard_cap, NoteContent, MAX_DISCUSSION_BYTES, MAX_DISCUSSION_BYTES, NoteContent, truncate_discussion, truncate_hard_cap,
}; };
use crate::core::error::Result;
/// Source type for documents. /// Source type for documents.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
@@ -113,7 +113,19 @@ pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result<Option
}, },
); );
let (id, iid, title, description, state, author_username, created_at, updated_at, web_url, path_with_namespace, project_id) = match row { let (
id,
iid,
title,
description,
state,
author_username,
created_at,
updated_at,
web_url,
path_with_namespace,
project_id,
) = match row {
Ok(r) => r, Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()), Err(e) => return Err(e.into()),
@@ -124,15 +136,14 @@ pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result<Option
"SELECT l.name FROM issue_labels il "SELECT l.name FROM issue_labels il
JOIN labels l ON l.id = il.label_id JOIN labels l ON l.id = il.label_id
WHERE il.issue_id = ?1 WHERE il.issue_id = ?1
ORDER BY l.name" ORDER BY l.name",
)?; )?;
let labels: Vec<String> = label_stmt let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![id], |row| row.get(0))? .query_map(rusqlite::params![id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?; .collect::<std::result::Result<Vec<_>, _>>()?;
// Build labels JSON array string // Build labels JSON array string
let labels_json = serde_json::to_string(&labels) let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
.unwrap_or_else(|_| "[]".to_string());
// Format content_text per PRD template // Format content_text per PRD template
let display_title = title.as_deref().unwrap_or("(untitled)"); let display_title = title.as_deref().unwrap_or("(untitled)");
@@ -213,7 +224,21 @@ pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result<Option<Docum
}, },
); );
let (id, iid, title, description, state, author_username, source_branch, target_branch, created_at, updated_at, web_url, path_with_namespace, project_id) = match row { let (
id,
iid,
title,
description,
state,
author_username,
source_branch,
target_branch,
created_at,
updated_at,
web_url,
path_with_namespace,
project_id,
) = match row {
Ok(r) => r, Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()), Err(e) => return Err(e.into()),
@@ -224,14 +249,13 @@ pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result<Option<Docum
"SELECT l.name FROM mr_labels ml "SELECT l.name FROM mr_labels ml
JOIN labels l ON l.id = ml.label_id JOIN labels l ON l.id = ml.label_id
WHERE ml.merge_request_id = ?1 WHERE ml.merge_request_id = ?1
ORDER BY l.name" ORDER BY l.name",
)?; )?;
let labels: Vec<String> = label_stmt let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![id], |row| row.get(0))? .query_map(rusqlite::params![id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?; .collect::<std::result::Result<Vec<_>, _>>()?;
let labels_json = serde_json::to_string(&labels) let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
.unwrap_or_else(|_| "[]".to_string());
let display_title = title.as_deref().unwrap_or("(untitled)"); let display_title = title.as_deref().unwrap_or("(untitled)");
let display_state = state.as_deref().unwrap_or("unknown"); let display_state = state.as_deref().unwrap_or("unknown");
@@ -359,13 +383,7 @@ pub fn extract_discussion_document(
.query_map(rusqlite::params![parent_id], |row| row.get(0))? .query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?; .collect::<std::result::Result<Vec<_>, _>>()?;
( (iid, title, web_url, format!("Issue #{}", iid), labels)
iid,
title,
web_url,
format!("Issue #{}", iid),
labels,
)
} }
"MergeRequest" => { "MergeRequest" => {
let parent_id = match merge_request_id { let parent_id = match merge_request_id {
@@ -399,13 +417,7 @@ pub fn extract_discussion_document(
.query_map(rusqlite::params![parent_id], |row| row.get(0))? .query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?; .collect::<std::result::Result<Vec<_>, _>>()?;
( (iid, title, web_url, format!("MR !{}", iid), labels)
iid,
title,
web_url,
format!("MR !{}", iid),
labels,
)
} }
_ => return Ok(None), _ => return Ok(None),
}; };
@@ -449,17 +461,17 @@ pub fn extract_discussion_document(
// Extract DiffNote paths (deduplicated, sorted) // Extract DiffNote paths (deduplicated, sorted)
let mut path_set = BTreeSet::new(); let mut path_set = BTreeSet::new();
for note in &notes { for note in &notes {
if let Some(ref p) = note.old_path { if let Some(ref p) = note.old_path
if !p.is_empty() { && !p.is_empty()
{
path_set.insert(p.clone()); path_set.insert(p.clone());
} }
} if let Some(ref p) = note.new_path
if let Some(ref p) = note.new_path { && !p.is_empty()
if !p.is_empty() { {
path_set.insert(p.clone()); path_set.insert(p.clone());
} }
} }
}
let paths: Vec<String> = path_set.into_iter().collect(); let paths: Vec<String> = path_set.into_iter().collect();
// Construct URL: parent_web_url#note_{first_note_gitlab_id} // Construct URL: parent_web_url#note_{first_note_gitlab_id}
@@ -620,7 +632,8 @@ mod tests {
// Helper to create an in-memory DB with the required tables for extraction tests // Helper to create an in-memory DB with the required tables for extraction tests
fn setup_test_db() -> Connection { fn setup_test_db() -> Connection {
let conn = Connection::open_in_memory().unwrap(); let conn = Connection::open_in_memory().unwrap();
conn.execute_batch(" conn.execute_batch(
"
CREATE TABLE projects ( CREATE TABLE projects (
id INTEGER PRIMARY KEY, id INTEGER PRIMARY KEY,
gitlab_project_id INTEGER UNIQUE NOT NULL, gitlab_project_id INTEGER UNIQUE NOT NULL,
@@ -660,7 +673,9 @@ mod tests {
label_id INTEGER NOT NULL REFERENCES labels(id), label_id INTEGER NOT NULL REFERENCES labels(id),
PRIMARY KEY(issue_id, label_id) PRIMARY KEY(issue_id, label_id)
); );
").unwrap(); ",
)
.unwrap();
// Insert a test project // Insert a test project
conn.execute( conn.execute(
@@ -671,7 +686,17 @@ mod tests {
conn conn
} }
fn insert_issue(conn: &Connection, id: i64, iid: i64, title: Option<&str>, description: Option<&str>, state: &str, author: Option<&str>, web_url: Option<&str>) { #[allow(clippy::too_many_arguments)]
fn insert_issue(
conn: &Connection,
id: i64,
iid: i64,
title: Option<&str>,
description: Option<&str>,
state: &str,
author: Option<&str>,
web_url: Option<&str>,
) {
conn.execute( conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, description, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, 1, ?3, ?4, ?5, ?6, ?7, 1000, 2000, 3000, ?8)", "INSERT INTO issues (id, gitlab_id, project_id, iid, title, description, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, 1, ?3, ?4, ?5, ?6, ?7, 1000, 2000, 3000, ?8)",
rusqlite::params![id, id * 10, iid, title, description, state, author, web_url], rusqlite::params![id, id * 10, iid, title, description, state, author, web_url],
@@ -682,20 +707,31 @@ mod tests {
conn.execute( conn.execute(
"INSERT INTO labels (id, project_id, name) VALUES (?1, 1, ?2)", "INSERT INTO labels (id, project_id, name) VALUES (?1, 1, ?2)",
rusqlite::params![id, name], rusqlite::params![id, name],
).unwrap(); )
.unwrap();
} }
fn link_issue_label(conn: &Connection, issue_id: i64, label_id: i64) { fn link_issue_label(conn: &Connection, issue_id: i64, label_id: i64) {
conn.execute( conn.execute(
"INSERT INTO issue_labels (issue_id, label_id) VALUES (?1, ?2)", "INSERT INTO issue_labels (issue_id, label_id) VALUES (?1, ?2)",
rusqlite::params![issue_id, label_id], rusqlite::params![issue_id, label_id],
).unwrap(); )
.unwrap();
} }
#[test] #[test]
fn test_issue_document_format() { fn test_issue_document_format() {
let conn = setup_test_db(); let conn = setup_test_db();
insert_issue(&conn, 1, 234, Some("Authentication redesign"), Some("We need to modernize our authentication system..."), "opened", Some("johndoe"), Some("https://gitlab.example.com/group/project-one/-/issues/234")); insert_issue(
&conn,
1,
234,
Some("Authentication redesign"),
Some("We need to modernize our authentication system..."),
"opened",
Some("johndoe"),
Some("https://gitlab.example.com/group/project-one/-/issues/234"),
);
insert_label(&conn, 1, "auth"); insert_label(&conn, 1, "auth");
insert_label(&conn, 2, "bug"); insert_label(&conn, 2, "bug");
link_issue_label(&conn, 1, 1); link_issue_label(&conn, 1, 1);
@@ -706,13 +742,23 @@ mod tests {
assert_eq!(doc.source_id, 1); assert_eq!(doc.source_id, 1);
assert_eq!(doc.project_id, 1); assert_eq!(doc.project_id, 1);
assert_eq!(doc.author_username, Some("johndoe".to_string())); assert_eq!(doc.author_username, Some("johndoe".to_string()));
assert!(doc.content_text.starts_with("[[Issue]] #234: Authentication redesign\n")); assert!(
doc.content_text
.starts_with("[[Issue]] #234: Authentication redesign\n")
);
assert!(doc.content_text.contains("Project: group/project-one\n")); assert!(doc.content_text.contains("Project: group/project-one\n"));
assert!(doc.content_text.contains("URL: https://gitlab.example.com/group/project-one/-/issues/234\n")); assert!(
doc.content_text
.contains("URL: https://gitlab.example.com/group/project-one/-/issues/234\n")
);
assert!(doc.content_text.contains("Labels: [\"auth\",\"bug\"]\n")); assert!(doc.content_text.contains("Labels: [\"auth\",\"bug\"]\n"));
assert!(doc.content_text.contains("State: opened\n")); assert!(doc.content_text.contains("State: opened\n"));
assert!(doc.content_text.contains("Author: @johndoe\n")); assert!(doc.content_text.contains("Author: @johndoe\n"));
assert!(doc.content_text.contains("--- Description ---\n\nWe need to modernize our authentication system...")); assert!(
doc.content_text.contains(
"--- Description ---\n\nWe need to modernize our authentication system..."
)
);
assert!(!doc.is_truncated); assert!(!doc.is_truncated);
assert!(doc.paths.is_empty()); assert!(doc.paths.is_empty());
} }
@@ -727,7 +773,16 @@ mod tests {
#[test] #[test]
fn test_issue_no_description() { fn test_issue_no_description() {
let conn = setup_test_db(); let conn = setup_test_db();
insert_issue(&conn, 1, 10, Some("Quick fix"), None, "opened", Some("alice"), None); insert_issue(
&conn,
1,
10,
Some("Quick fix"),
None,
"opened",
Some("alice"),
None,
);
let doc = extract_issue_document(&conn, 1).unwrap().unwrap(); let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
assert!(!doc.content_text.contains("--- Description ---")); assert!(!doc.content_text.contains("--- Description ---"));
@@ -737,7 +792,16 @@ mod tests {
#[test] #[test]
fn test_issue_labels_sorted() { fn test_issue_labels_sorted() {
let conn = setup_test_db(); let conn = setup_test_db();
insert_issue(&conn, 1, 10, Some("Test"), Some("Body"), "opened", Some("bob"), None); insert_issue(
&conn,
1,
10,
Some("Test"),
Some("Body"),
"opened",
Some("bob"),
None,
);
insert_label(&conn, 1, "zeta"); insert_label(&conn, 1, "zeta");
insert_label(&conn, 2, "alpha"); insert_label(&conn, 2, "alpha");
insert_label(&conn, 3, "middle"); insert_label(&conn, 3, "middle");
@@ -747,13 +811,25 @@ mod tests {
let doc = extract_issue_document(&conn, 1).unwrap().unwrap(); let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.labels, vec!["alpha", "middle", "zeta"]); assert_eq!(doc.labels, vec!["alpha", "middle", "zeta"]);
assert!(doc.content_text.contains("Labels: [\"alpha\",\"middle\",\"zeta\"]")); assert!(
doc.content_text
.contains("Labels: [\"alpha\",\"middle\",\"zeta\"]")
);
} }
#[test] #[test]
fn test_issue_no_labels() { fn test_issue_no_labels() {
let conn = setup_test_db(); let conn = setup_test_db();
insert_issue(&conn, 1, 10, Some("Test"), Some("Body"), "opened", None, None); insert_issue(
&conn,
1,
10,
Some("Test"),
Some("Body"),
"opened",
None,
None,
);
let doc = extract_issue_document(&conn, 1).unwrap().unwrap(); let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
assert!(doc.labels.is_empty()); assert!(doc.labels.is_empty());
@@ -763,7 +839,16 @@ mod tests {
#[test] #[test]
fn test_issue_hash_deterministic() { fn test_issue_hash_deterministic() {
let conn = setup_test_db(); let conn = setup_test_db();
insert_issue(&conn, 1, 10, Some("Test"), Some("Body"), "opened", Some("alice"), None); insert_issue(
&conn,
1,
10,
Some("Test"),
Some("Body"),
"opened",
Some("alice"),
None,
);
let doc1 = extract_issue_document(&conn, 1).unwrap().unwrap(); let doc1 = extract_issue_document(&conn, 1).unwrap().unwrap();
let doc2 = extract_issue_document(&conn, 1).unwrap().unwrap(); let doc2 = extract_issue_document(&conn, 1).unwrap().unwrap();
@@ -786,7 +871,8 @@ mod tests {
fn setup_mr_test_db() -> Connection { fn setup_mr_test_db() -> Connection {
let conn = setup_test_db(); let conn = setup_test_db();
conn.execute_batch(" conn.execute_batch(
"
CREATE TABLE merge_requests ( CREATE TABLE merge_requests (
id INTEGER PRIMARY KEY, id INTEGER PRIMARY KEY,
gitlab_id INTEGER UNIQUE NOT NULL, gitlab_id INTEGER UNIQUE NOT NULL,
@@ -821,11 +907,25 @@ mod tests {
label_id INTEGER REFERENCES labels(id), label_id INTEGER REFERENCES labels(id),
PRIMARY KEY(merge_request_id, label_id) PRIMARY KEY(merge_request_id, label_id)
); );
").unwrap(); ",
)
.unwrap();
conn conn
} }
fn insert_mr(conn: &Connection, id: i64, iid: i64, title: Option<&str>, description: Option<&str>, state: Option<&str>, author: Option<&str>, source_branch: Option<&str>, target_branch: Option<&str>, web_url: Option<&str>) { #[allow(clippy::too_many_arguments)]
fn insert_mr(
conn: &Connection,
id: i64,
iid: i64,
title: Option<&str>,
description: Option<&str>,
state: Option<&str>,
author: Option<&str>,
source_branch: Option<&str>,
target_branch: Option<&str>,
web_url: Option<&str>,
) {
conn.execute( conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, description, state, author_username, source_branch, target_branch, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, 1, ?3, ?4, ?5, ?6, ?7, ?8, ?9, 1000, 2000, 3000, ?10)", "INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, description, state, author_username, source_branch, target_branch, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, 1, ?3, ?4, ?5, ?6, ?7, ?8, ?9, 1000, 2000, 3000, ?10)",
rusqlite::params![id, id * 10, iid, title, description, state, author, source_branch, target_branch, web_url], rusqlite::params![id, id * 10, iid, title, description, state, author, source_branch, target_branch, web_url],
@@ -836,13 +936,25 @@ mod tests {
conn.execute( conn.execute(
"INSERT INTO mr_labels (merge_request_id, label_id) VALUES (?1, ?2)", "INSERT INTO mr_labels (merge_request_id, label_id) VALUES (?1, ?2)",
rusqlite::params![mr_id, label_id], rusqlite::params![mr_id, label_id],
).unwrap(); )
.unwrap();
} }
#[test] #[test]
fn test_mr_document_format() { fn test_mr_document_format() {
let conn = setup_mr_test_db(); let conn = setup_mr_test_db();
insert_mr(&conn, 1, 456, Some("Implement JWT authentication"), Some("This MR implements JWT-based authentication..."), Some("opened"), Some("johndoe"), Some("feature/jwt-auth"), Some("main"), Some("https://gitlab.example.com/group/project-one/-/merge_requests/456")); insert_mr(
&conn,
1,
456,
Some("Implement JWT authentication"),
Some("This MR implements JWT-based authentication..."),
Some("opened"),
Some("johndoe"),
Some("feature/jwt-auth"),
Some("main"),
Some("https://gitlab.example.com/group/project-one/-/merge_requests/456"),
);
insert_label(&conn, 1, "auth"); insert_label(&conn, 1, "auth");
insert_label(&conn, 2, "feature"); insert_label(&conn, 2, "feature");
link_mr_label(&conn, 1, 1); link_mr_label(&conn, 1, 1);
@@ -851,13 +963,25 @@ mod tests {
let doc = extract_mr_document(&conn, 1).unwrap().unwrap(); let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.source_type, SourceType::MergeRequest); assert_eq!(doc.source_type, SourceType::MergeRequest);
assert_eq!(doc.source_id, 1); assert_eq!(doc.source_id, 1);
assert!(doc.content_text.starts_with("[[MergeRequest]] !456: Implement JWT authentication\n")); assert!(
doc.content_text
.starts_with("[[MergeRequest]] !456: Implement JWT authentication\n")
);
assert!(doc.content_text.contains("Project: group/project-one\n")); assert!(doc.content_text.contains("Project: group/project-one\n"));
assert!(doc.content_text.contains("Labels: [\"auth\",\"feature\"]\n")); assert!(
doc.content_text
.contains("Labels: [\"auth\",\"feature\"]\n")
);
assert!(doc.content_text.contains("State: opened\n")); assert!(doc.content_text.contains("State: opened\n"));
assert!(doc.content_text.contains("Author: @johndoe\n")); assert!(doc.content_text.contains("Author: @johndoe\n"));
assert!(doc.content_text.contains("Source: feature/jwt-auth -> main\n")); assert!(
assert!(doc.content_text.contains("--- Description ---\n\nThis MR implements JWT-based authentication...")); doc.content_text
.contains("Source: feature/jwt-auth -> main\n")
);
assert!(
doc.content_text
.contains("--- Description ---\n\nThis MR implements JWT-based authentication...")
);
} }
#[test] #[test]
@@ -870,26 +994,65 @@ mod tests {
#[test] #[test]
fn test_mr_no_description() { fn test_mr_no_description() {
let conn = setup_mr_test_db(); let conn = setup_mr_test_db();
insert_mr(&conn, 1, 10, Some("Quick fix"), None, Some("merged"), Some("alice"), Some("fix/bug"), Some("main"), None); insert_mr(
&conn,
1,
10,
Some("Quick fix"),
None,
Some("merged"),
Some("alice"),
Some("fix/bug"),
Some("main"),
None,
);
let doc = extract_mr_document(&conn, 1).unwrap().unwrap(); let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
assert!(!doc.content_text.contains("--- Description ---")); assert!(!doc.content_text.contains("--- Description ---"));
assert!(doc.content_text.contains("[[MergeRequest]] !10: Quick fix\n")); assert!(
doc.content_text
.contains("[[MergeRequest]] !10: Quick fix\n")
);
} }
#[test] #[test]
fn test_mr_branch_info() { fn test_mr_branch_info() {
let conn = setup_mr_test_db(); let conn = setup_mr_test_db();
insert_mr(&conn, 1, 10, Some("Test"), Some("Body"), Some("opened"), None, Some("feature/foo"), Some("develop"), None); insert_mr(
&conn,
1,
10,
Some("Test"),
Some("Body"),
Some("opened"),
None,
Some("feature/foo"),
Some("develop"),
None,
);
let doc = extract_mr_document(&conn, 1).unwrap().unwrap(); let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
assert!(doc.content_text.contains("Source: feature/foo -> develop\n")); assert!(
doc.content_text
.contains("Source: feature/foo -> develop\n")
);
} }
#[test] #[test]
fn test_mr_no_branches() { fn test_mr_no_branches() {
let conn = setup_mr_test_db(); let conn = setup_mr_test_db();
insert_mr(&conn, 1, 10, Some("Test"), None, Some("opened"), None, None, None, None); insert_mr(
&conn,
1,
10,
Some("Test"),
None,
Some("opened"),
None,
None,
None,
None,
);
let doc = extract_mr_document(&conn, 1).unwrap().unwrap(); let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
assert!(!doc.content_text.contains("Source:")); assert!(!doc.content_text.contains("Source:"));
@@ -899,7 +1062,8 @@ mod tests {
fn setup_discussion_test_db() -> Connection { fn setup_discussion_test_db() -> Connection {
let conn = setup_mr_test_db(); // includes projects, issues schema, labels, mr tables let conn = setup_mr_test_db(); // includes projects, issues schema, labels, mr tables
conn.execute_batch(" conn.execute_batch(
"
CREATE TABLE discussions ( CREATE TABLE discussions (
id INTEGER PRIMARY KEY, id INTEGER PRIMARY KEY,
gitlab_discussion_id TEXT NOT NULL, gitlab_discussion_id TEXT NOT NULL,
@@ -937,18 +1101,38 @@ mod tests {
position_new_line INTEGER, position_new_line INTEGER,
raw_payload_id INTEGER raw_payload_id INTEGER
); );
").unwrap(); ",
)
.unwrap();
conn conn
} }
fn insert_discussion(conn: &Connection, id: i64, noteable_type: &str, issue_id: Option<i64>, mr_id: Option<i64>) { fn insert_discussion(
conn: &Connection,
id: i64,
noteable_type: &str,
issue_id: Option<i64>,
mr_id: Option<i64>,
) {
conn.execute( conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, merge_request_id, noteable_type, last_seen_at) VALUES (?1, ?2, 1, ?3, ?4, ?5, 3000)", "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, merge_request_id, noteable_type, last_seen_at) VALUES (?1, ?2, 1, ?3, ?4, ?5, 3000)",
rusqlite::params![id, format!("disc_{}", id), issue_id, mr_id, noteable_type], rusqlite::params![id, format!("disc_{}", id), issue_id, mr_id, noteable_type],
).unwrap(); ).unwrap();
} }
fn insert_note(conn: &Connection, id: i64, gitlab_id: i64, discussion_id: i64, author: Option<&str>, body: Option<&str>, created_at: i64, is_system: bool, old_path: Option<&str>, new_path: Option<&str>) { #[allow(clippy::too_many_arguments)]
fn insert_note(
conn: &Connection,
id: i64,
gitlab_id: i64,
discussion_id: i64,
author: Option<&str>,
body: Option<&str>,
created_at: i64,
is_system: bool,
old_path: Option<&str>,
new_path: Option<&str>,
) {
conn.execute( conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system, position_old_path, position_new_path) VALUES (?1, ?2, ?3, 1, ?4, ?5, ?6, ?6, ?6, ?7, ?8, ?9)", "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system, position_old_path, position_new_path) VALUES (?1, ?2, ?3, 1, ?4, ?5, ?6, ?6, ?6, ?7, ?8, ?9)",
rusqlite::params![id, gitlab_id, discussion_id, author, body, created_at, is_system as i32, old_path, new_path], rusqlite::params![id, gitlab_id, discussion_id, author, body, created_at, is_system as i32, old_path, new_path],
@@ -958,25 +1142,67 @@ mod tests {
#[test] #[test]
fn test_discussion_document_format() { fn test_discussion_document_format() {
let conn = setup_discussion_test_db(); let conn = setup_discussion_test_db();
insert_issue(&conn, 1, 234, Some("Authentication redesign"), Some("desc"), "opened", Some("johndoe"), Some("https://gitlab.example.com/group/project-one/-/issues/234")); insert_issue(
&conn,
1,
234,
Some("Authentication redesign"),
Some("desc"),
"opened",
Some("johndoe"),
Some("https://gitlab.example.com/group/project-one/-/issues/234"),
);
insert_label(&conn, 1, "auth"); insert_label(&conn, 1, "auth");
insert_label(&conn, 2, "bug"); insert_label(&conn, 2, "bug");
link_issue_label(&conn, 1, 1); link_issue_label(&conn, 1, 1);
link_issue_label(&conn, 1, 2); link_issue_label(&conn, 1, 2);
insert_discussion(&conn, 1, "Issue", Some(1), None); insert_discussion(&conn, 1, "Issue", Some(1), None);
// 1710460800000 = 2024-03-15T00:00:00Z // 1710460800000 = 2024-03-15T00:00:00Z
insert_note(&conn, 1, 12345, 1, Some("johndoe"), Some("I think we should move to JWT-based auth..."), 1710460800000, false, None, None); insert_note(
insert_note(&conn, 2, 12346, 1, Some("janedoe"), Some("Agreed. What about refresh token strategy?"), 1710460800000, false, None, None); &conn,
1,
12345,
1,
Some("johndoe"),
Some("I think we should move to JWT-based auth..."),
1710460800000,
false,
None,
None,
);
insert_note(
&conn,
2,
12346,
1,
Some("janedoe"),
Some("Agreed. What about refresh token strategy?"),
1710460800000,
false,
None,
None,
);
let doc = extract_discussion_document(&conn, 1).unwrap().unwrap(); let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.source_type, SourceType::Discussion); assert_eq!(doc.source_type, SourceType::Discussion);
assert!(doc.content_text.starts_with("[[Discussion]] Issue #234: Authentication redesign\n")); assert!(
doc.content_text
.starts_with("[[Discussion]] Issue #234: Authentication redesign\n")
);
assert!(doc.content_text.contains("Project: group/project-one\n")); assert!(doc.content_text.contains("Project: group/project-one\n"));
assert!(doc.content_text.contains("URL: https://gitlab.example.com/group/project-one/-/issues/234#note_12345\n")); assert!(doc.content_text.contains(
"URL: https://gitlab.example.com/group/project-one/-/issues/234#note_12345\n"
));
assert!(doc.content_text.contains("Labels: [\"auth\",\"bug\"]\n")); assert!(doc.content_text.contains("Labels: [\"auth\",\"bug\"]\n"));
assert!(doc.content_text.contains("--- Thread ---")); assert!(doc.content_text.contains("--- Thread ---"));
assert!(doc.content_text.contains("@johndoe (2024-03-15):\nI think we should move to JWT-based auth...")); assert!(
assert!(doc.content_text.contains("@janedoe (2024-03-15):\nAgreed. What about refresh token strategy?")); doc.content_text
.contains("@johndoe (2024-03-15):\nI think we should move to JWT-based auth...")
);
assert!(
doc.content_text
.contains("@janedoe (2024-03-15):\nAgreed. What about refresh token strategy?")
);
assert_eq!(doc.author_username, Some("johndoe".to_string())); assert_eq!(doc.author_username, Some("johndoe".to_string()));
assert!(doc.title.is_none()); // Discussions don't have their own title assert!(doc.title.is_none()); // Discussions don't have their own title
} }
@@ -992,13 +1218,34 @@ mod tests {
fn test_discussion_parent_deleted() { fn test_discussion_parent_deleted() {
let conn = setup_discussion_test_db(); let conn = setup_discussion_test_db();
// Insert issue, create discussion, then delete the issue // Insert issue, create discussion, then delete the issue
insert_issue(&conn, 99, 10, Some("To be deleted"), None, "opened", None, None); insert_issue(
&conn,
99,
10,
Some("To be deleted"),
None,
"opened",
None,
None,
);
insert_discussion(&conn, 1, "Issue", Some(99), None); insert_discussion(&conn, 1, "Issue", Some(99), None);
insert_note(&conn, 1, 100, 1, Some("alice"), Some("Hello"), 1000, false, None, None); insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some("Hello"),
1000,
false,
None,
None,
);
// Delete the parent issue — FK cascade won't delete discussion in test since // Delete the parent issue — FK cascade won't delete discussion in test since
// we used REFERENCES without ON DELETE CASCADE in test schema, so just delete from issues // we used REFERENCES without ON DELETE CASCADE in test schema, so just delete from issues
conn.execute("PRAGMA foreign_keys = OFF", []).unwrap(); conn.execute("PRAGMA foreign_keys = OFF", []).unwrap();
conn.execute("DELETE FROM issues WHERE id = 99", []).unwrap(); conn.execute("DELETE FROM issues WHERE id = 99", [])
.unwrap();
conn.execute("PRAGMA foreign_keys = ON", []).unwrap(); conn.execute("PRAGMA foreign_keys = ON", []).unwrap();
let result = extract_discussion_document(&conn, 1).unwrap(); let result = extract_discussion_document(&conn, 1).unwrap();
@@ -1008,11 +1255,53 @@ mod tests {
#[test] #[test]
fn test_discussion_system_notes_excluded() { fn test_discussion_system_notes_excluded() {
let conn = setup_discussion_test_db(); let conn = setup_discussion_test_db();
insert_issue(&conn, 1, 10, Some("Test"), Some("desc"), "opened", Some("alice"), None); insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
Some("alice"),
None,
);
insert_discussion(&conn, 1, "Issue", Some(1), None); insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(&conn, 1, 100, 1, Some("alice"), Some("Real comment"), 1000, false, None, None); insert_note(
insert_note(&conn, 2, 101, 1, Some("bot"), Some("assigned to @alice"), 2000, true, None, None); &conn,
insert_note(&conn, 3, 102, 1, Some("bob"), Some("Follow-up"), 3000, false, None, None); 1,
100,
1,
Some("alice"),
Some("Real comment"),
1000,
false,
None,
None,
);
insert_note(
&conn,
2,
101,
1,
Some("bot"),
Some("assigned to @alice"),
2000,
true,
None,
None,
);
insert_note(
&conn,
3,
102,
1,
Some("bob"),
Some("Follow-up"),
3000,
false,
None,
None,
);
let doc = extract_discussion_document(&conn, 1).unwrap().unwrap(); let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
assert!(doc.content_text.contains("@alice")); assert!(doc.content_text.contains("@alice"));
@@ -1023,38 +1312,115 @@ mod tests {
#[test] #[test]
fn test_discussion_diffnote_paths() { fn test_discussion_diffnote_paths() {
let conn = setup_discussion_test_db(); let conn = setup_discussion_test_db();
insert_issue(&conn, 1, 10, Some("Test"), Some("desc"), "opened", None, None); insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_discussion(&conn, 1, "Issue", Some(1), None); insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(&conn, 1, 100, 1, Some("alice"), Some("Comment on code"), 1000, false, Some("src/old.rs"), Some("src/new.rs")); insert_note(
insert_note(&conn, 2, 101, 1, Some("bob"), Some("Reply"), 2000, false, Some("src/old.rs"), Some("src/new.rs")); &conn,
1,
100,
1,
Some("alice"),
Some("Comment on code"),
1000,
false,
Some("src/old.rs"),
Some("src/new.rs"),
);
insert_note(
&conn,
2,
101,
1,
Some("bob"),
Some("Reply"),
2000,
false,
Some("src/old.rs"),
Some("src/new.rs"),
);
let doc = extract_discussion_document(&conn, 1).unwrap().unwrap(); let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
// Paths should be deduplicated and sorted // Paths should be deduplicated and sorted
assert_eq!(doc.paths, vec!["src/new.rs", "src/old.rs"]); assert_eq!(doc.paths, vec!["src/new.rs", "src/old.rs"]);
assert!(doc.content_text.contains("Files: [\"src/new.rs\",\"src/old.rs\"]")); assert!(
doc.content_text
.contains("Files: [\"src/new.rs\",\"src/old.rs\"]")
);
} }
#[test] #[test]
fn test_discussion_url_construction() { fn test_discussion_url_construction() {
let conn = setup_discussion_test_db(); let conn = setup_discussion_test_db();
insert_issue(&conn, 1, 10, Some("Test"), Some("desc"), "opened", None, Some("https://gitlab.example.com/group/project-one/-/issues/10")); insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
Some("https://gitlab.example.com/group/project-one/-/issues/10"),
);
insert_discussion(&conn, 1, "Issue", Some(1), None); insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(&conn, 1, 54321, 1, Some("alice"), Some("Hello"), 1000, false, None, None); insert_note(
&conn,
1,
54321,
1,
Some("alice"),
Some("Hello"),
1000,
false,
None,
None,
);
let doc = extract_discussion_document(&conn, 1).unwrap().unwrap(); let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.url, Some("https://gitlab.example.com/group/project-one/-/issues/10#note_54321".to_string())); assert_eq!(
doc.url,
Some("https://gitlab.example.com/group/project-one/-/issues/10#note_54321".to_string())
);
} }
#[test] #[test]
fn test_discussion_uses_parent_labels() { fn test_discussion_uses_parent_labels() {
let conn = setup_discussion_test_db(); let conn = setup_discussion_test_db();
insert_issue(&conn, 1, 10, Some("Test"), Some("desc"), "opened", None, None); insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_label(&conn, 1, "backend"); insert_label(&conn, 1, "backend");
insert_label(&conn, 2, "api"); insert_label(&conn, 2, "api");
link_issue_label(&conn, 1, 1); link_issue_label(&conn, 1, 1);
link_issue_label(&conn, 1, 2); link_issue_label(&conn, 1, 2);
insert_discussion(&conn, 1, "Issue", Some(1), None); insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(&conn, 1, 100, 1, Some("alice"), Some("Comment"), 1000, false, None, None); insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some("Comment"),
1000,
false,
None,
None,
);
let doc = extract_discussion_document(&conn, 1).unwrap().unwrap(); let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.labels, vec!["api", "backend"]); assert_eq!(doc.labels, vec!["api", "backend"]);
@@ -1063,20 +1429,65 @@ mod tests {
#[test] #[test]
fn test_discussion_on_mr() { fn test_discussion_on_mr() {
let conn = setup_discussion_test_db(); let conn = setup_discussion_test_db();
insert_mr(&conn, 1, 456, Some("JWT Auth"), Some("desc"), Some("opened"), Some("johndoe"), Some("feature/jwt"), Some("main"), Some("https://gitlab.example.com/group/project-one/-/merge_requests/456")); insert_mr(
&conn,
1,
456,
Some("JWT Auth"),
Some("desc"),
Some("opened"),
Some("johndoe"),
Some("feature/jwt"),
Some("main"),
Some("https://gitlab.example.com/group/project-one/-/merge_requests/456"),
);
insert_discussion(&conn, 1, "MergeRequest", None, Some(1)); insert_discussion(&conn, 1, "MergeRequest", None, Some(1));
insert_note(&conn, 1, 100, 1, Some("alice"), Some("LGTM"), 1000, false, None, None); insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some("LGTM"),
1000,
false,
None,
None,
);
let doc = extract_discussion_document(&conn, 1).unwrap().unwrap(); let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
assert!(doc.content_text.contains("[[Discussion]] MR !456: JWT Auth\n")); assert!(
doc.content_text
.contains("[[Discussion]] MR !456: JWT Auth\n")
);
} }
#[test] #[test]
fn test_discussion_all_system_notes() { fn test_discussion_all_system_notes() {
let conn = setup_discussion_test_db(); let conn = setup_discussion_test_db();
insert_issue(&conn, 1, 10, Some("Test"), Some("desc"), "opened", None, None); insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_discussion(&conn, 1, "Issue", Some(1), None); insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(&conn, 1, 100, 1, Some("bot"), Some("assigned to @alice"), 1000, true, None, None); insert_note(
&conn,
1,
100,
1,
Some("bot"),
Some("assigned to @alice"),
1000,
true,
None,
None,
);
// All notes are system notes -> no content -> returns None // All notes are system notes -> no content -> returns None
let result = extract_discussion_document(&conn, 1).unwrap(); let result = extract_discussion_document(&conn, 1).unwrap();

View File

@@ -7,11 +7,11 @@ mod regenerator;
mod truncation; mod truncation;
pub use extractor::{ pub use extractor::{
compute_content_hash, compute_list_hash, extract_discussion_document, DocumentData, SourceType, compute_content_hash, compute_list_hash, extract_discussion_document,
extract_issue_document, extract_mr_document, DocumentData, SourceType, extract_issue_document, extract_mr_document,
}; };
pub use regenerator::{regenerate_dirty_documents, RegenerateResult}; pub use regenerator::{RegenerateResult, regenerate_dirty_documents};
pub use truncation::{ pub use truncation::{
truncate_discussion, truncate_hard_cap, truncate_utf8, NoteContent, TruncationReason, MAX_DISCUSSION_BYTES, MAX_DOCUMENT_BYTES_HARD, NoteContent, TruncationReason, TruncationResult,
TruncationResult, MAX_DISCUSSION_BYTES, MAX_DOCUMENT_BYTES_HARD, truncate_discussion, truncate_hard_cap, truncate_utf8,
}; };

View File

@@ -4,8 +4,8 @@ use tracing::{debug, warn};
use crate::core::error::Result; use crate::core::error::Result;
use crate::documents::{ use crate::documents::{
extract_discussion_document, extract_issue_document, extract_mr_document, DocumentData, DocumentData, SourceType, extract_discussion_document, extract_issue_document,
SourceType, extract_mr_document,
}; };
use crate::ingestion::dirty_tracker::{clear_dirty, get_dirty_sources, record_dirty_error}; use crate::ingestion::dirty_tracker::{clear_dirty, get_dirty_sources, record_dirty_error};
@@ -65,11 +65,7 @@ pub fn regenerate_dirty_documents(conn: &Connection) -> Result<RegenerateResult>
} }
/// Regenerate a single document. Returns true if content_hash changed. /// Regenerate a single document. Returns true if content_hash changed.
fn regenerate_one( fn regenerate_one(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<bool> {
conn: &Connection,
source_type: SourceType,
source_id: i64,
) -> Result<bool> {
let doc = match source_type { let doc = match source_type {
SourceType::Issue => extract_issue_document(conn, source_id)?, SourceType::Issue => extract_issue_document(conn, source_id)?,
SourceType::MergeRequest => extract_mr_document(conn, source_id)?, SourceType::MergeRequest => extract_mr_document(conn, source_id)?,
@@ -97,8 +93,8 @@ fn get_existing_hash(
source_type: SourceType, source_type: SourceType,
source_id: i64, source_id: i64,
) -> Result<Option<String>> { ) -> Result<Option<String>> {
let mut stmt = let mut stmt = conn
conn.prepare("SELECT content_hash FROM documents WHERE source_type = ?1 AND source_id = ?2")?; .prepare("SELECT content_hash FROM documents WHERE source_type = ?1 AND source_id = ?2")?;
let hash: Option<String> = stmt let hash: Option<String> = stmt
.query_row(rusqlite::params![source_type.as_str(), source_id], |row| { .query_row(rusqlite::params![source_type.as_str(), source_id], |row| {
@@ -140,17 +136,15 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<()> {
.optional()?; .optional()?;
// Fast path: skip ALL writes when nothing changed (prevents WAL churn) // Fast path: skip ALL writes when nothing changed (prevents WAL churn)
if let Some((_, ref old_content_hash, ref old_labels_hash, ref old_paths_hash)) = existing { if let Some((_, ref old_content_hash, ref old_labels_hash, ref old_paths_hash)) = existing
if old_content_hash == &doc.content_hash && old_content_hash == &doc.content_hash
&& old_labels_hash == &doc.labels_hash && old_labels_hash == &doc.labels_hash
&& old_paths_hash == &doc.paths_hash && old_paths_hash == &doc.paths_hash
{ {
return Ok(()); return Ok(());
} }
}
let labels_json = let labels_json = serde_json::to_string(&doc.labels).unwrap_or_else(|_| "[]".to_string());
serde_json::to_string(&doc.labels).unwrap_or_else(|_| "[]".to_string());
// Upsert document row // Upsert document row
conn.execute( conn.execute(
@@ -237,11 +231,7 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<()> {
} }
/// Delete a document by source identity. /// Delete a document by source identity.
fn delete_document( fn delete_document(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<()> {
conn: &Connection,
source_type: SourceType,
source_id: i64,
) -> Result<()> {
conn.execute( conn.execute(
"DELETE FROM documents WHERE source_type = ?1 AND source_id = ?2", "DELETE FROM documents WHERE source_type = ?1 AND source_id = ?2",
rusqlite::params![source_type.as_str(), source_id], rusqlite::params![source_type.as_str(), source_id],
@@ -250,11 +240,7 @@ fn delete_document(
} }
/// Get document ID by source type and source ID. /// Get document ID by source type and source ID.
fn get_document_id( fn get_document_id(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<i64> {
conn: &Connection,
source_type: SourceType,
source_id: i64,
) -> Result<i64> {
let id: i64 = conn.query_row( let id: i64 = conn.query_row(
"SELECT id FROM documents WHERE source_type = ?1 AND source_id = ?2", "SELECT id FROM documents WHERE source_type = ?1 AND source_id = ?2",
rusqlite::params![source_type.as_str(), source_id], rusqlite::params![source_type.as_str(), source_id],
@@ -372,10 +358,14 @@ mod tests {
assert_eq!(result.errored, 0); assert_eq!(result.errored, 0);
// Verify document was created // Verify document was created
let count: i64 = conn.query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0)).unwrap(); let count: i64 = conn
.query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0))
.unwrap();
assert_eq!(count, 1); assert_eq!(count, 1);
let content: String = conn.query_row("SELECT content_text FROM documents", [], |r| r.get(0)).unwrap(); let content: String = conn
.query_row("SELECT content_text FROM documents", [], |r| r.get(0))
.unwrap();
assert!(content.contains("[[Issue]] #42: Test Issue")); assert!(content.contains("[[Issue]] #42: Test Issue"));
} }
@@ -418,7 +408,9 @@ mod tests {
let result = regenerate_dirty_documents(&conn).unwrap(); let result = regenerate_dirty_documents(&conn).unwrap();
assert_eq!(result.regenerated, 1); // Deletion counts as "changed" assert_eq!(result.regenerated, 1); // Deletion counts as "changed"
let count: i64 = conn.query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0)).unwrap(); let count: i64 = conn
.query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0))
.unwrap();
assert_eq!(count, 0); assert_eq!(count, 0);
} }
@@ -451,11 +443,13 @@ mod tests {
conn.execute( conn.execute(
"INSERT INTO labels (id, project_id, name) VALUES (1, 1, 'bug')", "INSERT INTO labels (id, project_id, name) VALUES (1, 1, 'bug')",
[], [],
).unwrap(); )
.unwrap();
conn.execute( conn.execute(
"INSERT INTO issue_labels (issue_id, label_id) VALUES (1, 1)", "INSERT INTO issue_labels (issue_id, label_id) VALUES (1, 1)",
[], [],
).unwrap(); )
.unwrap();
// First run creates document // First run creates document
mark_dirty(&conn, SourceType::Issue, 1).unwrap(); mark_dirty(&conn, SourceType::Issue, 1).unwrap();
@@ -467,9 +461,9 @@ mod tests {
assert_eq!(result.unchanged, 1); assert_eq!(result.unchanged, 1);
// Labels should still be present (not deleted and re-inserted) // Labels should still be present (not deleted and re-inserted)
let label_count: i64 = conn.query_row( let label_count: i64 = conn
"SELECT COUNT(*) FROM document_labels", [], |r| r.get(0), .query_row("SELECT COUNT(*) FROM document_labels", [], |r| r.get(0))
).unwrap(); .unwrap();
assert_eq!(label_count, 1); assert_eq!(label_count, 1);
} }
} }

View File

@@ -231,10 +231,7 @@ mod tests {
#[test] #[test]
fn test_first_last_oversized() { fn test_first_last_oversized() {
let big_body = "x".repeat(20_000); let big_body = "x".repeat(20_000);
let notes = vec![ let notes = vec![make_note("alice", &big_body), make_note("bob", &big_body)];
make_note("alice", &big_body),
make_note("bob", &big_body),
];
let result = truncate_discussion(&notes, 10_000); let result = truncate_discussion(&notes, 10_000);
assert!(result.is_truncated); assert!(result.is_truncated);
assert_eq!(result.reason, Some(TruncationReason::FirstLastOversized)); assert_eq!(result.reason, Some(TruncationReason::FirstLastOversized));
@@ -304,7 +301,11 @@ mod tests {
.collect(); .collect();
let result = truncate_discussion(&notes, 12_000); let result = truncate_discussion(&notes, 12_000);
assert!(result.is_truncated); assert!(result.is_truncated);
assert!(result.content.contains("[... 5 notes omitted for length ...]")); assert!(
result
.content
.contains("[... 5 notes omitted for length ...]")
);
} }
#[test] #[test]

View File

@@ -78,7 +78,9 @@ pub fn split_into_chunks(content: &str) -> Vec<(usize, String)> {
fn find_paragraph_break(window: &str) -> Option<usize> { fn find_paragraph_break(window: &str) -> Option<usize> {
// Search backward from 2/3 of the way through to find a good split // Search backward from 2/3 of the way through to find a good split
let search_start = window.len() * 2 / 3; let search_start = window.len() * 2 / 3;
window[search_start..].rfind("\n\n").map(|pos| search_start + pos + 2) window[search_start..]
.rfind("\n\n")
.map(|pos| search_start + pos + 2)
.or_else(|| window[..search_start].rfind("\n\n").map(|pos| pos + 2)) .or_else(|| window[..search_start].rfind("\n\n").map(|pos| pos + 2))
} }
@@ -102,7 +104,9 @@ fn find_sentence_break(window: &str) -> Option<usize> {
/// Find the last word boundary (space) in the window. /// Find the last word boundary (space) in the window.
fn find_word_break(window: &str) -> Option<usize> { fn find_word_break(window: &str) -> Option<usize> {
let search_start = window.len() / 2; let search_start = window.len() / 2;
window[search_start..].rfind(' ').map(|pos| search_start + pos + 1) window[search_start..]
.rfind(' ')
.map(|pos| search_start + pos + 1)
.or_else(|| window[..search_start].rfind(' ').map(|pos| pos + 1)) .or_else(|| window[..search_start].rfind(' ').map(|pos| pos + 1))
} }
@@ -155,7 +159,11 @@ mod tests {
} }
let chunks = split_into_chunks(&content); let chunks = split_into_chunks(&content);
assert!(chunks.len() >= 2, "Expected multiple chunks, got {}", chunks.len()); assert!(
chunks.len() >= 2,
"Expected multiple chunks, got {}",
chunks.len()
);
// Verify indices are sequential // Verify indices are sequential
for (i, (idx, _)) in chunks.iter().enumerate() { for (i, (idx, _)) in chunks.iter().enumerate() {
@@ -183,7 +191,8 @@ mod tests {
let end_of_first = &chunks[0].1; let end_of_first = &chunks[0].1;
let start_of_second = &chunks[1].1; let start_of_second = &chunks[1].1;
// The end of first chunk should overlap with start of second // The end of first chunk should overlap with start of second
let overlap_region = &end_of_first[end_of_first.len().saturating_sub(CHUNK_OVERLAP_CHARS)..]; let overlap_region =
&end_of_first[end_of_first.len().saturating_sub(CHUNK_OVERLAP_CHARS)..];
assert!( assert!(
start_of_second.starts_with(overlap_region) start_of_second.starts_with(overlap_region)
|| overlap_region.contains(&start_of_second[..100.min(start_of_second.len())]), || overlap_region.contains(&start_of_second[..100.min(start_of_second.len())]),

View File

@@ -4,6 +4,6 @@ pub mod chunking;
pub mod ollama; pub mod ollama;
pub mod pipeline; pub mod pipeline;
pub use change_detector::{count_pending_documents, find_pending_documents, PendingDocument}; pub use change_detector::{PendingDocument, count_pending_documents, find_pending_documents};
pub use chunking::{split_into_chunks, CHUNK_MAX_BYTES, CHUNK_OVERLAP_CHARS}; pub use chunking::{CHUNK_MAX_BYTES, CHUNK_OVERLAP_CHARS, split_into_chunks};
pub use pipeline::{embed_documents, EmbedResult}; pub use pipeline::{EmbedResult, embed_documents};

View File

@@ -67,8 +67,8 @@ impl OllamaClient {
pub async fn health_check(&self) -> Result<()> { pub async fn health_check(&self) -> Result<()> {
let url = format!("{}/api/tags", self.config.base_url); let url = format!("{}/api/tags", self.config.base_url);
let response = self let response =
.client self.client
.get(&url) .get(&url)
.send() .send()
.await .await
@@ -111,12 +111,16 @@ impl OllamaClient {
input: texts, input: texts,
}; };
let response = self.client.post(&url).json(&request).send().await.map_err( let response = self
|e| LoreError::OllamaUnavailable { .client
.post(&url)
.json(&request)
.send()
.await
.map_err(|e| LoreError::OllamaUnavailable {
base_url: self.config.base_url.clone(), base_url: self.config.base_url.clone(),
source: Some(e), source: Some(e),
}, })?;
)?;
let status = response.status(); let status = response.status();
if !status.is_success() { if !status.is_success() {

View File

@@ -8,8 +8,8 @@ use tracing::{info, warn};
use crate::core::error::Result; use crate::core::error::Result;
use crate::embedding::change_detector::{count_pending_documents, find_pending_documents}; use crate::embedding::change_detector::{count_pending_documents, find_pending_documents};
use crate::embedding::chunk_ids::{encode_rowid, CHUNK_ROWID_MULTIPLIER}; use crate::embedding::chunk_ids::{CHUNK_ROWID_MULTIPLIER, encode_rowid};
use crate::embedding::chunking::{split_into_chunks, CHUNK_MAX_BYTES, EXPECTED_DIMS}; use crate::embedding::chunking::{CHUNK_MAX_BYTES, EXPECTED_DIMS, split_into_chunks};
use crate::embedding::ollama::OllamaClient; use crate::embedding::ollama::OllamaClient;
const BATCH_SIZE: usize = 32; const BATCH_SIZE: usize = 32;
@@ -211,10 +211,13 @@ pub async fn embed_documents(
|| (err_lower.contains("413") && err_lower.contains("http")); || (err_lower.contains("413") && err_lower.contains("http"));
if is_context_error && batch.len() > 1 { if is_context_error && batch.len() > 1 {
warn!("Batch failed with context length error, retrying chunks individually"); warn!(
"Batch failed with context length error, retrying chunks individually"
);
for chunk in batch { for chunk in batch {
match client.embed_batch(vec![chunk.text.clone()]).await { match client.embed_batch(vec![chunk.text.clone()]).await {
Ok(embeddings) if !embeddings.is_empty() Ok(embeddings)
if !embeddings.is_empty()
&& embeddings[0].len() == EXPECTED_DIMS => && embeddings[0].len() == EXPECTED_DIMS =>
{ {
// Clear old embeddings on first successful chunk // Clear old embeddings on first successful chunk
@@ -272,7 +275,6 @@ pub async fn embed_documents(
} }
} }
} }
} }
// Fire progress for all normal documents after embedding completes. // Fire progress for all normal documents after embedding completes.
@@ -314,6 +316,7 @@ fn clear_document_embeddings(conn: &Connection, document_id: i64) -> Result<()>
} }
/// Store an embedding vector and its metadata. /// Store an embedding vector and its metadata.
#[allow(clippy::too_many_arguments)]
fn store_embedding( fn store_embedding(
conn: &Connection, conn: &Connection,
doc_id: i64, doc_id: i64,
@@ -347,8 +350,15 @@ fn store_embedding(
created_at, attempt_count, last_error, chunk_max_bytes, chunk_count) created_at, attempt_count, last_error, chunk_max_bytes, chunk_count)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1, NULL, ?8, ?9)", VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 1, NULL, ?8, ?9)",
rusqlite::params![ rusqlite::params![
doc_id, chunk_index as i64, model_name, EXPECTED_DIMS as i64, doc_id,
doc_hash, chunk_hash, now, CHUNK_MAX_BYTES as i64, chunk_count chunk_index as i64,
model_name,
EXPECTED_DIMS as i64,
doc_hash,
chunk_hash,
now,
CHUNK_MAX_BYTES as i64,
chunk_count
], ],
)?; )?;
@@ -377,8 +387,15 @@ fn record_embedding_error(
last_attempt_at = ?7, last_attempt_at = ?7,
chunk_max_bytes = ?9", chunk_max_bytes = ?9",
rusqlite::params![ rusqlite::params![
doc_id, chunk_index as i64, model_name, EXPECTED_DIMS as i64, doc_id,
doc_hash, chunk_hash, now, error, CHUNK_MAX_BYTES as i64 chunk_index as i64,
model_name,
EXPECTED_DIMS as i64,
doc_hash,
chunk_hash,
now,
error,
CHUNK_MAX_BYTES as i64
], ],
)?; )?;
Ok(()) Ok(())

View File

@@ -557,10 +557,7 @@ impl GitLabClient {
/// all pages into a Vec rather than using streaming. /// all pages into a Vec rather than using streaming.
impl GitLabClient { impl GitLabClient {
/// Fetch all pages from a paginated endpoint, returning collected results. /// Fetch all pages from a paginated endpoint, returning collected results.
async fn fetch_all_pages<T: serde::de::DeserializeOwned>( async fn fetch_all_pages<T: serde::de::DeserializeOwned>(&self, path: &str) -> Result<Vec<T>> {
&self,
path: &str,
) -> Result<Vec<T>> {
let mut results = Vec::new(); let mut results = Vec::new();
let mut page = 1u32; let mut page = 1u32;
let per_page = 100u32; let per_page = 100u32;
@@ -571,9 +568,7 @@ impl GitLabClient {
("page", page.to_string()), ("page", page.to_string()),
]; ];
let (items, headers) = self let (items, headers) = self.request_with_headers::<Vec<T>>(path, &params).await?;
.request_with_headers::<Vec<T>>(path, &params)
.await?;
let is_empty = items.is_empty(); let is_empty = items.is_empty();
let full_page = items.len() as u32 == per_page; let full_page = items.len() as u32 == per_page;
@@ -604,9 +599,8 @@ impl GitLabClient {
gitlab_project_id: i64, gitlab_project_id: i64,
iid: i64, iid: i64,
) -> Result<Vec<GitLabStateEvent>> { ) -> Result<Vec<GitLabStateEvent>> {
let path = format!( let path =
"/api/v4/projects/{gitlab_project_id}/issues/{iid}/resource_state_events" format!("/api/v4/projects/{gitlab_project_id}/issues/{iid}/resource_state_events");
);
self.fetch_all_pages(&path).await self.fetch_all_pages(&path).await
} }
@@ -616,9 +610,8 @@ impl GitLabClient {
gitlab_project_id: i64, gitlab_project_id: i64,
iid: i64, iid: i64,
) -> Result<Vec<GitLabLabelEvent>> { ) -> Result<Vec<GitLabLabelEvent>> {
let path = format!( let path =
"/api/v4/projects/{gitlab_project_id}/issues/{iid}/resource_label_events" format!("/api/v4/projects/{gitlab_project_id}/issues/{iid}/resource_label_events");
);
self.fetch_all_pages(&path).await self.fetch_all_pages(&path).await
} }
@@ -628,9 +621,8 @@ impl GitLabClient {
gitlab_project_id: i64, gitlab_project_id: i64,
iid: i64, iid: i64,
) -> Result<Vec<GitLabMilestoneEvent>> { ) -> Result<Vec<GitLabMilestoneEvent>> {
let path = format!( let path =
"/api/v4/projects/{gitlab_project_id}/issues/{iid}/resource_milestone_events" format!("/api/v4/projects/{gitlab_project_id}/issues/{iid}/resource_milestone_events");
);
self.fetch_all_pages(&path).await self.fetch_all_pages(&path).await
} }
@@ -676,18 +668,30 @@ impl GitLabClient {
gitlab_project_id: i64, gitlab_project_id: i64,
entity_type: &str, entity_type: &str,
iid: i64, iid: i64,
) -> Result<(Vec<GitLabStateEvent>, Vec<GitLabLabelEvent>, Vec<GitLabMilestoneEvent>)> { ) -> Result<(
Vec<GitLabStateEvent>,
Vec<GitLabLabelEvent>,
Vec<GitLabMilestoneEvent>,
)> {
match entity_type { match entity_type {
"issue" => { "issue" => {
let state = self.fetch_issue_state_events(gitlab_project_id, iid).await?; let state = self
let label = self.fetch_issue_label_events(gitlab_project_id, iid).await?; .fetch_issue_state_events(gitlab_project_id, iid)
let milestone = self.fetch_issue_milestone_events(gitlab_project_id, iid).await?; .await?;
let label = self
.fetch_issue_label_events(gitlab_project_id, iid)
.await?;
let milestone = self
.fetch_issue_milestone_events(gitlab_project_id, iid)
.await?;
Ok((state, label, milestone)) Ok((state, label, milestone))
} }
"merge_request" => { "merge_request" => {
let state = self.fetch_mr_state_events(gitlab_project_id, iid).await?; let state = self.fetch_mr_state_events(gitlab_project_id, iid).await?;
let label = self.fetch_mr_label_events(gitlab_project_id, iid).await?; let label = self.fetch_mr_label_events(gitlab_project_id, iid).await?;
let milestone = self.fetch_mr_milestone_events(gitlab_project_id, iid).await?; let milestone = self
.fetch_mr_milestone_events(gitlab_project_id, iid)
.await?;
Ok((state, label, milestone)) Ok((state, label, milestone))
} }
_ => Err(LoreError::Other(format!( _ => Err(LoreError::Other(format!(
@@ -750,23 +754,23 @@ mod tests {
#[test] #[test]
fn cursor_rewind_clamps_to_zero() { fn cursor_rewind_clamps_to_zero() {
let updated_after = Some(1000i64); // 1 second let updated_after = 1000i64; // 1 second
let cursor_rewind_seconds = 10u32; // 10 seconds let cursor_rewind_seconds = 10u32; // 10 seconds
// Rewind would be negative, should clamp to 0 // Rewind would be negative, should clamp to 0
let rewind_ms = (cursor_rewind_seconds as i64) * 1000; let rewind_ms = i64::from(cursor_rewind_seconds) * 1000;
let rewound = (updated_after.unwrap() - rewind_ms).max(0); let rewound = (updated_after - rewind_ms).max(0);
assert_eq!(rewound, 0); assert_eq!(rewound, 0);
} }
#[test] #[test]
fn cursor_rewind_applies_correctly() { fn cursor_rewind_applies_correctly() {
let updated_after = Some(1705312800000i64); // 2024-01-15T10:00:00.000Z let updated_after = 1705312800000i64; // 2024-01-15T10:00:00.000Z
let cursor_rewind_seconds = 60u32; // 1 minute let cursor_rewind_seconds = 60u32; // 1 minute
let rewind_ms = (cursor_rewind_seconds as i64) * 1000; let rewind_ms = i64::from(cursor_rewind_seconds) * 1000;
let rewound = (updated_after.unwrap() - rewind_ms).max(0); let rewound = (updated_after - rewind_ms).max(0);
// Should be 1 minute earlier // Should be 1 minute earlier
assert_eq!(rewound, 1705312740000); assert_eq!(rewound, 1705312740000);

View File

@@ -179,10 +179,7 @@ fn transform_single_note(
resolvable: note.resolvable, resolvable: note.resolvable,
resolved: note.resolved, resolved: note.resolved,
resolved_by: note.resolved_by.as_ref().map(|a| a.username.clone()), resolved_by: note.resolved_by.as_ref().map(|a| a.username.clone()),
resolved_at: note resolved_at: note.resolved_at.as_ref().and_then(|ts| iso_to_ms(ts)),
.resolved_at
.as_ref()
.and_then(|ts| iso_to_ms(ts)),
position_old_path, position_old_path,
position_new_path, position_new_path,
position_old_line, position_old_line,
@@ -235,7 +232,6 @@ fn extract_position_fields(
} }
} }
/// Transform notes from a GitLab discussion with strict timestamp parsing. /// Transform notes from a GitLab discussion with strict timestamp parsing.
/// Returns Err if any timestamp is invalid - no silent fallback to 0. /// Returns Err if any timestamp is invalid - no silent fallback to 0.
pub fn transform_notes_with_diff_position( pub fn transform_notes_with_diff_position(

View File

@@ -53,14 +53,17 @@ pub fn get_dirty_sources(conn: &Connection) -> Result<Vec<(SourceType, i64)>> {
"SELECT source_type, source_id FROM dirty_sources "SELECT source_type, source_id FROM dirty_sources
WHERE next_attempt_at IS NULL OR next_attempt_at <= ?1 WHERE next_attempt_at IS NULL OR next_attempt_at <= ?1
ORDER BY attempt_count ASC, queued_at ASC ORDER BY attempt_count ASC, queued_at ASC
LIMIT ?2" LIMIT ?2",
)?; )?;
let rows = stmt let rows = stmt
.query_map(rusqlite::params![now, DIRTY_SOURCES_BATCH_SIZE as i64], |row| { .query_map(
rusqlite::params![now, DIRTY_SOURCES_BATCH_SIZE as i64],
|row| {
let st_str: String = row.get(0)?; let st_str: String = row.get(0)?;
let source_id: i64 = row.get(1)?; let source_id: i64 = row.get(1)?;
Ok((st_str, source_id)) Ok((st_str, source_id))
})? },
)?
.collect::<std::result::Result<Vec<_>, _>>()?; .collect::<std::result::Result<Vec<_>, _>>()?;
let mut results = Vec::with_capacity(rows.len()); let mut results = Vec::with_capacity(rows.len());
@@ -110,7 +113,14 @@ pub fn record_dirty_error(
last_error = ?3, last_error = ?3,
next_attempt_at = ?4 next_attempt_at = ?4
WHERE source_type = ?5 AND source_id = ?6", WHERE source_type = ?5 AND source_id = ?6",
rusqlite::params![new_attempt, now, error, next_at, source_type.as_str(), source_id], rusqlite::params![
new_attempt,
now,
error,
next_at,
source_type.as_str(),
source_id
],
)?; )?;
Ok(()) Ok(())
} }
@@ -142,7 +152,9 @@ mod tests {
let conn = setup_db(); let conn = setup_db();
mark_dirty(&conn, SourceType::Issue, 1).unwrap(); mark_dirty(&conn, SourceType::Issue, 1).unwrap();
let count: i64 = conn.query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0)).unwrap(); let count: i64 = conn
.query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0))
.unwrap();
assert_eq!(count, 1); assert_eq!(count, 1);
} }
@@ -154,7 +166,9 @@ mod tests {
mark_dirty_tx(&tx, SourceType::Issue, 1).unwrap(); mark_dirty_tx(&tx, SourceType::Issue, 1).unwrap();
tx.commit().unwrap(); tx.commit().unwrap();
} }
let count: i64 = conn.query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0)).unwrap(); let count: i64 = conn
.query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0))
.unwrap();
assert_eq!(count, 1); assert_eq!(count, 1);
} }
@@ -165,21 +179,33 @@ mod tests {
// Simulate error state // Simulate error state
record_dirty_error(&conn, SourceType::Issue, 1, "test error").unwrap(); record_dirty_error(&conn, SourceType::Issue, 1, "test error").unwrap();
let attempt: i64 = conn.query_row( let attempt: i64 = conn
"SELECT attempt_count FROM dirty_sources WHERE source_id = 1", [], |r| r.get(0) .query_row(
).unwrap(); "SELECT attempt_count FROM dirty_sources WHERE source_id = 1",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(attempt, 1); assert_eq!(attempt, 1);
// Re-mark should reset // Re-mark should reset
mark_dirty(&conn, SourceType::Issue, 1).unwrap(); mark_dirty(&conn, SourceType::Issue, 1).unwrap();
let attempt: i64 = conn.query_row( let attempt: i64 = conn
"SELECT attempt_count FROM dirty_sources WHERE source_id = 1", [], |r| r.get(0) .query_row(
).unwrap(); "SELECT attempt_count FROM dirty_sources WHERE source_id = 1",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(attempt, 0); assert_eq!(attempt, 0);
let next_at: Option<i64> = conn.query_row( let next_at: Option<i64> = conn
"SELECT next_attempt_at FROM dirty_sources WHERE source_id = 1", [], |r| r.get(0) .query_row(
).unwrap(); "SELECT next_attempt_at FROM dirty_sources WHERE source_id = 1",
[],
|r| r.get(0),
)
.unwrap();
assert!(next_at.is_none()); assert!(next_at.is_none());
} }
@@ -191,7 +217,8 @@ mod tests {
conn.execute( conn.execute(
"UPDATE dirty_sources SET next_attempt_at = 9999999999999 WHERE source_id = 1", "UPDATE dirty_sources SET next_attempt_at = 9999999999999 WHERE source_id = 1",
[], [],
).unwrap(); )
.unwrap();
let results = get_dirty_sources(&conn).unwrap(); let results = get_dirty_sources(&conn).unwrap();
assert!(results.is_empty()); assert!(results.is_empty());
@@ -205,7 +232,8 @@ mod tests {
conn.execute( conn.execute(
"UPDATE dirty_sources SET attempt_count = 2 WHERE source_id = 1", "UPDATE dirty_sources SET attempt_count = 2 WHERE source_id = 1",
[], [],
).unwrap(); )
.unwrap();
// Insert issue 2 (fresh, attempt_count=0) // Insert issue 2 (fresh, attempt_count=0)
mark_dirty(&conn, SourceType::Issue, 2).unwrap(); mark_dirty(&conn, SourceType::Issue, 2).unwrap();
@@ -231,7 +259,9 @@ mod tests {
mark_dirty(&conn, SourceType::Issue, 1).unwrap(); mark_dirty(&conn, SourceType::Issue, 1).unwrap();
clear_dirty(&conn, SourceType::Issue, 1).unwrap(); clear_dirty(&conn, SourceType::Issue, 1).unwrap();
let count: i64 = conn.query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0)).unwrap(); let count: i64 = conn
.query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0))
.unwrap();
assert_eq!(count, 0); assert_eq!(count, 0);
} }

View File

@@ -65,7 +65,7 @@ pub fn get_pending_fetches(conn: &Connection, limit: usize) -> Result<Vec<Pendin
FROM pending_discussion_fetches FROM pending_discussion_fetches
WHERE next_attempt_at IS NULL OR next_attempt_at <= ?1 WHERE next_attempt_at IS NULL OR next_attempt_at <= ?1
ORDER BY queued_at ASC ORDER BY queued_at ASC
LIMIT ?2" LIMIT ?2",
)?; )?;
let rows = stmt let rows = stmt
.query_map(rusqlite::params![now, limit as i64], |row| { .query_map(rusqlite::params![now, limit as i64], |row| {
@@ -137,7 +137,15 @@ pub fn record_fetch_error(
last_error = ?3, last_error = ?3,
next_attempt_at = ?4 next_attempt_at = ?4
WHERE project_id = ?5 AND noteable_type = ?6 AND noteable_iid = ?7", WHERE project_id = ?5 AND noteable_type = ?6 AND noteable_iid = ?7",
rusqlite::params![new_attempt, now, error, next_at, project_id, noteable_type.as_str(), noteable_iid], rusqlite::params![
new_attempt,
now,
error,
next_at,
project_id,
noteable_type.as_str(),
noteable_iid
],
)?; )?;
Ok(()) Ok(())
} }
@@ -196,18 +204,24 @@ mod tests {
queue_discussion_fetch(&conn, 1, NoteableType::Issue, 42).unwrap(); queue_discussion_fetch(&conn, 1, NoteableType::Issue, 42).unwrap();
record_fetch_error(&conn, 1, NoteableType::Issue, 42, "network error").unwrap(); record_fetch_error(&conn, 1, NoteableType::Issue, 42, "network error").unwrap();
let attempt: i32 = conn.query_row( let attempt: i32 = conn
.query_row(
"SELECT attempt_count FROM pending_discussion_fetches WHERE noteable_iid = 42", "SELECT attempt_count FROM pending_discussion_fetches WHERE noteable_iid = 42",
[], |r| r.get(0), [],
).unwrap(); |r| r.get(0),
)
.unwrap();
assert_eq!(attempt, 1); assert_eq!(attempt, 1);
// Re-queue should reset // Re-queue should reset
queue_discussion_fetch(&conn, 1, NoteableType::Issue, 42).unwrap(); queue_discussion_fetch(&conn, 1, NoteableType::Issue, 42).unwrap();
let attempt: i32 = conn.query_row( let attempt: i32 = conn
.query_row(
"SELECT attempt_count FROM pending_discussion_fetches WHERE noteable_iid = 42", "SELECT attempt_count FROM pending_discussion_fetches WHERE noteable_iid = 42",
[], |r| r.get(0), [],
).unwrap(); |r| r.get(0),
)
.unwrap();
assert_eq!(attempt, 0); assert_eq!(attempt, 0);
} }
@@ -230,9 +244,11 @@ mod tests {
queue_discussion_fetch(&conn, 1, NoteableType::Issue, 42).unwrap(); queue_discussion_fetch(&conn, 1, NoteableType::Issue, 42).unwrap();
complete_fetch(&conn, 1, NoteableType::Issue, 42).unwrap(); complete_fetch(&conn, 1, NoteableType::Issue, 42).unwrap();
let count: i64 = conn.query_row( let count: i64 = conn
"SELECT COUNT(*) FROM pending_discussion_fetches", [], |r| r.get(0), .query_row("SELECT COUNT(*) FROM pending_discussion_fetches", [], |r| {
).unwrap(); r.get(0)
})
.unwrap();
assert_eq!(count, 0); assert_eq!(count, 0);
} }
@@ -249,17 +265,23 @@ mod tests {
assert_eq!(attempt, 1); assert_eq!(attempt, 1);
assert_eq!(error, Some("timeout".to_string())); assert_eq!(error, Some("timeout".to_string()));
let next_at: Option<i64> = conn.query_row( let next_at: Option<i64> = conn
.query_row(
"SELECT next_attempt_at FROM pending_discussion_fetches WHERE noteable_iid = 10", "SELECT next_attempt_at FROM pending_discussion_fetches WHERE noteable_iid = 10",
[], |r| r.get(0), [],
).unwrap(); |r| r.get(0),
)
.unwrap();
assert!(next_at.is_some()); assert!(next_at.is_some());
} }
#[test] #[test]
fn test_noteable_type_parse() { fn test_noteable_type_parse() {
assert_eq!(NoteableType::parse("Issue"), Some(NoteableType::Issue)); assert_eq!(NoteableType::parse("Issue"), Some(NoteableType::Issue));
assert_eq!(NoteableType::parse("MergeRequest"), Some(NoteableType::MergeRequest)); assert_eq!(
NoteableType::parse("MergeRequest"),
Some(NoteableType::MergeRequest)
);
assert_eq!(NoteableType::parse("invalid"), None); assert_eq!(NoteableType::parse("invalid"), None);
} }
} }

View File

@@ -14,9 +14,9 @@ use crate::Config;
use crate::core::error::Result; use crate::core::error::Result;
use crate::core::payloads::{StorePayloadOptions, store_payload}; use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::documents::SourceType; use crate::documents::SourceType;
use crate::ingestion::dirty_tracker;
use crate::gitlab::GitLabClient; use crate::gitlab::GitLabClient;
use crate::gitlab::transformers::{NoteableRef, transform_discussion, transform_notes}; use crate::gitlab::transformers::{NoteableRef, transform_discussion, transform_notes};
use crate::ingestion::dirty_tracker;
use super::issues::IssueForDiscussionSync; use super::issues::IssueForDiscussionSync;
@@ -95,7 +95,6 @@ async fn ingest_discussions_for_issue(
let mut pagination_error: Option<crate::core::error::LoreError> = None; let mut pagination_error: Option<crate::core::error::LoreError> = None;
while let Some(disc_result) = discussions_stream.next().await { while let Some(disc_result) = discussions_stream.next().await {
// Handle errors - record but don't delete stale data // Handle errors - record but don't delete stale data
let gitlab_discussion = match disc_result { let gitlab_discussion = match disc_result {
Ok(d) => d, Ok(d) => d,

View File

@@ -18,10 +18,10 @@ use crate::core::error::{LoreError, Result};
use crate::core::payloads::{StorePayloadOptions, store_payload}; use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::time::now_ms; use crate::core::time::now_ms;
use crate::documents::SourceType; use crate::documents::SourceType;
use crate::ingestion::dirty_tracker;
use crate::gitlab::GitLabClient; use crate::gitlab::GitLabClient;
use crate::gitlab::transformers::{MilestoneRow, transform_issue}; use crate::gitlab::transformers::{MilestoneRow, transform_issue};
use crate::gitlab::types::GitLabIssue; use crate::gitlab::types::GitLabIssue;
use crate::ingestion::dirty_tracker;
/// Result of issue ingestion. /// Result of issue ingestion.
#[derive(Debug, Default)] #[derive(Debug, Default)]
@@ -174,13 +174,13 @@ fn passes_cursor_filter(issue: &GitLabIssue, cursor: &SyncCursor) -> Result<bool
return Ok(false); return Ok(false);
} }
if issue_ts == cursor_ts { if issue_ts == cursor_ts
if let Some(cursor_id) = cursor.tie_breaker_id { && cursor
if issue.id <= cursor_id { .tie_breaker_id
.is_some_and(|cursor_id| issue.id <= cursor_id)
{
return Ok(false); return Ok(false);
} }
}
}
Ok(true) Ok(true)
} }

View File

@@ -17,10 +17,10 @@ use crate::core::error::{LoreError, Result};
use crate::core::payloads::{StorePayloadOptions, store_payload}; use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::time::now_ms; use crate::core::time::now_ms;
use crate::documents::SourceType; use crate::documents::SourceType;
use crate::ingestion::dirty_tracker;
use crate::gitlab::GitLabClient; use crate::gitlab::GitLabClient;
use crate::gitlab::transformers::merge_request::transform_merge_request; use crate::gitlab::transformers::merge_request::transform_merge_request;
use crate::gitlab::types::GitLabMergeRequest; use crate::gitlab::types::GitLabMergeRequest;
use crate::ingestion::dirty_tracker;
/// Result of merge request ingestion. /// Result of merge request ingestion.
#[derive(Debug, Default)] #[derive(Debug, Default)]

View File

@@ -19,7 +19,7 @@ pub use merge_requests::{
}; };
pub use mr_discussions::{IngestMrDiscussionsResult, ingest_mr_discussions}; pub use mr_discussions::{IngestMrDiscussionsResult, ingest_mr_discussions};
pub use orchestrator::{ pub use orchestrator::{
IngestMrProjectResult, IngestProjectResult, ProgressCallback, ProgressEvent, DrainResult, IngestMrProjectResult, IngestProjectResult, ProgressCallback, ProgressEvent,
ingest_project_issues, ingest_project_issues_with_progress, ingest_project_merge_requests, ingest_project_issues, ingest_project_issues_with_progress, ingest_project_merge_requests,
ingest_project_merge_requests_with_progress, ingest_project_merge_requests_with_progress,
}; };

View File

@@ -19,13 +19,13 @@ use crate::core::error::Result;
use crate::core::payloads::{StorePayloadOptions, store_payload}; use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::time::now_ms; use crate::core::time::now_ms;
use crate::documents::SourceType; use crate::documents::SourceType;
use crate::ingestion::dirty_tracker;
use crate::gitlab::GitLabClient; use crate::gitlab::GitLabClient;
use crate::gitlab::transformers::{ use crate::gitlab::transformers::{
NormalizedDiscussion, NormalizedNote, transform_mr_discussion, NormalizedDiscussion, NormalizedNote, transform_mr_discussion,
transform_notes_with_diff_position, transform_notes_with_diff_position,
}; };
use crate::gitlab::types::GitLabDiscussion; use crate::gitlab::types::GitLabDiscussion;
use crate::ingestion::dirty_tracker;
use super::merge_requests::MrForDiscussionSync; use super::merge_requests::MrForDiscussionSync;
@@ -72,7 +72,10 @@ pub async fn prefetch_mr_discussions(
debug!(mr_iid = mr.iid, "Prefetching discussions for MR"); debug!(mr_iid = mr.iid, "Prefetching discussions for MR");
// Fetch all discussions from GitLab // Fetch all discussions from GitLab
let raw_discussions = match client.fetch_all_mr_discussions(gitlab_project_id, mr.iid).await { let raw_discussions = match client
.fetch_all_mr_discussions(gitlab_project_id, mr.iid)
.await
{
Ok(d) => d, Ok(d) => d,
Err(e) => { Err(e) => {
return PrefetchedMrDiscussions { return PrefetchedMrDiscussions {
@@ -241,7 +244,10 @@ pub fn write_prefetched_mr_discussions(
mark_discussions_synced(conn, mr.local_mr_id, mr.updated_at)?; mark_discussions_synced(conn, mr.local_mr_id, mr.updated_at)?;
clear_sync_health_error(conn, mr.local_mr_id)?; clear_sync_health_error(conn, mr.local_mr_id)?;
debug!(mr_iid = mr.iid, "MR discussion sync complete, watermark advanced"); debug!(
mr_iid = mr.iid,
"MR discussion sync complete, watermark advanced"
);
} else if prefetched.had_transform_errors { } else if prefetched.had_transform_errors {
warn!( warn!(
mr_iid = mr.iid, mr_iid = mr.iid,

View File

@@ -7,9 +7,12 @@
use futures::future::join_all; use futures::future::join_all;
use rusqlite::Connection; use rusqlite::Connection;
use tracing::info; use tracing::{debug, info, warn};
use crate::Config; use crate::Config;
use crate::core::dependent_queue::{
claim_jobs, complete_job, count_pending_jobs, enqueue_job, fail_job, reclaim_stale_locks,
};
use crate::core::error::Result; use crate::core::error::Result;
use crate::gitlab::GitLabClient; use crate::gitlab::GitLabClient;
@@ -50,6 +53,12 @@ pub enum ProgressEvent {
MrDiscussionSynced { current: usize, total: usize }, MrDiscussionSynced { current: usize, total: usize },
/// MR discussion sync complete /// MR discussion sync complete
MrDiscussionSyncComplete, MrDiscussionSyncComplete,
/// Resource event fetching started (total jobs)
ResourceEventsFetchStarted { total: usize },
/// Resource event fetched for an entity (current/total)
ResourceEventFetched { current: usize, total: usize },
/// Resource event fetching complete
ResourceEventsFetchComplete { fetched: usize, failed: usize },
} }
/// Result of full project ingestion (issues). /// Result of full project ingestion (issues).
@@ -63,6 +72,8 @@ pub struct IngestProjectResult {
pub notes_upserted: usize, pub notes_upserted: usize,
pub issues_synced_discussions: usize, pub issues_synced_discussions: usize,
pub issues_skipped_discussion_sync: usize, pub issues_skipped_discussion_sync: usize,
pub resource_events_fetched: usize,
pub resource_events_failed: usize,
} }
/// Result of MR ingestion for a project. /// Result of MR ingestion for a project.
@@ -80,6 +91,8 @@ pub struct IngestMrProjectResult {
pub diffnotes_count: usize, pub diffnotes_count: usize,
pub mrs_synced_discussions: usize, pub mrs_synced_discussions: usize,
pub mrs_skipped_discussion_sync: usize, pub mrs_skipped_discussion_sync: usize,
pub resource_events_fetched: usize,
pub resource_events_failed: usize,
} }
/// Ingest all issues and their discussions for a project. /// Ingest all issues and their discussions for a project.
@@ -167,6 +180,21 @@ pub async fn ingest_project_issues_with_progress(
result.issues_synced_discussions += 1; result.issues_synced_discussions += 1;
} }
// Step 4: Enqueue and drain resource events (if enabled)
if config.sync.fetch_resource_events {
// Enqueue resource_events jobs for all issues in this project
let enqueued = enqueue_resource_events_for_entity_type(conn, project_id, "issue")?;
if enqueued > 0 {
debug!(enqueued, "Enqueued resource events jobs for issues");
}
// Drain the queue
let drain_result =
drain_resource_events(conn, client, config, gitlab_project_id, &progress).await?;
result.resource_events_fetched = drain_result.fetched;
result.resource_events_failed = drain_result.failed;
}
info!( info!(
issues_fetched = result.issues_fetched, issues_fetched = result.issues_fetched,
issues_upserted = result.issues_upserted, issues_upserted = result.issues_upserted,
@@ -175,6 +203,8 @@ pub async fn ingest_project_issues_with_progress(
notes_upserted = result.notes_upserted, notes_upserted = result.notes_upserted,
issues_synced = result.issues_synced_discussions, issues_synced = result.issues_synced_discussions,
issues_skipped = result.issues_skipped_discussion_sync, issues_skipped = result.issues_skipped_discussion_sync,
resource_events_fetched = result.resource_events_fetched,
resource_events_failed = result.resource_events_failed,
"Project ingestion complete" "Project ingestion complete"
); );
@@ -343,6 +373,19 @@ pub async fn ingest_project_merge_requests_with_progress(
} }
} }
// Step 4: Enqueue and drain resource events (if enabled)
if config.sync.fetch_resource_events {
let enqueued = enqueue_resource_events_for_entity_type(conn, project_id, "merge_request")?;
if enqueued > 0 {
debug!(enqueued, "Enqueued resource events jobs for MRs");
}
let drain_result =
drain_resource_events(conn, client, config, gitlab_project_id, &progress).await?;
result.resource_events_fetched = drain_result.fetched;
result.resource_events_failed = drain_result.failed;
}
info!( info!(
mrs_fetched = result.mrs_fetched, mrs_fetched = result.mrs_fetched,
mrs_upserted = result.mrs_upserted, mrs_upserted = result.mrs_upserted,
@@ -352,6 +395,8 @@ pub async fn ingest_project_merge_requests_with_progress(
diffnotes = result.diffnotes_count, diffnotes = result.diffnotes_count,
mrs_synced = result.mrs_synced_discussions, mrs_synced = result.mrs_synced_discussions,
mrs_skipped = result.mrs_skipped_discussion_sync, mrs_skipped = result.mrs_skipped_discussion_sync,
resource_events_fetched = result.resource_events_fetched,
resource_events_failed = result.resource_events_failed,
"MR project ingestion complete" "MR project ingestion complete"
); );
@@ -405,6 +450,368 @@ async fn sync_mr_discussions_sequential(
Ok(results) Ok(results)
} }
/// Result of draining the resource events queue.
#[derive(Debug, Default)]
pub struct DrainResult {
pub fetched: usize,
pub failed: usize,
}
/// Enqueue resource_events jobs for all entities of a given type in a project.
///
/// Uses the pending_dependent_fetches queue. Jobs are deduplicated by the UNIQUE
/// constraint, so re-enqueueing the same entity is a no-op.
fn enqueue_resource_events_for_entity_type(
conn: &Connection,
project_id: i64,
entity_type: &str,
) -> Result<usize> {
let (table, id_col) = match entity_type {
"issue" => ("issues", "id"),
"merge_request" => ("merge_requests", "id"),
_ => return Ok(0),
};
// Query all entities for this project and enqueue resource_events jobs.
// The UNIQUE constraint on pending_dependent_fetches makes this idempotent -
// already-queued entities are silently skipped via INSERT OR IGNORE.
let mut stmt = conn.prepare_cached(&format!(
"SELECT {id_col}, iid FROM {table} WHERE project_id = ?1"
))?;
let entities: Vec<(i64, i64)> = stmt
.query_map([project_id], |row| Ok((row.get(0)?, row.get(1)?)))?
.collect::<std::result::Result<Vec<_>, _>>()?;
let mut enqueued = 0;
for (local_id, iid) in &entities {
if enqueue_job(
conn,
project_id,
entity_type,
*iid,
*local_id,
"resource_events",
None,
)? {
enqueued += 1;
}
}
Ok(enqueued)
}
/// Drain pending resource_events jobs: claim, fetch from GitLab, store, complete/fail.
///
/// Processes jobs sequentially since `rusqlite::Connection` is not `Send`.
/// Uses exponential backoff on failure via `fail_job`.
async fn drain_resource_events(
conn: &Connection,
client: &GitLabClient,
config: &Config,
gitlab_project_id: i64,
progress: &Option<ProgressCallback>,
) -> Result<DrainResult> {
let mut result = DrainResult::default();
let batch_size = config.sync.dependent_concurrency as usize;
// Reclaim stale locks from crashed processes
let reclaimed = reclaim_stale_locks(conn, config.sync.stale_lock_minutes)?;
if reclaimed > 0 {
info!(reclaimed, "Reclaimed stale resource event locks");
}
// Count total pending jobs for progress reporting
let pending_counts = count_pending_jobs(conn)?;
let total_pending = pending_counts.get("resource_events").copied().unwrap_or(0);
if total_pending == 0 {
return Ok(result);
}
let emit = |event: ProgressEvent| {
if let Some(cb) = progress {
cb(event);
}
};
emit(ProgressEvent::ResourceEventsFetchStarted {
total: total_pending,
});
let mut processed = 0;
// Max iterations guard: prevent infinite loop if jobs keep failing and retrying
// within the same drain run. Allow 2x total_pending iterations as safety margin.
let max_iterations = total_pending * 2;
let mut iterations = 0;
loop {
if iterations >= max_iterations {
warn!(
iterations,
total_pending, "Resource events drain hit max iterations guard, stopping"
);
break;
}
let jobs = claim_jobs(conn, "resource_events", batch_size)?;
if jobs.is_empty() {
break;
}
for job in &jobs {
iterations += 1;
// conn is &Connection but upsert functions need &mut Connection.
// We need to use unsafe to get a mutable reference since rusqlite
// operations are internally safe with WAL mode and we're single-threaded.
// Instead, we'll use a savepoint approach via the Connection directly.
match client
.fetch_all_resource_events(gitlab_project_id, &job.entity_type, job.entity_iid)
.await
{
Ok((state_events, label_events, milestone_events)) => {
// Store events - we need &mut Connection for savepoints in upsert functions.
// Use unchecked_transaction as a workaround since we have &Connection.
let store_result = store_resource_events(
conn,
job.project_id,
&job.entity_type,
job.entity_local_id,
&state_events,
&label_events,
&milestone_events,
);
match store_result {
Ok(()) => {
complete_job(conn, job.id)?;
result.fetched += 1;
}
Err(e) => {
warn!(
entity_type = %job.entity_type,
entity_iid = job.entity_iid,
error = %e,
"Failed to store resource events"
);
fail_job(conn, job.id, &e.to_string())?;
result.failed += 1;
}
}
}
Err(e) => {
warn!(
entity_type = %job.entity_type,
entity_iid = job.entity_iid,
error = %e,
"Failed to fetch resource events from GitLab"
);
fail_job(conn, job.id, &e.to_string())?;
result.failed += 1;
}
}
processed += 1;
emit(ProgressEvent::ResourceEventFetched {
current: processed,
total: total_pending,
});
}
}
emit(ProgressEvent::ResourceEventsFetchComplete {
fetched: result.fetched,
failed: result.failed,
});
if result.fetched > 0 || result.failed > 0 {
info!(
fetched = result.fetched,
failed = result.failed,
"Resource events drain complete"
);
}
Ok(result)
}
/// Store fetched resource events in the database.
///
/// Uses unchecked_transaction to work with &Connection (not &mut Connection),
/// which is safe because we're single-threaded and using WAL mode.
fn store_resource_events(
conn: &Connection,
project_id: i64,
entity_type: &str,
entity_local_id: i64,
state_events: &[crate::gitlab::types::GitLabStateEvent],
label_events: &[crate::gitlab::types::GitLabLabelEvent],
milestone_events: &[crate::gitlab::types::GitLabMilestoneEvent],
) -> Result<()> {
// The upsert functions require &mut Connection for savepoints.
// We use unchecked_transaction to wrap all three upserts atomically,
// then call the upsert functions using the transaction's inner connection.
let tx = conn.unchecked_transaction()?;
// State events - use raw SQL within transaction instead of upsert_state_events
// which requires &mut Connection
if !state_events.is_empty() {
store_state_events_tx(&tx, project_id, entity_type, entity_local_id, state_events)?;
}
if !label_events.is_empty() {
store_label_events_tx(&tx, project_id, entity_type, entity_local_id, label_events)?;
}
if !milestone_events.is_empty() {
store_milestone_events_tx(
&tx,
project_id,
entity_type,
entity_local_id,
milestone_events,
)?;
}
tx.commit()?;
Ok(())
}
/// Store state events within an existing transaction.
fn store_state_events_tx(
tx: &rusqlite::Transaction<'_>,
project_id: i64,
entity_type: &str,
entity_local_id: i64,
events: &[crate::gitlab::types::GitLabStateEvent],
) -> Result<()> {
let (issue_id, merge_request_id): (Option<i64>, Option<i64>) = match entity_type {
"issue" => (Some(entity_local_id), None),
"merge_request" => (None, Some(entity_local_id)),
_ => return Ok(()),
};
let mut stmt = tx.prepare_cached(
"INSERT OR REPLACE INTO resource_state_events
(gitlab_id, project_id, issue_id, merge_request_id, state,
actor_gitlab_id, actor_username, created_at,
source_commit, source_merge_request_iid)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
)?;
for event in events {
let created_at = crate::core::time::iso_to_ms_strict(&event.created_at)
.map_err(crate::core::error::LoreError::Other)?;
let actor_id = event.user.as_ref().map(|u| u.id);
let actor_username = event.user.as_ref().map(|u| u.username.as_str());
let source_mr_iid = event.source_merge_request.as_ref().map(|mr| mr.iid);
stmt.execute(rusqlite::params![
event.id,
project_id,
issue_id,
merge_request_id,
event.state,
actor_id,
actor_username,
created_at,
event.source_commit,
source_mr_iid,
])?;
}
Ok(())
}
/// Store label events within an existing transaction.
fn store_label_events_tx(
tx: &rusqlite::Transaction<'_>,
project_id: i64,
entity_type: &str,
entity_local_id: i64,
events: &[crate::gitlab::types::GitLabLabelEvent],
) -> Result<()> {
let (issue_id, merge_request_id): (Option<i64>, Option<i64>) = match entity_type {
"issue" => (Some(entity_local_id), None),
"merge_request" => (None, Some(entity_local_id)),
_ => return Ok(()),
};
let mut stmt = tx.prepare_cached(
"INSERT OR REPLACE INTO resource_label_events
(gitlab_id, project_id, issue_id, merge_request_id, action,
label_name, actor_gitlab_id, actor_username, created_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
)?;
for event in events {
let created_at = crate::core::time::iso_to_ms_strict(&event.created_at)
.map_err(crate::core::error::LoreError::Other)?;
let actor_id = event.user.as_ref().map(|u| u.id);
let actor_username = event.user.as_ref().map(|u| u.username.as_str());
stmt.execute(rusqlite::params![
event.id,
project_id,
issue_id,
merge_request_id,
event.action,
event.label.name,
actor_id,
actor_username,
created_at,
])?;
}
Ok(())
}
/// Store milestone events within an existing transaction.
fn store_milestone_events_tx(
tx: &rusqlite::Transaction<'_>,
project_id: i64,
entity_type: &str,
entity_local_id: i64,
events: &[crate::gitlab::types::GitLabMilestoneEvent],
) -> Result<()> {
let (issue_id, merge_request_id): (Option<i64>, Option<i64>) = match entity_type {
"issue" => (Some(entity_local_id), None),
"merge_request" => (None, Some(entity_local_id)),
_ => return Ok(()),
};
let mut stmt = tx.prepare_cached(
"INSERT OR REPLACE INTO resource_milestone_events
(gitlab_id, project_id, issue_id, merge_request_id, action,
milestone_title, milestone_id, actor_gitlab_id, actor_username, created_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
)?;
for event in events {
let created_at = crate::core::time::iso_to_ms_strict(&event.created_at)
.map_err(crate::core::error::LoreError::Other)?;
let actor_id = event.user.as_ref().map(|u| u.id);
let actor_username = event.user.as_ref().map(|u| u.username.as_str());
stmt.execute(rusqlite::params![
event.id,
project_id,
issue_id,
merge_request_id,
event.action,
event.milestone.title,
event.milestone.id,
actor_id,
actor_username,
created_at,
])?;
}
Ok(())
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@@ -419,6 +826,8 @@ mod tests {
assert_eq!(result.notes_upserted, 0); assert_eq!(result.notes_upserted, 0);
assert_eq!(result.issues_synced_discussions, 0); assert_eq!(result.issues_synced_discussions, 0);
assert_eq!(result.issues_skipped_discussion_sync, 0); assert_eq!(result.issues_skipped_discussion_sync, 0);
assert_eq!(result.resource_events_fetched, 0);
assert_eq!(result.resource_events_failed, 0);
} }
#[test] #[test]
@@ -436,5 +845,28 @@ mod tests {
assert_eq!(result.diffnotes_count, 0); assert_eq!(result.diffnotes_count, 0);
assert_eq!(result.mrs_synced_discussions, 0); assert_eq!(result.mrs_synced_discussions, 0);
assert_eq!(result.mrs_skipped_discussion_sync, 0); assert_eq!(result.mrs_skipped_discussion_sync, 0);
assert_eq!(result.resource_events_fetched, 0);
assert_eq!(result.resource_events_failed, 0);
}
#[test]
fn drain_result_default_has_zero_counts() {
let result = DrainResult::default();
assert_eq!(result.fetched, 0);
assert_eq!(result.failed, 0);
}
#[test]
fn progress_event_resource_variants_exist() {
// Verify the new progress event variants are constructible
let _start = ProgressEvent::ResourceEventsFetchStarted { total: 10 };
let _progress = ProgressEvent::ResourceEventFetched {
current: 5,
total: 10,
};
let _complete = ProgressEvent::ResourceEventsFetchComplete {
fetched: 8,
failed: 2,
};
} }
} }

View File

@@ -10,23 +10,25 @@ use tracing_subscriber::util::SubscriberInitExt;
use lore::Config; use lore::Config;
use lore::cli::commands::{ use lore::cli::commands::{
InitInputs, InitOptions, InitResult, ListFilters, MrListFilters, SearchCliFilters, open_issue_in_browser, IngestDisplay, InitInputs, InitOptions, InitResult, ListFilters, MrListFilters,
open_mr_in_browser, print_count, print_count_json, print_event_count, print_event_count_json, print_doctor_results, print_generate_docs, SearchCliFilters, SyncOptions, open_issue_in_browser, open_mr_in_browser, print_count,
print_generate_docs_json, print_ingest_summary, print_ingest_summary_json, print_list_issues, print_count_json, print_doctor_results, print_embed, print_embed_json, print_event_count,
print_list_issues_json, print_list_mrs, print_list_mrs_json, print_search_results, print_event_count_json, print_generate_docs, print_generate_docs_json, print_ingest_summary,
print_search_results_json, print_show_issue, print_show_issue_json, print_show_mr, print_stats, print_ingest_summary_json, print_list_issues, print_list_issues_json, print_list_mrs,
print_stats_json, print_list_mrs_json, print_search_results, print_search_results_json, print_show_issue,
print_embed, print_embed_json, print_sync, print_sync_json, print_show_issue_json, print_show_mr, print_show_mr_json, print_stats, print_stats_json,
print_show_mr_json, print_sync_status, print_sync_status_json, run_auth_test, run_count, print_sync, print_sync_json, print_sync_status, print_sync_status_json, run_auth_test,
run_count_events, run_doctor, run_embed, run_generate_docs, run_ingest, run_init, run_list_issues, run_list_mrs, run_count, run_count_events, run_doctor, run_embed, run_generate_docs, run_ingest, run_init,
run_search, run_show_issue, run_show_mr, run_stats, run_sync, run_sync_status, SyncOptions, run_list_issues, run_list_mrs, run_search, run_show_issue, run_show_mr, run_stats, run_sync,
IngestDisplay, run_sync_status,
}; };
use lore::cli::{ use lore::cli::{
Cli, Commands, CountArgs, EmbedArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, MrsArgs, Cli, Commands, CountArgs, EmbedArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, MrsArgs,
SearchArgs, StatsArgs, SyncArgs, SearchArgs, StatsArgs, SyncArgs,
}; };
use lore::core::db::{create_connection, get_schema_version, run_migrations, LATEST_SCHEMA_VERSION}; use lore::core::db::{
LATEST_SCHEMA_VERSION, create_connection, get_schema_version, run_migrations,
};
use lore::core::error::{LoreError, RobotErrorOutput}; use lore::core::error::{LoreError, RobotErrorOutput};
use lore::core::paths::get_config_path; use lore::core::paths::get_config_path;
use lore::core::paths::get_db_path; use lore::core::paths::get_db_path;
@@ -76,10 +78,10 @@ async fn main() {
Commands::Stats(args) => handle_stats(cli.config.as_deref(), args, robot_mode).await, Commands::Stats(args) => handle_stats(cli.config.as_deref(), args, robot_mode).await,
Commands::Embed(args) => handle_embed(cli.config.as_deref(), args, robot_mode).await, Commands::Embed(args) => handle_embed(cli.config.as_deref(), args, robot_mode).await,
Commands::Sync(args) => handle_sync_cmd(cli.config.as_deref(), args, robot_mode).await, Commands::Sync(args) => handle_sync_cmd(cli.config.as_deref(), args, robot_mode).await,
Commands::Ingest(args) => handle_ingest(cli.config.as_deref(), args, robot_mode, quiet).await, Commands::Ingest(args) => {
Commands::Count(args) => { handle_ingest(cli.config.as_deref(), args, robot_mode, quiet).await
handle_count(cli.config.as_deref(), args, robot_mode).await
} }
Commands::Count(args) => handle_count(cli.config.as_deref(), args, robot_mode).await,
Commands::Status => handle_sync_status_cmd(cli.config.as_deref(), robot_mode).await, Commands::Status => handle_sync_status_cmd(cli.config.as_deref(), robot_mode).await,
Commands::Auth => handle_auth_test(cli.config.as_deref(), robot_mode).await, Commands::Auth => handle_auth_test(cli.config.as_deref(), robot_mode).await,
Commands::Doctor => handle_doctor(cli.config.as_deref(), robot_mode).await, Commands::Doctor => handle_doctor(cli.config.as_deref(), robot_mode).await,
@@ -137,7 +139,8 @@ async fn main() {
if !robot_mode { if !robot_mode {
eprintln!( eprintln!(
"{}", "{}",
style("warning: 'lore list' is deprecated, use 'lore issues' or 'lore mrs'").yellow() style("warning: 'lore list' is deprecated, use 'lore issues' or 'lore mrs'")
.yellow()
); );
} }
handle_list_compat( handle_list_compat(
@@ -266,8 +269,10 @@ fn handle_error(e: Box<dyn std::error::Error>, robot_mode: bool) -> ! {
}; };
eprintln!( eprintln!(
"{}", "{}",
serde_json::to_string(&output) serde_json::to_string(&output).unwrap_or_else(|_| {
.unwrap_or_else(|_| r#"{"error":{"code":"INTERNAL_ERROR","message":"Serialization failed"}}"#.to_string()) r#"{"error":{"code":"INTERNAL_ERROR","message":"Serialization failed"}}"#
.to_string()
})
); );
} else { } else {
eprintln!("{} {}", style("Error:").red(), e); eprintln!("{} {}", style("Error:").red(), e);
@@ -929,7 +934,10 @@ fn handle_backup(robot_mode: bool) -> Result<(), Box<dyn std::error::Error>> {
}; };
eprintln!("{}", serde_json::to_string(&output)?); eprintln!("{}", serde_json::to_string(&output)?);
} else { } else {
eprintln!("{} The 'backup' command is not yet implemented.", style("Error:").red()); eprintln!(
"{} The 'backup' command is not yet implemented.",
style("Error:").red()
);
} }
std::process::exit(1); std::process::exit(1);
} }
@@ -940,12 +948,16 @@ fn handle_reset(robot_mode: bool) -> Result<(), Box<dyn std::error::Error>> {
error: RobotErrorSuggestionData { error: RobotErrorSuggestionData {
code: "NOT_IMPLEMENTED".to_string(), code: "NOT_IMPLEMENTED".to_string(),
message: "The 'reset' command is not yet implemented.".to_string(), message: "The 'reset' command is not yet implemented.".to_string(),
suggestion: "Manually delete the database: rm ~/.local/share/lore/lore.db".to_string(), suggestion: "Manually delete the database: rm ~/.local/share/lore/lore.db"
.to_string(),
}, },
}; };
eprintln!("{}", serde_json::to_string(&output)?); eprintln!("{}", serde_json::to_string(&output)?);
} else { } else {
eprintln!("{} The 'reset' command is not yet implemented.", style("Error:").red()); eprintln!(
"{} The 'reset' command is not yet implemented.",
style("Error:").red()
);
} }
std::process::exit(1); std::process::exit(1);
} }
@@ -1234,18 +1246,23 @@ async fn handle_health(
style("FAIL").red() style("FAIL").red()
} }
}; };
println!("Config: {} ({})", status(config_found), config_path.display());
println!("DB: {}", status(db_found));
println!( println!(
"Schema: {} (v{})", "Config: {} ({})",
status(schema_current), status(config_found),
schema_version config_path.display()
); );
println!("DB: {}", status(db_found));
println!("Schema: {} (v{})", status(schema_current), schema_version);
println!(); println!();
if healthy { if healthy {
println!("{}", style("Healthy").green().bold()); println!("{}", style("Healthy").green().bold());
} else { } else {
println!("{}", style("Unhealthy - run 'lore doctor' for details").red().bold()); println!(
"{}",
style("Unhealthy - run 'lore doctor' for details")
.red()
.bold()
);
} }
} }

View File

@@ -138,10 +138,7 @@ pub fn apply_filters(
} }
let limit = filters.clamp_limit(); let limit = filters.clamp_limit();
sql.push_str(&format!( sql.push_str(&format!(" ORDER BY j.key LIMIT ?{}", param_idx));
" ORDER BY j.key LIMIT ?{}",
param_idx
));
params.push(Box::new(limit as i64)); params.push(Box::new(limit as i64));
let param_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect(); let param_refs: Vec<&dyn rusqlite::types::ToSql> = params.iter().map(|p| p.as_ref()).collect();

View File

@@ -39,14 +39,14 @@ pub fn to_fts_query(raw: &str, mode: FtsQueryMode) -> String {
.split_whitespace() .split_whitespace()
.map(|token| { .map(|token| {
// Check if token ends with * and the rest is alphanumeric // Check if token ends with * and the rest is alphanumeric
if token.ends_with('*') { if let Some(stem) = token.strip_suffix('*')
let stem = &token[..token.len() - 1]; && !stem.is_empty()
if !stem.is_empty() && stem.chars().all(|c| c.is_alphanumeric() || c == '_') { && stem.chars().all(|c| c.is_alphanumeric() || c == '_')
{
// Preserve prefix search: "stem"* // Preserve prefix search: "stem"*
let escaped = stem.replace('"', "\"\""); let escaped = stem.replace('"', "\"\"");
return format!("\"{}\"*", escaped); return format!("\"{}\"*", escaped);
} }
}
// Default: wrap in quotes, escape internal quotes // Default: wrap in quotes, escape internal quotes
let escaped = token.replace('"', "\"\""); let escaped = token.replace('"', "\"\"");
format!("\"{}\"", escaped) format!("\"{}\"", escaped)

View File

@@ -4,8 +4,8 @@ use rusqlite::Connection;
use crate::core::error::Result; use crate::core::error::Result;
use crate::embedding::ollama::OllamaClient; use crate::embedding::ollama::OllamaClient;
use crate::search::{rank_rrf, search_fts, search_vector, FtsQueryMode}; use crate::search::filters::{SearchFilters, apply_filters};
use crate::search::filters::{apply_filters, SearchFilters}; use crate::search::{FtsQueryMode, rank_rrf, search_fts, search_vector};
const BASE_RECALL_MIN: usize = 50; const BASE_RECALL_MIN: usize = 50;
const FILTERED_RECALL_MIN: usize = 200; const FILTERED_RECALL_MIN: usize = 200;
@@ -65,9 +65,9 @@ pub async fn search_hybrid(
// Adaptive recall // Adaptive recall
let requested = filters.clamp_limit(); let requested = filters.clamp_limit();
let top_k = if filters.has_any_filter() { let top_k = if filters.has_any_filter() {
(requested * 50).max(FILTERED_RECALL_MIN).min(RECALL_CAP) (requested * 50).clamp(FILTERED_RECALL_MIN, RECALL_CAP)
} else { } else {
(requested * 10).max(BASE_RECALL_MIN).min(RECALL_CAP) (requested * 10).clamp(BASE_RECALL_MIN, RECALL_CAP)
}; };
let (fts_tuples, vec_tuples) = match mode { let (fts_tuples, vec_tuples) = match mode {
@@ -88,10 +88,7 @@ pub async fn search_hybrid(
}; };
let query_embedding = client.embed_batch(vec![query.to_string()]).await?; let query_embedding = client.embed_batch(vec![query.to_string()]).await?;
let embedding = query_embedding let embedding = query_embedding.into_iter().next().unwrap_or_default();
.into_iter()
.next()
.unwrap_or_default();
if embedding.is_empty() { if embedding.is_empty() {
return Err(crate::core::error::LoreError::Other( return Err(crate::core::error::LoreError::Other(
@@ -115,18 +112,13 @@ pub async fn search_hybrid(
.collect(); .collect();
match client { match client {
Some(client) => { Some(client) => match client.embed_batch(vec![query.to_string()]).await {
match client.embed_batch(vec![query.to_string()]).await {
Ok(query_embedding) => { Ok(query_embedding) => {
let embedding = query_embedding let embedding = query_embedding.into_iter().next().unwrap_or_default();
.into_iter()
.next()
.unwrap_or_default();
let vec_tuples = if embedding.is_empty() { let vec_tuples = if embedding.is_empty() {
warnings.push( warnings
"Ollama returned empty embedding, using FTS only.".into(), .push("Ollama returned empty embedding, using FTS only.".into());
);
Vec::new() Vec::new()
} else { } else {
let vec_results = search_vector(conn, &embedding, top_k)?; let vec_results = search_vector(conn, &embedding, top_k)?;
@@ -139,17 +131,15 @@ pub async fn search_hybrid(
(fts_tuples, vec_tuples) (fts_tuples, vec_tuples)
} }
Err(e) => { Err(e) => {
warnings.push( warnings.push(format!(
format!("Embedding failed ({}), falling back to lexical search.", e), "Embedding failed ({}), falling back to lexical search.",
); e
));
(fts_tuples, Vec::new()) (fts_tuples, Vec::new())
} }
} },
}
None => { None => {
warnings.push( warnings.push("Ollama unavailable, falling back to lexical search.".into());
"Ollama unavailable, falling back to lexical search.".into(),
);
(fts_tuples, Vec::new()) (fts_tuples, Vec::new())
} }
} }
@@ -217,7 +207,7 @@ mod tests {
..Default::default() ..Default::default()
}; };
let requested = filters.clamp_limit(); let requested = filters.clamp_limit();
let top_k = (requested * 10).max(BASE_RECALL_MIN).min(RECALL_CAP); let top_k = (requested * 10).clamp(BASE_RECALL_MIN, RECALL_CAP);
assert_eq!(top_k, 200); assert_eq!(top_k, 200);
} }
@@ -229,7 +219,7 @@ mod tests {
..Default::default() ..Default::default()
}; };
let requested = filters.clamp_limit(); let requested = filters.clamp_limit();
let top_k = (requested * 50).max(FILTERED_RECALL_MIN).min(RECALL_CAP); let top_k = (requested * 50).clamp(FILTERED_RECALL_MIN, RECALL_CAP);
assert_eq!(top_k, 1000); assert_eq!(top_k, 1000);
} }
@@ -241,7 +231,7 @@ mod tests {
..Default::default() ..Default::default()
}; };
let requested = filters.clamp_limit(); let requested = filters.clamp_limit();
let top_k = (requested * 50).max(FILTERED_RECALL_MIN).min(RECALL_CAP); let top_k = (requested * 50).clamp(FILTERED_RECALL_MIN, RECALL_CAP);
assert_eq!(top_k, RECALL_CAP); // 5000 capped to 1500 assert_eq!(top_k, RECALL_CAP); // 5000 capped to 1500
} }
@@ -252,7 +242,7 @@ mod tests {
..Default::default() ..Default::default()
}; };
let requested = filters.clamp_limit(); let requested = filters.clamp_limit();
let top_k = (requested * 10).max(BASE_RECALL_MIN).min(RECALL_CAP); let top_k = (requested * 10).clamp(BASE_RECALL_MIN, RECALL_CAP);
assert_eq!(top_k, BASE_RECALL_MIN); // 10 -> 50 assert_eq!(top_k, BASE_RECALL_MIN); // 10 -> 50
} }
} }

View File

@@ -4,11 +4,11 @@ mod hybrid;
mod rrf; mod rrf;
mod vector; mod vector;
pub use filters::{PathFilter, SearchFilters, apply_filters};
pub use fts::{ pub use fts::{
generate_fallback_snippet, get_result_snippet, search_fts, to_fts_query, FtsQueryMode, FtsQueryMode, FtsResult, generate_fallback_snippet, get_result_snippet, search_fts,
FtsResult, to_fts_query,
}; };
pub use filters::{apply_filters, PathFilter, SearchFilters}; pub use hybrid::{HybridResult, SearchMode, search_hybrid};
pub use rrf::{rank_rrf, RrfResult}; pub use rrf::{RrfResult, rank_rrf};
pub use vector::{search_vector, VectorResult}; pub use vector::{VectorResult, search_vector};
pub use hybrid::{search_hybrid, HybridResult, SearchMode};

View File

@@ -22,10 +22,7 @@ pub struct RrfResult {
/// Ranks are 1-indexed (first result = rank 1). /// Ranks are 1-indexed (first result = rank 1).
/// ///
/// Score = sum of 1/(k + rank) for each list containing the document. /// Score = sum of 1/(k + rank) for each list containing the document.
pub fn rank_rrf( pub fn rank_rrf(vector_results: &[(i64, f64)], fts_results: &[(i64, f64)]) -> Vec<RrfResult> {
vector_results: &[(i64, f64)],
fts_results: &[(i64, f64)],
) -> Vec<RrfResult> {
if vector_results.is_empty() && fts_results.is_empty() { if vector_results.is_empty() && fts_results.is_empty() {
return Vec::new(); return Vec::new();
} }
@@ -63,16 +60,18 @@ pub fn rank_rrf(
.collect(); .collect();
// Sort descending by rrf_score // Sort descending by rrf_score
results.sort_by(|a, b| b.rrf_score.partial_cmp(&a.rrf_score).unwrap_or(std::cmp::Ordering::Equal)); results.sort_by(|a, b| {
b.rrf_score
.partial_cmp(&a.rrf_score)
.unwrap_or(std::cmp::Ordering::Equal)
});
// Normalize: best = 1.0 // Normalize: best = 1.0
if let Some(max_score) = results.first().map(|r| r.rrf_score) { if let Some(max_score) = results.first().map(|r| r.rrf_score).filter(|&s| s > 0.0) {
if max_score > 0.0 {
for result in &mut results { for result in &mut results {
result.normalized_score = result.rrf_score / max_score; result.normalized_score = result.rrf_score / max_score;
} }
} }
}
results results
} }
@@ -92,8 +91,16 @@ mod tests {
// Doc 1 score should be higher than doc 2 and doc 3 // Doc 1 score should be higher than doc 2 and doc 3
let doc1 = &results[0]; let doc1 = &results[0];
let doc2_score = results.iter().find(|r| r.document_id == 2).unwrap().rrf_score; let doc2_score = results
let doc3_score = results.iter().find(|r| r.document_id == 3).unwrap().rrf_score; .iter()
.find(|r| r.document_id == 2)
.unwrap()
.rrf_score;
let doc3_score = results
.iter()
.find(|r| r.document_id == 3)
.unwrap()
.rrf_score;
assert!(doc1.rrf_score > doc2_score); assert!(doc1.rrf_score > doc2_score);
assert!(doc1.rrf_score > doc3_score); assert!(doc1.rrf_score > doc3_score);
} }

View File

@@ -70,7 +70,7 @@ pub fn search_vector(
FROM embeddings FROM embeddings
WHERE embedding MATCH ?1 WHERE embedding MATCH ?1
AND k = ?2 AND k = ?2
ORDER BY distance" ORDER BY distance",
)?; )?;
let rows: Vec<(i64, f64)> = stmt let rows: Vec<(i64, f64)> = stmt
@@ -137,11 +137,7 @@ mod tests {
#[test] #[test]
fn test_dedup_respects_limit() { fn test_dedup_respects_limit() {
let rows = vec![ let rows = vec![(1000_i64, 0.1_f64), (2000, 0.2), (3000, 0.3)];
(1000_i64, 0.1_f64),
(2000, 0.2),
(3000, 0.3),
];
let results = search_vector_dedup(rows, 2); let results = search_vector_dedup(rows, 2);
assert_eq!(results.len(), 2); assert_eq!(results.len(), 2);
} }
@@ -161,7 +157,10 @@ mod tests {
} }
let mut results: Vec<VectorResult> = best let mut results: Vec<VectorResult> = best
.into_iter() .into_iter()
.map(|(document_id, distance)| VectorResult { document_id, distance }) .map(|(document_id, distance)| VectorResult {
document_id,
distance,
})
.collect(); .collect();
results.sort_by(|a, b| a.distance.total_cmp(&b.distance)); results.sort_by(|a, b| a.distance.total_cmp(&b.distance));
results.truncate(limit); results.truncate(limit);

View File

@@ -102,7 +102,10 @@ fn knn_search_returns_nearest_neighbors() {
let results = lore::search::search_vector(&conn, &query, 10).unwrap(); let results = lore::search::search_vector(&conn, &query, 10).unwrap();
assert!(!results.is_empty(), "Should return at least one result"); assert!(!results.is_empty(), "Should return at least one result");
assert_eq!(results[0].document_id, 1, "Nearest neighbor should be doc 1"); assert_eq!(
results[0].document_id, 1,
"Nearest neighbor should be doc 1"
);
} }
#[test] #[test]
@@ -122,7 +125,12 @@ fn knn_search_respects_limit() {
fn knn_search_deduplicates_chunks() { fn knn_search_deduplicates_chunks() {
let (_tmp, conn) = create_test_db(); let (_tmp, conn) = create_test_db();
insert_document(&conn, 1, "Multi-chunk doc", "Very long content that was chunked."); insert_document(
&conn,
1,
"Multi-chunk doc",
"Very long content that was chunked.",
);
// Same document, two chunks, both similar to query // Same document, two chunks, both similar to query
let mut v1 = vec![0.0f32; 768]; let mut v1 = vec![0.0f32; 768];
@@ -137,7 +145,8 @@ fn knn_search_deduplicates_chunks() {
let results = lore::search::search_vector(&conn, &axis_vector(0), 10).unwrap(); let results = lore::search::search_vector(&conn, &axis_vector(0), 10).unwrap();
// Should deduplicate: same document_id appears at most once // Should deduplicate: same document_id appears at most once
let unique_docs: std::collections::HashSet<i64> = results.iter().map(|r| r.document_id).collect(); let unique_docs: std::collections::HashSet<i64> =
results.iter().map(|r| r.document_id).collect();
assert_eq!( assert_eq!(
unique_docs.len(), unique_docs.len(),
results.len(), results.len(),
@@ -154,22 +163,38 @@ fn orphan_trigger_deletes_embeddings_on_document_delete() {
// Verify embedding exists // Verify embedding exists
let count: i64 = conn let count: i64 = conn
.query_row("SELECT COUNT(*) FROM embeddings WHERE rowid = 1000", [], |r| r.get(0)) .query_row(
"SELECT COUNT(*) FROM embeddings WHERE rowid = 1000",
[],
|r| r.get(0),
)
.unwrap(); .unwrap();
assert_eq!(count, 1, "Embedding should exist before delete"); assert_eq!(count, 1, "Embedding should exist before delete");
// Delete the document // Delete the document
conn.execute("DELETE FROM documents WHERE id = 1", []).unwrap(); conn.execute("DELETE FROM documents WHERE id = 1", [])
.unwrap();
// Verify embedding was cascade-deleted via trigger // Verify embedding was cascade-deleted via trigger
let count: i64 = conn let count: i64 = conn
.query_row("SELECT COUNT(*) FROM embeddings WHERE rowid = 1000", [], |r| r.get(0)) .query_row(
"SELECT COUNT(*) FROM embeddings WHERE rowid = 1000",
[],
|r| r.get(0),
)
.unwrap(); .unwrap();
assert_eq!(count, 0, "Trigger should delete embeddings when document is deleted"); assert_eq!(
count, 0,
"Trigger should delete embeddings when document is deleted"
);
// Verify metadata was cascade-deleted via FK // Verify metadata was cascade-deleted via FK
let meta_count: i64 = conn let meta_count: i64 = conn
.query_row("SELECT COUNT(*) FROM embedding_metadata WHERE document_id = 1", [], |r| r.get(0)) .query_row(
"SELECT COUNT(*) FROM embedding_metadata WHERE document_id = 1",
[],
|r| r.get(0),
)
.unwrap(); .unwrap();
assert_eq!(meta_count, 0, "Metadata should be cascade-deleted"); assert_eq!(meta_count, 0, "Metadata should be cascade-deleted");
} }
@@ -206,7 +231,8 @@ fn overflow_doc_with_error_sentinel_not_re_detected_as_pending() {
.unwrap(); .unwrap();
// Now find_pending_documents should NOT return this document // Now find_pending_documents should NOT return this document
let pending = lore::embedding::find_pending_documents(&conn, 100, 0, "nomic-embed-text").unwrap(); let pending =
lore::embedding::find_pending_documents(&conn, 100, 0, "nomic-embed-text").unwrap();
assert!( assert!(
pending.is_empty(), pending.is_empty(),
"Document with overflow error sentinel should not be re-detected as pending, got {} pending", "Document with overflow error sentinel should not be re-detected as pending, got {} pending",
@@ -215,7 +241,10 @@ fn overflow_doc_with_error_sentinel_not_re_detected_as_pending() {
// count_pending_documents should also return 0 // count_pending_documents should also return 0
let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap(); let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
assert_eq!(count, 0, "Count should be 0 for document with overflow sentinel"); assert_eq!(
count, 0,
"Count should be 0 for document with overflow sentinel"
);
} }
#[test] #[test]
@@ -226,14 +255,24 @@ fn count_and_find_pending_agree() {
// Case 1: No documents at all // Case 1: No documents at all
let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap(); let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
let found = lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap(); let found =
assert_eq!(count as usize, found.len(), "Empty DB: count and find should agree"); lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap();
assert_eq!(
count as usize,
found.len(),
"Empty DB: count and find should agree"
);
// Case 2: New document (no metadata) // Case 2: New document (no metadata)
insert_document(&conn, 1, "New doc", "Content"); insert_document(&conn, 1, "New doc", "Content");
let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap(); let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
let found = lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap(); let found =
assert_eq!(count as usize, found.len(), "New doc: count and find should agree"); lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap();
assert_eq!(
count as usize,
found.len(),
"New doc: count and find should agree"
);
assert_eq!(count, 1); assert_eq!(count, 1);
// Case 3: Document with matching metadata (not pending) // Case 3: Document with matching metadata (not pending)
@@ -247,8 +286,13 @@ fn count_and_find_pending_agree() {
) )
.unwrap(); .unwrap();
let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap(); let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
let found = lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap(); let found =
assert_eq!(count as usize, found.len(), "Complete doc: count and find should agree"); lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap();
assert_eq!(
count as usize,
found.len(),
"Complete doc: count and find should agree"
);
assert_eq!(count, 0); assert_eq!(count, 0);
// Case 4: Config drift (chunk_max_bytes mismatch) // Case 4: Config drift (chunk_max_bytes mismatch)
@@ -258,8 +302,13 @@ fn count_and_find_pending_agree() {
) )
.unwrap(); .unwrap();
let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap(); let count = lore::embedding::count_pending_documents(&conn, "nomic-embed-text").unwrap();
let found = lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap(); let found =
assert_eq!(count as usize, found.len(), "Config drift: count and find should agree"); lore::embedding::find_pending_documents(&conn, 1000, 0, "nomic-embed-text").unwrap();
assert_eq!(
count as usize,
found.len(),
"Config drift: count and find should agree"
);
assert_eq!(count, 1); assert_eq!(count, 1);
} }

View File

@@ -51,26 +51,72 @@ fn insert_document(conn: &Connection, id: i64, source_type: &str, title: &str, c
fn fts_basic_search() { fn fts_basic_search() {
let conn = create_test_db(); let conn = create_test_db();
insert_document(&conn, 1, "issue", "Authentication bug", "Users cannot login when using OAuth tokens. The JWT refresh fails silently."); insert_document(
insert_document(&conn, 2, "merge_request", "Add user profile page", "This MR adds a new user profile page with avatar upload support."); &conn,
insert_document(&conn, 3, "issue", "Database migration failing", "The migration script crashes on PostgreSQL 14 due to deprecated syntax."); 1,
"issue",
"Authentication bug",
"Users cannot login when using OAuth tokens. The JWT refresh fails silently.",
);
insert_document(
&conn,
2,
"merge_request",
"Add user profile page",
"This MR adds a new user profile page with avatar upload support.",
);
insert_document(
&conn,
3,
"issue",
"Database migration failing",
"The migration script crashes on PostgreSQL 14 due to deprecated syntax.",
);
let results = lore::search::search_fts(&conn, "authentication login", 10, lore::search::FtsQueryMode::Safe).unwrap(); let results = lore::search::search_fts(
&conn,
"authentication login",
10,
lore::search::FtsQueryMode::Safe,
)
.unwrap();
assert!(!results.is_empty(), "Expected at least one result for 'authentication login'"); assert!(
assert_eq!(results[0].document_id, 1, "Authentication issue should be top result"); !results.is_empty(),
"Expected at least one result for 'authentication login'"
);
assert_eq!(
results[0].document_id, 1,
"Authentication issue should be top result"
);
} }
#[test] #[test]
fn fts_stemming_matches() { fn fts_stemming_matches() {
let conn = create_test_db(); let conn = create_test_db();
insert_document(&conn, 1, "issue", "Running tests", "The test runner is executing integration tests."); insert_document(
insert_document(&conn, 2, "issue", "Deployment config", "Deployment configuration for production servers."); &conn,
1,
"issue",
"Running tests",
"The test runner is executing integration tests.",
);
insert_document(
&conn,
2,
"issue",
"Deployment config",
"Deployment configuration for production servers.",
);
// "running" should match "runner" and "executing" via porter stemmer // "running" should match "runner" and "executing" via porter stemmer
let results = lore::search::search_fts(&conn, "running", 10, lore::search::FtsQueryMode::Safe).unwrap(); let results =
assert!(!results.is_empty(), "Stemming should match 'running' to 'runner'"); lore::search::search_fts(&conn, "running", 10, lore::search::FtsQueryMode::Safe).unwrap();
assert!(
!results.is_empty(),
"Stemming should match 'running' to 'runner'"
);
assert_eq!(results[0].document_id, 1); assert_eq!(results[0].document_id, 1);
} }
@@ -78,20 +124,43 @@ fn fts_stemming_matches() {
fn fts_empty_results() { fn fts_empty_results() {
let conn = create_test_db(); let conn = create_test_db();
insert_document(&conn, 1, "issue", "Bug fix", "Fixed a null pointer dereference in the parser."); insert_document(
&conn,
1,
"issue",
"Bug fix",
"Fixed a null pointer dereference in the parser.",
);
let results = lore::search::search_fts(&conn, "kubernetes deployment helm", 10, lore::search::FtsQueryMode::Safe).unwrap(); let results = lore::search::search_fts(
assert!(results.is_empty(), "No documents should match unrelated query"); &conn,
"kubernetes deployment helm",
10,
lore::search::FtsQueryMode::Safe,
)
.unwrap();
assert!(
results.is_empty(),
"No documents should match unrelated query"
);
} }
#[test] #[test]
fn fts_special_characters_handled() { fn fts_special_characters_handled() {
let conn = create_test_db(); let conn = create_test_db();
insert_document(&conn, 1, "issue", "C++ compiler", "The C++ compiler segfaults on template metaprogramming."); insert_document(
&conn,
1,
"issue",
"C++ compiler",
"The C++ compiler segfaults on template metaprogramming.",
);
// Special characters should not crash the search // Special characters should not crash the search
let results = lore::search::search_fts(&conn, "C++ compiler", 10, lore::search::FtsQueryMode::Safe).unwrap(); let results =
lore::search::search_fts(&conn, "C++ compiler", 10, lore::search::FtsQueryMode::Safe)
.unwrap();
// Safe mode sanitizes the query — it should still return results or at least not crash // Safe mode sanitizes the query — it should still return results or at least not crash
assert!(results.len() <= 1); assert!(results.len() <= 1);
} }
@@ -101,17 +170,44 @@ fn fts_result_ordering_by_relevance() {
let conn = create_test_db(); let conn = create_test_db();
// Doc 1: "authentication" in title and content // Doc 1: "authentication" in title and content
insert_document(&conn, 1, "issue", "Authentication system redesign", "The authentication system needs a complete redesign. Authentication flows are broken."); insert_document(
&conn,
1,
"issue",
"Authentication system redesign",
"The authentication system needs a complete redesign. Authentication flows are broken.",
);
// Doc 2: "authentication" only in content, once // Doc 2: "authentication" only in content, once
insert_document(&conn, 2, "issue", "Login page update", "Updated the login page with better authentication error messages."); insert_document(
&conn,
2,
"issue",
"Login page update",
"Updated the login page with better authentication error messages.",
);
// Doc 3: unrelated // Doc 3: unrelated
insert_document(&conn, 3, "issue", "Database optimization", "Optimize database queries for faster response times."); insert_document(
&conn,
3,
"issue",
"Database optimization",
"Optimize database queries for faster response times.",
);
let results = lore::search::search_fts(&conn, "authentication", 10, lore::search::FtsQueryMode::Safe).unwrap(); let results = lore::search::search_fts(
&conn,
"authentication",
10,
lore::search::FtsQueryMode::Safe,
)
.unwrap();
assert!(results.len() >= 2, "Should match at least 2 documents"); assert!(results.len() >= 2, "Should match at least 2 documents");
// Doc 1 should rank higher (more occurrences of the term) // Doc 1 should rank higher (more occurrences of the term)
assert_eq!(results[0].document_id, 1, "Document with more term occurrences should rank first"); assert_eq!(
results[0].document_id, 1,
"Document with more term occurrences should rank first"
);
} }
#[test] #[test]
@@ -128,7 +224,8 @@ fn fts_respects_limit() {
); );
} }
let results = lore::search::search_fts(&conn, "bug login", 5, lore::search::FtsQueryMode::Safe).unwrap(); let results =
lore::search::search_fts(&conn, "bug login", 5, lore::search::FtsQueryMode::Safe).unwrap();
assert!(results.len() <= 5, "Results should be capped at limit"); assert!(results.len() <= 5, "Results should be capped at limit");
} }
@@ -136,24 +233,45 @@ fn fts_respects_limit() {
fn fts_snippet_generated() { fn fts_snippet_generated() {
let conn = create_test_db(); let conn = create_test_db();
insert_document(&conn, 1, "issue", "Performance issue", "The application performance degrades significantly when more than 100 users are connected simultaneously. Memory usage spikes to 4GB."); insert_document(
&conn,
1,
"issue",
"Performance issue",
"The application performance degrades significantly when more than 100 users are connected simultaneously. Memory usage spikes to 4GB.",
);
let results = lore::search::search_fts(&conn, "performance", 10, lore::search::FtsQueryMode::Safe).unwrap(); let results =
lore::search::search_fts(&conn, "performance", 10, lore::search::FtsQueryMode::Safe)
.unwrap();
assert!(!results.is_empty()); assert!(!results.is_empty());
// Snippet should contain some text (may have FTS5 highlight markers) // Snippet should contain some text (may have FTS5 highlight markers)
assert!(!results[0].snippet.is_empty(), "Snippet should be generated"); assert!(
!results[0].snippet.is_empty(),
"Snippet should be generated"
);
} }
#[test] #[test]
fn fts_triggers_sync_on_insert() { fn fts_triggers_sync_on_insert() {
let conn = create_test_db(); let conn = create_test_db();
insert_document(&conn, 1, "issue", "Test document", "This is test content for FTS trigger verification."); insert_document(
&conn,
1,
"issue",
"Test document",
"This is test content for FTS trigger verification.",
);
// Verify FTS table has an entry via direct query // Verify FTS table has an entry via direct query
let fts_count: i64 = conn let fts_count: i64 = conn
.query_row("SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'test'", [], |r| r.get(0)) .query_row(
"SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'test'",
[],
|r| r.get(0),
)
.unwrap(); .unwrap();
assert_eq!(fts_count, 1, "FTS trigger should auto-index on INSERT"); assert_eq!(fts_count, 1, "FTS trigger should auto-index on INSERT");
@@ -163,20 +281,35 @@ fn fts_triggers_sync_on_insert() {
fn fts_triggers_sync_on_delete() { fn fts_triggers_sync_on_delete() {
let conn = create_test_db(); let conn = create_test_db();
insert_document(&conn, 1, "issue", "Deletable document", "This content will be deleted from the index."); insert_document(
&conn,
1,
"issue",
"Deletable document",
"This content will be deleted from the index.",
);
// Verify it's indexed // Verify it's indexed
let before: i64 = conn let before: i64 = conn
.query_row("SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'deletable'", [], |r| r.get(0)) .query_row(
"SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'deletable'",
[],
|r| r.get(0),
)
.unwrap(); .unwrap();
assert_eq!(before, 1); assert_eq!(before, 1);
// Delete the document // Delete the document
conn.execute("DELETE FROM documents WHERE id = 1", []).unwrap(); conn.execute("DELETE FROM documents WHERE id = 1", [])
.unwrap();
// Verify it's removed from FTS // Verify it's removed from FTS
let after: i64 = conn let after: i64 = conn
.query_row("SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'deletable'", [], |r| r.get(0)) .query_row(
"SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'deletable'",
[],
|r| r.get(0),
)
.unwrap(); .unwrap();
assert_eq!(after, 0, "FTS trigger should remove entry on DELETE"); assert_eq!(after, 0, "FTS trigger should remove entry on DELETE");
} }
@@ -193,6 +326,8 @@ fn fts_null_title_handled() {
) )
.unwrap(); .unwrap();
let results = lore::search::search_fts(&conn, "rate limiting", 10, lore::search::FtsQueryMode::Safe).unwrap(); let results =
lore::search::search_fts(&conn, "rate limiting", 10, lore::search::FtsQueryMode::Safe)
.unwrap();
assert!(!results.is_empty(), "Should find documents with NULL title"); assert!(!results.is_empty(), "Should find documents with NULL title");
} }

View File

@@ -10,7 +10,7 @@ use rusqlite::Connection;
use serde::Deserialize; use serde::Deserialize;
use std::path::PathBuf; use std::path::PathBuf;
use lore::search::{self, FtsQueryMode, SearchFilters, SearchMode, search_fts, apply_filters}; use lore::search::{FtsQueryMode, SearchFilters, SearchMode, apply_filters, search_fts};
/// A golden query test case. /// A golden query test case.
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
@@ -35,8 +35,7 @@ struct GoldenFilters {
} }
fn load_golden_queries() -> Vec<GoldenQuery> { fn load_golden_queries() -> Vec<GoldenQuery> {
let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/golden_queries.json");
.join("tests/fixtures/golden_queries.json");
let content = std::fs::read_to_string(&path) let content = std::fs::read_to_string(&path)
.unwrap_or_else(|_| panic!("Failed to read golden queries fixture")); .unwrap_or_else(|_| panic!("Failed to read golden queries fixture"));
serde_json::from_str(&content) serde_json::from_str(&content)
@@ -77,63 +76,88 @@ fn create_seeded_db() -> Connection {
// Seed deterministic documents // Seed deterministic documents
let documents = vec![ let documents = vec![
// id=1: Auth issue (matches: authentication, login, OAuth, JWT, token, refresh) // id=1: Auth issue (matches: authentication, login, OAuth, JWT, token, refresh)
(1, "issue", "Authentication and login broken with OAuth", (
1,
"issue",
"Authentication and login broken with OAuth",
"Users cannot login when using OAuth tokens. The JWT token refresh fails silently, \ "Users cannot login when using OAuth tokens. The JWT token refresh fails silently, \
causing authentication errors. When the access token expires, the refresh flow returns \ causing authentication errors. When the access token expires, the refresh flow returns \
a 401 instead of fetching new credentials. Login page shows a generic error. \ a 401 instead of fetching new credentials. Login page shows a generic error. \
Multiple users reported authentication failures across all OAuth providers.", Multiple users reported authentication failures across all OAuth providers.",
"testuser"), "testuser",
),
// id=2: User profile MR (matches: user, profile, avatar, upload) // id=2: User profile MR (matches: user, profile, avatar, upload)
(2, "merge_request", "Add user profile page with avatar upload", (
2,
"merge_request",
"Add user profile page with avatar upload",
"This merge request adds a new user profile page. Users can now upload their avatar, \ "This merge request adds a new user profile page. Users can now upload their avatar, \
edit their display name, and manage notification preferences. The profile page includes \ edit their display name, and manage notification preferences. The profile page includes \
responsive design for mobile and desktop viewports.", responsive design for mobile and desktop viewports.",
"developer1"), "developer1",
),
// id=3: Database migration issue (matches: database, migration, PostgreSQL, schema) // id=3: Database migration issue (matches: database, migration, PostgreSQL, schema)
(3, "issue", "Database migration failing on PostgreSQL 14", (
3,
"issue",
"Database migration failing on PostgreSQL 14",
"The database migration script crashes on PostgreSQL 14 due to deprecated syntax. \ "The database migration script crashes on PostgreSQL 14 due to deprecated syntax. \
The ALTER TABLE command uses a syntax removed in PG14. Migration 042 needs to be \ The ALTER TABLE command uses a syntax removed in PG14. Migration 042 needs to be \
rewritten to use the new schema modification syntax. All staging environments affected.", rewritten to use the new schema modification syntax. All staging environments affected.",
"dba_admin"), "dba_admin",
),
// id=4: Performance MR (matches: performance, optimization, caching, query) // id=4: Performance MR (matches: performance, optimization, caching, query)
(4, "merge_request", "Performance optimization for dashboard queries", (
4,
"merge_request",
"Performance optimization for dashboard queries",
"Optimized the dashboard query performance by adding database indexes and implementing \ "Optimized the dashboard query performance by adding database indexes and implementing \
Redis caching for frequently accessed reports. Query execution time reduced from 3.2s \ Redis caching for frequently accessed reports. Query execution time reduced from 3.2s \
to 180ms. Added connection pooling and prepared statement caching.", to 180ms. Added connection pooling and prepared statement caching.",
"senior_dev"), "senior_dev",
),
// id=5: API rate limiting discussion (matches: API, rate, limiting, throttle) // id=5: API rate limiting discussion (matches: API, rate, limiting, throttle)
(5, "discussion", "API rate limiting strategies for public endpoints", (
5,
"discussion",
"API rate limiting strategies for public endpoints",
"Discussion about implementing API rate limiting on public-facing endpoints. \ "Discussion about implementing API rate limiting on public-facing endpoints. \
Proposed approaches: token bucket with sliding window, fixed window counters, \ Proposed approaches: token bucket with sliding window, fixed window counters, \
or leaky bucket algorithm. Rate limits should be configurable per API key tier. \ or leaky bucket algorithm. Rate limits should be configurable per API key tier. \
Need to handle burst traffic during peak hours without throttling legitimate users.", Need to handle burst traffic during peak hours without throttling legitimate users.",
"architect"), "architect",
),
// id=6: UI/CSS issue (matches: CSS, styling, frontend, responsive, UI) // id=6: UI/CSS issue (matches: CSS, styling, frontend, responsive, UI)
(6, "issue", "CSS styling issues on mobile frontend", (
6,
"issue",
"CSS styling issues on mobile frontend",
"Multiple CSS styling problems on the mobile frontend. The navigation menu overlaps \ "Multiple CSS styling problems on the mobile frontend. The navigation menu overlaps \
content on screens smaller than 768px. Button text truncates on compact viewports. \ content on screens smaller than 768px. Button text truncates on compact viewports. \
Frontend responsive breakpoints need adjustment. The UI components library has \ Frontend responsive breakpoints need adjustment. The UI components library has \
conflicting CSS specificity with the theme system.", conflicting CSS specificity with the theme system.",
"frontend_dev"), "frontend_dev",
),
// id=7: CI/CD MR (matches: CI, CD, pipeline, deployment, Docker) // id=7: CI/CD MR (matches: CI, CD, pipeline, deployment, Docker)
(7, "merge_request", "Revamp CI/CD pipeline with Docker caching", (
7,
"merge_request",
"Revamp CI/CD pipeline with Docker caching",
"Complete overhaul of the CI/CD pipeline. Added Docker layer caching to speed up \ "Complete overhaul of the CI/CD pipeline. Added Docker layer caching to speed up \
builds. Deployment stages now run in parallel where possible. Added rollback \ builds. Deployment stages now run in parallel where possible. Added rollback \
support for failed deployments. Pipeline runtime reduced from 45min to 12min.", support for failed deployments. Pipeline runtime reduced from 45min to 12min.",
"devops_lead"), "devops_lead",
),
// id=8: Security issue (matches: security, vulnerability, XSS, injection) // id=8: Security issue (matches: security, vulnerability, XSS, injection)
(8, "issue", "Security vulnerability in form submission", (
8,
"issue",
"Security vulnerability in form submission",
"A cross-site scripting (XSS) vulnerability was found in the comment submission form. \ "A cross-site scripting (XSS) vulnerability was found in the comment submission form. \
User input is not properly sanitized before rendering. The security scanner also flagged \ User input is not properly sanitized before rendering. The security scanner also flagged \
potential SQL injection in the search endpoint. Both vulnerabilities need immediate patching.", potential SQL injection in the search endpoint. Both vulnerabilities need immediate patching.",
"security_team"), "security_team",
),
]; ];
for (id, source_type, title, content, author) in &documents { for (id, source_type, title, content, author) in &documents {
@@ -213,7 +237,11 @@ fn golden_queries_all_pass() {
if filtered_ids.len() < gq.min_results { if filtered_ids.len() < gq.min_results {
failures.push(format!( failures.push(format!(
"FAIL [{}] \"{}\": expected >= {} results, got {} (description: {})", "FAIL [{}] \"{}\": expected >= {} results, got {} (description: {})",
i, gq.query, gq.min_results, filtered_ids.len(), gq.description i,
gq.query,
gq.min_results,
filtered_ids.len(),
gq.description
)); ));
continue; continue;
} }

View File

@@ -51,13 +51,24 @@ fn insert_document(conn: &Connection, id: i64, source_type: &str, title: &str, c
.unwrap(); .unwrap();
} }
#[test] #[test]
fn lexical_mode_uses_fts_only() { fn lexical_mode_uses_fts_only() {
let (_tmp, conn) = create_test_db(); let (_tmp, conn) = create_test_db();
insert_document(&conn, 1, "issue", "Authentication bug", "OAuth token refresh fails silently."); insert_document(
insert_document(&conn, 2, "issue", "Database migration", "Migration script crashes on PostgreSQL."); &conn,
1,
"issue",
"Authentication bug",
"OAuth token refresh fails silently.",
);
insert_document(
&conn,
2,
"issue",
"Database migration",
"Migration script crashes on PostgreSQL.",
);
let filters = SearchFilters { let filters = SearchFilters {
limit: 10, limit: 10,
@@ -121,14 +132,23 @@ fn lexical_mode_no_embeddings_required() {
.unwrap(); .unwrap();
let results = search_fts(&conn, "testing", 10, FtsQueryMode::Safe).unwrap(); let results = search_fts(&conn, "testing", 10, FtsQueryMode::Safe).unwrap();
assert!(!results.is_empty(), "FTS should work without embeddings tables"); assert!(
!results.is_empty(),
"FTS should work without embeddings tables"
);
} }
#[test] #[test]
fn hybrid_mode_degrades_to_fts_without_client() { fn hybrid_mode_degrades_to_fts_without_client() {
let (_tmp, conn) = create_test_db(); let (_tmp, conn) = create_test_db();
insert_document(&conn, 1, "issue", "Performance issue", "Application is slow under load."); insert_document(
&conn,
1,
"issue",
"Performance issue",
"Application is slow under load.",
);
let filters = SearchFilters { let filters = SearchFilters {
limit: 10, limit: 10,
@@ -150,7 +170,11 @@ fn hybrid_mode_degrades_to_fts_without_client() {
assert!(!results.is_empty(), "Should fall back to FTS results"); assert!(!results.is_empty(), "Should fall back to FTS results");
// Should warn about missing Ollama client // Should warn about missing Ollama client
assert!( assert!(
warnings.iter().any(|w| w.to_lowercase().contains("vector") || w.to_lowercase().contains("ollama") || w.to_lowercase().contains("client") || w.to_lowercase().contains("fallback") || w.to_lowercase().contains("fts")), warnings.iter().any(|w| w.to_lowercase().contains("vector")
|| w.to_lowercase().contains("ollama")
|| w.to_lowercase().contains("client")
|| w.to_lowercase().contains("fallback")
|| w.to_lowercase().contains("fts")),
"Should produce a degradation warning, got: {:?}", "Should produce a degradation warning, got: {:?}",
warnings warnings
); );
@@ -177,8 +201,20 @@ fn rrf_ranking_combines_signals() {
fn filters_by_source_type() { fn filters_by_source_type() {
let (_tmp, conn) = create_test_db(); let (_tmp, conn) = create_test_db();
insert_document(&conn, 1, "issue", "Bug report", "Authentication bug in login flow."); insert_document(
insert_document(&conn, 2, "merge_request", "Fix auth", "Fixed authentication issue."); &conn,
1,
"issue",
"Bug report",
"Authentication bug in login flow.",
);
insert_document(
&conn,
2,
"merge_request",
"Fix auth",
"Fixed authentication issue.",
);
let filters = SearchFilters { let filters = SearchFilters {
source_type: Some(lore::documents::SourceType::Issue), source_type: Some(lore::documents::SourceType::Issue),
@@ -189,7 +225,11 @@ fn filters_by_source_type() {
let all_ids = vec![1, 2]; let all_ids = vec![1, 2];
let filtered = lore::search::apply_filters(&conn, &all_ids, &filters).unwrap(); let filtered = lore::search::apply_filters(&conn, &all_ids, &filters).unwrap();
assert_eq!(filtered.len(), 1, "Filter should remove non-issue documents"); assert_eq!(
filtered.len(),
1,
"Filter should remove non-issue documents"
);
assert_eq!(filtered[0], 1, "Only issue document should remain"); assert_eq!(filtered[0], 1, "Only issue document should remain");
} }

View File

@@ -26,7 +26,7 @@ fn apply_migrations(conn: &Connection, through_version: i32) {
let sql = std::fs::read_to_string(entries[0].path()).unwrap(); let sql = std::fs::read_to_string(entries[0].path()).unwrap();
conn.execute_batch(&sql) conn.execute_batch(&sql)
.expect(&format!("Migration {} failed", version)); .unwrap_or_else(|e| panic!("Migration {} failed: {}", version, e));
} }
} }