use crate::cli::render::Theme; use rusqlite::Connection; use serde::Serialize; use tracing::info; use crate::Config; use crate::cli::robot::RobotMeta; use crate::core::db::create_connection; use crate::core::error::Result; use crate::core::paths::get_db_path; use crate::core::project::resolve_project; use crate::documents::{SourceType, regenerate_dirty_documents}; const FULL_MODE_CHUNK_SIZE: i64 = 2000; #[derive(Debug, Default)] pub struct GenerateDocsResult { pub regenerated: usize, pub unchanged: usize, pub errored: usize, pub seeded: usize, pub full_mode: bool, } pub fn run_generate_docs( config: &Config, full: bool, project_filter: Option<&str>, progress_callback: Option>, ) -> Result { let db_path = get_db_path(config.storage.db_path.as_deref()); let conn = create_connection(&db_path)?; let mut result = GenerateDocsResult { full_mode: full, ..Default::default() }; if full { result.seeded += seed_dirty(&conn, SourceType::Issue, project_filter)?; result.seeded += seed_dirty(&conn, SourceType::MergeRequest, project_filter)?; result.seeded += seed_dirty(&conn, SourceType::Discussion, project_filter)?; result.seeded += seed_dirty_notes(&conn, project_filter)?; } let regen = regenerate_dirty_documents(&conn, progress_callback.as_ref().map(|cb| cb.as_ref()))?; result.regenerated = regen.regenerated; result.unchanged = regen.unchanged; result.errored = regen.errored; if full { let _ = conn.execute( "INSERT INTO documents_fts(documents_fts) VALUES('optimize')", [], ); info!("FTS index optimized after full rebuild"); } Ok(result) } fn seed_dirty( conn: &Connection, source_type: SourceType, project_filter: Option<&str>, ) -> Result { let table = match source_type { SourceType::Issue => "issues", SourceType::MergeRequest => "merge_requests", SourceType::Discussion => "discussions", SourceType::Note => { // NOTE-2E will implement seed_dirty_notes separately (needs is_system filter) unreachable!("Note seeding handled by seed_dirty_notes, not seed_dirty") } }; let type_str = source_type.as_str(); let now = chrono::Utc::now().timestamp_millis(); let mut total_seeded: usize = 0; let mut last_id: i64 = 0; loop { let inserted = if let Some(project) = project_filter { let project_id = resolve_project(conn, project)?; conn.execute( &format!( "INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at) SELECT ?1, id, ?2, 0, NULL, NULL, NULL FROM {table} WHERE id > ?3 AND project_id = ?4 ORDER BY id LIMIT ?5 ON CONFLICT(source_type, source_id) DO NOTHING" ), rusqlite::params![type_str, now, last_id, project_id, FULL_MODE_CHUNK_SIZE], )? } else { conn.execute( &format!( "INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at) SELECT ?1, id, ?2, 0, NULL, NULL, NULL FROM {table} WHERE id > ?3 ORDER BY id LIMIT ?4 ON CONFLICT(source_type, source_id) DO NOTHING" ), rusqlite::params![type_str, now, last_id, FULL_MODE_CHUNK_SIZE], )? }; if inserted == 0 { break; } let max_id: i64 = conn.query_row( &format!( "SELECT MAX(id) FROM (SELECT id FROM {table} WHERE id > ?1 ORDER BY id LIMIT ?2)", table = table ), rusqlite::params![last_id, FULL_MODE_CHUNK_SIZE], |row| row.get(0), )?; total_seeded += inserted; last_id = max_id; } info!( source_type = type_str, seeded = total_seeded, "Seeded dirty_sources" ); Ok(total_seeded) } fn seed_dirty_notes(conn: &Connection, project_filter: Option<&str>) -> Result { let now = chrono::Utc::now().timestamp_millis(); let mut total_seeded: usize = 0; let mut last_id: i64 = 0; loop { let inserted = if let Some(project) = project_filter { let project_id = resolve_project(conn, project)?; conn.execute( "INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at) SELECT 'note', id, ?1, 0, NULL, NULL, NULL FROM notes WHERE id > ?2 AND project_id = ?3 AND is_system = 0 ORDER BY id LIMIT ?4 ON CONFLICT(source_type, source_id) DO NOTHING", rusqlite::params![now, last_id, project_id, FULL_MODE_CHUNK_SIZE], )? } else { conn.execute( "INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at) SELECT 'note', id, ?1, 0, NULL, NULL, NULL FROM notes WHERE id > ?2 AND is_system = 0 ORDER BY id LIMIT ?3 ON CONFLICT(source_type, source_id) DO NOTHING", rusqlite::params![now, last_id, FULL_MODE_CHUNK_SIZE], )? }; if inserted == 0 { break; } let max_id: i64 = conn.query_row( "SELECT MAX(id) FROM (SELECT id FROM notes WHERE id > ?1 AND is_system = 0 ORDER BY id LIMIT ?2)", rusqlite::params![last_id, FULL_MODE_CHUNK_SIZE], |row| row.get(0), )?; total_seeded += inserted; last_id = max_id; } info!( source_type = "note", seeded = total_seeded, "Seeded dirty_sources" ); Ok(total_seeded) } pub fn print_generate_docs(result: &GenerateDocsResult) { let mode = if result.full_mode { "full" } else { "incremental" }; if result.regenerated == 0 && result.errored == 0 { println!( "\n {} no documents to update ({})", Theme::success().bold().render("Docs"), mode ); return; } // Headline println!( "\n {} {} documents ({})", Theme::success().bold().render("Generated"), Theme::bold().render(&result.regenerated.to_string()), mode ); // Detail line: compact middle-dot format, zero-suppressed let mut details: Vec = Vec::new(); if result.full_mode && result.seeded > 0 { details.push(format!("{} seeded", result.seeded)); } if result.unchanged > 0 { details.push(format!("{} unchanged", result.unchanged)); } if !details.is_empty() { println!(" {}", Theme::dim().render(&details.join(" \u{b7} "))); } if result.errored > 0 { println!( " {}", Theme::error().render(&format!("{} errored", result.errored)) ); } } #[derive(Serialize)] struct GenerateDocsJsonOutput { ok: bool, data: GenerateDocsJsonData, meta: RobotMeta, } #[derive(Serialize)] struct GenerateDocsJsonData { mode: String, #[serde(skip_serializing_if = "Option::is_none")] seeded: Option, regenerated: usize, unchanged: usize, errored: usize, } pub fn print_generate_docs_json(result: &GenerateDocsResult, elapsed_ms: u64) { let output = GenerateDocsJsonOutput { ok: true, data: GenerateDocsJsonData { mode: if result.full_mode { "full".to_string() } else { "incremental".to_string() }, seeded: if result.full_mode { Some(result.seeded) } else { None }, regenerated: result.regenerated, unchanged: result.unchanged, errored: result.errored, }, meta: RobotMeta { elapsed_ms }, }; match serde_json::to_string(&output) { Ok(json) => println!("{json}"), Err(e) => eprintln!("Error serializing to JSON: {e}"), } } #[cfg(test)] mod tests { use std::path::Path; use crate::core::db::{create_connection, run_migrations}; use super::*; fn setup_db() -> Connection { let conn = create_connection(Path::new(":memory:")).unwrap(); run_migrations(&conn).unwrap(); conn.execute( "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url) VALUES (1, 100, 'group/project', 'https://gitlab.com/group/project')", [], ).unwrap(); conn.execute( "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 1, 'Test', 'opened', 1000, 2000, 3000)", [], ).unwrap(); conn.execute( "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)", [], ).unwrap(); conn } fn insert_note(conn: &Connection, id: i64, gitlab_id: i64, is_system: bool) { conn.execute( "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (?1, ?2, 1, 1, 'alice', 'note body', 1000, 2000, 3000, ?3)", rusqlite::params![id, gitlab_id, is_system as i32], ).unwrap(); } #[test] fn test_full_seed_includes_notes() { let conn = setup_db(); insert_note(&conn, 1, 101, false); insert_note(&conn, 2, 102, false); insert_note(&conn, 3, 103, false); insert_note(&conn, 4, 104, true); // system note — should be excluded let seeded = seed_dirty_notes(&conn, None).unwrap(); assert_eq!(seeded, 3); let count: i64 = conn .query_row( "SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'", [], |row| row.get(0), ) .unwrap(); assert_eq!(count, 3); } #[test] fn test_note_document_count_stable_after_second_generate_docs_full() { let conn = setup_db(); insert_note(&conn, 1, 101, false); insert_note(&conn, 2, 102, false); let first = seed_dirty_notes(&conn, None).unwrap(); assert_eq!(first, 2); // Second run should be idempotent (ON CONFLICT DO NOTHING) let second = seed_dirty_notes(&conn, None).unwrap(); assert_eq!(second, 0); let count: i64 = conn .query_row( "SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'", [], |row| row.get(0), ) .unwrap(); assert_eq!(count, 2); } }