Replace serde_json::to_string(&output).unwrap() with match-based error handling across all robot-mode JSON printers. On serialization failure, the error is now written to stderr instead of panicking. This hardens the CLI against unexpected Serialize failures in production. Affected commands: count (2), embed, generate-docs, ingest (2), search, stats, sync (2), sync-status, timeline. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
345 lines
11 KiB
Rust
345 lines
11 KiB
Rust
use crate::cli::render::Theme;
|
|
use rusqlite::Connection;
|
|
use serde::Serialize;
|
|
use tracing::info;
|
|
|
|
use crate::Config;
|
|
use crate::cli::robot::RobotMeta;
|
|
use crate::core::db::create_connection;
|
|
use crate::core::error::Result;
|
|
use crate::core::paths::get_db_path;
|
|
use crate::core::project::resolve_project;
|
|
use crate::documents::{SourceType, regenerate_dirty_documents};
|
|
|
|
const FULL_MODE_CHUNK_SIZE: i64 = 2000;
|
|
|
|
#[derive(Debug, Default)]
|
|
pub struct GenerateDocsResult {
|
|
pub regenerated: usize,
|
|
pub unchanged: usize,
|
|
pub errored: usize,
|
|
pub seeded: usize,
|
|
pub full_mode: bool,
|
|
}
|
|
|
|
pub fn run_generate_docs(
|
|
config: &Config,
|
|
full: bool,
|
|
project_filter: Option<&str>,
|
|
progress_callback: Option<Box<dyn Fn(usize, usize)>>,
|
|
) -> Result<GenerateDocsResult> {
|
|
let db_path = get_db_path(config.storage.db_path.as_deref());
|
|
let conn = create_connection(&db_path)?;
|
|
let mut result = GenerateDocsResult {
|
|
full_mode: full,
|
|
..Default::default()
|
|
};
|
|
|
|
if full {
|
|
result.seeded += seed_dirty(&conn, SourceType::Issue, project_filter)?;
|
|
result.seeded += seed_dirty(&conn, SourceType::MergeRequest, project_filter)?;
|
|
result.seeded += seed_dirty(&conn, SourceType::Discussion, project_filter)?;
|
|
result.seeded += seed_dirty_notes(&conn, project_filter)?;
|
|
}
|
|
|
|
let regen =
|
|
regenerate_dirty_documents(&conn, progress_callback.as_ref().map(|cb| cb.as_ref()))?;
|
|
result.regenerated = regen.regenerated;
|
|
result.unchanged = regen.unchanged;
|
|
result.errored = regen.errored;
|
|
|
|
if full {
|
|
let _ = conn.execute(
|
|
"INSERT INTO documents_fts(documents_fts) VALUES('optimize')",
|
|
[],
|
|
);
|
|
info!("FTS index optimized after full rebuild");
|
|
}
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
fn seed_dirty(
|
|
conn: &Connection,
|
|
source_type: SourceType,
|
|
project_filter: Option<&str>,
|
|
) -> Result<usize> {
|
|
let table = match source_type {
|
|
SourceType::Issue => "issues",
|
|
SourceType::MergeRequest => "merge_requests",
|
|
SourceType::Discussion => "discussions",
|
|
SourceType::Note => {
|
|
// NOTE-2E will implement seed_dirty_notes separately (needs is_system filter)
|
|
unreachable!("Note seeding handled by seed_dirty_notes, not seed_dirty")
|
|
}
|
|
};
|
|
let type_str = source_type.as_str();
|
|
let now = chrono::Utc::now().timestamp_millis();
|
|
|
|
let mut total_seeded: usize = 0;
|
|
let mut last_id: i64 = 0;
|
|
|
|
loop {
|
|
let inserted = if let Some(project) = project_filter {
|
|
let project_id = resolve_project(conn, project)?;
|
|
|
|
conn.execute(
|
|
&format!(
|
|
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
|
|
SELECT ?1, id, ?2, 0, NULL, NULL, NULL
|
|
FROM {table} WHERE id > ?3 AND project_id = ?4 ORDER BY id LIMIT ?5
|
|
ON CONFLICT(source_type, source_id) DO NOTHING"
|
|
),
|
|
rusqlite::params![type_str, now, last_id, project_id, FULL_MODE_CHUNK_SIZE],
|
|
)?
|
|
} else {
|
|
conn.execute(
|
|
&format!(
|
|
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
|
|
SELECT ?1, id, ?2, 0, NULL, NULL, NULL
|
|
FROM {table} WHERE id > ?3 ORDER BY id LIMIT ?4
|
|
ON CONFLICT(source_type, source_id) DO NOTHING"
|
|
),
|
|
rusqlite::params![type_str, now, last_id, FULL_MODE_CHUNK_SIZE],
|
|
)?
|
|
};
|
|
|
|
if inserted == 0 {
|
|
break;
|
|
}
|
|
|
|
let max_id: i64 = conn.query_row(
|
|
&format!(
|
|
"SELECT MAX(id) FROM (SELECT id FROM {table} WHERE id > ?1 ORDER BY id LIMIT ?2)",
|
|
table = table
|
|
),
|
|
rusqlite::params![last_id, FULL_MODE_CHUNK_SIZE],
|
|
|row| row.get(0),
|
|
)?;
|
|
|
|
total_seeded += inserted;
|
|
last_id = max_id;
|
|
}
|
|
|
|
info!(
|
|
source_type = type_str,
|
|
seeded = total_seeded,
|
|
"Seeded dirty_sources"
|
|
);
|
|
|
|
Ok(total_seeded)
|
|
}
|
|
|
|
fn seed_dirty_notes(conn: &Connection, project_filter: Option<&str>) -> Result<usize> {
|
|
let now = chrono::Utc::now().timestamp_millis();
|
|
let mut total_seeded: usize = 0;
|
|
let mut last_id: i64 = 0;
|
|
|
|
loop {
|
|
let inserted = if let Some(project) = project_filter {
|
|
let project_id = resolve_project(conn, project)?;
|
|
|
|
conn.execute(
|
|
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
|
|
SELECT 'note', id, ?1, 0, NULL, NULL, NULL
|
|
FROM notes WHERE id > ?2 AND project_id = ?3 AND is_system = 0 ORDER BY id LIMIT ?4
|
|
ON CONFLICT(source_type, source_id) DO NOTHING",
|
|
rusqlite::params![now, last_id, project_id, FULL_MODE_CHUNK_SIZE],
|
|
)?
|
|
} else {
|
|
conn.execute(
|
|
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
|
|
SELECT 'note', id, ?1, 0, NULL, NULL, NULL
|
|
FROM notes WHERE id > ?2 AND is_system = 0 ORDER BY id LIMIT ?3
|
|
ON CONFLICT(source_type, source_id) DO NOTHING",
|
|
rusqlite::params![now, last_id, FULL_MODE_CHUNK_SIZE],
|
|
)?
|
|
};
|
|
|
|
if inserted == 0 {
|
|
break;
|
|
}
|
|
|
|
let max_id: i64 = conn.query_row(
|
|
"SELECT MAX(id) FROM (SELECT id FROM notes WHERE id > ?1 AND is_system = 0 ORDER BY id LIMIT ?2)",
|
|
rusqlite::params![last_id, FULL_MODE_CHUNK_SIZE],
|
|
|row| row.get(0),
|
|
)?;
|
|
|
|
total_seeded += inserted;
|
|
last_id = max_id;
|
|
}
|
|
|
|
info!(
|
|
source_type = "note",
|
|
seeded = total_seeded,
|
|
"Seeded dirty_sources"
|
|
);
|
|
|
|
Ok(total_seeded)
|
|
}
|
|
|
|
pub fn print_generate_docs(result: &GenerateDocsResult) {
|
|
let mode = if result.full_mode {
|
|
"full"
|
|
} else {
|
|
"incremental"
|
|
};
|
|
|
|
if result.regenerated == 0 && result.errored == 0 {
|
|
println!(
|
|
"\n {} no documents to update ({})",
|
|
Theme::success().bold().render("Docs"),
|
|
mode
|
|
);
|
|
return;
|
|
}
|
|
|
|
// Headline
|
|
println!(
|
|
"\n {} {} documents ({})",
|
|
Theme::success().bold().render("Generated"),
|
|
Theme::bold().render(&result.regenerated.to_string()),
|
|
mode
|
|
);
|
|
|
|
// Detail line: compact middle-dot format, zero-suppressed
|
|
let mut details: Vec<String> = Vec::new();
|
|
if result.full_mode && result.seeded > 0 {
|
|
details.push(format!("{} seeded", result.seeded));
|
|
}
|
|
if result.unchanged > 0 {
|
|
details.push(format!("{} unchanged", result.unchanged));
|
|
}
|
|
if !details.is_empty() {
|
|
println!(" {}", Theme::dim().render(&details.join(" \u{b7} ")));
|
|
}
|
|
if result.errored > 0 {
|
|
println!(
|
|
" {}",
|
|
Theme::error().render(&format!("{} errored", result.errored))
|
|
);
|
|
}
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
struct GenerateDocsJsonOutput {
|
|
ok: bool,
|
|
data: GenerateDocsJsonData,
|
|
meta: RobotMeta,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
struct GenerateDocsJsonData {
|
|
mode: String,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
seeded: Option<usize>,
|
|
regenerated: usize,
|
|
unchanged: usize,
|
|
errored: usize,
|
|
}
|
|
|
|
pub fn print_generate_docs_json(result: &GenerateDocsResult, elapsed_ms: u64) {
|
|
let output = GenerateDocsJsonOutput {
|
|
ok: true,
|
|
data: GenerateDocsJsonData {
|
|
mode: if result.full_mode {
|
|
"full".to_string()
|
|
} else {
|
|
"incremental".to_string()
|
|
},
|
|
seeded: if result.full_mode {
|
|
Some(result.seeded)
|
|
} else {
|
|
None
|
|
},
|
|
regenerated: result.regenerated,
|
|
unchanged: result.unchanged,
|
|
errored: result.errored,
|
|
},
|
|
meta: RobotMeta { elapsed_ms },
|
|
};
|
|
match serde_json::to_string(&output) {
|
|
Ok(json) => println!("{json}"),
|
|
Err(e) => eprintln!("Error serializing to JSON: {e}"),
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use std::path::Path;
|
|
|
|
use crate::core::db::{create_connection, run_migrations};
|
|
|
|
use super::*;
|
|
|
|
fn setup_db() -> Connection {
|
|
let conn = create_connection(Path::new(":memory:")).unwrap();
|
|
run_migrations(&conn).unwrap();
|
|
conn.execute(
|
|
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url) VALUES (1, 100, 'group/project', 'https://gitlab.com/group/project')",
|
|
[],
|
|
).unwrap();
|
|
conn.execute(
|
|
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 1, 'Test', 'opened', 1000, 2000, 3000)",
|
|
[],
|
|
).unwrap();
|
|
conn.execute(
|
|
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
|
|
[],
|
|
).unwrap();
|
|
conn
|
|
}
|
|
|
|
fn insert_note(conn: &Connection, id: i64, gitlab_id: i64, is_system: bool) {
|
|
conn.execute(
|
|
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (?1, ?2, 1, 1, 'alice', 'note body', 1000, 2000, 3000, ?3)",
|
|
rusqlite::params![id, gitlab_id, is_system as i32],
|
|
).unwrap();
|
|
}
|
|
|
|
#[test]
|
|
fn test_full_seed_includes_notes() {
|
|
let conn = setup_db();
|
|
insert_note(&conn, 1, 101, false);
|
|
insert_note(&conn, 2, 102, false);
|
|
insert_note(&conn, 3, 103, false);
|
|
insert_note(&conn, 4, 104, true); // system note — should be excluded
|
|
|
|
let seeded = seed_dirty_notes(&conn, None).unwrap();
|
|
assert_eq!(seeded, 3);
|
|
|
|
let count: i64 = conn
|
|
.query_row(
|
|
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
|
|
[],
|
|
|row| row.get(0),
|
|
)
|
|
.unwrap();
|
|
assert_eq!(count, 3);
|
|
}
|
|
|
|
#[test]
|
|
fn test_note_document_count_stable_after_second_generate_docs_full() {
|
|
let conn = setup_db();
|
|
insert_note(&conn, 1, 101, false);
|
|
insert_note(&conn, 2, 102, false);
|
|
|
|
let first = seed_dirty_notes(&conn, None).unwrap();
|
|
assert_eq!(first, 2);
|
|
|
|
// Second run should be idempotent (ON CONFLICT DO NOTHING)
|
|
let second = seed_dirty_notes(&conn, None).unwrap();
|
|
assert_eq!(second, 0);
|
|
|
|
let count: i64 = conn
|
|
.query_row(
|
|
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
|
|
[],
|
|
|row| row.get(0),
|
|
)
|
|
.unwrap();
|
|
assert_eq!(count, 2);
|
|
}
|
|
}
|