Files
gitlore/src/cli/commands/generate_docs.rs
teernisse 097249f4e6 fix(robot): replace JSON serialization unwrap with graceful error handling
Replace serde_json::to_string(&output).unwrap() with match-based error
handling across all robot-mode JSON printers. On serialization failure,
the error is now written to stderr instead of panicking. This hardens
the CLI against unexpected Serialize failures in production.

Affected commands: count (2), embed, generate-docs, ingest (2), search,
stats, sync (2), sync-status, timeline.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 13:28:53 -05:00

345 lines
11 KiB
Rust

use crate::cli::render::Theme;
use rusqlite::Connection;
use serde::Serialize;
use tracing::info;
use crate::Config;
use crate::cli::robot::RobotMeta;
use crate::core::db::create_connection;
use crate::core::error::Result;
use crate::core::paths::get_db_path;
use crate::core::project::resolve_project;
use crate::documents::{SourceType, regenerate_dirty_documents};
const FULL_MODE_CHUNK_SIZE: i64 = 2000;
#[derive(Debug, Default)]
pub struct GenerateDocsResult {
pub regenerated: usize,
pub unchanged: usize,
pub errored: usize,
pub seeded: usize,
pub full_mode: bool,
}
pub fn run_generate_docs(
config: &Config,
full: bool,
project_filter: Option<&str>,
progress_callback: Option<Box<dyn Fn(usize, usize)>>,
) -> Result<GenerateDocsResult> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
let mut result = GenerateDocsResult {
full_mode: full,
..Default::default()
};
if full {
result.seeded += seed_dirty(&conn, SourceType::Issue, project_filter)?;
result.seeded += seed_dirty(&conn, SourceType::MergeRequest, project_filter)?;
result.seeded += seed_dirty(&conn, SourceType::Discussion, project_filter)?;
result.seeded += seed_dirty_notes(&conn, project_filter)?;
}
let regen =
regenerate_dirty_documents(&conn, progress_callback.as_ref().map(|cb| cb.as_ref()))?;
result.regenerated = regen.regenerated;
result.unchanged = regen.unchanged;
result.errored = regen.errored;
if full {
let _ = conn.execute(
"INSERT INTO documents_fts(documents_fts) VALUES('optimize')",
[],
);
info!("FTS index optimized after full rebuild");
}
Ok(result)
}
fn seed_dirty(
conn: &Connection,
source_type: SourceType,
project_filter: Option<&str>,
) -> Result<usize> {
let table = match source_type {
SourceType::Issue => "issues",
SourceType::MergeRequest => "merge_requests",
SourceType::Discussion => "discussions",
SourceType::Note => {
// NOTE-2E will implement seed_dirty_notes separately (needs is_system filter)
unreachable!("Note seeding handled by seed_dirty_notes, not seed_dirty")
}
};
let type_str = source_type.as_str();
let now = chrono::Utc::now().timestamp_millis();
let mut total_seeded: usize = 0;
let mut last_id: i64 = 0;
loop {
let inserted = if let Some(project) = project_filter {
let project_id = resolve_project(conn, project)?;
conn.execute(
&format!(
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
SELECT ?1, id, ?2, 0, NULL, NULL, NULL
FROM {table} WHERE id > ?3 AND project_id = ?4 ORDER BY id LIMIT ?5
ON CONFLICT(source_type, source_id) DO NOTHING"
),
rusqlite::params![type_str, now, last_id, project_id, FULL_MODE_CHUNK_SIZE],
)?
} else {
conn.execute(
&format!(
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
SELECT ?1, id, ?2, 0, NULL, NULL, NULL
FROM {table} WHERE id > ?3 ORDER BY id LIMIT ?4
ON CONFLICT(source_type, source_id) DO NOTHING"
),
rusqlite::params![type_str, now, last_id, FULL_MODE_CHUNK_SIZE],
)?
};
if inserted == 0 {
break;
}
let max_id: i64 = conn.query_row(
&format!(
"SELECT MAX(id) FROM (SELECT id FROM {table} WHERE id > ?1 ORDER BY id LIMIT ?2)",
table = table
),
rusqlite::params![last_id, FULL_MODE_CHUNK_SIZE],
|row| row.get(0),
)?;
total_seeded += inserted;
last_id = max_id;
}
info!(
source_type = type_str,
seeded = total_seeded,
"Seeded dirty_sources"
);
Ok(total_seeded)
}
fn seed_dirty_notes(conn: &Connection, project_filter: Option<&str>) -> Result<usize> {
let now = chrono::Utc::now().timestamp_millis();
let mut total_seeded: usize = 0;
let mut last_id: i64 = 0;
loop {
let inserted = if let Some(project) = project_filter {
let project_id = resolve_project(conn, project)?;
conn.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
SELECT 'note', id, ?1, 0, NULL, NULL, NULL
FROM notes WHERE id > ?2 AND project_id = ?3 AND is_system = 0 ORDER BY id LIMIT ?4
ON CONFLICT(source_type, source_id) DO NOTHING",
rusqlite::params![now, last_id, project_id, FULL_MODE_CHUNK_SIZE],
)?
} else {
conn.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
SELECT 'note', id, ?1, 0, NULL, NULL, NULL
FROM notes WHERE id > ?2 AND is_system = 0 ORDER BY id LIMIT ?3
ON CONFLICT(source_type, source_id) DO NOTHING",
rusqlite::params![now, last_id, FULL_MODE_CHUNK_SIZE],
)?
};
if inserted == 0 {
break;
}
let max_id: i64 = conn.query_row(
"SELECT MAX(id) FROM (SELECT id FROM notes WHERE id > ?1 AND is_system = 0 ORDER BY id LIMIT ?2)",
rusqlite::params![last_id, FULL_MODE_CHUNK_SIZE],
|row| row.get(0),
)?;
total_seeded += inserted;
last_id = max_id;
}
info!(
source_type = "note",
seeded = total_seeded,
"Seeded dirty_sources"
);
Ok(total_seeded)
}
pub fn print_generate_docs(result: &GenerateDocsResult) {
let mode = if result.full_mode {
"full"
} else {
"incremental"
};
if result.regenerated == 0 && result.errored == 0 {
println!(
"\n {} no documents to update ({})",
Theme::success().bold().render("Docs"),
mode
);
return;
}
// Headline
println!(
"\n {} {} documents ({})",
Theme::success().bold().render("Generated"),
Theme::bold().render(&result.regenerated.to_string()),
mode
);
// Detail line: compact middle-dot format, zero-suppressed
let mut details: Vec<String> = Vec::new();
if result.full_mode && result.seeded > 0 {
details.push(format!("{} seeded", result.seeded));
}
if result.unchanged > 0 {
details.push(format!("{} unchanged", result.unchanged));
}
if !details.is_empty() {
println!(" {}", Theme::dim().render(&details.join(" \u{b7} ")));
}
if result.errored > 0 {
println!(
" {}",
Theme::error().render(&format!("{} errored", result.errored))
);
}
}
#[derive(Serialize)]
struct GenerateDocsJsonOutput {
ok: bool,
data: GenerateDocsJsonData,
meta: RobotMeta,
}
#[derive(Serialize)]
struct GenerateDocsJsonData {
mode: String,
#[serde(skip_serializing_if = "Option::is_none")]
seeded: Option<usize>,
regenerated: usize,
unchanged: usize,
errored: usize,
}
pub fn print_generate_docs_json(result: &GenerateDocsResult, elapsed_ms: u64) {
let output = GenerateDocsJsonOutput {
ok: true,
data: GenerateDocsJsonData {
mode: if result.full_mode {
"full".to_string()
} else {
"incremental".to_string()
},
seeded: if result.full_mode {
Some(result.seeded)
} else {
None
},
regenerated: result.regenerated,
unchanged: result.unchanged,
errored: result.errored,
},
meta: RobotMeta { elapsed_ms },
};
match serde_json::to_string(&output) {
Ok(json) => println!("{json}"),
Err(e) => eprintln!("Error serializing to JSON: {e}"),
}
}
#[cfg(test)]
mod tests {
use std::path::Path;
use crate::core::db::{create_connection, run_migrations};
use super::*;
fn setup_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url) VALUES (1, 100, 'group/project', 'https://gitlab.com/group/project')",
[],
).unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 1, 'Test', 'opened', 1000, 2000, 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
[],
).unwrap();
conn
}
fn insert_note(conn: &Connection, id: i64, gitlab_id: i64, is_system: bool) {
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (?1, ?2, 1, 1, 'alice', 'note body', 1000, 2000, 3000, ?3)",
rusqlite::params![id, gitlab_id, is_system as i32],
).unwrap();
}
#[test]
fn test_full_seed_includes_notes() {
let conn = setup_db();
insert_note(&conn, 1, 101, false);
insert_note(&conn, 2, 102, false);
insert_note(&conn, 3, 103, false);
insert_note(&conn, 4, 104, true); // system note — should be excluded
let seeded = seed_dirty_notes(&conn, None).unwrap();
assert_eq!(seeded, 3);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 3);
}
#[test]
fn test_note_document_count_stable_after_second_generate_docs_full() {
let conn = setup_db();
insert_note(&conn, 1, 101, false);
insert_note(&conn, 2, 102, false);
let first = seed_dirty_notes(&conn, None).unwrap();
assert_eq!(first, 2);
// Second run should be idempotent (ON CONFLICT DO NOTHING)
let second = seed_dirty_notes(&conn, None).unwrap();
assert_eq!(second, 0);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 2);
}
}