Removes module-level doc comments (//! lines) and excessive inline doc comments that were duplicating information already evident from: - Function/struct names (self-documenting code) - Type signatures (the what is clear from types) - Implementation context (the how is clear from code) Affected modules: - cli/* - Removed command descriptions duplicating clap help text - core/* - Removed module headers and obvious function docs - documents/* - Removed extractor/regenerator/truncation docs - embedding/* - Removed pipeline and chunking docs - gitlab/* - Removed client and transformer docs (kept type definitions) - ingestion/* - Removed orchestrator and ingestion docs - search/* - Removed FTS and vector search docs Philosophy: Code should be self-documenting. Comments should explain "why" (business decisions, non-obvious constraints) not "what" (which the code itself shows). This change reduces noise and maintenance burden while keeping the codebase just as understandable. Retains comments for: - Non-obvious business logic - Important safety invariants - Complex algorithm explanations - Public API boundaries where generated docs matter Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
275 lines
8.1 KiB
Rust
275 lines
8.1 KiB
Rust
use rusqlite::Connection;
|
|
|
|
use crate::core::backoff::compute_next_attempt_at;
|
|
use crate::core::error::Result;
|
|
use crate::core::time::now_ms;
|
|
use crate::documents::SourceType;
|
|
|
|
const DIRTY_SOURCES_BATCH_SIZE: usize = 500;
|
|
|
|
pub fn mark_dirty_tx(
|
|
tx: &rusqlite::Transaction<'_>,
|
|
source_type: SourceType,
|
|
source_id: i64,
|
|
) -> Result<()> {
|
|
tx.execute(
|
|
"INSERT INTO dirty_sources (source_type, source_id, queued_at)
|
|
VALUES (?1, ?2, ?3)
|
|
ON CONFLICT(source_type, source_id) DO UPDATE SET
|
|
queued_at = excluded.queued_at,
|
|
attempt_count = 0,
|
|
last_attempt_at = NULL,
|
|
last_error = NULL,
|
|
next_attempt_at = NULL",
|
|
rusqlite::params![source_type.as_str(), source_id, now_ms()],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn mark_dirty(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<()> {
|
|
conn.execute(
|
|
"INSERT INTO dirty_sources (source_type, source_id, queued_at)
|
|
VALUES (?1, ?2, ?3)
|
|
ON CONFLICT(source_type, source_id) DO UPDATE SET
|
|
queued_at = excluded.queued_at,
|
|
attempt_count = 0,
|
|
last_attempt_at = NULL,
|
|
last_error = NULL,
|
|
next_attempt_at = NULL",
|
|
rusqlite::params![source_type.as_str(), source_id, now_ms()],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn get_dirty_sources(conn: &Connection) -> Result<Vec<(SourceType, i64)>> {
|
|
let now = now_ms();
|
|
let mut stmt = conn.prepare(
|
|
"SELECT source_type, source_id FROM dirty_sources
|
|
WHERE next_attempt_at IS NULL OR next_attempt_at <= ?1
|
|
ORDER BY attempt_count ASC, queued_at ASC
|
|
LIMIT ?2",
|
|
)?;
|
|
let rows = stmt
|
|
.query_map(
|
|
rusqlite::params![now, DIRTY_SOURCES_BATCH_SIZE as i64],
|
|
|row| {
|
|
let st_str: String = row.get(0)?;
|
|
let source_id: i64 = row.get(1)?;
|
|
Ok((st_str, source_id))
|
|
},
|
|
)?
|
|
.collect::<std::result::Result<Vec<_>, _>>()?;
|
|
|
|
let mut results = Vec::with_capacity(rows.len());
|
|
for (st_str, source_id) in rows {
|
|
let source_type = SourceType::parse(&st_str).ok_or_else(|| {
|
|
crate::core::error::LoreError::Other(format!(
|
|
"Invalid source_type in dirty_sources: {}",
|
|
st_str
|
|
))
|
|
})?;
|
|
results.push((source_type, source_id));
|
|
}
|
|
Ok(results)
|
|
}
|
|
|
|
pub fn clear_dirty(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<()> {
|
|
conn.execute(
|
|
"DELETE FROM dirty_sources WHERE source_type = ?1 AND source_id = ?2",
|
|
rusqlite::params![source_type.as_str(), source_id],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn record_dirty_error(
|
|
conn: &Connection,
|
|
source_type: SourceType,
|
|
source_id: i64,
|
|
error: &str,
|
|
) -> Result<()> {
|
|
let now = now_ms();
|
|
let attempt_count: i64 = conn.query_row(
|
|
"SELECT attempt_count FROM dirty_sources WHERE source_type = ?1 AND source_id = ?2",
|
|
rusqlite::params![source_type.as_str(), source_id],
|
|
|row| row.get(0),
|
|
)?;
|
|
|
|
let new_attempt = attempt_count + 1;
|
|
let next_at = compute_next_attempt_at(now, new_attempt);
|
|
|
|
conn.execute(
|
|
"UPDATE dirty_sources SET
|
|
attempt_count = ?1,
|
|
last_attempt_at = ?2,
|
|
last_error = ?3,
|
|
next_attempt_at = ?4
|
|
WHERE source_type = ?5 AND source_id = ?6",
|
|
rusqlite::params![
|
|
new_attempt,
|
|
now,
|
|
error,
|
|
next_at,
|
|
source_type.as_str(),
|
|
source_id
|
|
],
|
|
)?;
|
|
Ok(())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn setup_db() -> Connection {
|
|
let conn = Connection::open_in_memory().unwrap();
|
|
conn.execute_batch("
|
|
CREATE TABLE dirty_sources (
|
|
source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion')),
|
|
source_id INTEGER NOT NULL,
|
|
queued_at INTEGER NOT NULL,
|
|
attempt_count INTEGER NOT NULL DEFAULT 0,
|
|
last_attempt_at INTEGER,
|
|
last_error TEXT,
|
|
next_attempt_at INTEGER,
|
|
PRIMARY KEY(source_type, source_id)
|
|
);
|
|
CREATE INDEX idx_dirty_sources_next_attempt ON dirty_sources(next_attempt_at);
|
|
").unwrap();
|
|
conn
|
|
}
|
|
|
|
#[test]
|
|
fn test_mark_dirty_inserts() {
|
|
let conn = setup_db();
|
|
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
|
|
|
let count: i64 = conn
|
|
.query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0))
|
|
.unwrap();
|
|
assert_eq!(count, 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_mark_dirty_tx_inserts() {
|
|
let mut conn = setup_db();
|
|
{
|
|
let tx = conn.transaction().unwrap();
|
|
mark_dirty_tx(&tx, SourceType::Issue, 1).unwrap();
|
|
tx.commit().unwrap();
|
|
}
|
|
let count: i64 = conn
|
|
.query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0))
|
|
.unwrap();
|
|
assert_eq!(count, 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_requeue_resets_backoff() {
|
|
let conn = setup_db();
|
|
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
|
record_dirty_error(&conn, SourceType::Issue, 1, "test error").unwrap();
|
|
|
|
let attempt: i64 = conn
|
|
.query_row(
|
|
"SELECT attempt_count FROM dirty_sources WHERE source_id = 1",
|
|
[],
|
|
|r| r.get(0),
|
|
)
|
|
.unwrap();
|
|
assert_eq!(attempt, 1);
|
|
|
|
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
|
let attempt: i64 = conn
|
|
.query_row(
|
|
"SELECT attempt_count FROM dirty_sources WHERE source_id = 1",
|
|
[],
|
|
|r| r.get(0),
|
|
)
|
|
.unwrap();
|
|
assert_eq!(attempt, 0);
|
|
|
|
let next_at: Option<i64> = conn
|
|
.query_row(
|
|
"SELECT next_attempt_at FROM dirty_sources WHERE source_id = 1",
|
|
[],
|
|
|r| r.get(0),
|
|
)
|
|
.unwrap();
|
|
assert!(next_at.is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn test_get_respects_backoff() {
|
|
let conn = setup_db();
|
|
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
|
conn.execute(
|
|
"UPDATE dirty_sources SET next_attempt_at = 9999999999999 WHERE source_id = 1",
|
|
[],
|
|
)
|
|
.unwrap();
|
|
|
|
let results = get_dirty_sources(&conn).unwrap();
|
|
assert!(results.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn test_get_orders_by_attempt_count() {
|
|
let conn = setup_db();
|
|
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
|
conn.execute(
|
|
"UPDATE dirty_sources SET attempt_count = 2 WHERE source_id = 1",
|
|
[],
|
|
)
|
|
.unwrap();
|
|
mark_dirty(&conn, SourceType::Issue, 2).unwrap();
|
|
|
|
let results = get_dirty_sources(&conn).unwrap();
|
|
assert_eq!(results.len(), 2);
|
|
assert_eq!(results[0].1, 2);
|
|
assert_eq!(results[1].1, 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_batch_size_500() {
|
|
let conn = setup_db();
|
|
for i in 0..600 {
|
|
mark_dirty(&conn, SourceType::Issue, i).unwrap();
|
|
}
|
|
let results = get_dirty_sources(&conn).unwrap();
|
|
assert_eq!(results.len(), 500);
|
|
}
|
|
|
|
#[test]
|
|
fn test_clear_removes() {
|
|
let conn = setup_db();
|
|
mark_dirty(&conn, SourceType::Issue, 1).unwrap();
|
|
clear_dirty(&conn, SourceType::Issue, 1).unwrap();
|
|
|
|
let count: i64 = conn
|
|
.query_row("SELECT COUNT(*) FROM dirty_sources", [], |r| r.get(0))
|
|
.unwrap();
|
|
assert_eq!(count, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_drain_loop() {
|
|
let conn = setup_db();
|
|
for i in 0..1200 {
|
|
mark_dirty(&conn, SourceType::Issue, i).unwrap();
|
|
}
|
|
|
|
let mut total = 0;
|
|
loop {
|
|
let batch = get_dirty_sources(&conn).unwrap();
|
|
if batch.is_empty() {
|
|
break;
|
|
}
|
|
for (st, id) in &batch {
|
|
clear_dirty(&conn, *st, *id).unwrap();
|
|
}
|
|
total += batch.len();
|
|
}
|
|
assert_eq!(total, 1200);
|
|
}
|
|
}
|