3 Commits

Author SHA1 Message Date
teernisse
ba4ba9f508 feat: complete TUI Phase 0 — Toolchain Gate
Implements the full lore-tui crate scaffold with 6 Phase 0 modules:

- message.rs: Msg (~40 variants), Screen (12), EntityKey, AppError, InputMode
- clock.rs: Clock trait with SystemClock + FakeClock for deterministic testing
- safety.rs: Terminal sanitizer (ANSI filter), URL policy, PII/secret redaction
- db.rs: DbManager with 3 reader pool (round-robin) + dedicated writer (WAL mode)
- theme.rs: Flexoki adaptive theme (19 slots), state/event colors, label styling
- app.rs: Minimal LoreApp Model trait impl proving FrankenTUI integration

68 tests, clippy clean, fmt clean. Closes bd-3ddw, bd-c9gk, bd-2lg6,
bd-3ir1, bd-2kop, bd-5ofk, bd-2emv, bd-1cj0.
2026-02-12 15:15:15 -05:00
teernisse
ad4dd6e855 release: v0.7.0 2026-02-12 13:31:57 -05:00
teernisse
83cd16c918 feat: implement per-note search and document pipeline
- Add SourceType::Note with extract_note_document() and ParentMetadataCache
- Migration 022: composite indexes for notes queries + author_id column
- Migration 024: table rebuild adding 'note' to CHECK constraints, defense triggers
- Migration 025: backfill existing non-system notes into dirty queue
- Add lore notes CLI command with 17 filter options (author, path, resolution, etc.)
- Support table/json/jsonl/csv output formats with field selection
- Wire note dirty tracking through discussion and MR discussion ingestion
- Fix test_migration_024_preserves_existing_data off-by-one (tested wrong migration)
- Fix upsert_document_inner returning false for label/path-only changes
2026-02-12 13:31:24 -05:00
35 changed files with 10553 additions and 128 deletions

File diff suppressed because one or more lines are too long

View File

@@ -1 +1 @@
bd-2kop
bd-20p9

1
.gitignore vendored
View File

@@ -44,3 +44,4 @@ lore.config.json
# Added by cargo
/target
**/target/

2
Cargo.lock generated
View File

@@ -1106,7 +1106,7 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
name = "lore"
version = "0.6.2"
version = "0.7.0"
dependencies = [
"async-stream",
"chrono",

View File

@@ -1,6 +1,6 @@
[package]
name = "lore"
version = "0.6.2"
version = "0.7.0"
edition = "2024"
description = "Gitlore - Local GitLab data management with semantic search"
authors = ["Taylor Eernisse"]

3168
crates/lore-tui/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,39 @@
[package]
name = "lore-tui"
version = "0.1.0"
edition = "2024"
description = "Terminal UI for Gitlore — local GitLab data explorer"
authors = ["Taylor Eernisse"]
license = "MIT"
[[bin]]
name = "lore-tui"
path = "src/main.rs"
[dependencies]
# FrankenTUI (Elm-architecture TUI framework)
ftui = "0.1.1"
# Lore library (config, db, ingestion, search, etc.)
lore = { path = "../.." }
# CLI
clap = { version = "4", features = ["derive", "env"] }
# Error handling
anyhow = "1"
# Time
chrono = { version = "0.4", features = ["serde"] }
# Paths
dirs = "6"
# Database (read-only queries from TUI)
rusqlite = { version = "0.38", features = ["bundled"] }
# Terminal (crossterm for raw mode + event reading, used by ftui runtime)
crossterm = "0.28"
# Regex (used by safety module for PII/secret redaction)
regex = "1"

View File

@@ -0,0 +1,4 @@
[toolchain]
channel = "nightly-2026-02-08"
profile = "minimal"
components = ["rustfmt", "clippy"]

101
crates/lore-tui/src/app.rs Normal file
View File

@@ -0,0 +1,101 @@
#![allow(dead_code)] // Phase 0: minimal scaffold, fleshed out in bd-6pmy
//! Minimal FrankenTUI Model implementation for the lore TUI.
//!
//! This is the Phase 0 integration proof — validates that the ftui Model trait
//! compiles with our Msg type and produces basic output. The full LoreApp with
//! screen routing, navigation stack, and action dispatch comes in bd-6pmy.
use ftui::{Cmd, Frame, Model};
use crate::message::Msg;
// ---------------------------------------------------------------------------
// LoreApp
// ---------------------------------------------------------------------------
/// Root model for the lore TUI.
///
/// Phase 0: minimal scaffold that renders a placeholder and handles Quit.
/// Phase 1 (bd-6pmy) will add screen routing, DbManager, theme, and subscriptions.
pub struct LoreApp;
impl Model for LoreApp {
type Message = Msg;
fn init(&mut self) -> Cmd<Self::Message> {
Cmd::none()
}
fn update(&mut self, msg: Self::Message) -> Cmd<Self::Message> {
match msg {
Msg::Quit => Cmd::quit(),
_ => Cmd::none(),
}
}
fn view(&self, _frame: &mut Frame) {
// Phase 0: no-op view. Phase 1 will render screens via the frame.
}
}
/// Verify that `App::fullscreen(LoreApp).run()` compiles.
///
/// This is a compile-time check — we don't actually run it because that
/// would require a real TTY. The function exists solely to prove the wiring.
#[cfg(test)]
fn _assert_app_fullscreen_compiles() {
// This function is never called — it only needs to compile.
fn _inner() {
use ftui::App;
let _app_builder = App::fullscreen(LoreApp);
// _app_builder.run() would need a TTY, so we don't call it.
}
}
/// Verify that `App::inline(LoreApp, 12).run()` compiles.
#[cfg(test)]
fn _assert_app_inline_compiles() {
fn _inner() {
use ftui::App;
let _app_builder = App::inline(LoreApp, 12);
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lore_app_init_returns_none() {
let mut app = LoreApp;
let cmd = app.init();
assert!(matches!(cmd, Cmd::None));
}
#[test]
fn test_lore_app_quit_returns_quit_cmd() {
let mut app = LoreApp;
let cmd = app.update(Msg::Quit);
assert!(matches!(cmd, Cmd::Quit));
}
#[test]
fn test_lore_app_tick_returns_none() {
let mut app = LoreApp;
let cmd = app.update(Msg::Tick);
assert!(matches!(cmd, Cmd::None));
}
#[test]
fn test_lore_app_navigate_returns_none() {
use crate::message::Screen;
let mut app = LoreApp;
let cmd = app.update(Msg::NavigateTo(Screen::Dashboard));
assert!(matches!(cmd, Cmd::None));
}
}

View File

@@ -0,0 +1,151 @@
//! Injected clock for deterministic time in tests and consistent frame timestamps.
//!
//! All relative-time rendering (e.g., "3h ago") uses [`Clock::now()`] rather
//! than wall-clock time directly. This enables:
//! - Deterministic snapshot tests via [`FakeClock`]
//! - Consistent timestamps within a single frame render pass
use std::sync::{Arc, Mutex};
use chrono::{DateTime, TimeDelta, Utc};
/// Trait for obtaining the current time.
///
/// Inject via `Arc<dyn Clock>` to allow swapping between real and fake clocks.
pub trait Clock: Send + Sync {
/// Returns the current time.
fn now(&self) -> DateTime<Utc>;
}
// ---------------------------------------------------------------------------
// SystemClock
// ---------------------------------------------------------------------------
/// Real wall-clock time via `chrono::Utc::now()`.
#[derive(Debug, Clone, Copy)]
pub struct SystemClock;
impl Clock for SystemClock {
fn now(&self) -> DateTime<Utc> {
Utc::now()
}
}
// ---------------------------------------------------------------------------
// FakeClock
// ---------------------------------------------------------------------------
/// A controllable clock for tests. Returns a frozen time that can be
/// advanced or set explicitly.
///
/// `FakeClock` is `Clone` (shares the inner `Arc`) and `Send + Sync`
/// for use across `Cmd::task` threads.
#[derive(Debug, Clone)]
pub struct FakeClock {
inner: Arc<Mutex<DateTime<Utc>>>,
}
impl FakeClock {
/// Create a fake clock frozen at the given time.
#[must_use]
pub fn new(time: DateTime<Utc>) -> Self {
Self {
inner: Arc::new(Mutex::new(time)),
}
}
/// Advance the clock by `duration`. Uses `checked_add` to handle overflow
/// gracefully — if the addition would overflow, the time is not changed.
pub fn advance(&self, duration: TimeDelta) {
let mut guard = self.inner.lock().expect("FakeClock mutex poisoned");
if let Some(advanced) = guard.checked_add_signed(duration) {
*guard = advanced;
}
}
/// Set the clock to an exact time.
pub fn set(&self, time: DateTime<Utc>) {
let mut guard = self.inner.lock().expect("FakeClock mutex poisoned");
*guard = time;
}
}
impl Clock for FakeClock {
fn now(&self) -> DateTime<Utc> {
*self.inner.lock().expect("FakeClock mutex poisoned")
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use chrono::TimeZone;
fn fixed_time() -> DateTime<Utc> {
Utc.with_ymd_and_hms(2026, 2, 12, 12, 0, 0).unwrap()
}
#[test]
fn test_fake_clock_frozen() {
let clock = FakeClock::new(fixed_time());
let t1 = clock.now();
let t2 = clock.now();
assert_eq!(t1, t2);
assert_eq!(t1, fixed_time());
}
#[test]
fn test_fake_clock_advance() {
let clock = FakeClock::new(fixed_time());
clock.advance(TimeDelta::hours(3));
let expected = Utc.with_ymd_and_hms(2026, 2, 12, 15, 0, 0).unwrap();
assert_eq!(clock.now(), expected);
}
#[test]
fn test_fake_clock_set() {
let clock = FakeClock::new(fixed_time());
let new_time = Utc.with_ymd_and_hms(2030, 1, 1, 0, 0, 0).unwrap();
clock.set(new_time);
assert_eq!(clock.now(), new_time);
}
#[test]
fn test_fake_clock_clone_shares_state() {
let clock1 = FakeClock::new(fixed_time());
let clock2 = clock1.clone();
clock1.advance(TimeDelta::minutes(30));
// Both clones see the advanced time.
assert_eq!(clock1.now(), clock2.now());
}
#[test]
fn test_system_clock_returns_reasonable_time() {
let clock = SystemClock;
let now = clock.now();
// Sanity: time should be after 2025.
assert!(now.year() >= 2025);
}
#[test]
fn test_fake_clock_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<FakeClock>();
assert_send_sync::<SystemClock>();
}
#[test]
fn test_clock_trait_object_works() {
let fake: Arc<dyn Clock> = Arc::new(FakeClock::new(fixed_time()));
assert_eq!(fake.now(), fixed_time());
let real: Arc<dyn Clock> = Arc::new(SystemClock);
let _ = real.now(); // Just verify it doesn't panic.
}
use chrono::Datelike;
}

270
crates/lore-tui/src/db.rs Normal file
View File

@@ -0,0 +1,270 @@
#![allow(dead_code)] // Phase 0: types defined now, consumed in Phase 1+
//! Database access layer for the TUI.
//!
//! Provides a read pool (3 connections, round-robin) plus a dedicated writer
//! connection. All connections use WAL mode and busy_timeout for concurrency.
//!
//! The TUI operates read-heavy: parallel queries for dashboard, list views,
//! and prefetch. Writes are rare (TUI-local state: scroll positions, bookmarks).
use std::path::Path;
use std::sync::Mutex;
use std::sync::atomic::{AtomicUsize, Ordering};
use anyhow::{Context, Result};
use rusqlite::Connection;
/// Number of reader connections in the pool.
const READER_COUNT: usize = 3;
// ---------------------------------------------------------------------------
// DbManager
// ---------------------------------------------------------------------------
/// Manages a pool of read-only connections plus a dedicated writer.
///
/// Designed for `Arc<DbManager>` sharing across FrankenTUI's `Cmd::task`
/// background threads. Each reader is individually `Mutex`-protected so
/// concurrent tasks can query different readers without blocking.
pub struct DbManager {
readers: Vec<Mutex<Connection>>,
writer: Mutex<Connection>,
next_reader: AtomicUsize,
}
impl DbManager {
/// Open a database at `path` with 3 reader + 1 writer connections.
///
/// All connections get WAL mode, 5000ms busy_timeout, and foreign keys.
/// Reader connections additionally set `query_only = ON` as a safety guard.
pub fn open(path: &Path) -> Result<Self> {
let mut readers = Vec::with_capacity(READER_COUNT);
for i in 0..READER_COUNT {
let conn =
open_connection(path).with_context(|| format!("opening reader connection {i}"))?;
conn.pragma_update(None, "query_only", "ON")
.context("setting query_only on reader")?;
readers.push(Mutex::new(conn));
}
let writer = open_connection(path).context("opening writer connection")?;
Ok(Self {
readers,
writer: Mutex::new(writer),
next_reader: AtomicUsize::new(0),
})
}
/// Execute a read-only query against the pool.
///
/// Selects the next reader via round-robin. The connection is borrowed
/// for the duration of `f` and cannot leak outside.
pub fn with_reader<F, T>(&self, f: F) -> Result<T>
where
F: FnOnce(&Connection) -> Result<T>,
{
let idx = self.next_reader.fetch_add(1, Ordering::Relaxed) % READER_COUNT;
let conn = self.readers[idx].lock().expect("reader mutex poisoned");
f(&conn)
}
/// Execute a write operation against the dedicated writer.
///
/// Serialized via a single `Mutex`. The TUI writes infrequently
/// (bookmarks, scroll state) so contention is negligible.
pub fn with_writer<F, T>(&self, f: F) -> Result<T>
where
F: FnOnce(&Connection) -> Result<T>,
{
let conn = self.writer.lock().expect("writer mutex poisoned");
f(&conn)
}
}
// ---------------------------------------------------------------------------
// Connection setup
// ---------------------------------------------------------------------------
/// Open a single SQLite connection with TUI-appropriate pragmas.
///
/// Mirrors lore's `create_connection` pragmas (WAL, busy_timeout, etc.)
/// but skips the sqlite-vec extension registration — the TUI reads standard
/// tables only, never vec0 virtual tables.
fn open_connection(path: &Path) -> Result<Connection> {
let conn = Connection::open(path).context("opening SQLite database")?;
conn.pragma_update(None, "journal_mode", "WAL")?;
conn.pragma_update(None, "synchronous", "NORMAL")?;
conn.pragma_update(None, "foreign_keys", "ON")?;
conn.pragma_update(None, "busy_timeout", 5000)?;
conn.pragma_update(None, "temp_store", "MEMORY")?;
Ok(conn)
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
/// Create a temporary database file for testing.
///
/// Uses an atomic counter + thread ID to guarantee unique paths even
/// when tests run in parallel.
fn test_db_path() -> std::path::PathBuf {
use std::sync::atomic::AtomicU64;
static COUNTER: AtomicU64 = AtomicU64::new(0);
let n = COUNTER.fetch_add(1, Ordering::Relaxed);
let dir = std::env::temp_dir().join("lore-tui-tests");
std::fs::create_dir_all(&dir).expect("create test dir");
dir.join(format!(
"test-{}-{:?}-{n}.db",
std::process::id(),
std::thread::current().id(),
))
}
fn create_test_table(conn: &Connection) {
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS test_items (id INTEGER PRIMARY KEY, name TEXT);",
)
.expect("create test table");
}
#[test]
fn test_dbmanager_opens_successfully() {
let path = test_db_path();
let db = DbManager::open(&path).expect("open");
// Writer creates the test table
db.with_writer(|conn| {
create_test_table(conn);
Ok(())
})
.expect("create table via writer");
}
#[test]
fn test_reader_is_query_only() {
let path = test_db_path();
let db = DbManager::open(&path).expect("open");
// Create table via writer first
db.with_writer(|conn| {
create_test_table(conn);
Ok(())
})
.unwrap();
// Attempt INSERT via reader — should fail
let result = db.with_reader(|conn| {
conn.execute("INSERT INTO test_items (name) VALUES ('boom')", [])
.map_err(|e| anyhow::anyhow!(e))?;
Ok(())
});
assert!(result.is_err(), "reader should reject writes");
}
#[test]
fn test_writer_allows_mutations() {
let path = test_db_path();
let db = DbManager::open(&path).expect("open");
db.with_writer(|conn| {
create_test_table(conn);
conn.execute("INSERT INTO test_items (name) VALUES ('hello')", [])?;
let count: i64 = conn.query_row("SELECT COUNT(*) FROM test_items", [], |r| r.get(0))?;
assert_eq!(count, 1);
Ok(())
})
.expect("writer should allow mutations");
}
#[test]
fn test_round_robin_rotates_readers() {
let path = test_db_path();
let db = DbManager::open(&path).expect("open");
// Call with_reader 6 times — should cycle through readers 0,1,2,0,1,2
for expected_cycle in 0..2 {
for expected_idx in 0..READER_COUNT {
let current = db.next_reader.load(Ordering::Relaxed);
assert_eq!(
current % READER_COUNT,
(expected_cycle * READER_COUNT + expected_idx) % READER_COUNT,
);
db.with_reader(|_conn| Ok(())).unwrap();
}
}
}
#[test]
fn test_reader_can_read_writer_data() {
let path = test_db_path();
let db = DbManager::open(&path).expect("open");
db.with_writer(|conn| {
create_test_table(conn);
conn.execute("INSERT INTO test_items (name) VALUES ('visible')", [])?;
Ok(())
})
.unwrap();
let name: String = db
.with_reader(|conn| {
let n: String =
conn.query_row("SELECT name FROM test_items WHERE id = 1", [], |r| r.get(0))?;
Ok(n)
})
.expect("reader should see writer's data");
assert_eq!(name, "visible");
}
#[test]
fn test_dbmanager_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<DbManager>();
}
#[test]
fn test_concurrent_reads() {
let path = test_db_path();
let db = Arc::new(DbManager::open(&path).expect("open"));
db.with_writer(|conn| {
create_test_table(conn);
for i in 0..10 {
conn.execute(
"INSERT INTO test_items (name) VALUES (?1)",
[format!("item-{i}")],
)?;
}
Ok(())
})
.unwrap();
let mut handles = Vec::new();
for _ in 0..6 {
let db = Arc::clone(&db);
handles.push(std::thread::spawn(move || {
db.with_reader(|conn| {
let count: i64 =
conn.query_row("SELECT COUNT(*) FROM test_items", [], |r| r.get(0))?;
assert_eq!(count, 10);
Ok(())
})
.expect("concurrent read should succeed");
}));
}
for h in handles {
h.join().expect("thread should not panic");
}
}
}

View File

@@ -0,0 +1,58 @@
#![forbid(unsafe_code)]
//! Gitlore TUI — terminal interface for exploring GitLab data locally.
//!
//! Built on FrankenTUI (Elm architecture): Model, update, view.
//! The `lore` CLI spawns `lore-tui` via PATH lookup at runtime.
use anyhow::Result;
// Phase 0 modules.
pub mod clock; // Clock trait: SystemClock + FakeClock (bd-2lg6)
pub mod message; // Msg, Screen, EntityKey, AppError, InputMode (bd-c9gk)
pub mod safety; // Terminal safety: sanitize + URL policy + redact (bd-3ir1)
pub mod db; // DbManager: read pool + dedicated writer (bd-2kop)
pub mod theme; // Flexoki theme: build_theme, state_color, label_style (bd-5ofk)
pub mod app; // LoreApp Model trait impl (Phase 0 proof: bd-2emv, full: bd-6pmy)
/// Options controlling how the TUI launches.
#[derive(Debug, Clone)]
pub struct LaunchOptions {
/// Path to lore config file.
pub config_path: Option<String>,
/// Run a background sync before displaying data.
pub sync_on_start: bool,
/// Clear cached TUI state and start fresh.
pub fresh: bool,
/// Render backend: "crossterm" or "native".
pub render_mode: String,
/// Use ASCII-only box drawing characters.
pub ascii: bool,
/// Disable alternate screen (render inline).
pub no_alt_screen: bool,
}
/// Launch the TUI in browse mode (no sync).
///
/// Loads config from `options.config_path` (or default location),
/// opens the database read-only, and enters the FrankenTUI event loop.
pub fn launch_tui(options: LaunchOptions) -> Result<()> {
let _options = options;
// Phase 1 will wire this to LoreApp + App::fullscreen().run()
eprintln!("lore-tui: browse mode not yet implemented (Phase 1)");
Ok(())
}
/// Launch the TUI with an initial sync pass.
///
/// Runs `lore sync` in the background while displaying a progress screen,
/// then transitions to browse mode once sync completes.
pub fn launch_sync_tui(options: LaunchOptions) -> Result<()> {
let _options = options;
// Phase 2 will implement the sync progress screen
eprintln!("lore-tui: sync mode not yet implemented (Phase 2)");
Ok(())
}

View File

@@ -0,0 +1,53 @@
#![forbid(unsafe_code)]
use anyhow::Result;
use clap::Parser;
use lore_tui::LaunchOptions;
/// Terminal UI for Gitlore — explore GitLab issues, MRs, and search locally.
#[derive(Parser, Debug)]
#[command(name = "lore-tui", version, about)]
struct TuiCli {
/// Path to lore config file (default: ~/.config/lore/config.json).
#[arg(short, long, env = "LORE_CONFIG_PATH")]
config: Option<String>,
/// Run a sync before launching the TUI.
#[arg(long)]
sync: bool,
/// Clear cached state and start fresh.
#[arg(long)]
fresh: bool,
/// Render mode: "crossterm" (default) or "native".
#[arg(long, default_value = "crossterm")]
render_mode: String,
/// Use ASCII-only drawing characters (no Unicode box drawing).
#[arg(long)]
ascii: bool,
/// Disable alternate screen (render inline).
#[arg(long)]
no_alt_screen: bool,
}
fn main() -> Result<()> {
let cli = TuiCli::parse();
let options = LaunchOptions {
config_path: cli.config,
sync_on_start: cli.sync,
fresh: cli.fresh,
render_mode: cli.render_mode,
ascii: cli.ascii,
no_alt_screen: cli.no_alt_screen,
};
if options.sync_on_start {
lore_tui::launch_sync_tui(options)
} else {
lore_tui::launch_tui(options)
}
}

View File

@@ -0,0 +1,523 @@
#![allow(dead_code)] // Phase 0: types defined now, consumed in Phase 1+
//! Core types for the lore-tui Elm architecture.
//!
//! - [`Msg`] — every user action and async result flows through this enum.
//! - [`Screen`] — navigation targets.
//! - [`EntityKey`] — safe cross-project entity identity.
//! - [`AppError`] — structured error display in the TUI.
//! - [`InputMode`] — controls key dispatch routing.
use std::fmt;
use std::time::Instant;
use ftui::Event;
// ---------------------------------------------------------------------------
// EntityKind
// ---------------------------------------------------------------------------
/// Distinguishes issue vs merge request in an [`EntityKey`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum EntityKind {
Issue,
MergeRequest,
}
// ---------------------------------------------------------------------------
// EntityKey
// ---------------------------------------------------------------------------
/// Uniquely identifies an entity (issue or MR) across projects.
///
/// Bare `iid` is unsafe in multi-project datasets — equality requires
/// project_id + iid + kind.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct EntityKey {
pub project_id: i64,
pub iid: i64,
pub kind: EntityKind,
}
impl EntityKey {
#[must_use]
pub fn issue(project_id: i64, iid: i64) -> Self {
Self {
project_id,
iid,
kind: EntityKind::Issue,
}
}
#[must_use]
pub fn mr(project_id: i64, iid: i64) -> Self {
Self {
project_id,
iid,
kind: EntityKind::MergeRequest,
}
}
}
impl fmt::Display for EntityKey {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let prefix = match self.kind {
EntityKind::Issue => "#",
EntityKind::MergeRequest => "!",
};
write!(f, "p{}:{}{}", self.project_id, prefix, self.iid)
}
}
// ---------------------------------------------------------------------------
// Screen
// ---------------------------------------------------------------------------
/// Navigation targets within the TUI.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Screen {
Dashboard,
IssueList,
IssueDetail(EntityKey),
MrList,
MrDetail(EntityKey),
Search,
Timeline,
Who,
Sync,
Stats,
Doctor,
Bootstrap,
}
impl Screen {
/// Human-readable label for breadcrumbs and status bar.
#[must_use]
pub fn label(&self) -> &str {
match self {
Self::Dashboard => "Dashboard",
Self::IssueList => "Issues",
Self::IssueDetail(_) => "Issue",
Self::MrList => "Merge Requests",
Self::MrDetail(_) => "Merge Request",
Self::Search => "Search",
Self::Timeline => "Timeline",
Self::Who => "Who",
Self::Sync => "Sync",
Self::Stats => "Stats",
Self::Doctor => "Doctor",
Self::Bootstrap => "Bootstrap",
}
}
/// Whether this screen shows a specific entity detail view.
#[must_use]
pub fn is_detail_or_entity(&self) -> bool {
matches!(self, Self::IssueDetail(_) | Self::MrDetail(_))
}
}
// ---------------------------------------------------------------------------
// AppError
// ---------------------------------------------------------------------------
/// Structured error types for user-facing display in the TUI.
#[derive(Debug, Clone)]
pub enum AppError {
/// Database is busy (WAL contention).
DbBusy,
/// Database corruption detected.
DbCorruption(String),
/// GitLab rate-limited; retry after N seconds (if header present).
NetworkRateLimited { retry_after_secs: Option<u64> },
/// Network unavailable.
NetworkUnavailable,
/// GitLab authentication failed.
AuthFailed,
/// Data parsing error.
ParseError(String),
/// Internal / unexpected error.
Internal(String),
}
impl fmt::Display for AppError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::DbBusy => write!(f, "Database is busy — another process holds the lock"),
Self::DbCorruption(detail) => write!(f, "Database corruption: {detail}"),
Self::NetworkRateLimited {
retry_after_secs: Some(secs),
} => write!(f, "Rate limited by GitLab — retry in {secs}s"),
Self::NetworkRateLimited {
retry_after_secs: None,
} => write!(f, "Rate limited by GitLab — try again shortly"),
Self::NetworkUnavailable => write!(f, "Network unavailable — working offline"),
Self::AuthFailed => write!(f, "GitLab authentication failed — check your token"),
Self::ParseError(detail) => write!(f, "Parse error: {detail}"),
Self::Internal(detail) => write!(f, "Internal error: {detail}"),
}
}
}
// ---------------------------------------------------------------------------
// InputMode
// ---------------------------------------------------------------------------
/// Controls how keystrokes are routed through the key dispatch pipeline.
#[derive(Debug, Clone, Default)]
pub enum InputMode {
/// Standard navigation mode — keys dispatch to screen-specific handlers.
#[default]
Normal,
/// Text input focused (filter bar, search box).
Text,
/// Command palette is open.
Palette,
/// "g" prefix pressed — waiting for second key (500ms timeout).
GoPrefix { started_at: Instant },
}
// ---------------------------------------------------------------------------
// Msg
// ---------------------------------------------------------------------------
/// Every user action and async result flows through this enum.
///
/// Generation fields (`generation: u64`) on async result variants enable
/// stale-response detection: if the generation doesn't match the current
/// request generation, the result is silently dropped.
#[derive(Debug)]
pub enum Msg {
// --- Terminal events ---
/// Raw terminal event (key, mouse, paste, focus, clipboard).
RawEvent(Event),
/// Periodic tick from runtime subscription.
Tick,
/// Terminal resized.
Resize {
width: u16,
height: u16,
},
// --- Navigation ---
/// Navigate to a specific screen.
NavigateTo(Screen),
/// Go back in navigation history.
GoBack,
/// Go forward in navigation history.
GoForward,
/// Jump to the dashboard.
GoHome,
/// Jump back N screens in history.
JumpBack(usize),
/// Jump forward N screens in history.
JumpForward(usize),
// --- Command palette ---
OpenCommandPalette,
CloseCommandPalette,
CommandPaletteInput(String),
CommandPaletteSelect(String),
// --- Issue list ---
IssueListLoaded {
generation: u64,
rows: Vec<IssueRow>,
},
IssueListFilterChanged(String),
IssueListSortChanged,
IssueSelected(EntityKey),
// --- MR list ---
MrListLoaded {
generation: u64,
rows: Vec<MrRow>,
},
MrListFilterChanged(String),
MrSelected(EntityKey),
// --- Issue detail ---
IssueDetailLoaded {
generation: u64,
key: EntityKey,
detail: Box<IssueDetail>,
},
// --- MR detail ---
MrDetailLoaded {
generation: u64,
key: EntityKey,
detail: Box<MrDetail>,
},
// --- Discussions (shared by issue + MR detail) ---
DiscussionsLoaded {
generation: u64,
discussions: Vec<Discussion>,
},
// --- Search ---
SearchQueryChanged(String),
SearchRequestStarted {
generation: u64,
query: String,
},
SearchExecuted {
generation: u64,
results: Vec<SearchResult>,
},
SearchResultSelected(EntityKey),
SearchModeChanged,
SearchCapabilitiesLoaded,
// --- Timeline ---
TimelineLoaded {
generation: u64,
events: Vec<TimelineEvent>,
},
TimelineEntitySelected(EntityKey),
// --- Who (people) ---
WhoResultLoaded {
generation: u64,
result: Box<WhoResult>,
},
WhoModeChanged,
// --- Sync ---
SyncStarted,
SyncProgress {
stage: String,
current: u64,
total: u64,
},
SyncProgressBatch {
stage: String,
batch_size: u64,
},
SyncLogLine(String),
SyncBackpressureDrop,
SyncCompleted {
elapsed_ms: u64,
},
SyncCancelled,
SyncFailed(String),
SyncStreamStats {
bytes: u64,
items: u64,
},
// --- Search debounce ---
SearchDebounceArmed {
generation: u64,
},
SearchDebounceFired {
generation: u64,
},
// --- Dashboard ---
DashboardLoaded {
generation: u64,
data: Box<DashboardData>,
},
// --- Global actions ---
Error(AppError),
ShowHelp,
ShowCliEquivalent,
OpenInBrowser,
BlurTextInput,
ScrollToTopCurrentScreen,
Quit,
}
/// Convert terminal events into messages.
///
/// FrankenTUI requires `From<Event>` on the message type so the runtime
/// can inject terminal events into the model's update loop.
impl From<Event> for Msg {
fn from(event: Event) -> Self {
match event {
Event::Resize { width, height } => Self::Resize { width, height },
Event::Tick => Self::Tick,
other => Self::RawEvent(other),
}
}
}
// ---------------------------------------------------------------------------
// Placeholder data types (will be fleshed out in Phase 1+)
// ---------------------------------------------------------------------------
/// Placeholder for an issue row in list views.
#[derive(Debug, Clone)]
pub struct IssueRow {
pub key: EntityKey,
pub title: String,
pub state: String,
}
/// Placeholder for a merge request row in list views.
#[derive(Debug, Clone)]
pub struct MrRow {
pub key: EntityKey,
pub title: String,
pub state: String,
pub draft: bool,
}
/// Placeholder for issue detail payload.
#[derive(Debug, Clone)]
pub struct IssueDetail {
pub key: EntityKey,
pub title: String,
pub description: String,
}
/// Placeholder for MR detail payload.
#[derive(Debug, Clone)]
pub struct MrDetail {
pub key: EntityKey,
pub title: String,
pub description: String,
}
/// Placeholder for a discussion thread.
#[derive(Debug, Clone)]
pub struct Discussion {
pub id: String,
pub notes: Vec<String>,
}
/// Placeholder for a search result.
#[derive(Debug, Clone)]
pub struct SearchResult {
pub key: EntityKey,
pub title: String,
pub score: f64,
}
/// Placeholder for a timeline event.
#[derive(Debug, Clone)]
pub struct TimelineEvent {
pub timestamp: String,
pub description: String,
}
/// Placeholder for who/people intelligence result.
#[derive(Debug, Clone)]
pub struct WhoResult {
pub experts: Vec<String>,
}
/// Placeholder for dashboard summary data.
#[derive(Debug, Clone)]
pub struct DashboardData {
pub issue_count: u64,
pub mr_count: u64,
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_entity_key_equality() {
assert_eq!(EntityKey::issue(1, 42), EntityKey::issue(1, 42));
assert_ne!(EntityKey::issue(1, 42), EntityKey::mr(1, 42));
}
#[test]
fn test_entity_key_different_projects() {
assert_ne!(EntityKey::issue(1, 42), EntityKey::issue(2, 42));
}
#[test]
fn test_entity_key_display() {
assert_eq!(EntityKey::issue(5, 123).to_string(), "p5:#123");
assert_eq!(EntityKey::mr(5, 456).to_string(), "p5:!456");
}
#[test]
fn test_entity_key_hash_is_usable_in_collections() {
use std::collections::HashSet;
let mut set = HashSet::new();
set.insert(EntityKey::issue(1, 1));
set.insert(EntityKey::issue(1, 1)); // duplicate
set.insert(EntityKey::mr(1, 1));
assert_eq!(set.len(), 2);
}
#[test]
fn test_screen_labels() {
assert_eq!(Screen::Dashboard.label(), "Dashboard");
assert_eq!(Screen::IssueList.label(), "Issues");
assert_eq!(Screen::MrList.label(), "Merge Requests");
assert_eq!(Screen::Search.label(), "Search");
}
#[test]
fn test_screen_is_detail_or_entity() {
assert!(Screen::IssueDetail(EntityKey::issue(1, 1)).is_detail_or_entity());
assert!(Screen::MrDetail(EntityKey::mr(1, 1)).is_detail_or_entity());
assert!(!Screen::Dashboard.is_detail_or_entity());
assert!(!Screen::IssueList.is_detail_or_entity());
assert!(!Screen::Search.is_detail_or_entity());
}
#[test]
fn test_app_error_display() {
let err = AppError::DbBusy;
assert!(err.to_string().contains("busy"));
let err = AppError::NetworkRateLimited {
retry_after_secs: Some(30),
};
assert!(err.to_string().contains("30s"));
let err = AppError::NetworkRateLimited {
retry_after_secs: None,
};
assert!(err.to_string().contains("shortly"));
let err = AppError::AuthFailed;
assert!(err.to_string().contains("token"));
}
#[test]
fn test_input_mode_default_is_normal() {
assert!(matches!(InputMode::default(), InputMode::Normal));
}
#[test]
fn test_msg_from_event_resize() {
let event = Event::Resize {
width: 80,
height: 24,
};
let msg = Msg::from(event);
assert!(matches!(
msg,
Msg::Resize {
width: 80,
height: 24
}
));
}
#[test]
fn test_msg_from_event_tick() {
let msg = Msg::from(Event::Tick);
assert!(matches!(msg, Msg::Tick));
}
#[test]
fn test_msg_from_event_focus_wraps_raw() {
let msg = Msg::from(Event::Focus(true));
assert!(matches!(msg, Msg::RawEvent(Event::Focus(true))));
}
}

View File

@@ -0,0 +1,587 @@
//! Terminal safety: sanitize untrusted text, URL policy, credential redaction.
//!
//! GitLab content can contain ANSI escapes, bidi overrides, OSC hyperlinks,
//! and C1 control codes that could corrupt terminal rendering. This module
//! strips dangerous sequences while preserving a safe SGR subset for readability.
use std::fmt::Write;
// ---------------------------------------------------------------------------
// UrlPolicy
// ---------------------------------------------------------------------------
/// Controls how OSC 8 hyperlinks in input are handled.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum UrlPolicy {
/// Remove OSC 8 hyperlinks entirely, keeping only the link text.
#[default]
Strip,
/// Convert hyperlinks to numbered footnotes: `text [1]` with URL list appended.
Footnote,
/// Pass hyperlinks through unchanged (only for trusted content).
Passthrough,
}
// ---------------------------------------------------------------------------
// RedactPattern
// ---------------------------------------------------------------------------
/// Common patterns for PII/secret redaction.
#[derive(Debug, Clone)]
pub struct RedactPattern {
patterns: Vec<regex::Regex>,
}
impl RedactPattern {
/// Create a default set of redaction patterns (tokens, emails, etc.).
#[must_use]
pub fn defaults() -> Self {
let patterns = vec![
// GitLab personal access tokens
regex::Regex::new(r"glpat-[A-Za-z0-9_\-]{20,}").expect("valid regex"),
// Generic bearer/API tokens (long hex or base64-ish strings after common prefixes)
regex::Regex::new(r"(?i)(token|bearer|api[_-]?key)[\s:=]+\S{8,}").expect("valid regex"),
// Email addresses
regex::Regex::new(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}")
.expect("valid regex"),
];
Self { patterns }
}
/// Apply all redaction patterns to the input string.
#[must_use]
pub fn redact(&self, input: &str) -> String {
let mut result = input.to_string();
for pattern in &self.patterns {
result = pattern.replace_all(&result, "[REDACTED]").into_owned();
}
result
}
}
// ---------------------------------------------------------------------------
// sanitize_for_terminal
// ---------------------------------------------------------------------------
/// Sanitize untrusted text for safe terminal display.
///
/// - Strips C1 control codes (0x80-0x9F)
/// - Strips OSC sequences (ESC ] ... ST)
/// - Strips cursor movement CSI sequences (CSI n A/B/C/D/E/F/G/H/J/K)
/// - Strips bidi overrides (U+202A-U+202E, U+2066-U+2069)
/// - Preserves safe SGR subset (bold, italic, underline, reset, standard colors)
///
/// `url_policy` controls handling of OSC 8 hyperlinks.
#[must_use]
pub fn sanitize_for_terminal(input: &str, url_policy: UrlPolicy) -> String {
let mut output = String::with_capacity(input.len());
let mut footnotes: Vec<String> = Vec::new();
let chars: Vec<char> = input.chars().collect();
let len = chars.len();
let mut i = 0;
while i < len {
let ch = chars[i];
// --- Bidi overrides ---
if is_bidi_override(ch) {
i += 1;
continue;
}
// --- C1 control codes (U+0080-U+009F) ---
if ('\u{0080}'..='\u{009F}').contains(&ch) {
i += 1;
continue;
}
// --- C0 control codes except tab, newline, carriage return ---
if ch.is_ascii_control() && ch != '\t' && ch != '\n' && ch != '\r' && ch != '\x1B' {
i += 1;
continue;
}
// --- ESC sequences ---
if ch == '\x1B' {
if i + 1 < len {
match chars[i + 1] {
// CSI sequence: ESC [
'[' => {
let (consumed, safe_seq) = parse_csi(&chars, i);
if let Some(seq) = safe_seq {
output.push_str(&seq);
}
i += consumed;
continue;
}
// OSC sequence: ESC ]
']' => {
let (consumed, link_text, link_url) = parse_osc(&chars, i);
match url_policy {
UrlPolicy::Strip => {
if let Some(text) = link_text {
output.push_str(&text);
}
}
UrlPolicy::Footnote => {
if let (Some(text), Some(url)) = (link_text, link_url) {
footnotes.push(url);
let _ = write!(output, "{text} [{n}]", n = footnotes.len());
}
}
UrlPolicy::Passthrough => {
// Reproduce the raw OSC sequence
for &ch_raw in &chars[i..len.min(i + consumed)] {
output.push(ch_raw);
}
}
}
i += consumed;
continue;
}
_ => {
// Unknown ESC sequence — skip ESC + next char
i += 2;
continue;
}
}
} else {
// Trailing ESC at end of input
i += 1;
continue;
}
}
// --- Normal character ---
output.push(ch);
i += 1;
}
// Append footnotes if any
if !footnotes.is_empty() {
output.push('\n');
for (idx, url) in footnotes.iter().enumerate() {
let _ = write!(output, "\n[{}] {url}", idx + 1);
}
}
output
}
// ---------------------------------------------------------------------------
// Bidi check
// ---------------------------------------------------------------------------
fn is_bidi_override(ch: char) -> bool {
matches!(
ch,
'\u{202A}' // LRE
| '\u{202B}' // RLE
| '\u{202C}' // PDF
| '\u{202D}' // LRO
| '\u{202E}' // RLO
| '\u{2066}' // LRI
| '\u{2067}' // RLI
| '\u{2068}' // FSI
| '\u{2069}' // PDI
)
}
// ---------------------------------------------------------------------------
// CSI parser
// ---------------------------------------------------------------------------
/// Parse a CSI sequence starting at `chars[start]` (which should be ESC).
///
/// Returns `(chars_consumed, Option<safe_sequence_string>)`.
/// If the CSI is a safe SGR, returns the full sequence string to preserve.
/// Otherwise returns None (strip it).
fn parse_csi(chars: &[char], start: usize) -> (usize, Option<String>) {
// Minimum: ESC [ <final_byte>
debug_assert!(chars[start] == '\x1B');
debug_assert!(start + 1 < chars.len() && chars[start + 1] == '[');
let mut i = start + 2; // skip ESC [
let len = chars.len();
// Collect parameter bytes (0x30-0x3F) and intermediate bytes (0x20-0x2F)
let param_start = i;
while i < len && (chars[i] as u32) >= 0x20 && (chars[i] as u32) <= 0x3F {
i += 1;
}
// Collect intermediate bytes
while i < len && (chars[i] as u32) >= 0x20 && (chars[i] as u32) <= 0x2F {
i += 1;
}
// Final byte (0x40-0x7E)
if i >= len || (chars[i] as u32) < 0x40 || (chars[i] as u32) > 0x7E {
// Malformed — consume what we've seen and strip
return (i.saturating_sub(start).max(2), None);
}
let final_byte = chars[i];
let consumed = i + 1 - start;
// Only preserve SGR sequences (final byte 'm')
if final_byte == 'm' {
let param_str: String = chars[param_start..i].iter().collect();
if is_safe_sgr(&param_str) {
let full_seq: String = chars[start..start + consumed].iter().collect();
return (consumed, Some(full_seq));
}
}
// Anything else (cursor movement A-H, erase J/K, etc.) is stripped
(consumed, None)
}
/// Check if all SGR parameters in a sequence are in the safe subset.
///
/// Safe: 0 (reset), 1 (bold), 3 (italic), 4 (underline), 22 (normal intensity),
/// 23 (not italic), 24 (not underline), 39 (default fg), 49 (default bg),
/// 30-37 (standard fg), 40-47 (standard bg), 90-97 (bright fg), 100-107 (bright bg).
fn is_safe_sgr(params: &str) -> bool {
if params.is_empty() {
return true; // ESC[m is reset
}
for param in params.split(';') {
let param = param.trim();
if param.is_empty() {
continue; // treat empty as 0
}
let Ok(n) = param.parse::<u32>() else {
return false;
};
if !is_safe_sgr_code(n) {
return false;
}
}
true
}
fn is_safe_sgr_code(n: u32) -> bool {
matches!(
n,
0 // reset
| 1 // bold
| 3 // italic
| 4 // underline
| 22 // normal intensity (turn off bold)
| 23 // not italic
| 24 // not underline
| 39 // default foreground
| 49 // default background
| 30..=37 // standard foreground colors
| 40..=47 // standard background colors
| 90..=97 // bright foreground colors
| 100..=107 // bright background colors
)
}
// ---------------------------------------------------------------------------
// OSC parser
// ---------------------------------------------------------------------------
/// Parse an OSC sequence starting at `chars[start]` (ESC ]).
///
/// Returns `(chars_consumed, link_text, link_url)`.
/// For OSC 8 hyperlinks: `ESC ] 8 ; params ; url ST text ESC ] 8 ; ; ST`
/// For other OSC: consumed without extracting link data.
fn parse_osc(chars: &[char], start: usize) -> (usize, Option<String>, Option<String>) {
debug_assert!(chars[start] == '\x1B');
debug_assert!(start + 1 < chars.len() && chars[start + 1] == ']');
let len = chars.len();
let i = start + 2; // skip ESC ]
// Find ST (String Terminator): ESC \ or BEL (0x07)
let osc_end = find_st(chars, i);
// Check if this is OSC 8 (hyperlink)
if i < len && chars[i] == '8' && i + 1 < len && chars[i + 1] == ';' {
// OSC 8 hyperlink: ESC ] 8 ; params ; url ST ... ESC ] 8 ; ; ST
let osc_content: String = chars[i..osc_end.0].iter().collect();
let first_consumed = osc_end.1;
// Extract URL from "8;params;url"
let url = extract_osc8_url(&osc_content);
// Now find the link text (between first ST and second OSC 8)
let after_first_st = start + 2 + first_consumed;
let mut text = String::new();
let mut j = after_first_st;
// Collect text until we hit the closing OSC 8 or end of input
while j < len {
if j + 1 < len && chars[j] == '\x1B' && chars[j + 1] == ']' {
// Found another OSC — this should be the closing OSC 8
let close_end = find_st(chars, j + 2);
return (
j + close_end.1 - start + 2,
Some(text),
url.map(String::from),
);
}
text.push(chars[j]);
j += 1;
}
// Reached end without closing OSC 8
return (j - start, Some(text), url.map(String::from));
}
// Non-OSC-8: just consume and strip
(osc_end.1 + (start + 2 - start), None, None)
}
/// Find the String Terminator (ST) for an OSC sequence.
/// ST is either ESC \ (two chars) or BEL (0x07).
/// Returns (content_end_index, total_consumed_from_content_start).
fn find_st(chars: &[char], from: usize) -> (usize, usize) {
let len = chars.len();
let mut i = from;
while i < len {
if chars[i] == '\x07' {
return (i, i - from + 1);
}
if i + 1 < len && chars[i] == '\x1B' && chars[i + 1] == '\\' {
return (i, i - from + 2);
}
i += 1;
}
// Unterminated — consume everything
(len, len - from)
}
/// Extract URL from OSC 8 content "8;params;url".
fn extract_osc8_url(content: &str) -> Option<&str> {
// Format: "8;params;url"
let rest = content.strip_prefix("8;")?;
// Skip params (up to next ;)
let url_start = rest.find(';')? + 1;
let url = &rest[url_start..];
if url.is_empty() { None } else { Some(url) }
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
// --- CSI / cursor movement ---
#[test]
fn test_strips_cursor_movement() {
// CSI 5A = cursor up 5
let input = "before\x1B[5Aafter";
let result = sanitize_for_terminal(input, UrlPolicy::Strip);
assert_eq!(result, "beforeafter");
}
#[test]
fn test_strips_cursor_movement_all_directions() {
for dir in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'] {
let input = format!("x\x1B[3{dir}y");
let result = sanitize_for_terminal(&input, UrlPolicy::Strip);
assert_eq!(result, "xy", "failed for direction {dir}");
}
}
#[test]
fn test_strips_erase_sequences() {
// CSI 2J = erase display
let input = "before\x1B[2Jafter";
let result = sanitize_for_terminal(input, UrlPolicy::Strip);
assert_eq!(result, "beforeafter");
}
// --- SGR preservation ---
#[test]
fn test_preserves_bold_italic_underline_reset() {
let input = "\x1B[1mbold\x1B[0m \x1B[3mitalic\x1B[0m \x1B[4munderline\x1B[0m";
let result = sanitize_for_terminal(input, UrlPolicy::Strip);
assert_eq!(result, input);
}
#[test]
fn test_preserves_standard_colors() {
// Red foreground, green background
let input = "\x1B[31mred\x1B[42m on green\x1B[0m";
let result = sanitize_for_terminal(input, UrlPolicy::Strip);
assert_eq!(result, input);
}
#[test]
fn test_preserves_bright_colors() {
let input = "\x1B[91mbright red\x1B[0m";
let result = sanitize_for_terminal(input, UrlPolicy::Strip);
assert_eq!(result, input);
}
#[test]
fn test_preserves_combined_safe_sgr() {
// Bold + red foreground in one sequence
let input = "\x1B[1;31mbold red\x1B[0m";
let result = sanitize_for_terminal(input, UrlPolicy::Strip);
assert_eq!(result, input);
}
#[test]
fn test_strips_unsafe_sgr() {
// SGR 8 = hidden text (not in safe list)
let input = "\x1B[8mhidden\x1B[0m";
let result = sanitize_for_terminal(input, UrlPolicy::Strip);
// SGR 8 stripped, SGR 0 preserved
assert_eq!(result, "hidden\x1B[0m");
}
// --- C1 control codes ---
#[test]
fn test_strips_c1_control_codes() {
// U+008D = Reverse Index, U+009B = CSI (8-bit)
let input = format!("before{}middle{}after", '\u{008D}', '\u{009B}');
let result = sanitize_for_terminal(&input, UrlPolicy::Strip);
assert_eq!(result, "beforemiddleafter");
}
// --- Bidi overrides ---
#[test]
fn test_strips_bidi_overrides() {
let input = format!(
"normal{}reversed{}end",
'\u{202E}', // RLO
'\u{202C}' // PDF
);
let result = sanitize_for_terminal(&input, UrlPolicy::Strip);
assert_eq!(result, "normalreversedend");
}
#[test]
fn test_strips_all_bidi_chars() {
let bidi_chars = [
'\u{202A}', '\u{202B}', '\u{202C}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}',
'\u{2068}', '\u{2069}',
];
for ch in bidi_chars {
let input = format!("a{ch}b");
let result = sanitize_for_terminal(&input, UrlPolicy::Strip);
assert_eq!(result, "ab", "failed for U+{:04X}", ch as u32);
}
}
// --- OSC sequences ---
#[test]
fn test_strips_osc_sequences() {
// OSC 0 (set title): ESC ] 0 ; title BEL
let input = "before\x1B]0;My Title\x07after";
let result = sanitize_for_terminal(input, UrlPolicy::Strip);
assert_eq!(result, "beforeafter");
}
// --- OSC 8 hyperlinks ---
#[test]
fn test_url_policy_strip() {
// OSC 8 hyperlink: ESC]8;;url ST text ESC]8;; ST
let input = "click \x1B]8;;https://example.com\x07here\x1B]8;;\x07 done";
let result = sanitize_for_terminal(input, UrlPolicy::Strip);
assert_eq!(result, "click here done");
}
#[test]
fn test_url_policy_footnote() {
let input = "click \x1B]8;;https://example.com\x07here\x1B]8;;\x07 done";
let result = sanitize_for_terminal(input, UrlPolicy::Footnote);
assert!(result.contains("here [1]"));
assert!(result.contains("[1] https://example.com"));
}
// --- Redaction ---
#[test]
fn test_redact_gitlab_token() {
let redactor = RedactPattern::defaults();
let input = "My token is glpat-AbCdEfGhIjKlMnOpQrStUvWx";
let result = redactor.redact(input);
assert_eq!(result, "My token is [REDACTED]");
}
#[test]
fn test_redact_email() {
let redactor = RedactPattern::defaults();
let input = "Contact user@example.com for details";
let result = redactor.redact(input);
assert_eq!(result, "Contact [REDACTED] for details");
}
#[test]
fn test_redact_bearer_token() {
let redactor = RedactPattern::defaults();
let input = "Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCI";
let result = redactor.redact(input);
assert!(result.contains("[REDACTED]"));
assert!(!result.contains("eyJ"));
}
// --- Edge cases ---
#[test]
fn test_empty_input() {
assert_eq!(sanitize_for_terminal("", UrlPolicy::Strip), "");
}
#[test]
fn test_safe_content_passthrough() {
let input = "Hello, world! This is normal text.\nWith newlines\tand tabs.";
assert_eq!(sanitize_for_terminal(input, UrlPolicy::Strip), input);
}
#[test]
fn test_trailing_esc() {
let input = "text\x1B";
let result = sanitize_for_terminal(input, UrlPolicy::Strip);
assert_eq!(result, "text");
}
#[test]
fn test_malformed_csi_does_not_eat_text() {
// ESC [ without a valid final byte before next printable
let input = "a\x1B[b";
let result = sanitize_for_terminal(input, UrlPolicy::Strip);
// The malformed CSI is consumed but shouldn't eat "b" as text
// ESC[ is start, 'b' is final byte (0x62 is in 0x40-0x7E range)
// So this is CSI with final byte 'b' (cursor back) — gets stripped
assert_eq!(result, "a");
}
#[test]
fn test_utf8_adjacent_to_escapes() {
let input = "\x1B[1m日本語\x1B[0m text";
let result = sanitize_for_terminal(input, UrlPolicy::Strip);
assert_eq!(result, "\x1B[1m日本語\x1B[0m text");
}
#[test]
fn test_fuzz_no_panic() {
// 1000 random-ish byte sequences — must not panic
for seed in 0u16..1000 {
let mut bytes = Vec::new();
for j in 0..50 {
bytes.push(((seed.wrapping_mul(31).wrapping_add(j)) & 0xFF) as u8);
}
// Best-effort UTF-8
let input = String::from_utf8_lossy(&bytes);
let _ = sanitize_for_terminal(&input, UrlPolicy::Strip);
}
}
}

View File

@@ -0,0 +1,251 @@
#![allow(dead_code)] // Phase 0: types defined now, consumed in Phase 1+
//! Flexoki-based theme for the lore TUI.
//!
//! Uses FrankenTUI's `AdaptiveColor::adaptive(light, dark)` for automatic
//! light/dark mode switching. The palette is [Flexoki](https://stephango.com/flexoki)
//! by Steph Ango, designed in Oklab perceptual color space for balanced contrast.
use ftui::{AdaptiveColor, Color, PackedRgba, Style, Theme};
// ---------------------------------------------------------------------------
// Flexoki palette constants
// ---------------------------------------------------------------------------
// Base tones
const PAPER: Color = Color::rgb(0xFF, 0xFC, 0xF0);
const BASE_50: Color = Color::rgb(0xF2, 0xF0, 0xE5);
const BASE_100: Color = Color::rgb(0xE6, 0xE4, 0xD9);
const BASE_200: Color = Color::rgb(0xCE, 0xCD, 0xC3);
const BASE_300: Color = Color::rgb(0xB7, 0xB5, 0xAC);
const BASE_400: Color = Color::rgb(0x9F, 0x9D, 0x96);
const BASE_500: Color = Color::rgb(0x87, 0x85, 0x80);
const BASE_600: Color = Color::rgb(0x6F, 0x6E, 0x69);
const BASE_700: Color = Color::rgb(0x57, 0x56, 0x53);
const BASE_800: Color = Color::rgb(0x40, 0x3E, 0x3C);
const BASE_850: Color = Color::rgb(0x34, 0x33, 0x31);
const BASE_900: Color = Color::rgb(0x28, 0x27, 0x26);
const BLACK: Color = Color::rgb(0x10, 0x0F, 0x0F);
// Accent colors — light-600 (for light mode)
const RED_600: Color = Color::rgb(0xAF, 0x30, 0x29);
const ORANGE_600: Color = Color::rgb(0xBC, 0x52, 0x15);
const YELLOW_600: Color = Color::rgb(0xAD, 0x83, 0x01);
const GREEN_600: Color = Color::rgb(0x66, 0x80, 0x0B);
const CYAN_600: Color = Color::rgb(0x24, 0x83, 0x7B);
const BLUE_600: Color = Color::rgb(0x20, 0x5E, 0xA6);
const PURPLE_600: Color = Color::rgb(0x5E, 0x40, 0x9D);
// Accent colors — dark-400 (for dark mode)
const RED_400: Color = Color::rgb(0xD1, 0x4D, 0x41);
const ORANGE_400: Color = Color::rgb(0xDA, 0x70, 0x2C);
const YELLOW_400: Color = Color::rgb(0xD0, 0xA2, 0x15);
const GREEN_400: Color = Color::rgb(0x87, 0x9A, 0x39);
const CYAN_400: Color = Color::rgb(0x3A, 0xA9, 0x9F);
const BLUE_400: Color = Color::rgb(0x43, 0x85, 0xBE);
const PURPLE_400: Color = Color::rgb(0x8B, 0x7E, 0xC8);
const MAGENTA_400: Color = Color::rgb(0xCE, 0x5D, 0x97);
// Muted fallback as PackedRgba (for Style::fg)
const MUTED_PACKED: PackedRgba = PackedRgba::rgb(0x87, 0x85, 0x80);
// ---------------------------------------------------------------------------
// build_theme
// ---------------------------------------------------------------------------
/// Build the lore TUI theme with Flexoki adaptive colors.
///
/// Each of the 19 semantic slots gets an `AdaptiveColor::adaptive(light, dark)`
/// pair. FrankenTUI detects the terminal background and resolves accordingly.
#[must_use]
pub fn build_theme() -> Theme {
Theme::builder()
.primary(AdaptiveColor::adaptive(BLUE_600, BLUE_400))
.secondary(AdaptiveColor::adaptive(CYAN_600, CYAN_400))
.accent(AdaptiveColor::adaptive(PURPLE_600, PURPLE_400))
.background(AdaptiveColor::adaptive(PAPER, BLACK))
.surface(AdaptiveColor::adaptive(BASE_50, BASE_900))
.overlay(AdaptiveColor::adaptive(BASE_100, BASE_850))
.text(AdaptiveColor::adaptive(BASE_700, BASE_200))
.text_muted(AdaptiveColor::adaptive(BASE_500, BASE_500))
.text_subtle(AdaptiveColor::adaptive(BASE_400, BASE_600))
.success(AdaptiveColor::adaptive(GREEN_600, GREEN_400))
.warning(AdaptiveColor::adaptive(YELLOW_600, YELLOW_400))
.error(AdaptiveColor::adaptive(RED_600, RED_400))
.info(AdaptiveColor::adaptive(BLUE_600, BLUE_400))
.border(AdaptiveColor::adaptive(BASE_300, BASE_700))
.border_focused(AdaptiveColor::adaptive(BLUE_600, BLUE_400))
.selection_bg(AdaptiveColor::adaptive(BASE_100, BASE_800))
.selection_fg(AdaptiveColor::adaptive(BASE_700, BASE_100))
.scrollbar_track(AdaptiveColor::adaptive(BASE_50, BASE_900))
.scrollbar_thumb(AdaptiveColor::adaptive(BASE_300, BASE_700))
.build()
}
// ---------------------------------------------------------------------------
// State colors
// ---------------------------------------------------------------------------
/// Map a GitLab entity state to a display color.
///
/// Returns fixed (non-adaptive) colors — state indicators should be
/// consistent regardless of light/dark mode.
#[must_use]
pub fn state_color(state: &str) -> Color {
match state {
"opened" => GREEN_400,
"closed" => RED_400,
"merged" => PURPLE_400,
"locked" => YELLOW_400,
_ => BASE_500,
}
}
// ---------------------------------------------------------------------------
// Event type colors
// ---------------------------------------------------------------------------
/// Map a timeline event type to a display color.
#[must_use]
pub fn event_color(event_type: &str) -> Color {
match event_type {
"created" => GREEN_400,
"updated" => BLUE_400,
"closed" => RED_400,
"merged" => PURPLE_400,
"commented" => CYAN_400,
"labeled" => ORANGE_400,
"milestoned" => YELLOW_400,
_ => BASE_500,
}
}
// ---------------------------------------------------------------------------
// Label styling
// ---------------------------------------------------------------------------
/// Convert a GitLab label hex color (e.g., "#FF0000" or "FF0000") to a Style.
///
/// Falls back to muted text color if the hex string is invalid.
#[must_use]
pub fn label_style(hex_color: &str) -> Style {
let packed = parse_hex_to_packed(hex_color).unwrap_or(MUTED_PACKED);
Style::default().fg(packed)
}
/// Parse a hex color string like "#RRGGBB" or "RRGGBB" into a `PackedRgba`.
fn parse_hex_to_packed(s: &str) -> Option<PackedRgba> {
let hex = s.strip_prefix('#').unwrap_or(s);
if hex.len() != 6 {
return None;
}
let r = u8::from_str_radix(&hex[0..2], 16).ok()?;
let g = u8::from_str_radix(&hex[2..4], 16).ok()?;
let b = u8::from_str_radix(&hex[4..6], 16).ok()?;
Some(PackedRgba::rgb(r, g, b))
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_build_theme_compiles() {
let theme = build_theme();
// Resolve for dark mode — primary should be Blue-400
let resolved = theme.resolve(true);
assert_eq!(resolved.primary, BLUE_400);
}
#[test]
fn test_build_theme_light_mode() {
let theme = build_theme();
let resolved = theme.resolve(false);
assert_eq!(resolved.primary, BLUE_600);
}
#[test]
fn test_build_theme_all_slots_differ_between_modes() {
let theme = build_theme();
let dark = theme.resolve(true);
let light = theme.resolve(false);
// Background should differ (Paper vs Black)
assert_ne!(dark.background, light.background);
// Text should differ
assert_ne!(dark.text, light.text);
}
#[test]
fn test_state_color_opened_is_green() {
assert_eq!(state_color("opened"), GREEN_400);
}
#[test]
fn test_state_color_closed_is_red() {
assert_eq!(state_color("closed"), RED_400);
}
#[test]
fn test_state_color_merged_is_purple() {
assert_eq!(state_color("merged"), PURPLE_400);
}
#[test]
fn test_state_color_unknown_returns_muted() {
assert_eq!(state_color("unknown"), BASE_500);
}
#[test]
fn test_event_color_created_is_green() {
assert_eq!(event_color("created"), GREEN_400);
}
#[test]
fn test_event_color_unknown_returns_muted() {
assert_eq!(event_color("whatever"), BASE_500);
}
#[test]
fn test_label_style_valid_hex_with_hash() {
let style = label_style("#FF0000");
assert_eq!(style.fg, Some(PackedRgba::rgb(0xFF, 0x00, 0x00)));
}
#[test]
fn test_label_style_valid_hex_without_hash() {
let style = label_style("00FF00");
assert_eq!(style.fg, Some(PackedRgba::rgb(0x00, 0xFF, 0x00)));
}
#[test]
fn test_label_style_lowercase_hex() {
let style = label_style("#ff0000");
assert_eq!(style.fg, Some(PackedRgba::rgb(0xFF, 0x00, 0x00)));
}
#[test]
fn test_label_style_invalid_hex_fallback() {
let style = label_style("invalid");
assert_eq!(style.fg, Some(MUTED_PACKED));
}
#[test]
fn test_label_style_empty_fallback() {
let style = label_style("");
assert_eq!(style.fg, Some(MUTED_PACKED));
}
#[test]
fn test_parse_hex_short_string() {
assert!(parse_hex_to_packed("#FFF").is_none());
}
#[test]
fn test_parse_hex_non_hex_chars() {
assert!(parse_hex_to_packed("#GGHHII").is_none());
}
}

View File

@@ -0,0 +1,21 @@
-- Migration 022: Composite query indexes for notes + author_id column
-- Optimizes author-scoped and project-scoped date-range queries on notes.
-- Adds discussion JOIN indexes and immutable author identity column.
-- Composite index for author-scoped queries (who command, notes --author)
CREATE INDEX IF NOT EXISTS idx_notes_user_created
ON notes(project_id, author_username COLLATE NOCASE, created_at DESC, id DESC)
WHERE is_system = 0;
-- Composite index for project-scoped date-range queries
CREATE INDEX IF NOT EXISTS idx_notes_project_created
ON notes(project_id, created_at DESC, id DESC)
WHERE is_system = 0;
-- Discussion JOIN indexes
CREATE INDEX IF NOT EXISTS idx_discussions_issue_id ON discussions(issue_id);
CREATE INDEX IF NOT EXISTS idx_discussions_mr_id ON discussions(merge_request_id);
-- Immutable author identity column (GitLab numeric user ID)
ALTER TABLE notes ADD COLUMN author_id INTEGER;
CREATE INDEX IF NOT EXISTS idx_notes_author_id ON notes(author_id) WHERE author_id IS NOT NULL;

View File

@@ -0,0 +1,153 @@
-- Migration 024: Add 'note' source_type to documents and dirty_sources
-- SQLite does not support ALTER CONSTRAINT, so we use the table-rebuild pattern.
-- ============================================================
-- 1. Rebuild dirty_sources with updated CHECK constraint
-- ============================================================
CREATE TABLE dirty_sources_new (
source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion','note')),
source_id INTEGER NOT NULL,
queued_at INTEGER NOT NULL,
attempt_count INTEGER NOT NULL DEFAULT 0,
last_attempt_at INTEGER,
last_error TEXT,
next_attempt_at INTEGER,
PRIMARY KEY(source_type, source_id)
);
INSERT INTO dirty_sources_new SELECT * FROM dirty_sources;
DROP TABLE dirty_sources;
ALTER TABLE dirty_sources_new RENAME TO dirty_sources;
CREATE INDEX idx_dirty_sources_next_attempt ON dirty_sources(next_attempt_at);
-- ============================================================
-- 2. Rebuild documents with updated CHECK constraint
-- ============================================================
-- 2a. Backup junction table data
CREATE TEMP TABLE _doc_labels_backup AS SELECT * FROM document_labels;
CREATE TEMP TABLE _doc_paths_backup AS SELECT * FROM document_paths;
-- 2b. Drop all triggers that reference documents
DROP TRIGGER IF EXISTS documents_ai;
DROP TRIGGER IF EXISTS documents_ad;
DROP TRIGGER IF EXISTS documents_au;
DROP TRIGGER IF EXISTS documents_embeddings_ad;
-- 2c. Drop junction tables (they have FK references to documents)
DROP TABLE IF EXISTS document_labels;
DROP TABLE IF EXISTS document_paths;
-- 2d. Create new documents table with 'note' in CHECK constraint
CREATE TABLE documents_new (
id INTEGER PRIMARY KEY,
source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion','note')),
source_id INTEGER NOT NULL,
project_id INTEGER NOT NULL REFERENCES projects(id),
author_username TEXT,
label_names TEXT,
created_at INTEGER,
updated_at INTEGER,
url TEXT,
title TEXT,
content_text TEXT NOT NULL,
content_hash TEXT NOT NULL,
labels_hash TEXT NOT NULL DEFAULT '',
paths_hash TEXT NOT NULL DEFAULT '',
is_truncated INTEGER NOT NULL DEFAULT 0,
truncated_reason TEXT CHECK (
truncated_reason IN (
'token_limit_middle_drop','single_note_oversized','first_last_oversized',
'hard_cap_oversized'
)
OR truncated_reason IS NULL
),
UNIQUE(source_type, source_id)
);
-- 2e. Copy all existing data
INSERT INTO documents_new SELECT * FROM documents;
-- 2f. Swap tables
DROP TABLE documents;
ALTER TABLE documents_new RENAME TO documents;
-- 2g. Recreate all indexes on documents
CREATE INDEX idx_documents_project_updated ON documents(project_id, updated_at);
CREATE INDEX idx_documents_author ON documents(author_username);
CREATE INDEX idx_documents_source ON documents(source_type, source_id);
CREATE INDEX idx_documents_hash ON documents(content_hash);
-- 2h. Recreate junction tables
CREATE TABLE document_labels (
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
label_name TEXT NOT NULL,
PRIMARY KEY(document_id, label_name)
) WITHOUT ROWID;
CREATE INDEX idx_document_labels_label ON document_labels(label_name);
CREATE TABLE document_paths (
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
path TEXT NOT NULL,
PRIMARY KEY(document_id, path)
) WITHOUT ROWID;
CREATE INDEX idx_document_paths_path ON document_paths(path);
-- 2i. Restore junction table data from backups
INSERT INTO document_labels SELECT * FROM _doc_labels_backup;
INSERT INTO document_paths SELECT * FROM _doc_paths_backup;
-- 2j. Recreate FTS triggers (from migration 008)
CREATE TRIGGER documents_ai AFTER INSERT ON documents BEGIN
INSERT INTO documents_fts(rowid, title, content_text)
VALUES (new.id, COALESCE(new.title, ''), new.content_text);
END;
CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN
INSERT INTO documents_fts(documents_fts, rowid, title, content_text)
VALUES('delete', old.id, COALESCE(old.title, ''), old.content_text);
END;
CREATE TRIGGER documents_au AFTER UPDATE ON documents
WHEN old.title IS NOT new.title OR old.content_text != new.content_text
BEGIN
INSERT INTO documents_fts(documents_fts, rowid, title, content_text)
VALUES('delete', old.id, COALESCE(old.title, ''), old.content_text);
INSERT INTO documents_fts(rowid, title, content_text)
VALUES (new.id, COALESCE(new.title, ''), new.content_text);
END;
-- 2k. Recreate embeddings cleanup trigger (from migration 009)
CREATE TRIGGER documents_embeddings_ad AFTER DELETE ON documents BEGIN
DELETE FROM embeddings
WHERE rowid >= old.id * 1000
AND rowid < (old.id + 1) * 1000;
END;
-- 2l. Rebuild FTS index to ensure consistency after table swap
INSERT INTO documents_fts(documents_fts) VALUES('rebuild');
-- ============================================================
-- 3. Defense triggers: clean up documents when notes are
-- deleted or flipped to system notes
-- ============================================================
CREATE TRIGGER notes_ad_cleanup AFTER DELETE ON notes
WHEN old.is_system = 0
BEGIN
DELETE FROM documents WHERE source_type = 'note' AND source_id = old.id;
END;
CREATE TRIGGER notes_au_system_cleanup AFTER UPDATE OF is_system ON notes
WHEN NEW.is_system = 1 AND OLD.is_system = 0
BEGIN
DELETE FROM documents WHERE source_type = 'note' AND source_id = OLD.id;
END;
-- ============================================================
-- 4. Drop temp backup tables
-- ============================================================
DROP TABLE IF EXISTS _doc_labels_backup;
DROP TABLE IF EXISTS _doc_paths_backup;

View File

@@ -0,0 +1,8 @@
-- Backfill existing non-system notes into dirty queue for document generation.
-- Only seeds notes that don't already have documents and aren't already queued.
INSERT INTO dirty_sources (source_type, source_id, queued_at)
SELECT 'note', n.id, CAST(strftime('%s', 'now') AS INTEGER) * 1000
FROM notes n
LEFT JOIN documents d ON d.source_type = 'note' AND d.source_id = n.id
WHERE n.is_system = 0 AND d.id IS NULL
ON CONFLICT(source_type, source_id) DO NOTHING;

View File

@@ -186,6 +186,31 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[
],
),
("drift", &["--threshold", "--project"]),
(
"notes",
&[
"--limit",
"--fields",
"--format",
"--author",
"--note-type",
"--contains",
"--note-id",
"--gitlab-note-id",
"--discussion-id",
"--include-system",
"--for-issue",
"--for-mr",
"--project",
"--since",
"--until",
"--path",
"--resolution",
"--sort",
"--asc",
"--open",
],
),
(
"init",
&[

View File

@@ -39,6 +39,7 @@ pub fn run_generate_docs(
result.seeded += seed_dirty(&conn, SourceType::Issue, project_filter)?;
result.seeded += seed_dirty(&conn, SourceType::MergeRequest, project_filter)?;
result.seeded += seed_dirty(&conn, SourceType::Discussion, project_filter)?;
result.seeded += seed_dirty_notes(&conn, project_filter)?;
}
let regen =
@@ -67,6 +68,10 @@ fn seed_dirty(
SourceType::Issue => "issues",
SourceType::MergeRequest => "merge_requests",
SourceType::Discussion => "discussions",
SourceType::Note => {
// NOTE-2E will implement seed_dirty_notes separately (needs is_system filter)
unreachable!("Note seeding handled by seed_dirty_notes, not seed_dirty")
}
};
let type_str = source_type.as_str();
let now = chrono::Utc::now().timestamp_millis();
@@ -125,6 +130,55 @@ fn seed_dirty(
Ok(total_seeded)
}
fn seed_dirty_notes(conn: &Connection, project_filter: Option<&str>) -> Result<usize> {
let now = chrono::Utc::now().timestamp_millis();
let mut total_seeded: usize = 0;
let mut last_id: i64 = 0;
loop {
let inserted = if let Some(project) = project_filter {
let project_id = resolve_project(conn, project)?;
conn.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
SELECT 'note', id, ?1, 0, NULL, NULL, NULL
FROM notes WHERE id > ?2 AND project_id = ?3 AND is_system = 0 ORDER BY id LIMIT ?4
ON CONFLICT(source_type, source_id) DO NOTHING",
rusqlite::params![now, last_id, project_id, FULL_MODE_CHUNK_SIZE],
)?
} else {
conn.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
SELECT 'note', id, ?1, 0, NULL, NULL, NULL
FROM notes WHERE id > ?2 AND is_system = 0 ORDER BY id LIMIT ?3
ON CONFLICT(source_type, source_id) DO NOTHING",
rusqlite::params![now, last_id, FULL_MODE_CHUNK_SIZE],
)?
};
if inserted == 0 {
break;
}
let max_id: i64 = conn.query_row(
"SELECT MAX(id) FROM (SELECT id FROM notes WHERE id > ?1 AND is_system = 0 ORDER BY id LIMIT ?2)",
rusqlite::params![last_id, FULL_MODE_CHUNK_SIZE],
|row| row.get(0),
)?;
total_seeded += inserted;
last_id = max_id;
}
info!(
source_type = "note",
seeded = total_seeded,
"Seeded dirty_sources"
);
Ok(total_seeded)
}
pub fn print_generate_docs(result: &GenerateDocsResult) {
let mode = if result.full_mode {
"full"
@@ -186,3 +240,81 @@ pub fn print_generate_docs_json(result: &GenerateDocsResult, elapsed_ms: u64) {
};
println!("{}", serde_json::to_string(&output).unwrap());
}
#[cfg(test)]
mod tests {
use std::path::Path;
use crate::core::db::{create_connection, run_migrations};
use super::*;
fn setup_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url) VALUES (1, 100, 'group/project', 'https://gitlab.com/group/project')",
[],
).unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 1, 'Test', 'opened', 1000, 2000, 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
[],
).unwrap();
conn
}
fn insert_note(conn: &Connection, id: i64, gitlab_id: i64, is_system: bool) {
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (?1, ?2, 1, 1, 'alice', 'note body', 1000, 2000, 3000, ?3)",
rusqlite::params![id, gitlab_id, is_system as i32],
).unwrap();
}
#[test]
fn test_full_seed_includes_notes() {
let conn = setup_db();
insert_note(&conn, 1, 101, false);
insert_note(&conn, 2, 102, false);
insert_note(&conn, 3, 103, false);
insert_note(&conn, 4, 104, true); // system note — should be excluded
let seeded = seed_dirty_notes(&conn, None).unwrap();
assert_eq!(seeded, 3);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 3);
}
#[test]
fn test_note_document_count_stable_after_second_generate_docs_full() {
let conn = setup_db();
insert_note(&conn, 1, 101, false);
insert_note(&conn, 2, 102, false);
let first = seed_dirty_notes(&conn, None).unwrap();
assert_eq!(first, 2);
// Second run should be idempotent (ON CONFLICT DO NOTHING)
let second = seed_dirty_notes(&conn, None).unwrap();
assert_eq!(second, 0);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 2);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -30,8 +30,10 @@ pub use ingest::{
};
pub use init::{InitInputs, InitOptions, InitResult, run_init};
pub use list::{
ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser, print_list_issues,
print_list_issues_json, print_list_mrs, print_list_mrs_json, run_list_issues, run_list_mrs,
ListFilters, MrListFilters, NoteListFilters, open_issue_in_browser, open_mr_in_browser,
print_list_issues, print_list_issues_json, print_list_mrs, print_list_mrs_json,
print_list_notes, print_list_notes_csv, print_list_notes_json, print_list_notes_jsonl,
query_notes, run_list_issues, run_list_mrs,
};
pub use search::{
SearchCliFilters, SearchResponse, print_search_results, print_search_results_json, run_search,

View File

@@ -334,6 +334,7 @@ pub fn print_search_results(response: &SearchResponse) {
"issue" => "Issue",
"merge_request" => "MR",
"discussion" => "Discussion",
"note" => "Note",
_ => &result.source_type,
};

View File

@@ -112,6 +112,9 @@ pub enum Commands {
/// List or show merge requests
Mrs(MrsArgs),
/// List notes from discussions
Notes(NotesArgs),
/// Ingest data from GitLab
Ingest(IngestArgs),
@@ -489,6 +492,113 @@ pub struct MrsArgs {
pub no_open: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore notes # List 50 most recent notes
lore notes --author alice --since 7d # Notes by alice in last 7 days
lore notes --for-issue 42 -p group/repo # Notes on issue #42
lore notes --path src/ --resolution unresolved # Unresolved diff notes in src/")]
pub struct NotesArgs {
/// Maximum results
#[arg(
short = 'n',
long = "limit",
default_value = "50",
help_heading = "Output"
)]
pub limit: usize,
/// Select output fields (comma-separated, or 'minimal' preset: id,author_username,body,created_at_iso)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Output format (table, json, jsonl, csv)
#[arg(
long,
default_value = "table",
value_parser = ["table", "json", "jsonl", "csv"],
help_heading = "Output"
)]
pub format: String,
/// Filter by author username
#[arg(short = 'a', long, help_heading = "Filters")]
pub author: Option<String>,
/// Filter by note type (DiffNote, DiscussionNote)
#[arg(long, help_heading = "Filters")]
pub note_type: Option<String>,
/// Filter by body text (substring match)
#[arg(long, help_heading = "Filters")]
pub contains: Option<String>,
/// Filter by internal note ID
#[arg(long, help_heading = "Filters")]
pub note_id: Option<i64>,
/// Filter by GitLab note ID
#[arg(long, help_heading = "Filters")]
pub gitlab_note_id: Option<i64>,
/// Filter by discussion ID
#[arg(long, help_heading = "Filters")]
pub discussion_id: Option<String>,
/// Include system notes (excluded by default)
#[arg(long, help_heading = "Filters")]
pub include_system: bool,
/// Filter to notes on a specific issue IID (requires --project or default_project)
#[arg(long, conflicts_with = "for_mr", help_heading = "Filters")]
pub for_issue: Option<i64>,
/// Filter to notes on a specific MR IID (requires --project or default_project)
#[arg(long, conflicts_with = "for_issue", help_heading = "Filters")]
pub for_mr: Option<i64>,
/// Filter by project path
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Filter by time (7d, 2w, 1m, or YYYY-MM-DD)
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Filter until date (YYYY-MM-DD, inclusive end-of-day)
#[arg(long, help_heading = "Filters")]
pub until: Option<String>,
/// Filter by file path (exact match or prefix with trailing /)
#[arg(long, help_heading = "Filters")]
pub path: Option<String>,
/// Filter by resolution status (any, unresolved, resolved)
#[arg(
long,
value_parser = ["any", "unresolved", "resolved"],
help_heading = "Filters"
)]
pub resolution: Option<String>,
/// Sort field (created, updated)
#[arg(
long,
value_parser = ["created", "updated"],
default_value = "created",
help_heading = "Sorting"
)]
pub sort: String,
/// Sort ascending (default: descending)
#[arg(long, help_heading = "Sorting")]
pub asc: bool,
/// Open first matching item in browser
#[arg(long, help_heading = "Actions")]
pub open: bool,
}
#[derive(Parser)]
pub struct IngestArgs {
/// Entity to ingest (issues, mrs). Omit to ingest everything
@@ -556,8 +666,8 @@ pub struct SearchArgs {
#[arg(long, default_value = "hybrid", value_parser = ["lexical", "hybrid", "semantic"], help_heading = "Mode")]
pub mode: String,
/// Filter by source type (issue, mr, discussion)
#[arg(long = "type", value_name = "TYPE", value_parser = ["issue", "mr", "discussion"], help_heading = "Filters")]
/// Filter by source type (issue, mr, discussion, note)
#[arg(long = "type", value_name = "TYPE", value_parser = ["issue", "mr", "discussion", "note"], help_heading = "Filters")]
pub source_type: Option<String>,
/// Filter by author username

View File

@@ -64,6 +64,10 @@ pub fn expand_fields_preset(fields: &[String], entity: &str) -> Vec<String> {
.iter()
.map(|s| (*s).to_string())
.collect(),
"notes" => ["id", "author_username", "body", "created_at_iso"]
.iter()
.map(|s| (*s).to_string())
.collect(),
_ => fields.to_vec(),
}
} else {
@@ -82,3 +86,25 @@ pub fn strip_schemas(commands: &mut serde_json::Value) {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_expand_fields_preset_notes() {
let fields = vec!["minimal".to_string()];
let expanded = expand_fields_preset(&fields, "notes");
assert_eq!(
expanded,
["id", "author_username", "body", "created_at_iso"]
);
}
#[test]
fn test_expand_fields_preset_passthrough() {
let fields = vec!["id".to_string(), "body".to_string()];
let expanded = expand_fields_preset(&fields, "notes");
assert_eq!(expanded, ["id", "body"]);
}
}

View File

@@ -69,10 +69,22 @@ const MIGRATIONS: &[(&str, &str)] = &[
"021",
include_str!("../../migrations/021_work_item_status.sql"),
),
(
"022",
include_str!("../../migrations/022_notes_query_index.sql"),
),
(
"023",
include_str!("../../migrations/023_issue_detail_fields.sql"),
),
(
"024",
include_str!("../../migrations/024_note_documents.sql"),
),
(
"025",
include_str!("../../migrations/025_note_dirty_backfill.sql"),
),
];
pub fn create_connection(db_path: &Path) -> Result<Connection> {
@@ -316,3 +328,639 @@ pub fn get_schema_version(conn: &Connection) -> i32 {
)
.unwrap_or(0)
}
#[cfg(test)]
mod tests {
use super::*;
fn setup_migrated_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn index_exists(conn: &Connection, index_name: &str) -> bool {
conn.query_row(
"SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='index' AND name=?1",
[index_name],
|row| row.get(0),
)
.unwrap_or(false)
}
fn column_exists(conn: &Connection, table: &str, column: &str) -> bool {
let sql = format!("PRAGMA table_info({})", table);
let mut stmt = conn.prepare(&sql).unwrap();
let columns: Vec<String> = stmt
.query_map([], |row| row.get::<_, String>(1))
.unwrap()
.filter_map(|r| r.ok())
.collect();
columns.contains(&column.to_string())
}
#[test]
fn test_migration_022_indexes_exist() {
let conn = setup_migrated_db();
// New indexes from migration 022
assert!(
index_exists(&conn, "idx_notes_user_created"),
"idx_notes_user_created should exist"
);
assert!(
index_exists(&conn, "idx_notes_project_created"),
"idx_notes_project_created should exist"
);
assert!(
index_exists(&conn, "idx_notes_author_id"),
"idx_notes_author_id should exist"
);
// Discussion JOIN indexes (idx_discussions_issue_id is new;
// idx_discussions_mr_id already existed from migration 006 but
// IF NOT EXISTS makes it safe)
assert!(
index_exists(&conn, "idx_discussions_issue_id"),
"idx_discussions_issue_id should exist"
);
assert!(
index_exists(&conn, "idx_discussions_mr_id"),
"idx_discussions_mr_id should exist"
);
// author_id column on notes
assert!(
column_exists(&conn, "notes", "author_id"),
"notes.author_id column should exist"
);
}
// -- Helper: insert a minimal project for FK satisfaction --
fn insert_test_project(conn: &Connection) -> i64 {
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) \
VALUES (1000, 'test/project', 'https://example.com/test/project')",
[],
)
.unwrap();
conn.last_insert_rowid()
}
// -- Helper: insert a minimal issue --
fn insert_test_issue(conn: &Connection, project_id: i64) -> i64 {
conn.execute(
"INSERT INTO issues (gitlab_id, project_id, iid, state, author_username, \
created_at, updated_at, last_seen_at) \
VALUES (100, ?1, 1, 'opened', 'alice', 1000, 1000, 1000)",
[project_id],
)
.unwrap();
conn.last_insert_rowid()
}
// -- Helper: insert a minimal discussion --
fn insert_test_discussion(conn: &Connection, project_id: i64, issue_id: i64) -> i64 {
conn.execute(
"INSERT INTO discussions (gitlab_discussion_id, project_id, issue_id, \
noteable_type, last_seen_at) \
VALUES ('disc-001', ?1, ?2, 'Issue', 1000)",
rusqlite::params![project_id, issue_id],
)
.unwrap();
conn.last_insert_rowid()
}
// -- Helper: insert a minimal non-system note --
#[allow(clippy::too_many_arguments)]
fn insert_test_note(
conn: &Connection,
gitlab_id: i64,
discussion_id: i64,
project_id: i64,
is_system: bool,
) -> i64 {
conn.execute(
"INSERT INTO notes (gitlab_id, discussion_id, project_id, is_system, \
author_username, body, created_at, updated_at, last_seen_at) \
VALUES (?1, ?2, ?3, ?4, 'alice', 'note body', 1000, 1000, 1000)",
rusqlite::params![gitlab_id, discussion_id, project_id, is_system as i32],
)
.unwrap();
conn.last_insert_rowid()
}
// -- Helper: insert a document --
fn insert_test_document(
conn: &Connection,
source_type: &str,
source_id: i64,
project_id: i64,
) -> i64 {
conn.execute(
"INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) \
VALUES (?1, ?2, ?3, 'test content', 'hash123')",
rusqlite::params![source_type, source_id, project_id],
)
.unwrap();
conn.last_insert_rowid()
}
#[test]
fn test_migration_024_allows_note_source_type() {
let conn = setup_migrated_db();
let pid = insert_test_project(&conn);
// Should succeed — 'note' is now allowed
conn.execute(
"INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) \
VALUES ('note', 1, ?1, 'note content', 'hash-note')",
[pid],
)
.expect("INSERT with source_type='note' into documents should succeed");
// dirty_sources should also accept 'note'
conn.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at) \
VALUES ('note', 1, 1000)",
[],
)
.expect("INSERT with source_type='note' into dirty_sources should succeed");
}
#[test]
fn test_migration_024_preserves_existing_data() {
// Run migrations up to 023 only, insert data, then apply 024
// Migration 024 is at index 23 (0-based). Use hardcoded index so adding
// later migrations doesn't silently shift what this test exercises.
let conn = create_connection(Path::new(":memory:")).unwrap();
// Apply migrations 001-023 (indices 0..23)
run_migrations_up_to(&conn, 23);
let pid = insert_test_project(&conn);
// Insert a document with existing source_type
conn.execute(
"INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash, title) \
VALUES ('issue', 1, ?1, 'issue content', 'hash-issue', 'Test Issue')",
[pid],
)
.unwrap();
let doc_id: i64 = conn.last_insert_rowid();
// Insert junction data
conn.execute(
"INSERT INTO document_labels (document_id, label_name) VALUES (?1, 'bug')",
[doc_id],
)
.unwrap();
conn.execute(
"INSERT INTO document_paths (document_id, path) VALUES (?1, 'src/main.rs')",
[doc_id],
)
.unwrap();
// Insert dirty_sources row
conn.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at) VALUES ('issue', 1, 1000)",
[],
)
.unwrap();
// Now apply migration 024 (index 23) — the table-rebuild migration
run_single_migration(&conn, 23);
// Verify document still exists with correct data
let (st, content, title): (String, String, String) = conn
.query_row(
"SELECT source_type, content_text, title FROM documents WHERE id = ?1",
[doc_id],
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
)
.unwrap();
assert_eq!(st, "issue");
assert_eq!(content, "issue content");
assert_eq!(title, "Test Issue");
// Verify junction data preserved
let label_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM document_labels WHERE document_id = ?1",
[doc_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(label_count, 1);
let path_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM document_paths WHERE document_id = ?1",
[doc_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(path_count, 1);
// Verify dirty_sources preserved
let dirty_count: i64 = conn
.query_row("SELECT COUNT(*) FROM dirty_sources", [], |row| row.get(0))
.unwrap();
assert_eq!(dirty_count, 1);
}
#[test]
fn test_migration_024_fts_triggers_intact() {
let conn = setup_migrated_db();
let pid = insert_test_project(&conn);
// Insert a document after migration — FTS trigger should fire
let doc_id = insert_test_document(&conn, "note", 1, pid);
// Verify FTS entry exists
let fts_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'test'",
[],
|row| row.get(0),
)
.unwrap();
assert!(fts_count > 0, "FTS trigger should have created an entry");
// Verify update trigger works
conn.execute(
"UPDATE documents SET content_text = 'updated content' WHERE id = ?1",
[doc_id],
)
.unwrap();
let fts_updated: i64 = conn
.query_row(
"SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'updated'",
[],
|row| row.get(0),
)
.unwrap();
assert!(
fts_updated > 0,
"FTS update trigger should reflect new content"
);
// Verify delete trigger works
conn.execute("DELETE FROM documents WHERE id = ?1", [doc_id])
.unwrap();
let fts_after_delete: i64 = conn
.query_row(
"SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'updated'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(
fts_after_delete, 0,
"FTS delete trigger should remove the entry"
);
}
#[test]
fn test_migration_024_row_counts_preserved() {
let conn = setup_migrated_db();
// After full migration, tables should exist and be queryable
let doc_count: i64 = conn
.query_row("SELECT COUNT(*) FROM documents", [], |row| row.get(0))
.unwrap();
assert_eq!(doc_count, 0, "Fresh DB should have 0 documents");
let dirty_count: i64 = conn
.query_row("SELECT COUNT(*) FROM dirty_sources", [], |row| row.get(0))
.unwrap();
assert_eq!(dirty_count, 0, "Fresh DB should have 0 dirty_sources");
}
#[test]
fn test_migration_024_integrity_checks_pass() {
let conn = setup_migrated_db();
// PRAGMA integrity_check
let integrity: String = conn
.query_row("PRAGMA integrity_check", [], |row| row.get(0))
.unwrap();
assert_eq!(integrity, "ok", "Database integrity check should pass");
// PRAGMA foreign_key_check (returns rows only if there are violations)
let fk_violations: i64 = conn
.query_row("SELECT COUNT(*) FROM pragma_foreign_key_check", [], |row| {
row.get(0)
})
.unwrap();
assert_eq!(fk_violations, 0, "No foreign key violations should exist");
}
#[test]
fn test_migration_024_note_delete_trigger_cleans_document() {
let conn = setup_migrated_db();
let pid = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, pid);
let disc_id = insert_test_discussion(&conn, pid, issue_id);
let note_id = insert_test_note(&conn, 200, disc_id, pid, false);
// Create a document for this note
insert_test_document(&conn, "note", note_id, pid);
let doc_before: i64 = conn
.query_row(
"SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?1",
[note_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(doc_before, 1);
// Delete the note — trigger should remove the document
conn.execute("DELETE FROM notes WHERE id = ?1", [note_id])
.unwrap();
let doc_after: i64 = conn
.query_row(
"SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?1",
[note_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(
doc_after, 0,
"notes_ad_cleanup trigger should delete the document"
);
}
#[test]
fn test_migration_024_note_system_flip_trigger_cleans_document() {
let conn = setup_migrated_db();
let pid = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, pid);
let disc_id = insert_test_discussion(&conn, pid, issue_id);
let note_id = insert_test_note(&conn, 201, disc_id, pid, false);
// Create a document for this note
insert_test_document(&conn, "note", note_id, pid);
let doc_before: i64 = conn
.query_row(
"SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?1",
[note_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(doc_before, 1);
// Flip is_system from 0 to 1 — trigger should remove the document
conn.execute("UPDATE notes SET is_system = 1 WHERE id = ?1", [note_id])
.unwrap();
let doc_after: i64 = conn
.query_row(
"SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?1",
[note_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(
doc_after, 0,
"notes_au_system_cleanup trigger should delete the document"
);
}
#[test]
fn test_migration_024_system_note_delete_trigger_does_not_fire() {
let conn = setup_migrated_db();
let pid = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, pid);
let disc_id = insert_test_discussion(&conn, pid, issue_id);
// Insert a system note (is_system = true)
let note_id = insert_test_note(&conn, 202, disc_id, pid, true);
// Manually insert a document (shouldn't exist for system notes in practice,
// but we test the trigger guard)
insert_test_document(&conn, "note", note_id, pid);
let doc_before: i64 = conn
.query_row(
"SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?1",
[note_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(doc_before, 1);
// Delete system note — trigger has WHEN old.is_system = 0 so it should NOT fire
conn.execute("DELETE FROM notes WHERE id = ?1", [note_id])
.unwrap();
let doc_after: i64 = conn
.query_row(
"SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?1",
[note_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(
doc_after, 1,
"notes_ad_cleanup trigger should NOT fire for system notes"
);
}
/// Run migrations only up to version `up_to` (inclusive).
fn run_migrations_up_to(conn: &Connection, up_to: usize) {
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS schema_version ( \
version INTEGER PRIMARY KEY, applied_at INTEGER NOT NULL, description TEXT);",
)
.unwrap();
for (version_str, sql) in &MIGRATIONS[..up_to] {
let version: i32 = version_str.parse().unwrap();
conn.execute_batch(sql).unwrap();
conn.execute(
"INSERT OR REPLACE INTO schema_version (version, applied_at, description) \
VALUES (?1, strftime('%s', 'now') * 1000, ?2)",
rusqlite::params![version, version_str],
)
.unwrap();
}
}
/// Run a single migration by index (0-based).
fn run_single_migration(conn: &Connection, index: usize) {
let (version_str, sql) = MIGRATIONS[index];
let version: i32 = version_str.parse().unwrap();
conn.execute_batch(sql).unwrap();
conn.execute(
"INSERT OR REPLACE INTO schema_version (version, applied_at, description) \
VALUES (?1, strftime('%s', 'now') * 1000, ?2)",
rusqlite::params![version, version_str],
)
.unwrap();
}
#[test]
fn test_migration_025_backfills_existing_notes() {
let conn = create_connection(Path::new(":memory:")).unwrap();
// Run all migrations through 024 (index 0..24)
run_migrations_up_to(&conn, 24);
let pid = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, pid);
let disc_id = insert_test_discussion(&conn, pid, issue_id);
// Insert 5 non-system notes
for i in 1..=5 {
insert_test_note(&conn, 300 + i, disc_id, pid, false);
}
// Insert 2 system notes
for i in 1..=2 {
insert_test_note(&conn, 400 + i, disc_id, pid, true);
}
// Run migration 025
run_single_migration(&conn, 24);
let dirty_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(
dirty_count, 5,
"Migration 025 should backfill 5 non-system notes"
);
// Verify system notes were not backfilled
let system_note_ids: Vec<i64> = {
let mut stmt = conn
.prepare(
"SELECT source_id FROM dirty_sources WHERE source_type = 'note' ORDER BY source_id",
)
.unwrap();
stmt.query_map([], |row| row.get(0))
.unwrap()
.collect::<std::result::Result<Vec<_>, _>>()
.unwrap()
};
// System note ids should not appear
let all_system_note_ids: Vec<i64> = {
let mut stmt = conn
.prepare("SELECT id FROM notes WHERE is_system = 1 ORDER BY id")
.unwrap();
stmt.query_map([], |row| row.get(0))
.unwrap()
.collect::<std::result::Result<Vec<_>, _>>()
.unwrap()
};
for sys_id in &all_system_note_ids {
assert!(
!system_note_ids.contains(sys_id),
"System note id {} should not be in dirty_sources",
sys_id
);
}
}
#[test]
fn test_migration_025_idempotent_with_existing_documents() {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations_up_to(&conn, 24);
let pid = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, pid);
let disc_id = insert_test_discussion(&conn, pid, issue_id);
// Insert 3 non-system notes
let note_ids: Vec<i64> = (1..=3)
.map(|i| insert_test_note(&conn, 500 + i, disc_id, pid, false))
.collect();
// Create documents for 2 of 3 notes (simulating already-generated docs)
insert_test_document(&conn, "note", note_ids[0], pid);
insert_test_document(&conn, "note", note_ids[1], pid);
// Run migration 025
run_single_migration(&conn, 24);
let dirty_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(
dirty_count, 1,
"Only the note without a document should be backfilled"
);
// Verify the correct note was queued
let queued_id: i64 = conn
.query_row(
"SELECT source_id FROM dirty_sources WHERE source_type = 'note'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(queued_id, note_ids[2]);
}
#[test]
fn test_migration_025_skips_notes_already_in_dirty_queue() {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations_up_to(&conn, 24);
let pid = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, pid);
let disc_id = insert_test_discussion(&conn, pid, issue_id);
// Insert 3 non-system notes
let note_ids: Vec<i64> = (1..=3)
.map(|i| insert_test_note(&conn, 600 + i, disc_id, pid, false))
.collect();
// Pre-queue one note in dirty_sources
conn.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at) VALUES ('note', ?1, 999)",
[note_ids[0]],
)
.unwrap();
// Run migration 025
run_single_migration(&conn, 24);
let dirty_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(
dirty_count, 3,
"All 3 notes should be in dirty_sources (1 pre-existing + 2 new)"
);
// Verify the pre-existing entry preserved its original queued_at
let original_queued_at: i64 = conn
.query_row(
"SELECT queued_at FROM dirty_sources WHERE source_type = 'note' AND source_id = ?1",
[note_ids[0]],
|row| row.get(0),
)
.unwrap();
assert_eq!(
original_queued_at, 999,
"ON CONFLICT DO NOTHING should preserve the original queued_at"
);
}
}

View File

@@ -2,13 +2,14 @@ use chrono::DateTime;
use rusqlite::Connection;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::collections::BTreeSet;
use std::collections::{BTreeSet, HashMap};
use std::fmt::Write as _;
use super::truncation::{
MAX_DISCUSSION_BYTES, NoteContent, truncate_discussion, truncate_hard_cap,
};
use crate::core::error::Result;
use crate::core::time::ms_to_iso;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
@@ -16,6 +17,7 @@ pub enum SourceType {
Issue,
MergeRequest,
Discussion,
Note,
}
impl SourceType {
@@ -24,6 +26,7 @@ impl SourceType {
Self::Issue => "issue",
Self::MergeRequest => "merge_request",
Self::Discussion => "discussion",
Self::Note => "note",
}
}
@@ -32,6 +35,7 @@ impl SourceType {
"issue" | "issues" => Some(Self::Issue),
"mr" | "mrs" | "merge_request" | "merge_requests" => Some(Self::MergeRequest),
"discussion" | "discussions" => Some(Self::Discussion),
"note" | "notes" => Some(Self::Note),
_ => None,
}
}
@@ -515,6 +519,521 @@ pub fn extract_discussion_document(
}))
}
pub fn extract_note_document(conn: &Connection, note_id: i64) -> Result<Option<DocumentData>> {
let row = conn.query_row(
"SELECT n.id, n.gitlab_id, n.author_username, n.body, n.note_type, n.is_system,
n.created_at, n.updated_at, n.position_new_path, n.position_new_line,
n.position_old_path, n.position_old_line, n.resolvable, n.resolved, n.resolved_by,
d.noteable_type, d.issue_id, d.merge_request_id,
p.path_with_namespace, p.id AS project_id
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
JOIN projects p ON n.project_id = p.id
WHERE n.id = ?1",
rusqlite::params![note_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, Option<String>>(3)?,
row.get::<_, Option<String>>(4)?,
row.get::<_, bool>(5)?,
row.get::<_, i64>(6)?,
row.get::<_, i64>(7)?,
row.get::<_, Option<String>>(8)?,
row.get::<_, Option<i64>>(9)?,
row.get::<_, Option<String>>(10)?,
row.get::<_, Option<i64>>(11)?,
row.get::<_, bool>(12)?,
row.get::<_, bool>(13)?,
row.get::<_, Option<String>>(14)?,
row.get::<_, String>(15)?,
row.get::<_, Option<i64>>(16)?,
row.get::<_, Option<i64>>(17)?,
row.get::<_, String>(18)?,
row.get::<_, i64>(19)?,
))
},
);
let (
_id,
gitlab_id,
author_username,
body,
note_type,
is_system,
created_at,
updated_at,
position_new_path,
position_new_line,
position_old_path,
_position_old_line,
resolvable,
resolved,
_resolved_by,
noteable_type,
issue_id,
merge_request_id,
path_with_namespace,
project_id,
) = match row {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
if is_system {
return Ok(None);
}
let (parent_iid, parent_title, parent_web_url, parent_type_label, labels) =
match noteable_type.as_str() {
"Issue" => {
let parent_id = match issue_id {
Some(pid) => pid,
None => return Ok(None),
};
let parent = conn.query_row(
"SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM issue_labels il
JOIN labels l ON l.id = il.label_id
WHERE il.issue_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
(iid, title, web_url, "Issue", labels)
}
"MergeRequest" => {
let parent_id = match merge_request_id {
Some(pid) => pid,
None => return Ok(None),
};
let parent = conn.query_row(
"SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM mr_labels ml
JOIN labels l ON l.id = ml.label_id
WHERE ml.merge_request_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
(iid, title, web_url, "MergeRequest", labels)
}
_ => return Ok(None),
};
build_note_document(
note_id,
gitlab_id,
author_username,
body,
note_type,
created_at,
updated_at,
position_new_path,
position_new_line,
position_old_path,
resolvable,
resolved,
parent_iid,
parent_title.as_deref(),
parent_web_url.as_deref(),
&labels,
parent_type_label,
&path_with_namespace,
project_id,
)
}
pub struct ParentMetadata {
pub iid: i64,
pub title: Option<String>,
pub web_url: Option<String>,
pub labels: Vec<String>,
pub project_path: String,
}
pub struct ParentMetadataCache {
cache: HashMap<(String, i64), Option<ParentMetadata>>,
}
impl Default for ParentMetadataCache {
fn default() -> Self {
Self::new()
}
}
impl ParentMetadataCache {
pub fn new() -> Self {
Self {
cache: HashMap::new(),
}
}
pub fn get_or_fetch(
&mut self,
conn: &Connection,
noteable_type: &str,
parent_id: i64,
project_path: &str,
) -> Result<Option<&ParentMetadata>> {
let key = (noteable_type.to_string(), parent_id);
if !self.cache.contains_key(&key) {
let meta = fetch_parent_metadata(conn, noteable_type, parent_id, project_path)?;
self.cache.insert(key.clone(), meta);
}
Ok(self.cache.get(&key).and_then(|m| m.as_ref()))
}
}
fn fetch_parent_metadata(
conn: &Connection,
noteable_type: &str,
parent_id: i64,
project_path: &str,
) -> Result<Option<ParentMetadata>> {
match noteable_type {
"Issue" => {
let parent = conn.query_row(
"SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM issue_labels il
JOIN labels l ON l.id = il.label_id
WHERE il.issue_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(Some(ParentMetadata {
iid,
title,
web_url,
labels,
project_path: project_path.to_string(),
}))
}
"MergeRequest" => {
let parent = conn.query_row(
"SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM mr_labels ml
JOIN labels l ON l.id = ml.label_id
WHERE ml.merge_request_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(Some(ParentMetadata {
iid,
title,
web_url,
labels,
project_path: project_path.to_string(),
}))
}
_ => Ok(None),
}
}
pub fn extract_note_document_cached(
conn: &Connection,
note_id: i64,
cache: &mut ParentMetadataCache,
) -> Result<Option<DocumentData>> {
let row = conn.query_row(
"SELECT n.id, n.gitlab_id, n.author_username, n.body, n.note_type, n.is_system,
n.created_at, n.updated_at, n.position_new_path, n.position_new_line,
n.position_old_path, n.position_old_line, n.resolvable, n.resolved, n.resolved_by,
d.noteable_type, d.issue_id, d.merge_request_id,
p.path_with_namespace, p.id AS project_id
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
JOIN projects p ON n.project_id = p.id
WHERE n.id = ?1",
rusqlite::params![note_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, Option<String>>(3)?,
row.get::<_, Option<String>>(4)?,
row.get::<_, bool>(5)?,
row.get::<_, i64>(6)?,
row.get::<_, i64>(7)?,
row.get::<_, Option<String>>(8)?,
row.get::<_, Option<i64>>(9)?,
row.get::<_, Option<String>>(10)?,
row.get::<_, Option<i64>>(11)?,
row.get::<_, bool>(12)?,
row.get::<_, bool>(13)?,
row.get::<_, Option<String>>(14)?,
row.get::<_, String>(15)?,
row.get::<_, Option<i64>>(16)?,
row.get::<_, Option<i64>>(17)?,
row.get::<_, String>(18)?,
row.get::<_, i64>(19)?,
))
},
);
let (
_id,
gitlab_id,
author_username,
body,
note_type,
is_system,
created_at,
updated_at,
position_new_path,
position_new_line,
position_old_path,
_position_old_line,
resolvable,
resolved,
_resolved_by,
noteable_type,
issue_id,
merge_request_id,
path_with_namespace,
project_id,
) = match row {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
if is_system {
return Ok(None);
}
let parent_id = match noteable_type.as_str() {
"Issue" => match issue_id {
Some(pid) => pid,
None => return Ok(None),
},
"MergeRequest" => match merge_request_id {
Some(pid) => pid,
None => return Ok(None),
},
_ => return Ok(None),
};
let parent = cache.get_or_fetch(conn, &noteable_type, parent_id, &path_with_namespace)?;
let parent = match parent {
Some(p) => p,
None => return Ok(None),
};
let parent_iid = parent.iid;
let parent_title = parent.title.as_deref();
let parent_web_url = parent.web_url.as_deref();
let labels = parent.labels.clone();
let parent_type_label = noteable_type.as_str();
build_note_document(
note_id,
gitlab_id,
author_username,
body,
note_type,
created_at,
updated_at,
position_new_path,
position_new_line,
position_old_path,
resolvable,
resolved,
parent_iid,
parent_title,
parent_web_url,
&labels,
parent_type_label,
&path_with_namespace,
project_id,
)
}
#[allow(clippy::too_many_arguments)]
fn build_note_document(
note_id: i64,
gitlab_id: i64,
author_username: Option<String>,
body: Option<String>,
note_type: Option<String>,
created_at: i64,
updated_at: i64,
position_new_path: Option<String>,
position_new_line: Option<i64>,
position_old_path: Option<String>,
resolvable: bool,
resolved: bool,
parent_iid: i64,
parent_title: Option<&str>,
parent_web_url: Option<&str>,
labels: &[String],
parent_type_label: &str,
path_with_namespace: &str,
project_id: i64,
) -> Result<Option<DocumentData>> {
let mut path_set = BTreeSet::new();
if let Some(ref p) = position_old_path
&& !p.is_empty()
{
path_set.insert(p.clone());
}
if let Some(ref p) = position_new_path
&& !p.is_empty()
{
path_set.insert(p.clone());
}
let paths: Vec<String> = path_set.into_iter().collect();
let url = parent_web_url.map(|wu| format!("{}#note_{}", wu, gitlab_id));
let display_title = parent_title.unwrap_or("(untitled)");
let display_note_type = note_type.as_deref().unwrap_or("Note");
let display_author = author_username.as_deref().unwrap_or("unknown");
let parent_prefix = if parent_type_label == "Issue" {
format!("Issue #{}", parent_iid)
} else {
format!("MR !{}", parent_iid)
};
let title = format!(
"Note by @{} on {}: {}",
display_author, parent_prefix, display_title
);
let labels_csv = labels.join(", ");
let mut content = String::new();
let _ = writeln!(content, "[[Note]]");
let _ = writeln!(content, "source_type: note");
let _ = writeln!(content, "note_gitlab_id: {}", gitlab_id);
let _ = writeln!(content, "project: {}", path_with_namespace);
let _ = writeln!(content, "parent_type: {}", parent_type_label);
let _ = writeln!(content, "parent_iid: {}", parent_iid);
let _ = writeln!(content, "parent_title: {}", display_title);
let _ = writeln!(content, "note_type: {}", display_note_type);
let _ = writeln!(content, "author: @{}", display_author);
let _ = writeln!(content, "created_at: {}", ms_to_iso(created_at));
if resolvable {
let _ = writeln!(content, "resolved: {}", resolved);
}
if display_note_type == "DiffNote"
&& let Some(ref p) = position_new_path
{
if let Some(line) = position_new_line {
let _ = writeln!(content, "path: {}:{}", p, line);
} else {
let _ = writeln!(content, "path: {}", p);
}
}
if !labels.is_empty() {
let _ = writeln!(content, "labels: {}", labels_csv);
}
if let Some(ref u) = url {
let _ = writeln!(content, "url: {}", u);
}
content.push_str("\n--- Body ---\n\n");
content.push_str(body.as_deref().unwrap_or(""));
let labels_hash = compute_list_hash(labels);
let paths_hash = compute_list_hash(&paths);
let hard_cap = truncate_hard_cap(&content);
let content_hash = compute_content_hash(&hard_cap.content);
Ok(Some(DocumentData {
source_type: SourceType::Note,
source_id: note_id,
project_id,
author_username,
labels: labels.to_vec(),
paths,
labels_hash,
paths_hash,
created_at,
updated_at,
url,
title: Some(title),
content_text: hard_cap.content,
content_hash,
is_truncated: hard_cap.is_truncated,
truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()),
}))
}
#[cfg(test)]
mod tests {
use super::*;
@@ -545,6 +1064,26 @@ mod tests {
assert_eq!(SourceType::parse("ISSUE"), Some(SourceType::Issue));
}
#[test]
fn test_source_type_parse_note() {
assert_eq!(SourceType::parse("note"), Some(SourceType::Note));
}
#[test]
fn test_source_type_note_as_str() {
assert_eq!(SourceType::Note.as_str(), "note");
}
#[test]
fn test_source_type_note_display() {
assert_eq!(format!("{}", SourceType::Note), "note");
}
#[test]
fn test_source_type_parse_notes_alias() {
assert_eq!(SourceType::parse("notes"), Some(SourceType::Note));
}
#[test]
fn test_source_type_as_str() {
assert_eq!(SourceType::Issue.as_str(), "issue");
@@ -1449,4 +1988,354 @@ mod tests {
let result = extract_discussion_document(&conn, 1).unwrap();
assert!(result.is_none());
}
#[allow(clippy::too_many_arguments)]
fn insert_note_with_type(
conn: &Connection,
id: i64,
gitlab_id: i64,
discussion_id: i64,
author: Option<&str>,
body: Option<&str>,
created_at: i64,
is_system: bool,
old_path: Option<&str>,
new_path: Option<&str>,
old_line: Option<i64>,
new_line: Option<i64>,
note_type: Option<&str>,
resolvable: bool,
resolved: bool,
) {
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system, position_old_path, position_new_path, position_old_line, position_new_line, note_type, resolvable, resolved) VALUES (?1, ?2, ?3, 1, ?4, ?5, ?6, ?6, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14)",
rusqlite::params![id, gitlab_id, discussion_id, author, body, created_at, is_system as i32, old_path, new_path, old_line, new_line, note_type, resolvable as i32, resolved as i32],
).unwrap();
}
#[test]
fn test_note_document_basic_format() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
42,
Some("Fix login bug"),
Some("desc"),
"opened",
Some("johndoe"),
Some("https://gitlab.example.com/group/project-one/-/issues/42"),
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
12345,
1,
Some("alice"),
Some("This looks like a race condition"),
1710460800000,
false,
None,
None,
);
let doc = extract_note_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.source_type, SourceType::Note);
assert_eq!(doc.source_id, 1);
assert_eq!(doc.project_id, 1);
assert_eq!(doc.author_username, Some("alice".to_string()));
assert!(doc.content_text.contains("[[Note]]"));
assert!(doc.content_text.contains("source_type: note"));
assert!(doc.content_text.contains("note_gitlab_id: 12345"));
assert!(doc.content_text.contains("project: group/project-one"));
assert!(doc.content_text.contains("parent_type: Issue"));
assert!(doc.content_text.contains("parent_iid: 42"));
assert!(doc.content_text.contains("parent_title: Fix login bug"));
assert!(doc.content_text.contains("author: @alice"));
assert!(doc.content_text.contains("--- Body ---"));
assert!(
doc.content_text
.contains("This looks like a race condition")
);
assert_eq!(
doc.title,
Some("Note by @alice on Issue #42: Fix login bug".to_string())
);
assert_eq!(
doc.url,
Some("https://gitlab.example.com/group/project-one/-/issues/42#note_12345".to_string())
);
}
#[test]
fn test_note_document_diffnote_with_path() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Refactor auth"),
Some("desc"),
"opened",
None,
Some("https://gitlab.example.com/group/project-one/-/issues/10"),
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note_with_type(
&conn,
1,
555,
1,
Some("bob"),
Some("Unused variable here"),
1000,
false,
Some("src/old_auth.rs"),
Some("src/auth.rs"),
Some(10),
Some(25),
Some("DiffNote"),
true,
false,
);
let doc = extract_note_document(&conn, 1).unwrap().unwrap();
assert!(doc.content_text.contains("note_type: DiffNote"));
assert!(doc.content_text.contains("path: src/auth.rs:25"));
assert!(doc.content_text.contains("resolved: false"));
assert_eq!(doc.paths, vec!["src/auth.rs", "src/old_auth.rs"]);
}
#[test]
fn test_note_document_inherits_parent_labels() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_label(&conn, 1, "backend");
insert_label(&conn, 2, "api");
link_issue_label(&conn, 1, 1);
link_issue_label(&conn, 1, 2);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some("Note body"),
1000,
false,
None,
None,
);
let doc = extract_note_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.labels, vec!["api", "backend"]);
assert!(doc.content_text.contains("labels: api, backend"));
}
#[test]
fn test_note_document_mr_parent() {
let conn = setup_discussion_test_db();
insert_mr(
&conn,
1,
456,
Some("JWT Auth"),
Some("desc"),
Some("opened"),
Some("johndoe"),
Some("feature/jwt"),
Some("main"),
Some("https://gitlab.example.com/group/project-one/-/merge_requests/456"),
);
insert_discussion(&conn, 1, "MergeRequest", None, Some(1));
insert_note(
&conn,
1,
200,
1,
Some("reviewer"),
Some("Needs tests"),
1000,
false,
None,
None,
);
let doc = extract_note_document(&conn, 1).unwrap().unwrap();
assert!(doc.content_text.contains("parent_type: MergeRequest"));
assert!(doc.content_text.contains("parent_iid: 456"));
assert_eq!(
doc.title,
Some("Note by @reviewer on MR !456: JWT Auth".to_string())
);
}
#[test]
fn test_note_document_system_note_returns_none() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
100,
1,
Some("bot"),
Some("assigned to @alice"),
1000,
true,
None,
None,
);
let result = extract_note_document(&conn, 1).unwrap();
assert!(result.is_none());
}
#[test]
fn test_note_document_not_found() {
let conn = setup_discussion_test_db();
let result = extract_note_document(&conn, 999).unwrap();
assert!(result.is_none());
}
#[test]
fn test_note_document_orphaned_discussion() {
let conn = setup_discussion_test_db();
insert_discussion(&conn, 1, "Issue", None, None);
insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some("Comment"),
1000,
false,
None,
None,
);
let result = extract_note_document(&conn, 1).unwrap();
assert!(result.is_none());
}
#[test]
fn test_note_document_hash_deterministic() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some("Comment"),
1000,
false,
None,
None,
);
let doc1 = extract_note_document(&conn, 1).unwrap().unwrap();
let doc2 = extract_note_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc1.content_hash, doc2.content_hash);
assert_eq!(doc1.labels_hash, doc2.labels_hash);
assert_eq!(doc1.paths_hash, doc2.paths_hash);
assert_eq!(doc1.content_hash.len(), 64);
}
#[test]
fn test_note_document_empty_body() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some(""),
1000,
false,
None,
None,
);
let doc = extract_note_document(&conn, 1).unwrap().unwrap();
assert!(doc.content_text.contains("--- Body ---\n\n"));
assert!(!doc.is_truncated);
}
#[test]
fn test_note_document_null_body() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
100,
1,
Some("alice"),
None,
1000,
false,
None,
None,
);
let doc = extract_note_document(&conn, 1).unwrap().unwrap();
assert!(doc.content_text.contains("--- Body ---\n\n"));
assert!(doc.content_text.ends_with("--- Body ---\n\n"));
}
}

View File

@@ -3,8 +3,9 @@ mod regenerator;
mod truncation;
pub use extractor::{
DocumentData, SourceType, compute_content_hash, compute_list_hash, extract_discussion_document,
extract_issue_document, extract_mr_document,
DocumentData, ParentMetadataCache, SourceType, compute_content_hash, compute_list_hash,
extract_discussion_document, extract_issue_document, extract_mr_document,
extract_note_document, extract_note_document_cached,
};
pub use regenerator::{RegenerateResult, regenerate_dirty_documents};
pub use truncation::{

View File

@@ -4,8 +4,8 @@ use tracing::{debug, instrument, warn};
use crate::core::error::Result;
use crate::documents::{
DocumentData, SourceType, extract_discussion_document, extract_issue_document,
extract_mr_document,
DocumentData, ParentMetadataCache, SourceType, extract_discussion_document,
extract_issue_document, extract_mr_document, extract_note_document_cached,
};
use crate::ingestion::dirty_tracker::{clear_dirty, get_dirty_sources, record_dirty_error};
@@ -27,6 +27,7 @@ pub fn regenerate_dirty_documents(
let mut result = RegenerateResult::default();
let mut estimated_total: usize = 0;
let mut cache = ParentMetadataCache::new();
loop {
let dirty = get_dirty_sources(conn)?;
@@ -41,7 +42,7 @@ pub fn regenerate_dirty_documents(
estimated_total = estimated_total.max(processed_so_far + remaining);
for (source_type, source_id) in &dirty {
match regenerate_one(conn, *source_type, *source_id) {
match regenerate_one(conn, *source_type, *source_id, &mut cache) {
Ok(changed) => {
if changed {
result.regenerated += 1;
@@ -83,11 +84,17 @@ pub fn regenerate_dirty_documents(
Ok(result)
}
fn regenerate_one(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<bool> {
fn regenerate_one(
conn: &Connection,
source_type: SourceType,
source_id: i64,
cache: &mut ParentMetadataCache,
) -> Result<bool> {
let doc = match source_type {
SourceType::Issue => extract_issue_document(conn, source_id)?,
SourceType::MergeRequest => extract_mr_document(conn, source_id)?,
SourceType::Discussion => extract_discussion_document(conn, source_id)?,
SourceType::Note => extract_note_document_cached(conn, source_id, cache)?,
};
let Some(doc) = doc else {
@@ -122,11 +129,7 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<bool>
)
.optional()?;
let content_changed = match &existing {
Some((_, old_content_hash, _, _)) => old_content_hash != &doc.content_hash,
None => true,
};
// Fast path: if all three hashes match, nothing changed at all.
if let Some((_, ref old_content_hash, ref old_labels_hash, ref old_paths_hash)) = existing
&& old_content_hash == &doc.content_hash
&& old_labels_hash == &doc.labels_hash
@@ -134,6 +137,7 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<bool>
{
return Ok(false);
}
// Past this point at least one hash differs, so the document will be updated.
let labels_json = serde_json::to_string(&doc.labels).unwrap_or_else(|_| "[]".to_string());
@@ -243,7 +247,8 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<bool>
}
}
Ok(content_changed)
// We passed the triple-hash fast path, so at least one hash differs.
Ok(true)
}
fn delete_document(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<()> {
@@ -473,4 +478,316 @@ mod tests {
.unwrap();
assert_eq!(label_count, 1);
}
fn setup_note_db() -> Connection {
let conn = setup_db();
conn.execute_batch(
"
CREATE TABLE merge_requests (
id INTEGER PRIMARY KEY,
gitlab_id INTEGER UNIQUE NOT NULL,
project_id INTEGER NOT NULL REFERENCES projects(id),
iid INTEGER NOT NULL,
title TEXT,
description TEXT,
state TEXT,
draft INTEGER NOT NULL DEFAULT 0,
author_username TEXT,
source_branch TEXT,
target_branch TEXT,
head_sha TEXT,
references_short TEXT,
references_full TEXT,
detailed_merge_status TEXT,
merge_user_username TEXT,
created_at INTEGER,
updated_at INTEGER,
merged_at INTEGER,
closed_at INTEGER,
last_seen_at INTEGER NOT NULL,
discussions_synced_for_updated_at INTEGER,
discussions_sync_last_attempt_at INTEGER,
discussions_sync_attempts INTEGER DEFAULT 0,
discussions_sync_last_error TEXT,
resource_events_synced_for_updated_at INTEGER,
web_url TEXT,
raw_payload_id INTEGER
);
CREATE TABLE mr_labels (
merge_request_id INTEGER REFERENCES merge_requests(id),
label_id INTEGER REFERENCES labels(id),
PRIMARY KEY(merge_request_id, label_id)
);
CREATE TABLE discussions (
id INTEGER PRIMARY KEY,
gitlab_discussion_id TEXT NOT NULL,
project_id INTEGER NOT NULL REFERENCES projects(id),
issue_id INTEGER REFERENCES issues(id),
merge_request_id INTEGER,
noteable_type TEXT NOT NULL,
individual_note INTEGER NOT NULL DEFAULT 0,
first_note_at INTEGER,
last_note_at INTEGER,
last_seen_at INTEGER NOT NULL,
resolvable INTEGER NOT NULL DEFAULT 0,
resolved INTEGER NOT NULL DEFAULT 0
);
CREATE TABLE notes (
id INTEGER PRIMARY KEY,
gitlab_id INTEGER UNIQUE NOT NULL,
discussion_id INTEGER NOT NULL REFERENCES discussions(id),
project_id INTEGER NOT NULL REFERENCES projects(id),
note_type TEXT,
is_system INTEGER NOT NULL DEFAULT 0,
author_username TEXT,
body TEXT,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL,
last_seen_at INTEGER NOT NULL,
position INTEGER,
resolvable INTEGER NOT NULL DEFAULT 0,
resolved INTEGER NOT NULL DEFAULT 0,
resolved_by TEXT,
resolved_at INTEGER,
position_old_path TEXT,
position_new_path TEXT,
position_old_line INTEGER,
position_new_line INTEGER,
raw_payload_id INTEGER
);
",
)
.unwrap();
conn
}
#[test]
fn test_regenerate_note_document() {
let conn = setup_note_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Test Issue', 'opened', 'alice', 1000, 2000, 3000, 'https://example.com/issues/42')",
[],
).unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bob', 'This is a note', 1000, 2000, 3000, 0)",
[],
).unwrap();
mark_dirty(&conn, SourceType::Note, 1).unwrap();
let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.regenerated, 1);
assert_eq!(result.unchanged, 0);
assert_eq!(result.errored, 0);
let (source_type, content): (String, String) = conn
.query_row(
"SELECT source_type, content_text FROM documents WHERE source_id = 1",
[],
|r| Ok((r.get(0)?, r.get(1)?)),
)
.unwrap();
assert_eq!(source_type, "note");
assert!(content.contains("[[Note]]"));
assert!(content.contains("author: @bob"));
}
#[test]
fn test_regenerate_note_system_note_deletes() {
let conn = setup_note_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 42, 'Test', 'opened', 1000, 2000, 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bot', 'assigned to @alice', 1000, 2000, 3000, 1)",
[],
).unwrap();
// Pre-insert a document for this note (simulating a previously-generated doc)
conn.execute(
"INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) VALUES ('note', 1, 1, 'old content', 'oldhash')",
[],
).unwrap();
mark_dirty(&conn, SourceType::Note, 1).unwrap();
let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.regenerated, 1);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM documents WHERE source_type = 'note'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(count, 0);
}
#[test]
fn test_regenerate_note_unchanged() {
let conn = setup_note_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Test', 'opened', 1000, 2000, 3000, 'https://example.com/issues/42')",
[],
).unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bob', 'Some note', 1000, 2000, 3000, 0)",
[],
).unwrap();
mark_dirty(&conn, SourceType::Note, 1).unwrap();
let r1 = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(r1.regenerated, 1);
mark_dirty(&conn, SourceType::Note, 1).unwrap();
let r2 = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(r2.unchanged, 1);
assert_eq!(r2.regenerated, 0);
}
#[test]
fn test_note_regeneration_batch_uses_cache() {
let conn = setup_note_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Shared Issue', 'opened', 'alice', 1000, 2000, 3000, 'https://example.com/issues/42')",
[],
).unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
[],
).unwrap();
for i in 1..=10 {
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (?1, ?2, 1, 1, 'bob', ?3, 1000, 2000, 3000, 0)",
rusqlite::params![i, i * 100, format!("Note body {}", i)],
).unwrap();
mark_dirty(&conn, SourceType::Note, i).unwrap();
}
let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.regenerated, 10);
assert_eq!(result.errored, 0);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM documents WHERE source_type = 'note'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(count, 10);
}
#[test]
fn test_note_regeneration_cache_consistent_with_direct_extraction() {
let conn = setup_note_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Consistency Check', 'opened', 'alice', 1000, 2000, 3000, 'https://example.com/issues/42')",
[],
).unwrap();
conn.execute(
"INSERT INTO labels (id, project_id, name) VALUES (1, 1, 'backend')",
[],
)
.unwrap();
conn.execute(
"INSERT INTO issue_labels (issue_id, label_id) VALUES (1, 1)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bob', 'Some content', 1000, 2000, 3000, 0)",
[],
).unwrap();
use crate::documents::extract_note_document;
let direct = extract_note_document(&conn, 1).unwrap().unwrap();
let mut cache = ParentMetadataCache::new();
let cached = extract_note_document_cached(&conn, 1, &mut cache)
.unwrap()
.unwrap();
assert_eq!(direct.content_text, cached.content_text);
assert_eq!(direct.content_hash, cached.content_hash);
assert_eq!(direct.labels, cached.labels);
assert_eq!(direct.labels_hash, cached.labels_hash);
assert_eq!(direct.paths_hash, cached.paths_hash);
assert_eq!(direct.title, cached.title);
assert_eq!(direct.url, cached.url);
assert_eq!(direct.author_username, cached.author_username);
}
#[test]
fn test_note_regeneration_cache_invalidates_across_parents() {
let conn = setup_note_db();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Issue Alpha', 'opened', 1000, 2000, 3000, 'https://example.com/issues/42')",
[],
).unwrap();
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at, web_url) VALUES (2, 20, 1, 99, 'Issue Beta', 'opened', 1000, 2000, 3000, 'https://example.com/issues/99')",
[],
).unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (2, 'disc_2', 1, 2, 'Issue', 3000)",
[],
).unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bob', 'Alpha note', 1000, 2000, 3000, 0)",
[],
).unwrap();
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (2, 200, 2, 1, 'alice', 'Beta note', 1000, 2000, 3000, 0)",
[],
).unwrap();
mark_dirty(&conn, SourceType::Note, 1).unwrap();
mark_dirty(&conn, SourceType::Note, 2).unwrap();
let result = regenerate_dirty_documents(&conn, None).unwrap();
assert_eq!(result.regenerated, 2);
assert_eq!(result.errored, 0);
let alpha_content: String = conn
.query_row(
"SELECT content_text FROM documents WHERE source_type = 'note' AND source_id = 1",
[],
|r| r.get(0),
)
.unwrap();
let beta_content: String = conn
.query_row(
"SELECT content_text FROM documents WHERE source_type = 'note' AND source_id = 2",
[],
|r| r.get(0),
)
.unwrap();
assert!(alpha_content.contains("parent_iid: 42"));
assert!(alpha_content.contains("parent_title: Issue Alpha"));
assert!(beta_content.contains("parent_iid: 99"));
assert!(beta_content.contains("parent_title: Issue Beta"));
}
}

View File

@@ -30,6 +30,7 @@ pub struct NormalizedNote {
pub project_id: i64,
pub note_type: Option<String>,
pub is_system: bool,
pub author_id: Option<i64>,
pub author_username: String,
pub body: String,
pub created_at: i64,
@@ -160,6 +161,7 @@ fn transform_single_note(
project_id: local_project_id,
note_type: note.note_type.clone(),
is_system: note.system,
author_id: Some(note.author.id),
author_username: note.author.username.clone(),
body: note.body.clone(),
created_at: parse_timestamp(&note.created_at),
@@ -265,6 +267,7 @@ fn transform_single_note_strict(
project_id: local_project_id,
note_type: note.note_type.clone(),
is_system: note.system,
author_id: Some(note.author.id),
author_username: note.author.username.clone(),
body: note.body.clone(),
created_at,

View File

@@ -131,7 +131,7 @@ mod tests {
let conn = Connection::open_in_memory().unwrap();
conn.execute_batch("
CREATE TABLE dirty_sources (
source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion')),
source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion','note')),
source_id INTEGER NOT NULL,
queued_at INTEGER NOT NULL,
attempt_count INTEGER NOT NULL DEFAULT 0,
@@ -258,6 +258,21 @@ mod tests {
assert_eq!(count, 0);
}
#[test]
fn test_mark_dirty_note_type() {
let conn = setup_db();
mark_dirty(&conn, SourceType::Note, 42).unwrap();
let results = get_dirty_sources(&conn).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, SourceType::Note);
assert_eq!(results[0].1, 42);
clear_dirty(&conn, SourceType::Note, 42).unwrap();
let results = get_dirty_sources(&conn).unwrap();
assert!(results.is_empty());
}
#[test]
fn test_drain_loop() {
let conn = setup_db();

View File

@@ -1,17 +1,26 @@
use futures::StreamExt;
use rusqlite::Connection;
use rusqlite::{Connection, params};
use tracing::{debug, warn};
use crate::Config;
use crate::core::error::Result;
use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::time::now_ms;
use crate::documents::SourceType;
use crate::gitlab::GitLabClient;
use crate::gitlab::transformers::{NoteableRef, transform_discussion, transform_notes};
use crate::gitlab::transformers::{
NormalizedNote, NoteableRef, transform_discussion, transform_notes,
};
use crate::ingestion::dirty_tracker;
use super::issues::IssueForDiscussionSync;
#[derive(Debug)]
pub struct NoteUpsertOutcome {
pub local_note_id: i64,
pub changed_semantics: bool,
}
#[derive(Debug, Default)]
pub struct IngestDiscussionsResult {
pub discussions_fetched: usize,
@@ -80,6 +89,8 @@ async fn ingest_discussions_for_issue(
let mut seen_discussion_ids: Vec<String> = Vec::new();
let mut pagination_error: Option<crate::core::error::LoreError> = None;
let run_seen_at = now_ms();
while let Some(disc_result) = discussions_stream.next().await {
let gitlab_discussion = match disc_result {
Ok(d) => d,
@@ -126,17 +137,28 @@ async fn ingest_discussions_for_issue(
dirty_tracker::mark_dirty_tx(&tx, SourceType::Discussion, local_discussion_id)?;
// Mark child note documents dirty (they inherit parent metadata)
tx.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at)
SELECT 'note', n.id, ?1
FROM notes n
WHERE n.discussion_id = ?2 AND n.is_system = 0
ON CONFLICT(source_type, source_id) DO UPDATE SET queued_at = excluded.queued_at, attempt_count = 0",
params![now_ms(), local_discussion_id],
)?;
let notes = transform_notes(&gitlab_discussion, local_project_id);
let notes_count = notes.len();
tx.execute(
"DELETE FROM notes WHERE discussion_id = ?",
[local_discussion_id],
)?;
for note in notes {
insert_note(&tx, local_discussion_id, &note, None)?;
let outcome =
upsert_note_for_issue(&tx, local_discussion_id, &note, run_seen_at, None)?;
if !note.is_system && outcome.changed_semantics {
dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.local_note_id)?;
}
}
sweep_stale_issue_notes(&tx, local_discussion_id, run_seen_at)?;
tx.commit()?;
@@ -198,38 +220,182 @@ fn upsert_discussion(
Ok(())
}
fn insert_note(
fn upsert_note_for_issue(
conn: &Connection,
discussion_id: i64,
note: &crate::gitlab::transformers::NormalizedNote,
note: &NormalizedNote,
last_seen_at: i64,
payload_id: Option<i64>,
) -> Result<()> {
) -> Result<NoteUpsertOutcome> {
// Pre-read for semantic change detection
let existing = conn
.query_row(
"SELECT id, body, note_type, resolved, resolved_by,
position_old_path, position_new_path, position_old_line, position_new_line,
position_type, position_line_range_start, position_line_range_end,
position_base_sha, position_start_sha, position_head_sha
FROM notes WHERE gitlab_id = ?",
params![note.gitlab_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, String>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, bool>(3)?,
row.get::<_, Option<String>>(4)?,
row.get::<_, Option<String>>(5)?,
row.get::<_, Option<String>>(6)?,
row.get::<_, Option<i32>>(7)?,
row.get::<_, Option<i32>>(8)?,
row.get::<_, Option<String>>(9)?,
row.get::<_, Option<i32>>(10)?,
row.get::<_, Option<i32>>(11)?,
row.get::<_, Option<String>>(12)?,
row.get::<_, Option<String>>(13)?,
row.get::<_, Option<String>>(14)?,
))
},
)
.ok();
let changed_semantics = match &existing {
Some((
_id,
body,
note_type,
resolved,
resolved_by,
pos_old_path,
pos_new_path,
pos_old_line,
pos_new_line,
pos_type,
pos_range_start,
pos_range_end,
pos_base_sha,
pos_start_sha,
pos_head_sha,
)) => {
*body != note.body
|| *note_type != note.note_type
|| *resolved != note.resolved
|| *resolved_by != note.resolved_by
|| *pos_old_path != note.position_old_path
|| *pos_new_path != note.position_new_path
|| *pos_old_line != note.position_old_line
|| *pos_new_line != note.position_new_line
|| *pos_type != note.position_type
|| *pos_range_start != note.position_line_range_start
|| *pos_range_end != note.position_line_range_end
|| *pos_base_sha != note.position_base_sha
|| *pos_start_sha != note.position_start_sha
|| *pos_head_sha != note.position_head_sha
}
None => true,
};
conn.execute(
"INSERT INTO notes (
gitlab_id, discussion_id, project_id, note_type, is_system,
author_username, body, created_at, updated_at, last_seen_at,
position, resolvable, resolved, resolved_by, resolved_at, raw_payload_id
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16)",
(
author_id, author_username, body, created_at, updated_at, last_seen_at,
position, resolvable, resolved, resolved_by, resolved_at,
position_old_path, position_new_path, position_old_line, position_new_line,
position_type, position_line_range_start, position_line_range_end,
position_base_sha, position_start_sha, position_head_sha,
raw_payload_id
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26, ?27)
ON CONFLICT(gitlab_id) DO UPDATE SET
body = excluded.body,
note_type = excluded.note_type,
author_id = excluded.author_id,
updated_at = excluded.updated_at,
last_seen_at = excluded.last_seen_at,
resolvable = excluded.resolvable,
resolved = excluded.resolved,
resolved_by = excluded.resolved_by,
resolved_at = excluded.resolved_at,
position_old_path = excluded.position_old_path,
position_new_path = excluded.position_new_path,
position_old_line = excluded.position_old_line,
position_new_line = excluded.position_new_line,
position_type = excluded.position_type,
position_line_range_start = excluded.position_line_range_start,
position_line_range_end = excluded.position_line_range_end,
position_base_sha = excluded.position_base_sha,
position_start_sha = excluded.position_start_sha,
position_head_sha = excluded.position_head_sha,
raw_payload_id = COALESCE(excluded.raw_payload_id, raw_payload_id)",
params![
note.gitlab_id,
discussion_id,
note.project_id,
&note.note_type,
note.is_system,
note.author_id,
&note.author_username,
&note.body,
note.created_at,
note.updated_at,
note.last_seen_at,
last_seen_at,
note.position,
note.resolvable,
note.resolved,
&note.resolved_by,
note.resolved_at,
&note.position_old_path,
&note.position_new_path,
note.position_old_line,
note.position_new_line,
&note.position_type,
note.position_line_range_start,
note.position_line_range_end,
&note.position_base_sha,
&note.position_start_sha,
&note.position_head_sha,
payload_id,
),
],
)?;
Ok(())
let local_note_id: i64 = conn.query_row(
"SELECT id FROM notes WHERE gitlab_id = ?",
params![note.gitlab_id],
|row| row.get(0),
)?;
Ok(NoteUpsertOutcome {
local_note_id,
changed_semantics,
})
}
fn sweep_stale_issue_notes(
conn: &Connection,
discussion_id: i64,
last_seen_at: i64,
) -> Result<usize> {
// Step 1: Delete note documents for stale notes
conn.execute(
"DELETE FROM documents WHERE source_type = 'note' AND source_id IN
(SELECT id FROM notes WHERE discussion_id = ?1 AND last_seen_at < ?2 AND is_system = 0)",
params![discussion_id, last_seen_at],
)?;
// Step 2: Delete dirty_sources entries for stale notes
conn.execute(
"DELETE FROM dirty_sources WHERE source_type = 'note' AND source_id IN
(SELECT id FROM notes WHERE discussion_id = ?1 AND last_seen_at < ?2 AND is_system = 0)",
params![discussion_id, last_seen_at],
)?;
// Step 3: Delete the stale notes themselves
let deleted = conn.execute(
"DELETE FROM notes WHERE discussion_id = ?1 AND last_seen_at < ?2",
params![discussion_id, last_seen_at],
)?;
if deleted > 0 {
debug!(discussion_id, deleted, "Swept stale issue notes");
}
Ok(deleted)
}
fn remove_stale_discussions(
@@ -303,6 +469,9 @@ fn update_issue_sync_timestamp(conn: &Connection, issue_id: i64, updated_at: i64
#[cfg(test)]
mod tests {
use super::*;
use crate::core::db::{create_connection, run_migrations};
use crate::gitlab::transformers::NormalizedNote;
use std::path::Path;
#[test]
fn result_default_has_zero_counts() {
@@ -311,4 +480,462 @@ mod tests {
assert_eq!(result.discussions_upserted, 0);
assert_eq!(result.notes_upserted, 0);
}
fn setup() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) \
VALUES (1, 'group/repo', 'https://gitlab.com/group/repo')",
[],
)
.unwrap();
conn.execute(
"INSERT INTO issues (gitlab_id, iid, project_id, title, state, author_username, created_at, updated_at, last_seen_at) \
VALUES (100, 1, 1, 'Test Issue', 'opened', 'testuser', 1000, 2000, 3000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO discussions (gitlab_discussion_id, project_id, issue_id, noteable_type, individual_note, last_seen_at, resolvable, resolved) \
VALUES ('disc-1', 1, 1, 'Issue', 0, 3000, 0, 0)",
[],
)
.unwrap();
conn
}
fn get_discussion_id(conn: &Connection) -> i64 {
conn.query_row("SELECT id FROM discussions LIMIT 1", [], |row| row.get(0))
.unwrap()
}
#[allow(clippy::too_many_arguments)]
fn make_note(
gitlab_id: i64,
project_id: i64,
body: &str,
note_type: Option<&str>,
created_at: i64,
updated_at: i64,
resolved: bool,
resolved_by: Option<&str>,
) -> NormalizedNote {
NormalizedNote {
gitlab_id,
project_id,
note_type: note_type.map(String::from),
is_system: false,
author_id: None,
author_username: "testuser".to_string(),
body: body.to_string(),
created_at,
updated_at,
last_seen_at: updated_at,
position: 0,
resolvable: false,
resolved,
resolved_by: resolved_by.map(String::from),
resolved_at: None,
position_old_path: None,
position_new_path: None,
position_old_line: None,
position_new_line: None,
position_type: None,
position_line_range_start: None,
position_line_range_end: None,
position_base_sha: None,
position_start_sha: None,
position_head_sha: None,
}
}
#[test]
fn test_issue_note_upsert_stable_id() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let last_seen_at = 5000;
let note1 = make_note(1001, 1, "First note", None, 1000, 2000, false, None);
let note2 = make_note(1002, 1, "Second note", None, 1000, 2000, false, None);
let out1 = upsert_note_for_issue(&conn, disc_id, &note1, last_seen_at, None).unwrap();
let out2 = upsert_note_for_issue(&conn, disc_id, &note2, last_seen_at, None).unwrap();
let id1 = out1.local_note_id;
let id2 = out2.local_note_id;
// Re-sync same gitlab_ids
let out1b = upsert_note_for_issue(&conn, disc_id, &note1, last_seen_at + 1, None).unwrap();
let out2b = upsert_note_for_issue(&conn, disc_id, &note2, last_seen_at + 1, None).unwrap();
assert_eq!(id1, out1b.local_note_id);
assert_eq!(id2, out2b.local_note_id);
}
#[test]
fn test_issue_note_upsert_detects_body_change() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let note = make_note(2001, 1, "Original body", None, 1000, 2000, false, None);
upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
let mut changed = make_note(2001, 1, "Updated body", None, 1000, 3000, false, None);
changed.updated_at = 3000;
let outcome = upsert_note_for_issue(&conn, disc_id, &changed, 5001, None).unwrap();
assert!(outcome.changed_semantics);
}
#[test]
fn test_issue_note_upsert_unchanged_returns_false() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let note = make_note(3001, 1, "Same body", None, 1000, 2000, false, None);
upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
// Re-sync identical note
let outcome = upsert_note_for_issue(&conn, disc_id, &note, 5001, None).unwrap();
assert!(!outcome.changed_semantics);
}
#[test]
fn test_issue_note_upsert_updated_at_only_does_not_mark_semantic_change() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let note = make_note(4001, 1, "Body stays", None, 1000, 2000, false, None);
upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
// Only change updated_at (non-semantic field)
let mut same = make_note(4001, 1, "Body stays", None, 1000, 9999, false, None);
same.updated_at = 9999;
let outcome = upsert_note_for_issue(&conn, disc_id, &same, 5001, None).unwrap();
assert!(!outcome.changed_semantics);
}
#[test]
fn test_issue_note_sweep_removes_stale() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let note1 = make_note(5001, 1, "Keep me", None, 1000, 2000, false, None);
let note2 = make_note(5002, 1, "Stale me", None, 1000, 2000, false, None);
upsert_note_for_issue(&conn, disc_id, &note1, 5000, None).unwrap();
upsert_note_for_issue(&conn, disc_id, &note2, 5000, None).unwrap();
// Re-sync only note1 with newer timestamp
upsert_note_for_issue(&conn, disc_id, &note1, 6000, None).unwrap();
// Sweep should remove note2 (last_seen_at=5000 < 6000)
let swept = sweep_stale_issue_notes(&conn, disc_id, 6000).unwrap();
assert_eq!(swept, 1);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM notes WHERE discussion_id = ?",
[disc_id],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 1);
}
#[test]
fn test_issue_note_upsert_returns_local_id() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let note = make_note(6001, 1, "Check my ID", None, 1000, 2000, false, None);
let outcome = upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
// Verify the local_note_id matches what's in the DB
let db_id: i64 = conn
.query_row(
"SELECT id FROM notes WHERE gitlab_id = ?",
[6001_i64],
|row| row.get(0),
)
.unwrap();
assert_eq!(outcome.local_note_id, db_id);
}
#[test]
fn test_issue_note_upsert_captures_author_id() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let mut note = make_note(7001, 1, "With author", None, 1000, 2000, false, None);
note.author_id = Some(12345);
upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
let stored: Option<i64> = conn
.query_row(
"SELECT author_id FROM notes WHERE gitlab_id = ?",
[7001_i64],
|row| row.get(0),
)
.unwrap();
assert_eq!(stored, Some(12345));
}
#[test]
fn test_note_upsert_author_id_nullable() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let note = make_note(7002, 1, "No author id", None, 1000, 2000, false, None);
// author_id defaults to None in make_note
upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
let stored: Option<i64> = conn
.query_row(
"SELECT author_id FROM notes WHERE gitlab_id = ?",
[7002_i64],
|row| row.get(0),
)
.unwrap();
assert_eq!(stored, None);
}
#[test]
fn test_note_author_id_survives_username_change() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let mut note = make_note(7003, 1, "Original body", None, 1000, 2000, false, None);
note.author_id = Some(99999);
note.author_username = "oldname".to_string();
upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
// Re-sync with changed username, changed body, same author_id
let mut updated = make_note(7003, 1, "Updated body", None, 1000, 3000, false, None);
updated.author_id = Some(99999);
updated.author_username = "newname".to_string();
upsert_note_for_issue(&conn, disc_id, &updated, 5001, None).unwrap();
// author_id must survive the re-sync intact
let stored_id: Option<i64> = conn
.query_row(
"SELECT author_id FROM notes WHERE gitlab_id = ?",
[7003_i64],
|row| row.get(0),
)
.unwrap();
assert_eq!(stored_id, Some(99999));
}
fn insert_note_document(conn: &Connection, note_local_id: i64) {
conn.execute(
"INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) \
VALUES ('note', ?1, 1, 'note content', 'hash123')",
[note_local_id],
)
.unwrap();
}
fn insert_note_dirty_source(conn: &Connection, note_local_id: i64) {
conn.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at) \
VALUES ('note', ?1, 1000)",
[note_local_id],
)
.unwrap();
}
fn count_note_documents(conn: &Connection, note_local_id: i64) -> i64 {
conn.query_row(
"SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?",
[note_local_id],
|row| row.get(0),
)
.unwrap()
}
fn count_note_dirty_sources(conn: &Connection, note_local_id: i64) -> i64 {
conn.query_row(
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note' AND source_id = ?",
[note_local_id],
|row| row.get(0),
)
.unwrap()
}
#[test]
fn test_issue_note_sweep_deletes_note_documents_immediately() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
// Insert 3 notes
let note1 = make_note(9001, 1, "Keep me", None, 1000, 2000, false, None);
let note2 = make_note(9002, 1, "Keep me too", None, 1000, 2000, false, None);
let note3 = make_note(9003, 1, "Stale me", None, 1000, 2000, false, None);
let out1 = upsert_note_for_issue(&conn, disc_id, &note1, 5000, None).unwrap();
let out2 = upsert_note_for_issue(&conn, disc_id, &note2, 5000, None).unwrap();
let out3 = upsert_note_for_issue(&conn, disc_id, &note3, 5000, None).unwrap();
// Add documents for all 3
insert_note_document(&conn, out1.local_note_id);
insert_note_document(&conn, out2.local_note_id);
insert_note_document(&conn, out3.local_note_id);
// Add dirty_sources for note3
insert_note_dirty_source(&conn, out3.local_note_id);
// Re-sync only notes 1 and 2 with newer timestamp
upsert_note_for_issue(&conn, disc_id, &note1, 6000, None).unwrap();
upsert_note_for_issue(&conn, disc_id, &note2, 6000, None).unwrap();
// Sweep should remove note3 and its document + dirty_source
sweep_stale_issue_notes(&conn, disc_id, 6000).unwrap();
// Stale note's document should be gone
assert_eq!(count_note_documents(&conn, out3.local_note_id), 0);
assert_eq!(count_note_dirty_sources(&conn, out3.local_note_id), 0);
// Kept notes' documents should survive
assert_eq!(count_note_documents(&conn, out1.local_note_id), 1);
assert_eq!(count_note_documents(&conn, out2.local_note_id), 1);
}
#[test]
fn test_sweep_deletion_handles_note_without_document() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
let note = make_note(9004, 1, "No doc", None, 1000, 2000, false, None);
upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
// Don't insert any document -- sweep should still work without error
let swept = sweep_stale_issue_notes(&conn, disc_id, 6000).unwrap();
assert_eq!(swept, 1);
}
#[test]
fn test_set_based_deletion_atomicity() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
// Insert a stale note with both document and dirty_source
let note = make_note(9005, 1, "Stale with deps", None, 1000, 2000, false, None);
let out = upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
insert_note_document(&conn, out.local_note_id);
insert_note_dirty_source(&conn, out.local_note_id);
// Verify they exist before sweep
assert_eq!(count_note_documents(&conn, out.local_note_id), 1);
assert_eq!(count_note_dirty_sources(&conn, out.local_note_id), 1);
// The sweep function already runs inside a transaction (called from
// ingest_discussions_for_issue's tx). Simulate by wrapping in a transaction.
let tx = conn.unchecked_transaction().unwrap();
sweep_stale_issue_notes(&tx, disc_id, 6000).unwrap();
tx.commit().unwrap();
// All three DELETEs must have happened
assert_eq!(count_note_documents(&conn, out.local_note_id), 0);
assert_eq!(count_note_dirty_sources(&conn, out.local_note_id), 0);
let note_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM notes WHERE gitlab_id = ?",
[9005_i64],
|row| row.get(0),
)
.unwrap();
assert_eq!(note_count, 0);
}
fn count_dirty_notes(conn: &Connection) -> i64 {
conn.query_row(
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
[],
|row| row.get(0),
)
.unwrap()
}
#[test]
fn test_parent_title_change_marks_notes_dirty() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
// Insert two user notes and one system note
let note1 = make_note(10001, 1, "User note 1", None, 1000, 2000, false, None);
let note2 = make_note(10002, 1, "User note 2", None, 1000, 2000, false, None);
let mut sys_note = make_note(10003, 1, "System note", None, 1000, 2000, false, None);
sys_note.is_system = true;
let out1 = upsert_note_for_issue(&conn, disc_id, &note1, 5000, None).unwrap();
let out2 = upsert_note_for_issue(&conn, disc_id, &note2, 5000, None).unwrap();
upsert_note_for_issue(&conn, disc_id, &sys_note, 5000, None).unwrap();
// Clear any dirty_sources from individual note upserts
conn.execute("DELETE FROM dirty_sources WHERE source_type = 'note'", [])
.unwrap();
assert_eq!(count_dirty_notes(&conn), 0);
// Simulate parent title change triggering discussion re-ingest:
// update the issue title, then run the propagation SQL
conn.execute("UPDATE issues SET title = 'Changed Title' WHERE id = 1", [])
.unwrap();
// Run the propagation query (same as in ingestion code)
conn.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at)
SELECT 'note', n.id, ?1
FROM notes n
WHERE n.discussion_id = ?2 AND n.is_system = 0
ON CONFLICT(source_type, source_id) DO UPDATE SET queued_at = excluded.queued_at, attempt_count = 0",
params![now_ms(), disc_id],
)
.unwrap();
// Both user notes should be dirty, system note should not
assert_eq!(count_dirty_notes(&conn), 2);
assert_eq!(count_note_dirty_sources(&conn, out1.local_note_id), 1);
assert_eq!(count_note_dirty_sources(&conn, out2.local_note_id), 1);
}
#[test]
fn test_parent_label_change_marks_notes_dirty() {
let conn = setup();
let disc_id = get_discussion_id(&conn);
// Insert one user note
let note = make_note(11001, 1, "User note", None, 1000, 2000, false, None);
let out = upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
// Clear dirty_sources
conn.execute("DELETE FROM dirty_sources WHERE source_type = 'note'", [])
.unwrap();
// Simulate label change on parent issue (labels are part of issue metadata)
conn.execute("UPDATE issues SET updated_at = 9999 WHERE id = 1", [])
.unwrap();
// Run propagation query
conn.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at)
SELECT 'note', n.id, ?1
FROM notes n
WHERE n.discussion_id = ?2 AND n.is_system = 0
ON CONFLICT(source_type, source_id) DO UPDATE SET queued_at = excluded.queued_at, attempt_count = 0",
params![now_ms(), disc_id],
)
.unwrap();
assert_eq!(count_dirty_notes(&conn), 1);
assert_eq!(count_note_dirty_sources(&conn, out.local_note_id), 1);
}
}

View File

@@ -14,6 +14,7 @@ use crate::gitlab::transformers::{
};
use crate::gitlab::types::GitLabDiscussion;
use crate::ingestion::dirty_tracker;
use crate::ingestion::discussions::NoteUpsertOutcome;
use super::merge_requests::MrForDiscussionSync;
@@ -161,6 +162,16 @@ pub fn write_prefetched_mr_discussions(
dirty_tracker::mark_dirty_tx(&tx, SourceType::Discussion, local_discussion_id)?;
// Mark child note documents dirty (they inherit parent metadata)
tx.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at)
SELECT 'note', n.id, ?1
FROM notes n
WHERE n.discussion_id = ?2 AND n.is_system = 0
ON CONFLICT(source_type, source_id) DO UPDATE SET queued_at = excluded.queued_at, attempt_count = 0",
params![now_ms(), local_discussion_id],
)?;
for note in &disc.notes {
let should_store_payload = !note.is_system
|| note.position_new_path.is_some()
@@ -187,7 +198,11 @@ pub fn write_prefetched_mr_discussions(
None
};
let outcome =
upsert_note(&tx, local_discussion_id, note, run_seen_at, note_payload_id)?;
if !note.is_system && outcome.changed_semantics {
dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.local_note_id)?;
}
}
tx.commit()?;
@@ -361,6 +376,16 @@ async fn ingest_discussions_for_mr(
dirty_tracker::mark_dirty_tx(&tx, SourceType::Discussion, local_discussion_id)?;
// Mark child note documents dirty (they inherit parent metadata)
tx.execute(
"INSERT INTO dirty_sources (source_type, source_id, queued_at)
SELECT 'note', n.id, ?1
FROM notes n
WHERE n.discussion_id = ?2 AND n.is_system = 0
ON CONFLICT(source_type, source_id) DO UPDATE SET queued_at = excluded.queued_at, attempt_count = 0",
params![now_ms(), local_discussion_id],
)?;
for note in &notes {
let should_store_payload = !note.is_system
|| note.position_new_path.is_some()
@@ -390,7 +415,11 @@ async fn ingest_discussions_for_mr(
None
};
let outcome =
upsert_note(&tx, local_discussion_id, note, run_seen_at, note_payload_id)?;
if !note.is_system && outcome.changed_semantics {
dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.local_note_id)?;
}
}
tx.commit()?;
@@ -473,19 +502,87 @@ fn upsert_note(
note: &NormalizedNote,
last_seen_at: i64,
payload_id: Option<i64>,
) -> Result<()> {
) -> Result<NoteUpsertOutcome> {
// Pre-read for semantic change detection
let existing = conn
.query_row(
"SELECT id, body, note_type, resolved, resolved_by,
position_old_path, position_new_path, position_old_line, position_new_line,
position_type, position_line_range_start, position_line_range_end,
position_base_sha, position_start_sha, position_head_sha
FROM notes WHERE gitlab_id = ?",
params![note.gitlab_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, String>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, bool>(3)?,
row.get::<_, Option<String>>(4)?,
row.get::<_, Option<String>>(5)?,
row.get::<_, Option<String>>(6)?,
row.get::<_, Option<i32>>(7)?,
row.get::<_, Option<i32>>(8)?,
row.get::<_, Option<String>>(9)?,
row.get::<_, Option<i32>>(10)?,
row.get::<_, Option<i32>>(11)?,
row.get::<_, Option<String>>(12)?,
row.get::<_, Option<String>>(13)?,
row.get::<_, Option<String>>(14)?,
))
},
)
.ok();
let changed_semantics = match &existing {
Some((
_id,
body,
note_type,
resolved,
resolved_by,
pos_old_path,
pos_new_path,
pos_old_line,
pos_new_line,
pos_type,
pos_range_start,
pos_range_end,
pos_base_sha,
pos_start_sha,
pos_head_sha,
)) => {
*body != note.body
|| *note_type != note.note_type
|| *resolved != note.resolved
|| *resolved_by != note.resolved_by
|| *pos_old_path != note.position_old_path
|| *pos_new_path != note.position_new_path
|| *pos_old_line != note.position_old_line
|| *pos_new_line != note.position_new_line
|| *pos_type != note.position_type
|| *pos_range_start != note.position_line_range_start
|| *pos_range_end != note.position_line_range_end
|| *pos_base_sha != note.position_base_sha
|| *pos_start_sha != note.position_start_sha
|| *pos_head_sha != note.position_head_sha
}
None => true,
};
conn.execute(
"INSERT INTO notes (
gitlab_id, discussion_id, project_id, note_type, is_system,
author_username, body, created_at, updated_at, last_seen_at,
author_id, author_username, body, created_at, updated_at, last_seen_at,
position, resolvable, resolved, resolved_by, resolved_at,
position_old_path, position_new_path, position_old_line, position_new_line,
position_type, position_line_range_start, position_line_range_end,
position_base_sha, position_start_sha, position_head_sha,
raw_payload_id
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26)
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26, ?27)
ON CONFLICT(gitlab_id) DO UPDATE SET
note_type = excluded.note_type,
author_id = excluded.author_id,
body = excluded.body,
updated_at = excluded.updated_at,
last_seen_at = excluded.last_seen_at,
@@ -510,6 +607,7 @@ fn upsert_note(
note.project_id,
&note.note_type,
note.is_system,
note.author_id,
&note.author_username,
&note.body,
note.created_at,
@@ -533,7 +631,17 @@ fn upsert_note(
payload_id,
],
)?;
Ok(())
let local_note_id: i64 = conn.query_row(
"SELECT id FROM notes WHERE gitlab_id = ?",
params![note.gitlab_id],
|row| row.get(0),
)?;
Ok(NoteUpsertOutcome {
local_note_id,
changed_semantics,
})
}
fn sweep_stale_discussions(conn: &Connection, local_mr_id: i64, run_seen_at: i64) -> Result<usize> {
@@ -554,13 +662,36 @@ fn sweep_stale_notes(
local_mr_id: i64,
run_seen_at: i64,
) -> Result<usize> {
// Step 1: Delete note documents for stale notes
conn.execute(
"DELETE FROM documents WHERE source_type = 'note' AND source_id IN
(SELECT id FROM notes
WHERE project_id = ?1
AND discussion_id IN (SELECT id FROM discussions WHERE merge_request_id = ?2)
AND last_seen_at < ?3
AND is_system = 0)",
params![local_project_id, local_mr_id, run_seen_at],
)?;
// Step 2: Delete dirty_sources entries for stale notes
conn.execute(
"DELETE FROM dirty_sources WHERE source_type = 'note' AND source_id IN
(SELECT id FROM notes
WHERE project_id = ?1
AND discussion_id IN (SELECT id FROM discussions WHERE merge_request_id = ?2)
AND last_seen_at < ?3
AND is_system = 0)",
params![local_project_id, local_mr_id, run_seen_at],
)?;
// Step 3: Delete the stale notes themselves
let deleted = conn.execute(
"DELETE FROM notes
WHERE project_id = ?
WHERE project_id = ?1
AND discussion_id IN (
SELECT id FROM discussions WHERE merge_request_id = ?
SELECT id FROM discussions WHERE merge_request_id = ?2
)
AND last_seen_at < ?",
AND last_seen_at < ?3",
params![local_project_id, local_mr_id, run_seen_at],
)?;
if deleted > 0 {
@@ -604,6 +735,8 @@ fn clear_sync_health_error(conn: &Connection, local_mr_id: i64) -> Result<()> {
#[cfg(test)]
mod tests {
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
#[test]
fn result_default_has_zero_counts() {
@@ -621,4 +754,153 @@ mod tests {
let result = IngestMrDiscussionsResult::default();
assert!(!result.pagination_succeeded);
}
fn setup_mr() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) \
VALUES (1, 'group/repo', 'https://gitlab.com/group/repo')",
[],
)
.unwrap();
conn.execute(
"INSERT INTO merge_requests (gitlab_id, iid, project_id, title, state, \
author_username, source_branch, target_branch, created_at, updated_at, last_seen_at) \
VALUES (200, 1, 1, 'Test MR', 'opened', 'testuser', 'feat', 'main', 1000, 2000, 3000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO discussions (gitlab_discussion_id, project_id, merge_request_id, noteable_type, \
individual_note, last_seen_at, resolvable, resolved) \
VALUES ('mr-disc-1', 1, 1, 'MergeRequest', 0, 3000, 0, 0)",
[],
)
.unwrap();
conn
}
fn get_mr_discussion_id(conn: &Connection) -> i64 {
conn.query_row("SELECT id FROM discussions LIMIT 1", [], |row| row.get(0))
.unwrap()
}
#[allow(clippy::too_many_arguments)]
fn make_mr_note(
gitlab_id: i64,
project_id: i64,
body: &str,
note_type: Option<&str>,
created_at: i64,
updated_at: i64,
resolved: bool,
resolved_by: Option<&str>,
) -> NormalizedNote {
NormalizedNote {
gitlab_id,
project_id,
note_type: note_type.map(String::from),
is_system: false,
author_id: None,
author_username: "testuser".to_string(),
body: body.to_string(),
created_at,
updated_at,
last_seen_at: updated_at,
position: 0,
resolvable: false,
resolved,
resolved_by: resolved_by.map(String::from),
resolved_at: None,
position_old_path: None,
position_new_path: None,
position_old_line: None,
position_new_line: None,
position_type: None,
position_line_range_start: None,
position_line_range_end: None,
position_base_sha: None,
position_start_sha: None,
position_head_sha: None,
}
}
#[test]
fn test_mr_note_upsert_captures_author_id() {
let conn = setup_mr();
let disc_id = get_mr_discussion_id(&conn);
let mut note = make_mr_note(8001, 1, "MR note", None, 1000, 2000, false, None);
note.author_id = Some(12345);
upsert_note(&conn, disc_id, &note, 5000, None).unwrap();
let stored: Option<i64> = conn
.query_row(
"SELECT author_id FROM notes WHERE gitlab_id = ?",
[8001_i64],
|row| row.get(0),
)
.unwrap();
assert_eq!(stored, Some(12345));
}
fn insert_note_document(conn: &Connection, note_local_id: i64) {
conn.execute(
"INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) \
VALUES ('note', ?1, 1, 'note content', 'hash123')",
[note_local_id],
)
.unwrap();
}
fn count_note_documents(conn: &Connection, note_local_id: i64) -> i64 {
conn.query_row(
"SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?",
[note_local_id],
|row| row.get(0),
)
.unwrap()
}
#[test]
fn test_mr_note_sweep_deletes_note_documents_immediately() {
let conn = setup_mr();
let disc_id = get_mr_discussion_id(&conn);
let local_project_id = 1;
let local_mr_id = 1;
// Insert 3 notes
let note1 = make_mr_note(8101, 1, "Keep", None, 1000, 2000, false, None);
let note2 = make_mr_note(8102, 1, "Keep too", None, 1000, 2000, false, None);
let note3 = make_mr_note(8103, 1, "Stale", None, 1000, 2000, false, None);
let out1 = upsert_note(&conn, disc_id, &note1, 5000, None).unwrap();
let out2 = upsert_note(&conn, disc_id, &note2, 5000, None).unwrap();
let out3 = upsert_note(&conn, disc_id, &note3, 5000, None).unwrap();
// Add documents for all 3
insert_note_document(&conn, out1.local_note_id);
insert_note_document(&conn, out2.local_note_id);
insert_note_document(&conn, out3.local_note_id);
// Re-sync only notes 1 and 2
upsert_note(&conn, disc_id, &note1, 6000, None).unwrap();
upsert_note(&conn, disc_id, &note2, 6000, None).unwrap();
// Sweep stale notes
sweep_stale_notes(&conn, local_project_id, local_mr_id, 6000).unwrap();
// Stale note's document should be gone
assert_eq!(count_note_documents(&conn, out3.local_note_id), 0);
// Kept notes' documents should survive
assert_eq!(count_note_documents(&conn, out1.local_note_id), 1);
assert_eq!(count_note_documents(&conn, out2.local_note_id), 1);
}
}

View File

@@ -11,23 +11,25 @@ use lore::Config;
use lore::cli::autocorrect::{self, CorrectionResult};
use lore::cli::commands::{
IngestDisplay, InitInputs, InitOptions, InitResult, ListFilters, MrListFilters,
SearchCliFilters, SyncOptions, TimelineParams, open_issue_in_browser, open_mr_in_browser,
print_count, print_count_json, print_doctor_results, print_drift_human, print_drift_json,
print_dry_run_preview, print_dry_run_preview_json, print_embed, print_embed_json,
print_event_count, print_event_count_json, print_generate_docs, print_generate_docs_json,
print_ingest_summary, print_ingest_summary_json, print_list_issues, print_list_issues_json,
print_list_mrs, print_list_mrs_json, print_search_results, print_search_results_json,
print_show_issue, print_show_issue_json, print_show_mr, print_show_mr_json, print_stats,
print_stats_json, print_sync, print_sync_json, print_sync_status, print_sync_status_json,
print_timeline, print_timeline_json_with_meta, print_who_human, print_who_json, run_auth_test,
run_count, run_count_events, run_doctor, run_drift, run_embed, run_generate_docs, run_ingest,
run_ingest_dry_run, run_init, run_list_issues, run_list_mrs, run_search, run_show_issue,
run_show_mr, run_stats, run_sync, run_sync_status, run_timeline, run_who,
NoteListFilters, SearchCliFilters, SyncOptions, TimelineParams, open_issue_in_browser,
open_mr_in_browser, print_count, print_count_json, print_doctor_results, print_drift_human,
print_drift_json, print_dry_run_preview, print_dry_run_preview_json, print_embed,
print_embed_json, print_event_count, print_event_count_json, print_generate_docs,
print_generate_docs_json, print_ingest_summary, print_ingest_summary_json, print_list_issues,
print_list_issues_json, print_list_mrs, print_list_mrs_json, print_list_notes,
print_list_notes_csv, print_list_notes_json, print_list_notes_jsonl, print_search_results,
print_search_results_json, print_show_issue, print_show_issue_json, print_show_mr,
print_show_mr_json, print_stats, print_stats_json, print_sync, print_sync_json,
print_sync_status, print_sync_status_json, print_timeline, print_timeline_json_with_meta,
print_who_human, print_who_json, query_notes, run_auth_test, run_count, run_count_events,
run_doctor, run_drift, run_embed, run_generate_docs, run_ingest, run_ingest_dry_run, run_init,
run_list_issues, run_list_mrs, run_search, run_show_issue, run_show_mr, run_stats, run_sync,
run_sync_status, run_timeline, run_who,
};
use lore::cli::robot::{RobotMeta, strip_schemas};
use lore::cli::{
Cli, Commands, CountArgs, EmbedArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, MrsArgs,
SearchArgs, StatsArgs, SyncArgs, TimelineArgs, WhoArgs,
NotesArgs, SearchArgs, StatsArgs, SyncArgs, TimelineArgs, WhoArgs,
};
use lore::core::db::{
LATEST_SCHEMA_VERSION, create_connection, get_schema_version, run_migrations,
@@ -173,6 +175,7 @@ async fn main() {
}
Some(Commands::Issues(args)) => handle_issues(cli.config.as_deref(), args, robot_mode),
Some(Commands::Mrs(args)) => handle_mrs(cli.config.as_deref(), args, robot_mode),
Some(Commands::Notes(args)) => handle_notes(cli.config.as_deref(), args, robot_mode),
Some(Commands::Search(args)) => {
handle_search(cli.config.as_deref(), args, robot_mode).await
}
@@ -801,6 +804,59 @@ fn handle_mrs(
Ok(())
}
fn handle_notes(
config_override: Option<&str>,
args: NotesArgs,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let start = std::time::Instant::now();
let config = Config::load(config_override)?;
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
let order = if args.asc { "asc" } else { "desc" };
let filters = NoteListFilters {
limit: args.limit,
project: args.project,
author: args.author,
note_type: args.note_type,
include_system: args.include_system,
for_issue_iid: args.for_issue,
for_mr_iid: args.for_mr,
note_id: args.note_id,
gitlab_note_id: args.gitlab_note_id,
discussion_id: args.discussion_id,
since: args.since,
until: args.until,
path: args.path,
contains: args.contains,
resolution: args.resolution,
sort: args.sort,
order: order.to_string(),
};
let result = query_notes(&conn, &filters, &config)?;
let format = if robot_mode && args.format == "table" {
"json"
} else {
&args.format
};
match format {
"json" => print_list_notes_json(
&result,
start.elapsed().as_millis() as u64,
args.fields.as_deref(),
),
"jsonl" => print_list_notes_jsonl(&result),
"csv" => print_list_notes_csv(&result),
_ => print_list_notes(&result),
}
Ok(())
}
async fn handle_ingest(
config_override: Option<&str>,
args: IngestArgs,
@@ -2317,6 +2373,17 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box<dyn std::e
"active_minimal": ["entity_type", "iid", "title", "participants"]
}
},
"notes": {
"description": "List notes from discussions with rich filtering",
"flags": ["--limit/-n <N>", "--author/-a <username>", "--note-type <type>", "--contains <text>", "--for-issue <iid>", "--for-mr <iid>", "-p/--project <path>", "--since <period>", "--until <period>", "--path <filepath>", "--resolution <any|unresolved|resolved>", "--sort <created|updated>", "--asc", "--include-system", "--note-id <id>", "--gitlab-note-id <id>", "--discussion-id <id>", "--format <table|json|jsonl|csv>", "--fields <list|minimal>", "--open"],
"robot_flags": ["--format json", "--fields minimal"],
"example": "lore --robot notes --author jdefting --since 1y --format json --fields minimal",
"response_schema": {
"ok": "bool",
"data": {"notes": "[NoteListRowJson]", "total_count": "int", "showing": "int"},
"meta": {"elapsed_ms": "int"}
}
},
"robot-docs": {
"description": "This command (agent self-discovery manifest)",
"flags": ["--brief"],
@@ -2338,6 +2405,7 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box<dyn std::e
"search: FTS5 + vector hybrid search across all entities",
"who: Expert/workload/reviews analysis per file path or person",
"timeline: Chronological event reconstruction across entities",
"notes: Rich note listing with author, type, resolution, path, and discussion filters",
"stats: Database statistics with document/note/discussion counts",
"count: Entity counts with state breakdowns",
"embed: Generate vector embeddings for semantic search via Ollama"