7 Commits

Author SHA1 Message Date
teernisse
8bd68e02bd chore(beads): update issue tracking state
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-06 17:01:36 -05:00
teernisse
6aaf931c9b fix(embedding): guard is_multiple_of() progress logs against zero
is_multiple_of(N) returns true for 0, which caused debug/info
progress messages to fire at doc_num=0 (the start of every page)
rather than only at the intended 50/100 milestones. Add != 0
check to both the debug (every 50) and info (every 100) log sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-06 17:01:33 -05:00
teernisse
af167e2086 test(asupersync): add cancellation, parity, and E2E acceptance tests
- Add 7 cancellation integration tests (ShutdownSignal, transaction rollback)
- Add 7 HTTP behavior parity tests (redirect, proxy, keep-alive, DNS, TLS)
- Add 9 E2E runtime acceptance tests (lifecycle, cancel+resume, tracing, HTTP pipeline)
- Total: 1190 tests, all passing

Phases 4-5 of asupersync migration.
2026-03-06 16:09:41 -05:00
teernisse
e8d6c5b15f feat(runtime): replace tokio+reqwest with asupersync async runtime
- Add HTTP adapter layer (src/http.rs) wrapping asupersync h1 client
- Migrate gitlab client, graphql, and ollama to HTTP adapter
- Swap entrypoint from #[tokio::main] to RuntimeBuilder::new().block_on()
- Rewrite signal handler for asupersync (RuntimeHandle::spawn + ctrl_c())
- Migrate rate limiter sleeps to asupersync::time::sleep(wall_now(), d)
- Add asupersync-native HTTP integration tests
- Convert timeline_seed_tests to RuntimeBuilder pattern

Phases 1-3 of asupersync migration (atomic: code won't compile without all pieces).
2026-03-06 15:57:20 -05:00
teernisse
bf977eca1a refactor(structure): reorganize codebase into domain-focused modules 2026-03-06 15:24:09 -05:00
teernisse
4d41d74ea7 refactor(deps): replace tokio Mutex/join!, add NetworkErrorKind enum, remove reqwest from error types 2026-03-06 15:22:42 -05:00
teernisse
3a4fc96558 refactor(shutdown): extract 4 identical Ctrl+C handlers into core/shutdown.rs 2026-03-06 15:22:37 -05:00
96 changed files with 14558 additions and 11722 deletions

File diff suppressed because one or more lines are too long

View File

@@ -1 +1 @@
bd-8con
bd-23xb

950
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -29,12 +29,11 @@ lipgloss = { package = "charmed-lipgloss", version = "0.2", default-features = f
open = "5"
# HTTP
reqwest = { version = "0.12", features = ["json"] }
tokio = { version = "1", features = ["rt-multi-thread", "macros", "time", "signal"] }
asupersync = { version = "0.2", features = ["tls", "tls-native-roots"] }
# Async streaming for pagination
async-stream = "0.3"
futures = { version = "0.3", default-features = false, features = ["alloc"] }
futures = { version = "0.3", default-features = false, features = ["alloc", "async-await"] }
# Utilities
thiserror = "2"
@@ -60,6 +59,7 @@ tracing-appender = "0.2"
[dev-dependencies]
tempfile = "3"
tokio = { version = "1", features = ["rt", "rt-multi-thread", "macros"] }
wiremock = "0.6"
[profile.release]

File diff suppressed because it is too large Load Diff

2
rust-toolchain.toml Normal file
View File

@@ -0,0 +1,2 @@
[toolchain]
channel = "nightly-2026-03-01"

3
src/app/dispatch.rs Normal file
View File

@@ -0,0 +1,3 @@
include!("errors.rs");
include!("handlers.rs");
include!("robot_docs.rs");

478
src/app/errors.rs Normal file
View File

@@ -0,0 +1,478 @@
#[derive(Serialize)]
struct FallbackErrorOutput {
error: FallbackError,
}
#[derive(Serialize)]
struct FallbackError {
code: String,
message: String,
}
fn handle_error(e: Box<dyn std::error::Error>, robot_mode: bool) -> ! {
if let Some(gi_error) = e.downcast_ref::<LoreError>() {
if robot_mode {
let output = RobotErrorOutput::from(gi_error);
eprintln!(
"{}",
serde_json::to_string(&output).unwrap_or_else(|_| {
let fallback = FallbackErrorOutput {
error: FallbackError {
code: "INTERNAL_ERROR".to_string(),
message: gi_error.to_string(),
},
};
serde_json::to_string(&fallback)
.unwrap_or_else(|_| r#"{"error":{"code":"INTERNAL_ERROR","message":"Serialization failed"}}"#.to_string())
})
);
std::process::exit(gi_error.exit_code());
} else {
eprintln!();
eprintln!(
" {} {}",
Theme::error().render(Icons::error()),
Theme::error().bold().render(&gi_error.to_string())
);
if let Some(suggestion) = gi_error.suggestion() {
eprintln!();
eprintln!(" {suggestion}");
}
let actions = gi_error.actions();
if !actions.is_empty() {
eprintln!();
for action in &actions {
eprintln!(
" {} {}",
Theme::dim().render("\u{2192}"),
Theme::bold().render(action)
);
}
}
eprintln!();
std::process::exit(gi_error.exit_code());
}
}
if robot_mode {
let output = FallbackErrorOutput {
error: FallbackError {
code: "INTERNAL_ERROR".to_string(),
message: e.to_string(),
},
};
eprintln!(
"{}",
serde_json::to_string(&output).unwrap_or_else(|_| {
r#"{"error":{"code":"INTERNAL_ERROR","message":"Serialization failed"}}"#
.to_string()
})
);
} else {
eprintln!();
eprintln!(
" {} {}",
Theme::error().render(Icons::error()),
Theme::error().bold().render(&e.to_string())
);
eprintln!();
}
std::process::exit(1);
}
/// Emit stderr warnings for any corrections applied during Phase 1.5.
fn emit_correction_warnings(result: &CorrectionResult, robot_mode: bool) {
if robot_mode {
#[derive(Serialize)]
struct CorrectionWarning<'a> {
warning: CorrectionWarningInner<'a>,
}
#[derive(Serialize)]
struct CorrectionWarningInner<'a> {
r#type: &'static str,
corrections: &'a [autocorrect::Correction],
teaching: Vec<String>,
}
let teaching: Vec<String> = result
.corrections
.iter()
.map(autocorrect::format_teaching_note)
.collect();
let warning = CorrectionWarning {
warning: CorrectionWarningInner {
r#type: "ARG_CORRECTED",
corrections: &result.corrections,
teaching,
},
};
if let Ok(json) = serde_json::to_string(&warning) {
eprintln!("{json}");
}
} else {
for c in &result.corrections {
eprintln!(
"{} {}",
Theme::warning().render("Auto-corrected:"),
autocorrect::format_teaching_note(c)
);
}
}
}
/// Phase 1 & 4: Handle clap parsing errors with structured JSON output in robot mode.
/// Also includes fuzzy command matching and flag-level suggestions.
fn handle_clap_error(e: clap::Error, robot_mode: bool, corrections: &CorrectionResult) -> ! {
use clap::error::ErrorKind;
// Always let clap handle --help and --version normally (print and exit 0).
// These are intentional user actions, not errors, even when stdout is redirected.
if matches!(e.kind(), ErrorKind::DisplayHelp | ErrorKind::DisplayVersion) {
e.exit()
}
if robot_mode {
let error_code = map_clap_error_kind(e.kind());
let full_msg = e.to_string();
let message = full_msg
.lines()
.take(3)
.collect::<Vec<_>>()
.join("; ")
.trim()
.to_string();
let (suggestion, correction, valid_values) = match e.kind() {
// Phase 4: Suggest similar command for unknown subcommands
ErrorKind::InvalidSubcommand => {
let suggestion = if let Some(invalid_cmd) = extract_invalid_subcommand(&e) {
suggest_similar_command(&invalid_cmd)
} else {
"Run 'lore robot-docs' for valid commands".to_string()
};
(suggestion, None, None)
}
// Flag-level fuzzy matching for unknown flags
ErrorKind::UnknownArgument => {
let invalid_flag = extract_invalid_flag(&e);
let similar = invalid_flag
.as_deref()
.and_then(|flag| autocorrect::suggest_similar_flag(flag, &corrections.args));
let suggestion = if let Some(ref s) = similar {
format!("Did you mean '{s}'? Run 'lore robot-docs' for all flags")
} else {
"Run 'lore robot-docs' for valid flags".to_string()
};
(suggestion, similar, None)
}
// Value-level suggestions for invalid enum values
ErrorKind::InvalidValue => {
let (flag, valid_vals) = extract_invalid_value_context(&e);
let suggestion = if let Some(vals) = &valid_vals {
format!(
"Valid values: {}. Run 'lore robot-docs' for details",
vals.join(", ")
)
} else if let Some(ref f) = flag {
if let Some(vals) = autocorrect::valid_values_for_flag(f) {
format!("Valid values for {f}: {}", vals.join(", "))
} else {
"Run 'lore robot-docs' for valid values".to_string()
}
} else {
"Run 'lore robot-docs' for valid values".to_string()
};
let vals_vec = valid_vals.or_else(|| {
flag.as_deref()
.and_then(autocorrect::valid_values_for_flag)
.map(|v| v.iter().map(|s| (*s).to_string()).collect())
});
(suggestion, None, vals_vec)
}
ErrorKind::MissingRequiredArgument => {
let suggestion = format!(
"A required argument is missing. {}",
if let Some(subcmd) = extract_subcommand_from_context(&e) {
format!(
"Example: {}. Run 'lore {subcmd} --help' for required arguments",
command_example(&subcmd)
)
} else {
"Run 'lore robot-docs' for command reference".to_string()
}
);
(suggestion, None, None)
}
ErrorKind::MissingSubcommand => {
let suggestion =
"No command specified. Common commands: issues, mrs, search, sync, \
timeline, who, me. Run 'lore robot-docs' for the full list"
.to_string();
(suggestion, None, None)
}
ErrorKind::TooFewValues | ErrorKind::TooManyValues => {
let suggestion = if let Some(subcmd) = extract_subcommand_from_context(&e) {
format!(
"Example: {}. Run 'lore {subcmd} --help' for usage",
command_example(&subcmd)
)
} else {
"Run 'lore robot-docs' for command reference".to_string()
};
(suggestion, None, None)
}
_ => (
"Run 'lore robot-docs' for valid commands".to_string(),
None,
None,
),
};
let output = RobotErrorWithSuggestion {
error: RobotErrorSuggestionData {
code: error_code.to_string(),
message,
suggestion,
correction,
valid_values,
},
};
eprintln!(
"{}",
serde_json::to_string(&output).unwrap_or_else(|_| {
r#"{"error":{"code":"PARSE_ERROR","message":"Parse error"}}"#.to_string()
})
);
std::process::exit(2);
} else {
e.exit()
}
}
/// Map clap ErrorKind to semantic error codes
fn map_clap_error_kind(kind: clap::error::ErrorKind) -> &'static str {
use clap::error::ErrorKind;
match kind {
ErrorKind::InvalidSubcommand => "UNKNOWN_COMMAND",
ErrorKind::UnknownArgument => "UNKNOWN_FLAG",
ErrorKind::MissingRequiredArgument => "MISSING_REQUIRED",
ErrorKind::InvalidValue => "INVALID_VALUE",
ErrorKind::ValueValidation => "INVALID_VALUE",
ErrorKind::TooManyValues => "TOO_MANY_VALUES",
ErrorKind::TooFewValues => "TOO_FEW_VALUES",
ErrorKind::ArgumentConflict => "ARGUMENT_CONFLICT",
ErrorKind::MissingSubcommand => "MISSING_COMMAND",
ErrorKind::DisplayHelp | ErrorKind::DisplayVersion => "HELP_REQUESTED",
_ => "PARSE_ERROR",
}
}
/// Extract the invalid subcommand from a clap error (Phase 4)
fn extract_invalid_subcommand(e: &clap::Error) -> Option<String> {
// Parse the error message to find the invalid subcommand
// Format is typically: "error: unrecognized subcommand 'foo'"
let msg = e.to_string();
if let Some(start) = msg.find('\'')
&& let Some(end) = msg[start + 1..].find('\'')
{
return Some(msg[start + 1..start + 1 + end].to_string());
}
None
}
/// Extract the invalid flag from a clap UnknownArgument error.
/// Format is typically: "error: unexpected argument '--xyzzy' found"
fn extract_invalid_flag(e: &clap::Error) -> Option<String> {
let msg = e.to_string();
if let Some(start) = msg.find('\'')
&& let Some(end) = msg[start + 1..].find('\'')
{
let value = &msg[start + 1..start + 1 + end];
if value.starts_with('-') {
return Some(value.to_string());
}
}
None
}
/// Extract flag name and valid values from a clap InvalidValue error.
/// Returns (flag_name, valid_values_if_listed_in_error).
fn extract_invalid_value_context(e: &clap::Error) -> (Option<String>, Option<Vec<String>>) {
let msg = e.to_string();
// Try to find the flag name from "[possible values: ...]" pattern or from the arg info
// Clap format: "error: invalid value 'opend' for '--state <STATE>'"
let flag = if let Some(for_pos) = msg.find("for '") {
let after_for = &msg[for_pos + 5..];
if let Some(end) = after_for.find('\'') {
let raw = &after_for[..end];
// Strip angle-bracket value placeholder: "--state <STATE>" -> "--state"
Some(raw.split_whitespace().next().unwrap_or(raw).to_string())
} else {
None
}
} else {
None
};
// Try to extract possible values from the error message
// Clap format: "[possible values: opened, closed, merged, locked, all]"
let valid_values = if let Some(pv_pos) = msg.find("[possible values: ") {
let after_pv = &msg[pv_pos + 18..];
after_pv.find(']').map(|end| {
after_pv[..end]
.split(", ")
.map(|s| s.trim().to_string())
.collect()
})
} else {
// Fall back to our static registry
flag.as_deref()
.and_then(autocorrect::valid_values_for_flag)
.map(|v| v.iter().map(|s| (*s).to_string()).collect())
};
(flag, valid_values)
}
/// Extract the subcommand context from a clap error for better suggestions.
/// Looks at the error message to find which command was being invoked.
fn extract_subcommand_from_context(e: &clap::Error) -> Option<String> {
let msg = e.to_string();
let known = [
"issues",
"mrs",
"notes",
"search",
"sync",
"ingest",
"count",
"status",
"auth",
"doctor",
"stats",
"timeline",
"who",
"me",
"drift",
"related",
"trace",
"file-history",
"generate-docs",
"embed",
"token",
"cron",
"init",
"migrate",
];
for cmd in known {
if msg.contains(&format!("lore {cmd}")) || msg.contains(&format!("'{cmd}'")) {
return Some(cmd.to_string());
}
}
None
}
/// Phase 4: Suggest similar command using fuzzy matching
fn suggest_similar_command(invalid: &str) -> String {
// Primary commands + common aliases for fuzzy matching
const VALID_COMMANDS: &[(&str, &str)] = &[
("issues", "issues"),
("issue", "issues"),
("mrs", "mrs"),
("mr", "mrs"),
("merge-requests", "mrs"),
("search", "search"),
("find", "search"),
("query", "search"),
("sync", "sync"),
("ingest", "ingest"),
("count", "count"),
("status", "status"),
("auth", "auth"),
("doctor", "doctor"),
("version", "version"),
("init", "init"),
("stats", "stats"),
("stat", "stats"),
("generate-docs", "generate-docs"),
("embed", "embed"),
("migrate", "migrate"),
("health", "health"),
("robot-docs", "robot-docs"),
("completions", "completions"),
("timeline", "timeline"),
("who", "who"),
("notes", "notes"),
("note", "notes"),
("drift", "drift"),
("file-history", "file-history"),
("trace", "trace"),
("related", "related"),
("me", "me"),
("token", "token"),
("cron", "cron"),
// Hidden but may be known to agents
("list", "list"),
("show", "show"),
("reset", "reset"),
("backup", "backup"),
];
let invalid_lower = invalid.to_lowercase();
// Find the best match using Jaro-Winkler similarity
let best_match = VALID_COMMANDS
.iter()
.map(|(alias, canonical)| (*canonical, jaro_winkler(&invalid_lower, alias)))
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
if let Some((cmd, score)) = best_match
&& score > 0.7
{
let example = command_example(cmd);
return format!(
"Did you mean 'lore {cmd}'? Example: {example}. Run 'lore robot-docs' for all commands"
);
}
"Run 'lore robot-docs' for valid commands. Common: issues, mrs, search, sync, timeline, who"
.to_string()
}
/// Return a contextual usage example for a command.
fn command_example(cmd: &str) -> &'static str {
match cmd {
"issues" => "lore --robot issues -n 10",
"mrs" => "lore --robot mrs -n 10",
"search" => "lore --robot search \"auth bug\"",
"sync" => "lore --robot sync",
"ingest" => "lore --robot ingest issues",
"notes" => "lore --robot notes --for-issue 123",
"count" => "lore --robot count issues",
"status" => "lore --robot status",
"stats" => "lore --robot stats",
"timeline" => "lore --robot timeline \"auth flow\"",
"who" => "lore --robot who --path src/",
"health" => "lore --robot health",
"generate-docs" => "lore --robot generate-docs",
"embed" => "lore --robot embed",
"robot-docs" => "lore robot-docs",
"trace" => "lore --robot trace src/main.rs",
"init" => "lore init",
"related" => "lore --robot related issues 42 -n 5",
"me" => "lore --robot me",
"drift" => "lore --robot drift issues 42",
"file-history" => "lore --robot file-history src/main.rs",
"token" => "lore --robot token show",
"cron" => "lore --robot cron status",
"auth" => "lore --robot auth",
"doctor" => "lore --robot doctor",
"migrate" => "lore --robot migrate",
"completions" => "lore completions bash",
_ => "lore --robot <command>",
}
}

2001
src/app/handlers.rs Normal file

File diff suppressed because it is too large Load Diff

821
src/app/robot_docs.rs Normal file
View File

@@ -0,0 +1,821 @@
#[derive(Serialize)]
struct RobotDocsOutput {
ok: bool,
data: RobotDocsData,
}
#[derive(Serialize)]
struct RobotDocsData {
name: String,
version: String,
description: String,
activation: RobotDocsActivation,
quick_start: serde_json::Value,
commands: serde_json::Value,
/// Deprecated command aliases (old -> new)
aliases: serde_json::Value,
/// Pre-clap error tolerance: what the CLI auto-corrects
error_tolerance: serde_json::Value,
exit_codes: serde_json::Value,
/// Error codes emitted by clap parse failures
clap_error_codes: serde_json::Value,
error_format: String,
workflows: serde_json::Value,
config_notes: serde_json::Value,
}
#[derive(Serialize)]
struct RobotDocsActivation {
flags: Vec<String>,
env: String,
auto: String,
}
fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box<dyn std::error::Error>> {
let version = env!("CARGO_PKG_VERSION").to_string();
let commands = serde_json::json!({
"init": {
"description": "Initialize configuration and database",
"flags": ["--force", "--non-interactive", "--gitlab-url <URL>", "--token-env-var <VAR>", "--projects <paths>", "--default-project <path>"],
"robot_flags": ["--gitlab-url", "--token-env-var", "--projects", "--default-project"],
"example": "lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project,other/repo --default-project group/project",
"response_schema": {
"ok": "bool",
"data": {"config_path": "string", "data_dir": "string", "user": {"username": "string", "name": "string"}, "projects": "[{path:string, name:string}]", "default_project": "string?"},
"meta": {"elapsed_ms": "int"}
}
},
"health": {
"description": "Quick pre-flight check: config, database, schema version",
"flags": [],
"example": "lore --robot health",
"response_schema": {
"ok": "bool",
"data": {"healthy": "bool", "config_found": "bool", "db_found": "bool", "schema_current": "bool", "schema_version": "int"},
"meta": {"elapsed_ms": "int"}
}
},
"auth": {
"description": "Verify GitLab authentication",
"flags": [],
"example": "lore --robot auth",
"response_schema": {
"ok": "bool",
"data": {"authenticated": "bool", "username": "string", "name": "string", "gitlab_url": "string"},
"meta": {"elapsed_ms": "int"}
}
},
"doctor": {
"description": "Full environment health check (config, auth, DB, Ollama)",
"flags": [],
"example": "lore --robot doctor",
"response_schema": {
"ok": "bool",
"data": {"success": "bool", "checks": "{config:object, auth:object, database:object, ollama:object}"},
"meta": {"elapsed_ms": "int"}
}
},
"ingest": {
"description": "Sync data from GitLab",
"flags": ["--project <path>", "--force", "--no-force", "--full", "--no-full", "--dry-run", "--no-dry-run", "<entity: issues|mrs>"],
"example": "lore --robot ingest issues --project group/repo",
"response_schema": {
"ok": "bool",
"data": {"resource_type": "string", "projects_synced": "int", "issues_fetched?": "int", "mrs_fetched?": "int", "upserted": "int", "labels_created": "int", "discussions_fetched": "int", "notes_upserted": "int"},
"meta": {"elapsed_ms": "int"}
}
},
"sync": {
"description": "Full sync pipeline: ingest -> generate-docs -> embed. Supports surgical per-IID mode.",
"flags": ["--full", "--no-full", "--force", "--no-force", "--no-embed", "--no-docs", "--no-events", "--no-file-changes", "--no-status", "--dry-run", "--no-dry-run", "-t/--timings", "--lock", "--issue <IID>", "--mr <IID>", "-p/--project <path>", "--preflight-only"],
"example": "lore --robot sync",
"surgical_mode": {
"description": "Sync specific issues or MRs by IID. Runs a scoped pipeline: preflight -> TOCTOU check -> ingest -> dependents -> docs -> embed.",
"flags": ["--issue <IID> (repeatable)", "--mr <IID> (repeatable)", "-p/--project <path> (required)", "--preflight-only"],
"examples": [
"lore --robot sync --issue 7 -p group/project",
"lore --robot sync --issue 7 --issue 42 --mr 10 -p group/project",
"lore --robot sync --issue 7 -p group/project --preflight-only"
],
"constraints": ["--issue/--mr requires -p/--project (or defaultProject in config)", "--full and --issue/--mr are incompatible", "--preflight-only requires --issue or --mr", "Max 100 total targets"],
"entity_result_outcomes": ["synced", "skipped_stale", "not_found", "preflight_failed", "error"]
},
"response_schema": {
"normal": {
"ok": "bool",
"data": {"issues_updated": "int", "mrs_updated": "int", "documents_regenerated": "int", "documents_embedded": "int", "resource_events_synced": "int", "resource_events_failed": "int"},
"meta": {"elapsed_ms": "int", "stages?": "[{name:string, elapsed_ms:int, items_processed:int}]"}
},
"surgical": {
"ok": "bool",
"data": {"surgical_mode": "true", "surgical_iids": "{issues:[int], merge_requests:[int]}", "entity_results": "[{entity_type:string, iid:int, outcome:string, error?:string, toctou_reason?:string}]", "preflight_only?": "bool", "issues_updated": "int", "mrs_updated": "int", "documents_regenerated": "int", "documents_embedded": "int", "discussions_fetched": "int"},
"meta": {"elapsed_ms": "int"}
}
}
},
"issues": {
"description": "List or show issues",
"flags": ["<IID>", "-n/--limit", "--fields <list>", "-s/--state", "--status <name>", "-p/--project", "-a/--author", "-A/--assignee", "-l/--label", "-m/--milestone", "--since", "--due-before", "--has-due", "--no-has-due", "--sort", "--asc", "--no-asc", "-o/--open", "--no-open"],
"example": "lore --robot issues --state opened --limit 10",
"notes": {
"status_filter": "--status filters by work item status NAME (case-insensitive). Valid values are in meta.available_statuses of any issues list response.",
"status_name": "status_name is the board column label (e.g. 'In review', 'Blocked'). This is the canonical status identifier for filtering."
},
"response_schema": {
"list": {
"ok": "bool",
"data": {"issues": "[{iid:int, title:string, state:string, author_username:string, labels:[string], assignees:[string], discussion_count:int, unresolved_count:int, created_at_iso:string, updated_at_iso:string, web_url:string?, project_path:string, status_name:string?}]", "total_count": "int", "showing": "int"},
"meta": {"elapsed_ms": "int", "available_statuses": "[string] — all distinct status names in the database, for use with --status filter"}
},
"show": {
"ok": "bool",
"data": "IssueDetail (full entity with description, discussions, notes, events)",
"meta": {"elapsed_ms": "int"}
}
},
"example_output": {"list": {"ok":true,"data":{"issues":[{"iid":3864,"title":"Switch Health Card","state":"opened","status_name":"In progress","labels":["customer:BNSF"],"assignees":["teernisse"],"discussion_count":12,"updated_at_iso":"2026-02-12T..."}],"total_count":1,"showing":1},"meta":{"elapsed_ms":42}}},
"fields_presets": {"minimal": ["iid", "title", "state", "updated_at_iso"]}
},
"mrs": {
"description": "List or show merge requests",
"flags": ["<IID>", "-n/--limit", "--fields <list>", "-s/--state", "-p/--project", "-a/--author", "-A/--assignee", "-r/--reviewer", "-l/--label", "--since", "-d/--draft", "-D/--no-draft", "--target", "--source", "--sort", "--asc", "--no-asc", "-o/--open", "--no-open"],
"example": "lore --robot mrs --state opened",
"response_schema": {
"list": {
"ok": "bool",
"data": {"mrs": "[{iid:int, title:string, state:string, author_username:string, labels:[string], draft:bool, target_branch:string, source_branch:string, discussion_count:int, unresolved_count:int, created_at_iso:string, updated_at_iso:string, web_url:string?, project_path:string, reviewers:[string]}]", "total_count": "int", "showing": "int"},
"meta": {"elapsed_ms": "int"}
},
"show": {
"ok": "bool",
"data": "MrDetail (full entity with description, discussions, notes, events)",
"meta": {"elapsed_ms": "int"}
}
},
"example_output": {"list": {"ok":true,"data":{"mrs":[{"iid":200,"title":"Add throw time chart","state":"opened","draft":false,"author_username":"teernisse","target_branch":"main","source_branch":"feat/throw-time","reviewers":["cseiber"],"discussion_count":5,"updated_at_iso":"2026-02-11T..."}],"total_count":1,"showing":1},"meta":{"elapsed_ms":38}}},
"fields_presets": {"minimal": ["iid", "title", "state", "updated_at_iso"]}
},
"search": {
"description": "Search indexed documents (lexical, hybrid, semantic)",
"flags": ["<QUERY>", "--mode", "--type", "--author", "-p/--project", "--label", "--path", "--since", "--updated-since", "-n/--limit", "--fields <list>", "--explain", "--no-explain", "--fts-mode"],
"example": "lore --robot search 'authentication bug' --mode hybrid --limit 10",
"response_schema": {
"ok": "bool",
"data": {"results": "[{document_id:int, source_type:string, title:string, snippet:string, score:float, url:string?, author:string?, created_at:string?, updated_at:string?, project_path:string, labels:[string], paths:[string]}]", "total_results": "int", "query": "string", "mode": "string", "warnings": "[string]"},
"meta": {"elapsed_ms": "int"}
},
"example_output": {"ok":true,"data":{"query":"throw time","mode":"hybrid","total_results":3,"results":[{"document_id":42,"source_type":"issue","title":"Switch Health Card","score":0.92,"snippet":"...throw time data from BNSF...","project_path":"vs/typescript-code"}],"warnings":[]},"meta":{"elapsed_ms":85}},
"fields_presets": {"minimal": ["document_id", "title", "source_type", "score"]}
},
"count": {
"description": "Count entities in local database",
"flags": ["<entity: issues|mrs|discussions|notes|events>", "-f/--for <issue|mr>"],
"example": "lore --robot count issues",
"response_schema": {
"ok": "bool",
"data": {"entity": "string", "count": "int", "system_excluded?": "int", "breakdown?": {"opened": "int", "closed": "int", "merged?": "int", "locked?": "int"}},
"meta": {"elapsed_ms": "int"}
}
},
"stats": {
"description": "Show document and index statistics",
"flags": ["--check", "--no-check", "--repair", "--dry-run", "--no-dry-run"],
"example": "lore --robot stats",
"response_schema": {
"ok": "bool",
"data": {"total_documents": "int", "indexed_documents": "int", "embedded_documents": "int", "stale_documents": "int", "integrity?": "object"},
"meta": {"elapsed_ms": "int"}
}
},
"status": {
"description": "Show sync state (cursors, last sync times)",
"flags": [],
"example": "lore --robot status",
"response_schema": {
"ok": "bool",
"data": {"projects": "[{path:string, issues_cursor:string?, mrs_cursor:string?, last_sync:string?}]"},
"meta": {"elapsed_ms": "int"}
}
},
"generate-docs": {
"description": "Generate searchable documents from ingested data",
"flags": ["--full", "-p/--project <path>"],
"example": "lore --robot generate-docs --full",
"response_schema": {
"ok": "bool",
"data": {"generated": "int", "updated": "int", "unchanged": "int", "deleted": "int"},
"meta": {"elapsed_ms": "int"}
}
},
"embed": {
"description": "Generate vector embeddings for documents via Ollama",
"flags": ["--full", "--no-full", "--retry-failed", "--no-retry-failed"],
"example": "lore --robot embed",
"response_schema": {
"ok": "bool",
"data": {"embedded": "int", "skipped": "int", "failed": "int", "total_chunks": "int"},
"meta": {"elapsed_ms": "int"}
}
},
"migrate": {
"description": "Run pending database migrations",
"flags": [],
"example": "lore --robot migrate",
"response_schema": {
"ok": "bool",
"data": {"before_version": "int", "after_version": "int", "migrated": "bool"},
"meta": {"elapsed_ms": "int"}
}
},
"version": {
"description": "Show version information",
"flags": [],
"example": "lore --robot version",
"response_schema": {
"ok": "bool",
"data": {"version": "string", "git_hash?": "string"},
"meta": {"elapsed_ms": "int"}
}
},
"completions": {
"description": "Generate shell completions",
"flags": ["<shell: bash|zsh|fish|powershell>"],
"example": "lore completions bash > ~/.local/share/bash-completion/completions/lore"
},
"timeline": {
"description": "Chronological timeline of events matching a keyword query or entity reference",
"flags": ["<QUERY>", "-p/--project", "--since <duration>", "--depth <n>", "--no-mentions", "-n/--limit", "--fields <list>", "--max-seeds", "--max-entities", "--max-evidence"],
"query_syntax": {
"search": "Any text -> hybrid search seeding (FTS5 + vector)",
"entity_direct": "issue:N, i:N, mr:N, m:N -> direct entity seeding (no search, no Ollama)"
},
"example": "lore --robot timeline issue:42",
"response_schema": {
"ok": "bool",
"data": {"entities": "[{type:string, iid:int, title:string, project_path:string}]", "events": "[{timestamp:string, type:string, entity_type:string, entity_iid:int, detail:string}]", "total_events": "int"},
"meta": {"elapsed_ms": "int", "search_mode": "string (hybrid|lexical|direct)"}
},
"fields_presets": {"minimal": ["timestamp", "type", "entity_iid", "detail"]}
},
"who": {
"description": "People intelligence: experts, workload, active discussions, overlap, review patterns",
"flags": ["<target>", "--path <path>", "--active", "--overlap <path>", "--reviews", "--since <duration>", "-p/--project", "-n/--limit", "--fields <list>", "--detail", "--no-detail", "--as-of <date>", "--explain-score", "--include-bots", "--include-closed", "--all-history"],
"modes": {
"expert": "lore who <file-path> -- Who knows about this area? (also: --path for root files)",
"workload": "lore who <username> -- What is someone working on?",
"reviews": "lore who <username> --reviews -- Review pattern analysis",
"active": "lore who --active -- Active unresolved discussions",
"overlap": "lore who --overlap <path> -- Who else is touching these files?"
},
"example": "lore --robot who src/features/auth/",
"response_schema": {
"ok": "bool",
"data": {
"mode": "string",
"input": {"target": "string|null", "path": "string|null", "project": "string|null", "since": "string|null", "limit": "int"},
"resolved_input": {"mode": "string", "project_id": "int|null", "project_path": "string|null", "since_ms": "int", "since_iso": "string", "since_mode": "string (default|explicit|none)", "limit": "int"},
"...": "mode-specific fields"
},
"meta": {"elapsed_ms": "int"}
},
"example_output": {"expert": {"ok":true,"data":{"mode":"expert","result":{"experts":[{"username":"teernisse","score":42,"note_count":15,"diff_note_count":8}]}},"meta":{"elapsed_ms":65}}},
"fields_presets": {
"expert_minimal": ["username", "score"],
"workload_minimal": ["entity_type", "iid", "title", "state"],
"active_minimal": ["entity_type", "iid", "title", "participants"]
}
},
"trace": {
"description": "Trace why code was introduced: file -> MR -> issue -> discussion. Follows rename chains by default.",
"flags": ["<path>", "-p/--project <path>", "--discussions", "--no-follow-renames", "-n/--limit <N>"],
"example": "lore --robot trace src/main.rs -p group/repo",
"response_schema": {
"ok": "bool",
"data": {"path": "string", "resolved_paths": "[string]", "trace_chains": "[{mr_iid:int, mr_title:string, mr_state:string, mr_author:string, change_type:string, merged_at_iso:string?, updated_at_iso:string, web_url:string?, issues:[{iid:int, title:string, state:string, reference_type:string, web_url:string?}], discussions:[{discussion_id:string, mr_iid:int, author_username:string, body_snippet:string, path:string, created_at_iso:string}]}]"},
"meta": {"tier": "string (api_only)", "line_requested": "int?", "elapsed_ms": "int", "total_chains": "int", "renames_followed": "bool"}
}
},
"file-history": {
"description": "Show MRs that touched a file, with rename chain resolution and optional DiffNote discussions",
"flags": ["<path>", "-p/--project <path>", "--discussions", "--no-follow-renames", "--merged", "-n/--limit <N>"],
"example": "lore --robot file-history src/main.rs -p group/repo",
"response_schema": {
"ok": "bool",
"data": {"path": "string", "rename_chain": "[string]?", "merge_requests": "[{iid:int, title:string, state:string, author_username:string, change_type:string, merged_at_iso:string?, updated_at_iso:string, merge_commit_sha:string?, web_url:string?}]", "discussions": "[{discussion_id:string, author_username:string, body_snippet:string, path:string, created_at_iso:string}]?"},
"meta": {"elapsed_ms": "int", "total_mrs": "int", "renames_followed": "bool", "paths_searched": "int"}
}
},
"drift": {
"description": "Detect discussion divergence from original issue intent",
"flags": ["<entity_type: issues>", "<IID>", "--threshold <0.0-1.0>", "-p/--project <path>"],
"example": "lore --robot drift issues 42 --threshold 0.4",
"response_schema": {
"ok": "bool",
"data": {"entity_type": "string", "iid": "int", "title": "string", "threshold": "float", "divergent_discussions": "[{discussion_id:string, similarity:float, snippet:string}]"},
"meta": {"elapsed_ms": "int"}
}
},
"notes": {
"description": "List notes from discussions with rich filtering",
"flags": ["--limit/-n <N>", "--author/-a <username>", "--note-type <type>", "--contains <text>", "--for-issue <iid>", "--for-mr <iid>", "-p/--project <path>", "--since <period>", "--until <period>", "--path <filepath>", "--resolution <any|unresolved|resolved>", "--sort <created|updated>", "--asc", "--include-system", "--note-id <id>", "--gitlab-note-id <id>", "--discussion-id <id>", "--fields <list|minimal>", "--open"],
"robot_flags": ["--format json", "--fields minimal"],
"example": "lore --robot notes --author jdefting --since 1y --format json --fields minimal",
"response_schema": {
"ok": "bool",
"data": {"notes": "[NoteListRowJson]", "total_count": "int", "showing": "int"},
"meta": {"elapsed_ms": "int"}
}
},
"cron": {
"description": "Manage cron-based automatic syncing (Unix only)",
"subcommands": {
"install": {"flags": ["--interval <minutes>"], "default_interval": 8},
"uninstall": {"flags": []},
"status": {"flags": []}
},
"example": "lore --robot cron status",
"response_schema": {
"ok": "bool",
"data": {"action": "string (install|uninstall|status)", "installed?": "bool", "interval_minutes?": "int", "entry?": "string", "log_path?": "string", "replaced?": "bool", "was_installed?": "bool", "last_run_iso?": "string"},
"meta": {"elapsed_ms": "int"}
}
},
"token": {
"description": "Manage stored GitLab token",
"subcommands": {
"set": {"flags": ["--token <value>"], "note": "Reads from stdin if --token omitted in non-interactive mode"},
"show": {"flags": ["--unmask"]}
},
"example": "lore --robot token show",
"response_schema": {
"ok": "bool",
"data": {"action": "string (set|show)", "token_masked?": "string", "token?": "string", "valid?": "bool", "username?": "string"},
"meta": {"elapsed_ms": "int"}
}
},
"me": {
"description": "Personal work dashboard: open issues, authored/reviewing MRs, @mentioned-in items, activity feed, and cursor-based since-last-check inbox with computed attention states",
"flags": ["--issues", "--mrs", "--mentions", "--activity", "--since <period>", "-p/--project <path>", "--all", "--user <username>", "--fields <list|minimal>", "--reset-cursor"],
"example": "lore --robot me",
"response_schema": {
"ok": "bool",
"data": {
"username": "string",
"since_iso": "string?",
"summary": {"project_count": "int", "open_issue_count": "int", "authored_mr_count": "int", "reviewing_mr_count": "int", "mentioned_in_count": "int", "needs_attention_count": "int"},
"since_last_check": "{cursor_iso:string, total_event_count:int, groups:[{entity_type:string, entity_iid:int, entity_title:string, project:string, events:[{timestamp_iso:string, event_type:string, actor:string?, summary:string, body_preview:string?}]}]}?",
"open_issues": "[{project:string, iid:int, title:string, state:string, attention_state:string, attention_reason:string, status_name:string?, labels:[string], updated_at_iso:string, web_url:string?}]",
"open_mrs_authored": "[{project:string, iid:int, title:string, state:string, attention_state:string, attention_reason:string, draft:bool, detailed_merge_status:string?, author_username:string?, labels:[string], updated_at_iso:string, web_url:string?}]",
"reviewing_mrs": "[same as open_mrs_authored]",
"mentioned_in": "[{entity_type:string, project:string, iid:int, title:string, state:string, attention_state:string, attention_reason:string, updated_at_iso:string, web_url:string?}]",
"activity": "[{timestamp_iso:string, event_type:string, entity_type:string, entity_iid:int, project:string, actor:string?, is_own:bool, summary:string, body_preview:string?}]"
},
"meta": {"elapsed_ms": "int"}
},
"fields_presets": {
"me_items_minimal": ["iid", "title", "attention_state", "attention_reason", "updated_at_iso"],
"me_mentions_minimal": ["entity_type", "iid", "title", "state", "attention_state", "attention_reason", "updated_at_iso"],
"me_activity_minimal": ["timestamp_iso", "event_type", "entity_iid", "actor"]
},
"notes": {
"attention_states": "needs_attention | not_started | awaiting_response | stale | not_ready",
"event_types": "note | status_change | label_change | assign | unassign | review_request | milestone_change",
"section_flags": "If none of --issues/--mrs/--mentions/--activity specified, all sections returned",
"since_default": "1d for activity feed",
"issue_filter": "Only In Progress / In Review status issues shown",
"since_last_check": "Cursor-based inbox showing events since last run. Null on first run (no cursor yet). Groups events by entity (issue/MR). Sources: others' comments on your items, @mentions, assignment/review-request notes. Cursor auto-advances after each run. Use --reset-cursor to clear.",
"cursor_persistence": "Stored per user in ~/.local/share/lore/me_cursor_<username>.json. --project filters display only for since-last-check; cursor still advances for all projects for that user."
}
},
"robot-docs": {
"description": "This command (agent self-discovery manifest)",
"flags": ["--brief"],
"example": "lore robot-docs --brief"
}
});
let quick_start = serde_json::json!({
"glab_equivalents": [
{ "glab": "glab issue list", "lore": "lore -J issues -n 50", "note": "Richer: includes labels, status, closing MRs, discussion counts" },
{ "glab": "glab issue view 123", "lore": "lore -J issues 123", "note": "Includes full discussions, work-item status, cross-references" },
{ "glab": "glab issue list -l bug", "lore": "lore -J issues --label bug", "note": "AND logic for multiple --label flags" },
{ "glab": "glab mr list", "lore": "lore -J mrs", "note": "Includes draft status, reviewers, discussion counts" },
{ "glab": "glab mr view 456", "lore": "lore -J mrs 456", "note": "Includes discussions, review threads, source/target branches" },
{ "glab": "glab mr list -s opened", "lore": "lore -J mrs -s opened", "note": "States: opened, merged, closed, locked, all" },
{ "glab": "glab api '/projects/:id/issues'", "lore": "lore -J issues -p project", "note": "Fuzzy project matching (suffix or substring)" }
],
"lore_exclusive": [
"search: FTS5 + vector hybrid search across all entities",
"who: Expert/workload/reviews analysis per file path or person",
"timeline: Chronological event reconstruction across entities",
"trace: Code provenance chains (file -> MR -> issue -> discussion)",
"file-history: MR history per file with rename resolution",
"notes: Rich note listing with author, type, resolution, path, and discussion filters",
"stats: Database statistics with document/note/discussion counts",
"count: Entity counts with state breakdowns",
"embed: Generate vector embeddings for semantic search via Ollama",
"cron: Automated sync scheduling (Unix)",
"token: Secure token management with masked display",
"me: Personal work dashboard with attention states, activity feed, cursor-based since-last-check inbox, and needs-attention triage"
],
"read_write_split": "lore = ALL reads (issues, MRs, search, who, timeline, intelligence). glab = ALL writes (create, update, approve, merge, CI/CD)."
});
// --brief: strip response_schema and example_output from every command (~60% smaller)
let mut commands = commands;
if brief {
strip_schemas(&mut commands);
}
let exit_codes = serde_json::json!({
"0": "Success",
"1": "Internal error",
"2": "Usage error (invalid flags or arguments)",
"3": "Config invalid",
"4": "Token not set",
"5": "GitLab auth failed",
"6": "Resource not found",
"7": "Rate limited",
"8": "Network error",
"9": "Database locked",
"10": "Database error",
"11": "Migration failed",
"12": "I/O error",
"13": "Transform error",
"14": "Ollama unavailable",
"15": "Ollama model not found",
"16": "Embedding failed",
"17": "Not found",
"18": "Ambiguous match",
"19": "Health check failed",
"20": "Config not found"
});
let workflows = serde_json::json!({
"first_setup": [
"lore --robot init --gitlab-url https://gitlab.com --token-env-var GITLAB_TOKEN --projects group/project",
"lore --robot doctor",
"lore --robot sync"
],
"daily_sync": [
"lore --robot sync"
],
"search": [
"lore --robot search 'query' --mode hybrid"
],
"pre_flight": [
"lore --robot health"
],
"temporal_intelligence": [
"lore --robot sync",
"lore --robot timeline '<keyword>' --since 30d",
"lore --robot timeline '<keyword>' --depth 2"
],
"people_intelligence": [
"lore --robot who src/path/to/feature/",
"lore --robot who @username",
"lore --robot who @username --reviews",
"lore --robot who --active --since 7d",
"lore --robot who --overlap src/path/",
"lore --robot who --path README.md"
],
"surgical_sync": [
"lore --robot sync --issue 7 -p group/project",
"lore --robot sync --issue 7 --mr 10 -p group/project",
"lore --robot sync --issue 7 -p group/project --preflight-only"
],
"personal_dashboard": [
"lore --robot me",
"lore --robot me --issues",
"lore --robot me --activity --since 7d",
"lore --robot me --project group/repo",
"lore --robot me --fields minimal",
"lore --robot me --reset-cursor"
]
});
// Phase 3: Deprecated command aliases
let aliases = serde_json::json!({
"deprecated_commands": {
"list issues": "issues",
"list mrs": "mrs",
"show issue <IID>": "issues <IID>",
"show mr <IID>": "mrs <IID>",
"auth-test": "auth",
"sync-status": "status"
},
"command_aliases": {
"issue": "issues",
"mr": "mrs",
"merge-requests": "mrs",
"merge-request": "mrs",
"mergerequests": "mrs",
"mergerequest": "mrs",
"generate-docs": "generate-docs",
"generatedocs": "generate-docs",
"gendocs": "generate-docs",
"gen-docs": "generate-docs",
"robot-docs": "robot-docs",
"robotdocs": "robot-docs"
},
"pre_clap_aliases": {
"note": "Underscore/no-separator forms auto-corrected before parsing",
"merge_requests": "mrs",
"merge_request": "mrs",
"mergerequests": "mrs",
"mergerequest": "mrs",
"generate_docs": "generate-docs",
"generatedocs": "generate-docs",
"gendocs": "generate-docs",
"gen-docs": "generate-docs",
"robot-docs": "robot-docs",
"robotdocs": "robot-docs"
},
"prefix_matching": "Enabled via infer_subcommands. Unambiguous prefixes work: 'iss' -> issues, 'time' -> timeline, 'sea' -> search"
});
let error_tolerance = serde_json::json!({
"note": "The CLI auto-corrects common mistakes before parsing. Corrections are applied silently with a teaching note on stderr.",
"auto_corrections": [
{"type": "single_dash_long_flag", "example": "-robot -> --robot", "mode": "all"},
{"type": "case_normalization", "example": "--Robot -> --robot, --State -> --state", "mode": "all"},
{"type": "flag_prefix", "example": "--proj -> --project (when unambiguous)", "mode": "all"},
{"type": "fuzzy_flag", "example": "--projct -> --project", "mode": "all (threshold 0.9 in robot, 0.8 in human)"},
{"type": "subcommand_alias", "example": "merge_requests -> mrs, robotdocs -> robot-docs", "mode": "all"},
{"type": "subcommand_fuzzy", "example": "issuess -> issues, timline -> timeline, serach -> search", "mode": "all (threshold 0.85)"},
{"type": "flag_as_subcommand", "example": "--robot-docs -> robot-docs, --generate-docs -> generate-docs", "mode": "all"},
{"type": "value_normalization", "example": "--state Opened -> --state opened", "mode": "all"},
{"type": "value_fuzzy", "example": "--state opend -> --state opened", "mode": "all"},
{"type": "prefix_matching", "example": "lore iss -> lore issues, lore time -> lore timeline", "mode": "all (via clap infer_subcommands)"}
],
"teaching_notes": "Auto-corrections emit a JSON warning on stderr: {\"warning\":{\"type\":\"ARG_CORRECTED\",\"corrections\":[...],\"teaching\":[...]}}"
});
// Phase 3: Clap error codes (emitted by handle_clap_error)
let clap_error_codes = serde_json::json!({
"UNKNOWN_COMMAND": "Unrecognized subcommand (includes fuzzy suggestion)",
"UNKNOWN_FLAG": "Unrecognized command-line flag",
"MISSING_REQUIRED": "Required argument not provided",
"INVALID_VALUE": "Invalid value for argument",
"TOO_MANY_VALUES": "Too many values provided",
"TOO_FEW_VALUES": "Too few values provided",
"ARGUMENT_CONFLICT": "Conflicting arguments",
"MISSING_COMMAND": "No subcommand provided (in non-robot mode, shows help)",
"HELP_REQUESTED": "Help or version flag used",
"PARSE_ERROR": "General parse error"
});
let config_notes = serde_json::json!({
"defaultProject": {
"type": "string?",
"description": "Fallback project path used when -p/--project is omitted. Must match a configured project path (exact or suffix). CLI -p always overrides.",
"example": "group/project"
}
});
let output = RobotDocsOutput {
ok: true,
data: RobotDocsData {
name: "lore".to_string(),
version,
description: "Local GitLab data management with semantic search".to_string(),
activation: RobotDocsActivation {
flags: vec!["--robot".to_string(), "-J".to_string(), "--json".to_string()],
env: "LORE_ROBOT=1".to_string(),
auto: "Non-TTY stdout".to_string(),
},
quick_start,
commands,
aliases,
error_tolerance,
exit_codes,
clap_error_codes,
error_format: "stderr JSON: {\"error\":{\"code\":\"...\",\"message\":\"...\",\"suggestion\":\"...\",\"actions\":[\"...\"]}}".to_string(),
workflows,
config_notes,
},
};
if robot_mode {
println!("{}", serde_json::to_string(&output)?);
} else {
println!("{}", serde_json::to_string_pretty(&output)?);
}
Ok(())
}
fn handle_who(
config_override: Option<&str>,
mut args: WhoArgs,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let start = std::time::Instant::now();
let config = Config::load(config_override)?;
if args.project.is_none() {
args.project = config.default_project.clone();
}
let run = run_who(&config, &args)?;
let elapsed_ms = start.elapsed().as_millis() as u64;
if robot_mode {
print_who_json(&run, &args, elapsed_ms);
} else {
print_who_human(&run.result, run.resolved_input.project_path.as_deref());
}
Ok(())
}
fn handle_me(
config_override: Option<&str>,
args: MeArgs,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let config = Config::load(config_override)?;
run_me(&config, &args, robot_mode)?;
Ok(())
}
async fn handle_drift(
config_override: Option<&str>,
entity_type: &str,
iid: i64,
threshold: f32,
project: Option<&str>,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let start = std::time::Instant::now();
let config = Config::load(config_override)?;
let effective_project = config.effective_project(project);
let response = run_drift(&config, entity_type, iid, threshold, effective_project).await?;
let elapsed_ms = start.elapsed().as_millis() as u64;
if robot_mode {
print_drift_json(&response, elapsed_ms);
} else {
print_drift_human(&response);
}
Ok(())
}
async fn handle_related(
config_override: Option<&str>,
query_or_type: &str,
iid: Option<i64>,
limit: usize,
project: Option<&str>,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let start = std::time::Instant::now();
let config = Config::load(config_override)?;
let effective_project = config.effective_project(project);
let response = run_related(&config, query_or_type, iid, limit, effective_project).await?;
let elapsed_ms = start.elapsed().as_millis() as u64;
if robot_mode {
print_related_json(&response, elapsed_ms);
} else {
print_related_human(&response);
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
async fn handle_list_compat(
config_override: Option<&str>,
entity: &str,
limit: usize,
project_filter: Option<&str>,
state_filter: Option<&str>,
author_filter: Option<&str>,
assignee_filter: Option<&str>,
label_filter: Option<&[String]>,
milestone_filter: Option<&str>,
since_filter: Option<&str>,
due_before_filter: Option<&str>,
has_due_date: bool,
sort: &str,
order: &str,
open_browser: bool,
json_output: bool,
draft: bool,
no_draft: bool,
reviewer_filter: Option<&str>,
target_branch_filter: Option<&str>,
source_branch_filter: Option<&str>,
) -> Result<(), Box<dyn std::error::Error>> {
let start = std::time::Instant::now();
let config = Config::load(config_override)?;
let project_filter = config.effective_project(project_filter);
let state_normalized = state_filter.map(str::to_lowercase);
match entity {
"issues" => {
let filters = ListFilters {
limit,
project: project_filter,
state: state_normalized.as_deref(),
author: author_filter,
assignee: assignee_filter,
labels: label_filter,
milestone: milestone_filter,
since: since_filter,
due_before: due_before_filter,
has_due_date,
statuses: &[],
sort,
order,
};
let result = run_list_issues(&config, filters)?;
if open_browser {
open_issue_in_browser(&result);
} else if json_output {
print_list_issues_json(&result, start.elapsed().as_millis() as u64, None);
} else {
print_list_issues(&result);
}
Ok(())
}
"mrs" => {
let filters = MrListFilters {
limit,
project: project_filter,
state: state_normalized.as_deref(),
author: author_filter,
assignee: assignee_filter,
reviewer: reviewer_filter,
labels: label_filter,
since: since_filter,
draft,
no_draft,
target_branch: target_branch_filter,
source_branch: source_branch_filter,
sort,
order,
};
let result = run_list_mrs(&config, filters)?;
if open_browser {
open_mr_in_browser(&result);
} else if json_output {
print_list_mrs_json(&result, start.elapsed().as_millis() as u64, None);
} else {
print_list_mrs(&result);
}
Ok(())
}
_ => {
eprintln!(
"{}",
Theme::error().render(&format!("Unknown entity: {entity}"))
);
std::process::exit(1);
}
}
}
async fn handle_show_compat(
config_override: Option<&str>,
entity: &str,
iid: i64,
project_filter: Option<&str>,
robot_mode: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let start = std::time::Instant::now();
let config = Config::load(config_override)?;
let project_filter = config.effective_project(project_filter);
match entity {
"issue" => {
let result = run_show_issue(&config, iid, project_filter)?;
if robot_mode {
print_show_issue_json(&result, start.elapsed().as_millis() as u64);
} else {
print_show_issue(&result);
}
Ok(())
}
"mr" => {
let result = run_show_mr(&config, iid, project_filter)?;
if robot_mode {
print_show_mr_json(&result, start.elapsed().as_millis() as u64);
} else {
print_show_mr(&result);
}
Ok(())
}
_ => {
eprintln!(
"{}",
Theme::error().render(&format!("Unknown entity: {entity}"))
);
std::process::exit(1);
}
}
}

870
src/cli/args.rs Normal file
View File

@@ -0,0 +1,870 @@
use clap::{Args, Parser, Subcommand};
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore issues -n 10 # List 10 most recently updated issues
lore issues -s opened -l bug # Open issues labeled 'bug'
lore issues 42 -p group/repo # Show issue #42 in a specific project
lore issues --since 7d -a jsmith # Issues updated in last 7 days by jsmith")]
pub struct IssuesArgs {
/// Issue IID (omit to list, provide to show details)
pub iid: Option<i64>,
/// Maximum results
#[arg(
short = 'n',
long = "limit",
default_value = "50",
help_heading = "Output"
)]
pub limit: usize,
/// Select output fields (comma-separated, or 'minimal' preset: iid,title,state,updated_at_iso)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Filter by state (opened, closed, all)
#[arg(short = 's', long, help_heading = "Filters", value_parser = ["opened", "closed", "all"])]
pub state: Option<String>,
/// Filter by project path
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Filter by author username
#[arg(short = 'a', long, help_heading = "Filters")]
pub author: Option<String>,
/// Filter by assignee username
#[arg(short = 'A', long, help_heading = "Filters")]
pub assignee: Option<String>,
/// Filter by label (repeatable, AND logic)
#[arg(short = 'l', long, help_heading = "Filters")]
pub label: Option<Vec<String>>,
/// Filter by milestone title
#[arg(short = 'm', long, help_heading = "Filters")]
pub milestone: Option<String>,
/// Filter by work-item status name (repeatable, OR logic)
#[arg(long, help_heading = "Filters")]
pub status: Vec<String>,
/// Filter by time (7d, 2w, 1m, or YYYY-MM-DD)
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Filter by due date (before this date, YYYY-MM-DD)
#[arg(long = "due-before", help_heading = "Filters")]
pub due_before: Option<String>,
/// Show only issues with a due date
#[arg(
long = "has-due",
help_heading = "Filters",
overrides_with = "no_has_due"
)]
pub has_due: bool,
#[arg(long = "no-has-due", hide = true, overrides_with = "has_due")]
pub no_has_due: bool,
/// Sort field (updated, created, iid)
#[arg(long, value_parser = ["updated", "created", "iid"], default_value = "updated", help_heading = "Sorting")]
pub sort: String,
/// Sort ascending (default: descending)
#[arg(long, help_heading = "Sorting", overrides_with = "no_asc")]
pub asc: bool,
#[arg(long = "no-asc", hide = true, overrides_with = "asc")]
pub no_asc: bool,
/// Open first matching item in browser
#[arg(
short = 'o',
long,
help_heading = "Actions",
overrides_with = "no_open"
)]
pub open: bool,
#[arg(long = "no-open", hide = true, overrides_with = "open")]
pub no_open: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore mrs -s opened # List open merge requests
lore mrs -s merged --since 2w # MRs merged in the last 2 weeks
lore mrs 99 -p group/repo # Show MR !99 in a specific project
lore mrs -D --reviewer jsmith # Non-draft MRs reviewed by jsmith")]
pub struct MrsArgs {
/// MR IID (omit to list, provide to show details)
pub iid: Option<i64>,
/// Maximum results
#[arg(
short = 'n',
long = "limit",
default_value = "50",
help_heading = "Output"
)]
pub limit: usize,
/// Select output fields (comma-separated, or 'minimal' preset: iid,title,state,updated_at_iso)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Filter by state (opened, merged, closed, locked, all)
#[arg(short = 's', long, help_heading = "Filters", value_parser = ["opened", "merged", "closed", "locked", "all"])]
pub state: Option<String>,
/// Filter by project path
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Filter by author username
#[arg(short = 'a', long, help_heading = "Filters")]
pub author: Option<String>,
/// Filter by assignee username
#[arg(short = 'A', long, help_heading = "Filters")]
pub assignee: Option<String>,
/// Filter by reviewer username
#[arg(short = 'r', long, help_heading = "Filters")]
pub reviewer: Option<String>,
/// Filter by label (repeatable, AND logic)
#[arg(short = 'l', long, help_heading = "Filters")]
pub label: Option<Vec<String>>,
/// Filter by time (7d, 2w, 1m, or YYYY-MM-DD)
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Show only draft MRs
#[arg(
short = 'd',
long,
conflicts_with = "no_draft",
help_heading = "Filters"
)]
pub draft: bool,
/// Exclude draft MRs
#[arg(
short = 'D',
long = "no-draft",
conflicts_with = "draft",
help_heading = "Filters"
)]
pub no_draft: bool,
/// Filter by target branch
#[arg(long, help_heading = "Filters")]
pub target: Option<String>,
/// Filter by source branch
#[arg(long, help_heading = "Filters")]
pub source: Option<String>,
/// Sort field (updated, created, iid)
#[arg(long, value_parser = ["updated", "created", "iid"], default_value = "updated", help_heading = "Sorting")]
pub sort: String,
/// Sort ascending (default: descending)
#[arg(long, help_heading = "Sorting", overrides_with = "no_asc")]
pub asc: bool,
#[arg(long = "no-asc", hide = true, overrides_with = "asc")]
pub no_asc: bool,
/// Open first matching item in browser
#[arg(
short = 'o',
long,
help_heading = "Actions",
overrides_with = "no_open"
)]
pub open: bool,
#[arg(long = "no-open", hide = true, overrides_with = "open")]
pub no_open: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore notes # List 50 most recent notes
lore notes --author alice --since 7d # Notes by alice in last 7 days
lore notes --for-issue 42 -p group/repo # Notes on issue #42
lore notes --path src/ --resolution unresolved # Unresolved diff notes in src/")]
pub struct NotesArgs {
/// Maximum results
#[arg(
short = 'n',
long = "limit",
default_value = "50",
help_heading = "Output"
)]
pub limit: usize,
/// Select output fields (comma-separated, or 'minimal' preset: id,author_username,body,created_at_iso)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Filter by author username
#[arg(short = 'a', long, help_heading = "Filters")]
pub author: Option<String>,
/// Filter by note type (DiffNote, DiscussionNote)
#[arg(long, help_heading = "Filters")]
pub note_type: Option<String>,
/// Filter by body text (substring match)
#[arg(long, help_heading = "Filters")]
pub contains: Option<String>,
/// Filter by internal note ID
#[arg(long, help_heading = "Filters")]
pub note_id: Option<i64>,
/// Filter by GitLab note ID
#[arg(long, help_heading = "Filters")]
pub gitlab_note_id: Option<i64>,
/// Filter by discussion ID
#[arg(long, help_heading = "Filters")]
pub discussion_id: Option<String>,
/// Include system notes (excluded by default)
#[arg(long, help_heading = "Filters")]
pub include_system: bool,
/// Filter to notes on a specific issue IID (requires --project or default_project)
#[arg(long, conflicts_with = "for_mr", help_heading = "Filters")]
pub for_issue: Option<i64>,
/// Filter to notes on a specific MR IID (requires --project or default_project)
#[arg(long, conflicts_with = "for_issue", help_heading = "Filters")]
pub for_mr: Option<i64>,
/// Filter by project path
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Filter by time (7d, 2w, 1m, or YYYY-MM-DD)
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Filter until date (YYYY-MM-DD, inclusive end-of-day)
#[arg(long, help_heading = "Filters")]
pub until: Option<String>,
/// Filter by file path (exact match or prefix with trailing /)
#[arg(long, help_heading = "Filters")]
pub path: Option<String>,
/// Filter by resolution status (any, unresolved, resolved)
#[arg(
long,
value_parser = ["any", "unresolved", "resolved"],
help_heading = "Filters"
)]
pub resolution: Option<String>,
/// Sort field (created, updated)
#[arg(
long,
value_parser = ["created", "updated"],
default_value = "created",
help_heading = "Sorting"
)]
pub sort: String,
/// Sort ascending (default: descending)
#[arg(long, help_heading = "Sorting")]
pub asc: bool,
/// Open first matching item in browser
#[arg(long, help_heading = "Actions")]
pub open: bool,
}
#[derive(Parser)]
pub struct IngestArgs {
/// Entity to ingest (issues, mrs). Omit to ingest everything
#[arg(value_parser = ["issues", "mrs"])]
pub entity: Option<String>,
/// Filter to single project
#[arg(short = 'p', long)]
pub project: Option<String>,
/// Override stale sync lock
#[arg(short = 'f', long, overrides_with = "no_force")]
pub force: bool,
#[arg(long = "no-force", hide = true, overrides_with = "force")]
pub no_force: bool,
/// Full re-sync: reset cursors and fetch all data from scratch
#[arg(long, overrides_with = "no_full")]
pub full: bool,
#[arg(long = "no-full", hide = true, overrides_with = "full")]
pub no_full: bool,
/// Preview what would be synced without making changes
#[arg(long, overrides_with = "no_dry_run")]
pub dry_run: bool,
#[arg(long = "no-dry-run", hide = true, overrides_with = "dry_run")]
pub no_dry_run: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore stats # Show document and index statistics
lore stats --check # Run integrity checks
lore stats --repair --dry-run # Preview what repair would fix
lore --robot stats # JSON output for automation")]
pub struct StatsArgs {
/// Run integrity checks
#[arg(long, overrides_with = "no_check")]
pub check: bool,
#[arg(long = "no-check", hide = true, overrides_with = "check")]
pub no_check: bool,
/// Repair integrity issues (auto-enables --check)
#[arg(long)]
pub repair: bool,
/// Preview what would be repaired without making changes (requires --repair)
#[arg(long, overrides_with = "no_dry_run")]
pub dry_run: bool,
#[arg(long = "no-dry-run", hide = true, overrides_with = "dry_run")]
pub no_dry_run: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore search 'authentication bug' # Hybrid search (default)
lore search 'deploy' --mode lexical --type mr # Lexical search, MRs only
lore search 'API rate limit' --since 30d # Recent results only
lore search 'config' -p group/repo --explain # With ranking explanation")]
pub struct SearchArgs {
/// Search query string
pub query: String,
/// Search mode (lexical, hybrid, semantic)
#[arg(long, default_value = "hybrid", value_parser = ["lexical", "hybrid", "semantic"], help_heading = "Mode")]
pub mode: String,
/// Filter by source type (issue, mr, discussion, note)
#[arg(long = "type", value_name = "TYPE", value_parser = ["issue", "mr", "discussion", "note"], help_heading = "Filters")]
pub source_type: Option<String>,
/// Filter by author username
#[arg(long, help_heading = "Filters")]
pub author: Option<String>,
/// Filter by project path
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Filter by label (repeatable, AND logic)
#[arg(long, action = clap::ArgAction::Append, help_heading = "Filters")]
pub label: Vec<String>,
/// Filter by file path (trailing / for prefix match)
#[arg(long, help_heading = "Filters")]
pub path: Option<String>,
/// Filter by created since (7d, 2w, or YYYY-MM-DD)
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Filter by updated since (7d, 2w, or YYYY-MM-DD)
#[arg(long = "updated-since", help_heading = "Filters")]
pub updated_since: Option<String>,
/// Maximum results (default 20, max 100)
#[arg(
short = 'n',
long = "limit",
default_value = "20",
help_heading = "Output"
)]
pub limit: usize,
/// Select output fields (comma-separated, or 'minimal' preset: document_id,title,source_type,score)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Show ranking explanation per result
#[arg(long, help_heading = "Output", overrides_with = "no_explain")]
pub explain: bool,
#[arg(long = "no-explain", hide = true, overrides_with = "explain")]
pub no_explain: bool,
/// FTS query mode: safe (default) or raw
#[arg(long = "fts-mode", default_value = "safe", value_parser = ["safe", "raw"], help_heading = "Mode")]
pub fts_mode: String,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore generate-docs # Generate docs for dirty entities
lore generate-docs --full # Full rebuild of all documents
lore generate-docs --full -p group/repo # Full rebuild for one project")]
pub struct GenerateDocsArgs {
/// Full rebuild: seed all entities into dirty queue, then drain
#[arg(long)]
pub full: bool,
/// Filter to single project
#[arg(short = 'p', long)]
pub project: Option<String>,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore sync # Full pipeline: ingest + docs + embed
lore sync --no-embed # Skip embedding step
lore sync --no-status # Skip work-item status enrichment
lore sync --full --force # Full re-sync, override stale lock
lore sync --dry-run # Preview what would change
lore sync --issue 42 -p group/repo # Surgically sync one issue
lore sync --mr 10 --mr 20 -p g/r # Surgically sync two MRs")]
pub struct SyncArgs {
/// Reset cursors, fetch everything
#[arg(long, overrides_with = "no_full")]
pub full: bool,
#[arg(long = "no-full", hide = true, overrides_with = "full")]
pub no_full: bool,
/// Override stale lock
#[arg(long, overrides_with = "no_force")]
pub force: bool,
#[arg(long = "no-force", hide = true, overrides_with = "force")]
pub no_force: bool,
/// Skip embedding step
#[arg(long)]
pub no_embed: bool,
/// Skip document regeneration
#[arg(long)]
pub no_docs: bool,
/// Skip resource event fetching (overrides config)
#[arg(long = "no-events")]
pub no_events: bool,
/// Skip MR file change fetching (overrides config)
#[arg(long = "no-file-changes")]
pub no_file_changes: bool,
/// Skip work-item status enrichment via GraphQL (overrides config)
#[arg(long = "no-status")]
pub no_status: bool,
/// Preview what would be synced without making changes
#[arg(long, overrides_with = "no_dry_run")]
pub dry_run: bool,
#[arg(long = "no-dry-run", hide = true, overrides_with = "dry_run")]
pub no_dry_run: bool,
/// Show detailed timing breakdown for sync stages
#[arg(short = 't', long = "timings")]
pub timings: bool,
/// Acquire file lock before syncing (skip if another sync is running)
#[arg(long)]
pub lock: bool,
/// Surgically sync specific issues by IID (repeatable, must be positive)
#[arg(long, value_parser = clap::value_parser!(u64).range(1..), action = clap::ArgAction::Append)]
pub issue: Vec<u64>,
/// Surgically sync specific merge requests by IID (repeatable, must be positive)
#[arg(long, value_parser = clap::value_parser!(u64).range(1..), action = clap::ArgAction::Append)]
pub mr: Vec<u64>,
/// Scope to a single project (required when --issue or --mr is used)
#[arg(short = 'p', long)]
pub project: Option<String>,
/// Validate remote entities exist without DB writes (preflight only)
#[arg(long)]
pub preflight_only: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore embed # Embed new/changed documents
lore embed --full # Re-embed all documents from scratch
lore embed --retry-failed # Retry previously failed embeddings")]
pub struct EmbedArgs {
/// Re-embed all documents (clears existing embeddings first)
#[arg(long, overrides_with = "no_full")]
pub full: bool,
#[arg(long = "no-full", hide = true, overrides_with = "full")]
pub no_full: bool,
/// Retry previously failed embeddings
#[arg(long, overrides_with = "no_retry_failed")]
pub retry_failed: bool,
#[arg(long = "no-retry-failed", hide = true, overrides_with = "retry_failed")]
pub no_retry_failed: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore timeline 'deployment' # Search-based seeding
lore timeline issue:42 # Direct: issue #42 and related entities
lore timeline i:42 # Shorthand for issue:42
lore timeline mr:99 # Direct: MR !99 and related entities
lore timeline 'auth' --since 30d -p group/repo # Scoped to project and time
lore timeline 'migration' --depth 2 # Deep cross-reference expansion
lore timeline 'auth' --no-mentions # Only 'closes' and 'related' edges")]
pub struct TimelineArgs {
/// Search text or entity reference (issue:N, i:N, mr:N, m:N)
pub query: String,
/// Scope to a specific project (fuzzy match)
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Only show events after this date (e.g. "6m", "2w", "2024-01-01")
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Cross-reference expansion depth (0 = no expansion)
#[arg(long, default_value = "1", help_heading = "Expansion")]
pub depth: u32,
/// Skip 'mentioned' edges during expansion (only follow 'closes' and 'related')
#[arg(long = "no-mentions", help_heading = "Expansion")]
pub no_mentions: bool,
/// Maximum number of events to display
#[arg(
short = 'n',
long = "limit",
default_value = "100",
help_heading = "Output"
)]
pub limit: usize,
/// Select output fields (comma-separated, or 'minimal' preset: timestamp,type,entity_iid,detail)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Maximum seed entities from search
#[arg(long = "max-seeds", default_value = "10", help_heading = "Expansion")]
pub max_seeds: usize,
/// Maximum expanded entities via cross-references
#[arg(
long = "max-entities",
default_value = "50",
help_heading = "Expansion"
)]
pub max_entities: usize,
/// Maximum evidence notes included
#[arg(
long = "max-evidence",
default_value = "10",
help_heading = "Expansion"
)]
pub max_evidence: usize,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore who src/features/auth/ # Who knows about this area?
lore who @asmith # What is asmith working on?
lore who @asmith --reviews # What review patterns does asmith have?
lore who --active # What discussions need attention?
lore who --overlap src/features/auth/ # Who else is touching these files?
lore who --path README.md # Expert lookup for a root file
lore who --path Makefile # Expert lookup for a dotless root file")]
pub struct WhoArgs {
/// Username or file path (path if contains /)
pub target: Option<String>,
/// Force expert mode for a file/directory path.
/// Root files (README.md, LICENSE, Makefile) are treated as exact matches.
/// Use a trailing `/` to force directory-prefix matching.
#[arg(long, help_heading = "Mode", conflicts_with_all = ["active", "overlap", "reviews"])]
pub path: Option<String>,
/// Show active unresolved discussions
#[arg(long, help_heading = "Mode", conflicts_with_all = ["target", "overlap", "reviews", "path"])]
pub active: bool,
/// Find users with MRs/notes touching this file path
#[arg(long, help_heading = "Mode", conflicts_with_all = ["target", "active", "reviews", "path"])]
pub overlap: Option<String>,
/// Show review pattern analysis (requires username target)
#[arg(long, help_heading = "Mode", requires = "target", conflicts_with_all = ["active", "overlap", "path"])]
pub reviews: bool,
/// Time window (7d, 2w, 6m, YYYY-MM-DD). Default varies by mode.
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Scope to a project (supports fuzzy matching)
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Maximum results per section (1..=500); omit for unlimited
#[arg(
short = 'n',
long = "limit",
value_parser = clap::value_parser!(u16).range(1..=500),
help_heading = "Output"
)]
pub limit: Option<u16>,
/// Select output fields (comma-separated, or 'minimal' preset; varies by mode)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Show per-MR detail breakdown (expert mode only)
#[arg(
long,
help_heading = "Output",
overrides_with = "no_detail",
conflicts_with = "explain_score"
)]
pub detail: bool,
#[arg(long = "no-detail", hide = true, overrides_with = "detail")]
pub no_detail: bool,
/// Score as if "now" is this date (ISO 8601 or duration like 30d). Expert mode only.
#[arg(long = "as-of", help_heading = "Scoring")]
pub as_of: Option<String>,
/// Show per-component score breakdown in output. Expert mode only.
#[arg(long = "explain-score", help_heading = "Scoring")]
pub explain_score: bool,
/// Include bot users in results (normally excluded via scoring.excluded_usernames).
#[arg(long = "include-bots", help_heading = "Scoring")]
pub include_bots: bool,
/// Include discussions on closed issues and merged/closed MRs
#[arg(long, help_heading = "Filters")]
pub include_closed: bool,
/// Remove the default time window (query all history). Conflicts with --since.
#[arg(
long = "all-history",
help_heading = "Filters",
conflicts_with = "since"
)]
pub all_history: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore me # Full dashboard (default project or all)
lore me --issues # Issues section only
lore me --mrs # MRs section only
lore me --activity # Activity feed only
lore me --all # All synced projects
lore me --since 2d # Activity window (default: 30d)
lore me --project group/repo # Scope to one project
lore me --user jdoe # Override configured username")]
pub struct MeArgs {
/// Show open issues section
#[arg(long, help_heading = "Sections")]
pub issues: bool,
/// Show authored + reviewing MRs section
#[arg(long, help_heading = "Sections")]
pub mrs: bool,
/// Show activity feed section
#[arg(long, help_heading = "Sections")]
pub activity: bool,
/// Show items you're @mentioned in (not assigned/authored/reviewing)
#[arg(long, help_heading = "Sections")]
pub mentions: bool,
/// Activity window (e.g. 7d, 2w, 30d). Default: 30d. Only affects activity section.
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Scope to a project (supports fuzzy matching)
#[arg(short = 'p', long, help_heading = "Filters", conflicts_with = "all")]
pub project: Option<String>,
/// Show all synced projects (overrides default_project)
#[arg(long, help_heading = "Filters", conflicts_with = "project")]
pub all: bool,
/// Override configured username
#[arg(long = "user", help_heading = "Filters")]
pub user: Option<String>,
/// Select output fields (comma-separated, or 'minimal' preset)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Reset the since-last-check cursor (next run shows no new events)
#[arg(long, help_heading = "Output")]
pub reset_cursor: bool,
}
impl MeArgs {
/// Returns true if no section flags were passed (show all sections).
pub fn show_all_sections(&self) -> bool {
!self.issues && !self.mrs && !self.activity && !self.mentions
}
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore file-history src/main.rs # MRs that touched this file
lore file-history src/auth/ -p group/repo # Scoped to project
lore file-history src/foo.rs --discussions # Include DiffNote snippets
lore file-history src/bar.rs --no-follow-renames # Skip rename chain")]
pub struct FileHistoryArgs {
/// File path to trace history for
pub path: String,
/// Scope to a specific project (fuzzy match)
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Include discussion snippets from DiffNotes on this file
#[arg(long, help_heading = "Output")]
pub discussions: bool,
/// Disable rename chain resolution
#[arg(long = "no-follow-renames", help_heading = "Filters")]
pub no_follow_renames: bool,
/// Only show merged MRs
#[arg(long, help_heading = "Filters")]
pub merged: bool,
/// Maximum results
#[arg(
short = 'n',
long = "limit",
default_value = "50",
help_heading = "Output"
)]
pub limit: usize,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore trace src/main.rs # Why was this file changed?
lore trace src/auth/ -p group/repo # Scoped to project
lore trace src/foo.rs --discussions # Include DiffNote context
lore trace src/bar.rs:42 # Line hint (Tier 2 warning)")]
pub struct TraceArgs {
/// File path to trace (supports :line suffix for future Tier 2)
pub path: String,
/// Scope to a specific project (fuzzy match)
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Include DiffNote discussion snippets
#[arg(long, help_heading = "Output")]
pub discussions: bool,
/// Disable rename chain resolution
#[arg(long = "no-follow-renames", help_heading = "Filters")]
pub no_follow_renames: bool,
/// Maximum trace chains to display
#[arg(
short = 'n',
long = "limit",
default_value = "20",
help_heading = "Output"
)]
pub limit: usize,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore count issues # Total issues in local database
lore count notes --for mr # Notes on merge requests only
lore count discussions --for issue # Discussions on issues only")]
pub struct CountArgs {
/// Entity type to count (issues, mrs, discussions, notes, events)
#[arg(value_parser = ["issues", "mrs", "discussions", "notes", "events"])]
pub entity: String,
/// Parent type filter: issue or mr (for discussions/notes)
#[arg(short = 'f', long = "for", value_parser = ["issue", "mr"])]
pub for_entity: Option<String>,
}
#[derive(Parser)]
pub struct CronArgs {
#[command(subcommand)]
pub action: CronAction,
}
#[derive(Subcommand)]
pub enum CronAction {
/// Install cron job for automatic syncing
Install {
/// Sync interval in minutes (default: 8)
#[arg(long, default_value = "8")]
interval: u32,
},
/// Remove cron job
Uninstall,
/// Show current cron configuration
Status,
}
#[derive(Args)]
pub struct TokenArgs {
#[command(subcommand)]
pub action: TokenAction,
}
#[derive(Subcommand)]
pub enum TokenAction {
/// Store a GitLab token in the config file
Set {
/// Token value (reads from stdin if omitted in non-interactive mode)
#[arg(long)]
token: Option<String>,
},
/// Show the current token (masked by default)
Show {
/// Show the full unmasked token
#[arg(long)]
unmask: bool,
},
}

View File

@@ -6,8 +6,8 @@ use crate::Config;
use crate::cli::robot::RobotMeta;
use crate::core::db::create_connection;
use crate::core::error::Result;
use crate::core::events_db::{self, EventCounts};
use crate::core::paths::get_db_path;
use crate::ingestion::storage::events::{EventCounts, count_events};
pub struct CountResult {
pub entity: String,
@@ -208,7 +208,7 @@ struct CountJsonBreakdown {
pub fn run_count_events(config: &Config) -> Result<EventCounts> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
events_db::count_events(&conn)
count_events(&conn)
}
#[derive(Serialize)]

View File

@@ -385,25 +385,11 @@ async fn check_ollama(config: Option<&Config>) -> OllamaCheck {
let base_url = &config.embedding.base_url;
let model = &config.embedding.model;
let client = match reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(2))
.build()
{
Ok(client) => client,
Err(e) => {
return OllamaCheck {
result: CheckResult {
status: CheckStatus::Warning,
message: Some(format!("Failed to build HTTP client: {e}")),
},
url: Some(base_url.clone()),
model: Some(model.clone()),
};
}
};
let client = crate::http::Client::with_timeout(std::time::Duration::from_secs(2));
let url = format!("{base_url}/api/tags");
match client.get(format!("{base_url}/api/tags")).send().await {
Ok(response) if response.status().is_success() => {
match client.get(&url, &[]).await {
Ok(response) if response.is_success() => {
#[derive(serde::Deserialize)]
struct TagsResponse {
models: Option<Vec<ModelInfo>>,
@@ -413,7 +399,7 @@ async fn check_ollama(config: Option<&Config>) -> OllamaCheck {
name: String,
}
match response.json::<TagsResponse>().await {
match response.json::<TagsResponse>() {
Ok(data) => {
let models = data.models.unwrap_or_default();
let model_names: Vec<&str> = models
@@ -462,7 +448,7 @@ async fn check_ollama(config: Option<&Config>) -> OllamaCheck {
Ok(response) => OllamaCheck {
result: CheckResult {
status: CheckStatus::Warning,
message: Some(format!("Ollama responded with {}", response.status())),
message: Some(format!("Ollama responded with {}", response.status)),
},
url: Some(base_url.clone()),
model: Some(model.clone()),

View File

@@ -0,0 +1,26 @@
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use crate::cli::render::Theme;
use indicatif::{ProgressBar, ProgressStyle};
use rusqlite::Connection;
use serde::Serialize;
use tracing::Instrument;
use crate::Config;
use crate::cli::robot::RobotMeta;
use crate::core::db::create_connection;
use crate::core::error::{LoreError, Result};
use crate::core::lock::{AppLock, LockOptions};
use crate::core::paths::get_db_path;
use crate::core::project::resolve_project;
use crate::core::shutdown::ShutdownSignal;
use crate::gitlab::GitLabClient;
use crate::ingestion::{
IngestMrProjectResult, IngestProjectResult, ProgressEvent, ingest_project_issues_with_progress,
ingest_project_merge_requests_with_progress,
};
include!("run.rs");
include!("render.rs");

View File

@@ -0,0 +1,331 @@
fn print_issue_project_summary(path: &str, result: &IngestProjectResult) {
let labels_str = if result.labels_created > 0 {
format!(", {} new labels", result.labels_created)
} else {
String::new()
};
println!(
" {}: {} issues fetched{}",
Theme::info().render(path),
result.issues_upserted,
labels_str
);
if result.issues_synced_discussions > 0 {
println!(
" {} issues -> {} discussions, {} notes",
result.issues_synced_discussions, result.discussions_fetched, result.notes_upserted
);
}
if result.issues_skipped_discussion_sync > 0 {
println!(
" {} unchanged issues (discussion sync skipped)",
Theme::dim().render(&result.issues_skipped_discussion_sync.to_string())
);
}
}
fn print_mr_project_summary(path: &str, result: &IngestMrProjectResult) {
let labels_str = if result.labels_created > 0 {
format!(", {} new labels", result.labels_created)
} else {
String::new()
};
let assignees_str = if result.assignees_linked > 0 || result.reviewers_linked > 0 {
format!(
", {} assignees, {} reviewers",
result.assignees_linked, result.reviewers_linked
)
} else {
String::new()
};
println!(
" {}: {} MRs fetched{}{}",
Theme::info().render(path),
result.mrs_upserted,
labels_str,
assignees_str
);
if result.mrs_synced_discussions > 0 {
let diffnotes_str = if result.diffnotes_count > 0 {
format!(" ({} diff notes)", result.diffnotes_count)
} else {
String::new()
};
println!(
" {} MRs -> {} discussions, {} notes{}",
result.mrs_synced_discussions,
result.discussions_fetched,
result.notes_upserted,
diffnotes_str
);
}
if result.mrs_skipped_discussion_sync > 0 {
println!(
" {} unchanged MRs (discussion sync skipped)",
Theme::dim().render(&result.mrs_skipped_discussion_sync.to_string())
);
}
}
#[derive(Serialize)]
struct IngestJsonOutput {
ok: bool,
data: IngestJsonData,
meta: RobotMeta,
}
#[derive(Serialize)]
struct IngestJsonData {
resource_type: String,
projects_synced: usize,
#[serde(skip_serializing_if = "Option::is_none")]
issues: Option<IngestIssueStats>,
#[serde(skip_serializing_if = "Option::is_none")]
merge_requests: Option<IngestMrStats>,
labels_created: usize,
discussions_fetched: usize,
notes_upserted: usize,
resource_events_fetched: usize,
resource_events_failed: usize,
#[serde(skip_serializing_if = "Vec::is_empty")]
status_enrichment: Vec<StatusEnrichmentJson>,
status_enrichment_errors: usize,
}
#[derive(Serialize)]
struct StatusEnrichmentJson {
mode: String,
#[serde(skip_serializing_if = "Option::is_none")]
reason: Option<String>,
seen: usize,
enriched: usize,
cleared: usize,
without_widget: usize,
partial_errors: usize,
#[serde(skip_serializing_if = "Option::is_none")]
first_partial_error: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
error: Option<String>,
}
#[derive(Serialize)]
struct IngestIssueStats {
fetched: usize,
upserted: usize,
synced_discussions: usize,
skipped_discussion_sync: usize,
}
#[derive(Serialize)]
struct IngestMrStats {
fetched: usize,
upserted: usize,
synced_discussions: usize,
skipped_discussion_sync: usize,
assignees_linked: usize,
reviewers_linked: usize,
diffnotes_count: usize,
}
pub fn print_ingest_summary_json(result: &IngestResult, elapsed_ms: u64) {
let (issues, merge_requests) = if result.resource_type == "issues" {
(
Some(IngestIssueStats {
fetched: result.issues_fetched,
upserted: result.issues_upserted,
synced_discussions: result.issues_synced_discussions,
skipped_discussion_sync: result.issues_skipped_discussion_sync,
}),
None,
)
} else {
(
None,
Some(IngestMrStats {
fetched: result.mrs_fetched,
upserted: result.mrs_upserted,
synced_discussions: result.mrs_synced_discussions,
skipped_discussion_sync: result.mrs_skipped_discussion_sync,
assignees_linked: result.assignees_linked,
reviewers_linked: result.reviewers_linked,
diffnotes_count: result.diffnotes_count,
}),
)
};
let status_enrichment: Vec<StatusEnrichmentJson> = result
.status_enrichment_projects
.iter()
.map(|p| StatusEnrichmentJson {
mode: p.mode.clone(),
reason: p.reason.clone(),
seen: p.seen,
enriched: p.enriched,
cleared: p.cleared,
without_widget: p.without_widget,
partial_errors: p.partial_errors,
first_partial_error: p.first_partial_error.clone(),
error: p.error.clone(),
})
.collect();
let output = IngestJsonOutput {
ok: true,
data: IngestJsonData {
resource_type: result.resource_type.clone(),
projects_synced: result.projects_synced,
issues,
merge_requests,
labels_created: result.labels_created,
discussions_fetched: result.discussions_fetched,
notes_upserted: result.notes_upserted,
resource_events_fetched: result.resource_events_fetched,
resource_events_failed: result.resource_events_failed,
status_enrichment,
status_enrichment_errors: result.status_enrichment_errors,
},
meta: RobotMeta { elapsed_ms },
};
match serde_json::to_string(&output) {
Ok(json) => println!("{json}"),
Err(e) => eprintln!("Error serializing to JSON: {e}"),
}
}
pub fn print_ingest_summary(result: &IngestResult) {
println!();
if result.resource_type == "issues" {
println!(
"{}",
Theme::success().render(&format!(
"Total: {} issues, {} discussions, {} notes",
result.issues_upserted, result.discussions_fetched, result.notes_upserted
))
);
if result.issues_skipped_discussion_sync > 0 {
println!(
"{}",
Theme::dim().render(&format!(
"Skipped discussion sync for {} unchanged issues.",
result.issues_skipped_discussion_sync
))
);
}
} else {
let diffnotes_str = if result.diffnotes_count > 0 {
format!(" ({} diff notes)", result.diffnotes_count)
} else {
String::new()
};
println!(
"{}",
Theme::success().render(&format!(
"Total: {} MRs, {} discussions, {} notes{}",
result.mrs_upserted,
result.discussions_fetched,
result.notes_upserted,
diffnotes_str
))
);
if result.mrs_skipped_discussion_sync > 0 {
println!(
"{}",
Theme::dim().render(&format!(
"Skipped discussion sync for {} unchanged MRs.",
result.mrs_skipped_discussion_sync
))
);
}
}
if result.resource_events_fetched > 0 || result.resource_events_failed > 0 {
println!(
" Resource events: {} fetched{}",
result.resource_events_fetched,
if result.resource_events_failed > 0 {
format!(", {} failed", result.resource_events_failed)
} else {
String::new()
}
);
}
}
pub fn print_dry_run_preview(preview: &DryRunPreview) {
println!(
"{} {}",
Theme::info().bold().render("Dry Run Preview"),
Theme::warning().render("(no changes will be made)")
);
println!();
let type_label = if preview.resource_type == "issues" {
"issues"
} else {
"merge requests"
};
println!(" Resource type: {}", Theme::bold().render(type_label));
println!(
" Sync mode: {}",
if preview.sync_mode == "full" {
Theme::warning().render("full (all data will be re-fetched)")
} else {
Theme::success().render("incremental (only changes since last sync)")
}
);
println!(" Projects: {}", preview.projects.len());
println!();
println!("{}", Theme::info().bold().render("Projects to sync:"));
for project in &preview.projects {
let sync_status = if !project.has_cursor {
Theme::warning().render("initial sync")
} else {
Theme::success().render("incremental")
};
println!(
" {} ({})",
Theme::bold().render(&project.path),
sync_status
);
println!(" Existing {}: {}", type_label, project.existing_count);
if let Some(ref last_synced) = project.last_synced {
println!(" Last synced: {}", last_synced);
}
}
}
#[derive(Serialize)]
struct DryRunJsonOutput {
ok: bool,
dry_run: bool,
data: DryRunPreview,
}
pub fn print_dry_run_preview_json(preview: &DryRunPreview) {
let output = DryRunJsonOutput {
ok: true,
dry_run: true,
data: preview.clone(),
};
match serde_json::to_string(&output) {
Ok(json) => println!("{json}"),
Err(e) => eprintln!("Error serializing to JSON: {e}"),
}
}

View File

@@ -1,27 +1,3 @@
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use crate::cli::render::Theme;
use indicatif::{ProgressBar, ProgressStyle};
use rusqlite::Connection;
use serde::Serialize;
use tracing::Instrument;
use crate::Config;
use crate::cli::robot::RobotMeta;
use crate::core::db::create_connection;
use crate::core::error::{LoreError, Result};
use crate::core::lock::{AppLock, LockOptions};
use crate::core::paths::get_db_path;
use crate::core::project::resolve_project;
use crate::core::shutdown::ShutdownSignal;
use crate::gitlab::GitLabClient;
use crate::ingestion::{
IngestMrProjectResult, IngestProjectResult, ProgressEvent, ingest_project_issues_with_progress,
ingest_project_merge_requests_with_progress,
};
#[derive(Default)]
pub struct IngestResult {
pub resource_type: String,
@@ -295,11 +271,11 @@ async fn run_ingest_inner(
let token = config.gitlab.resolve_token()?;
let client = GitLabClient::new(
let client = Arc::new(GitLabClient::new(
&config.gitlab.base_url,
&token,
Some(config.sync.requests_per_second),
);
));
let projects = get_projects_to_sync(&conn, &config.projects, project_filter)?;
@@ -376,7 +352,7 @@ async fn run_ingest_inner(
let project_results: Vec<Result<ProjectIngestOutcome>> = stream::iter(projects.iter())
.map(|(local_project_id, gitlab_project_id, path)| {
let client = client.clone();
let client = Arc::clone(&client);
let db_path = db_path.clone();
let config = config.clone();
let resource_type = resource_type_owned.clone();
@@ -783,334 +759,3 @@ fn get_projects_to_sync(
Ok(projects)
}
fn print_issue_project_summary(path: &str, result: &IngestProjectResult) {
let labels_str = if result.labels_created > 0 {
format!(", {} new labels", result.labels_created)
} else {
String::new()
};
println!(
" {}: {} issues fetched{}",
Theme::info().render(path),
result.issues_upserted,
labels_str
);
if result.issues_synced_discussions > 0 {
println!(
" {} issues -> {} discussions, {} notes",
result.issues_synced_discussions, result.discussions_fetched, result.notes_upserted
);
}
if result.issues_skipped_discussion_sync > 0 {
println!(
" {} unchanged issues (discussion sync skipped)",
Theme::dim().render(&result.issues_skipped_discussion_sync.to_string())
);
}
}
fn print_mr_project_summary(path: &str, result: &IngestMrProjectResult) {
let labels_str = if result.labels_created > 0 {
format!(", {} new labels", result.labels_created)
} else {
String::new()
};
let assignees_str = if result.assignees_linked > 0 || result.reviewers_linked > 0 {
format!(
", {} assignees, {} reviewers",
result.assignees_linked, result.reviewers_linked
)
} else {
String::new()
};
println!(
" {}: {} MRs fetched{}{}",
Theme::info().render(path),
result.mrs_upserted,
labels_str,
assignees_str
);
if result.mrs_synced_discussions > 0 {
let diffnotes_str = if result.diffnotes_count > 0 {
format!(" ({} diff notes)", result.diffnotes_count)
} else {
String::new()
};
println!(
" {} MRs -> {} discussions, {} notes{}",
result.mrs_synced_discussions,
result.discussions_fetched,
result.notes_upserted,
diffnotes_str
);
}
if result.mrs_skipped_discussion_sync > 0 {
println!(
" {} unchanged MRs (discussion sync skipped)",
Theme::dim().render(&result.mrs_skipped_discussion_sync.to_string())
);
}
}
#[derive(Serialize)]
struct IngestJsonOutput {
ok: bool,
data: IngestJsonData,
meta: RobotMeta,
}
#[derive(Serialize)]
struct IngestJsonData {
resource_type: String,
projects_synced: usize,
#[serde(skip_serializing_if = "Option::is_none")]
issues: Option<IngestIssueStats>,
#[serde(skip_serializing_if = "Option::is_none")]
merge_requests: Option<IngestMrStats>,
labels_created: usize,
discussions_fetched: usize,
notes_upserted: usize,
resource_events_fetched: usize,
resource_events_failed: usize,
#[serde(skip_serializing_if = "Vec::is_empty")]
status_enrichment: Vec<StatusEnrichmentJson>,
status_enrichment_errors: usize,
}
#[derive(Serialize)]
struct StatusEnrichmentJson {
mode: String,
#[serde(skip_serializing_if = "Option::is_none")]
reason: Option<String>,
seen: usize,
enriched: usize,
cleared: usize,
without_widget: usize,
partial_errors: usize,
#[serde(skip_serializing_if = "Option::is_none")]
first_partial_error: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
error: Option<String>,
}
#[derive(Serialize)]
struct IngestIssueStats {
fetched: usize,
upserted: usize,
synced_discussions: usize,
skipped_discussion_sync: usize,
}
#[derive(Serialize)]
struct IngestMrStats {
fetched: usize,
upserted: usize,
synced_discussions: usize,
skipped_discussion_sync: usize,
assignees_linked: usize,
reviewers_linked: usize,
diffnotes_count: usize,
}
pub fn print_ingest_summary_json(result: &IngestResult, elapsed_ms: u64) {
let (issues, merge_requests) = if result.resource_type == "issues" {
(
Some(IngestIssueStats {
fetched: result.issues_fetched,
upserted: result.issues_upserted,
synced_discussions: result.issues_synced_discussions,
skipped_discussion_sync: result.issues_skipped_discussion_sync,
}),
None,
)
} else {
(
None,
Some(IngestMrStats {
fetched: result.mrs_fetched,
upserted: result.mrs_upserted,
synced_discussions: result.mrs_synced_discussions,
skipped_discussion_sync: result.mrs_skipped_discussion_sync,
assignees_linked: result.assignees_linked,
reviewers_linked: result.reviewers_linked,
diffnotes_count: result.diffnotes_count,
}),
)
};
let status_enrichment: Vec<StatusEnrichmentJson> = result
.status_enrichment_projects
.iter()
.map(|p| StatusEnrichmentJson {
mode: p.mode.clone(),
reason: p.reason.clone(),
seen: p.seen,
enriched: p.enriched,
cleared: p.cleared,
without_widget: p.without_widget,
partial_errors: p.partial_errors,
first_partial_error: p.first_partial_error.clone(),
error: p.error.clone(),
})
.collect();
let output = IngestJsonOutput {
ok: true,
data: IngestJsonData {
resource_type: result.resource_type.clone(),
projects_synced: result.projects_synced,
issues,
merge_requests,
labels_created: result.labels_created,
discussions_fetched: result.discussions_fetched,
notes_upserted: result.notes_upserted,
resource_events_fetched: result.resource_events_fetched,
resource_events_failed: result.resource_events_failed,
status_enrichment,
status_enrichment_errors: result.status_enrichment_errors,
},
meta: RobotMeta { elapsed_ms },
};
match serde_json::to_string(&output) {
Ok(json) => println!("{json}"),
Err(e) => eprintln!("Error serializing to JSON: {e}"),
}
}
pub fn print_ingest_summary(result: &IngestResult) {
println!();
if result.resource_type == "issues" {
println!(
"{}",
Theme::success().render(&format!(
"Total: {} issues, {} discussions, {} notes",
result.issues_upserted, result.discussions_fetched, result.notes_upserted
))
);
if result.issues_skipped_discussion_sync > 0 {
println!(
"{}",
Theme::dim().render(&format!(
"Skipped discussion sync for {} unchanged issues.",
result.issues_skipped_discussion_sync
))
);
}
} else {
let diffnotes_str = if result.diffnotes_count > 0 {
format!(" ({} diff notes)", result.diffnotes_count)
} else {
String::new()
};
println!(
"{}",
Theme::success().render(&format!(
"Total: {} MRs, {} discussions, {} notes{}",
result.mrs_upserted,
result.discussions_fetched,
result.notes_upserted,
diffnotes_str
))
);
if result.mrs_skipped_discussion_sync > 0 {
println!(
"{}",
Theme::dim().render(&format!(
"Skipped discussion sync for {} unchanged MRs.",
result.mrs_skipped_discussion_sync
))
);
}
}
if result.resource_events_fetched > 0 || result.resource_events_failed > 0 {
println!(
" Resource events: {} fetched{}",
result.resource_events_fetched,
if result.resource_events_failed > 0 {
format!(", {} failed", result.resource_events_failed)
} else {
String::new()
}
);
}
}
pub fn print_dry_run_preview(preview: &DryRunPreview) {
println!(
"{} {}",
Theme::info().bold().render("Dry Run Preview"),
Theme::warning().render("(no changes will be made)")
);
println!();
let type_label = if preview.resource_type == "issues" {
"issues"
} else {
"merge requests"
};
println!(" Resource type: {}", Theme::bold().render(type_label));
println!(
" Sync mode: {}",
if preview.sync_mode == "full" {
Theme::warning().render("full (all data will be re-fetched)")
} else {
Theme::success().render("incremental (only changes since last sync)")
}
);
println!(" Projects: {}", preview.projects.len());
println!();
println!("{}", Theme::info().bold().render("Projects to sync:"));
for project in &preview.projects {
let sync_status = if !project.has_cursor {
Theme::warning().render("initial sync")
} else {
Theme::success().render("incremental")
};
println!(
" {} ({})",
Theme::bold().render(&project.path),
sync_status
);
println!(" Existing {}: {}", type_label, project.existing_count);
if let Some(ref last_synced) = project.last_synced {
println!(" Last synced: {}", last_synced);
}
}
}
#[derive(Serialize)]
struct DryRunJsonOutput {
ok: bool,
dry_run: bool,
data: DryRunPreview,
}
pub fn print_dry_run_preview_json(preview: &DryRunPreview) {
let output = DryRunJsonOutput {
ok: true,
dry_run: true,
data: preview.clone(),
};
match serde_json::to_string(&output) {
Ok(json) => println!("{json}"),
Err(e) => eprintln!("Error serializing to JSON: {e}"),
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,443 @@
use crate::cli::render::{self, Align, Icons, StyledCell, Table as LoreTable, Theme};
use rusqlite::Connection;
use serde::Serialize;
use crate::Config;
use crate::cli::robot::{expand_fields_preset, filter_fields};
use crate::core::db::create_connection;
use crate::core::error::{LoreError, Result};
use crate::core::paths::get_db_path;
use crate::core::project::resolve_project;
use crate::core::time::{ms_to_iso, parse_since};
use super::render_helpers::{format_assignees, format_discussions};
#[derive(Debug, Serialize)]
pub struct IssueListRow {
pub iid: i64,
pub title: String,
pub state: String,
pub author_username: String,
pub created_at: i64,
pub updated_at: i64,
#[serde(skip_serializing_if = "Option::is_none")]
pub web_url: Option<String>,
pub project_path: String,
pub labels: Vec<String>,
pub assignees: Vec<String>,
pub discussion_count: i64,
pub unresolved_count: i64,
#[serde(skip_serializing_if = "Option::is_none")]
pub status_name: Option<String>,
#[serde(skip_serializing)]
pub status_category: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub status_color: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub status_icon_name: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub status_synced_at: Option<i64>,
}
#[derive(Serialize)]
pub struct IssueListRowJson {
pub iid: i64,
pub title: String,
pub state: String,
pub author_username: String,
pub labels: Vec<String>,
pub assignees: Vec<String>,
pub discussion_count: i64,
pub unresolved_count: i64,
pub created_at_iso: String,
pub updated_at_iso: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub web_url: Option<String>,
pub project_path: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub status_name: Option<String>,
#[serde(skip_serializing)]
pub status_category: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub status_color: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub status_icon_name: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub status_synced_at_iso: Option<String>,
}
impl From<&IssueListRow> for IssueListRowJson {
fn from(row: &IssueListRow) -> Self {
Self {
iid: row.iid,
title: row.title.clone(),
state: row.state.clone(),
author_username: row.author_username.clone(),
labels: row.labels.clone(),
assignees: row.assignees.clone(),
discussion_count: row.discussion_count,
unresolved_count: row.unresolved_count,
created_at_iso: ms_to_iso(row.created_at),
updated_at_iso: ms_to_iso(row.updated_at),
web_url: row.web_url.clone(),
project_path: row.project_path.clone(),
status_name: row.status_name.clone(),
status_category: row.status_category.clone(),
status_color: row.status_color.clone(),
status_icon_name: row.status_icon_name.clone(),
status_synced_at_iso: row.status_synced_at.map(ms_to_iso),
}
}
}
#[derive(Serialize)]
pub struct ListResult {
pub issues: Vec<IssueListRow>,
pub total_count: usize,
pub available_statuses: Vec<String>,
}
#[derive(Serialize)]
pub struct ListResultJson {
pub issues: Vec<IssueListRowJson>,
pub total_count: usize,
pub showing: usize,
}
impl From<&ListResult> for ListResultJson {
fn from(result: &ListResult) -> Self {
Self {
issues: result.issues.iter().map(IssueListRowJson::from).collect(),
total_count: result.total_count,
showing: result.issues.len(),
}
}
}
pub struct ListFilters<'a> {
pub limit: usize,
pub project: Option<&'a str>,
pub state: Option<&'a str>,
pub author: Option<&'a str>,
pub assignee: Option<&'a str>,
pub labels: Option<&'a [String]>,
pub milestone: Option<&'a str>,
pub since: Option<&'a str>,
pub due_before: Option<&'a str>,
pub has_due_date: bool,
pub statuses: &'a [String],
pub sort: &'a str,
pub order: &'a str,
}
pub fn run_list_issues(config: &Config, filters: ListFilters) -> Result<ListResult> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
let mut result = query_issues(&conn, &filters)?;
result.available_statuses = query_available_statuses(&conn)?;
Ok(result)
}
fn query_available_statuses(conn: &Connection) -> Result<Vec<String>> {
let mut stmt = conn.prepare(
"SELECT DISTINCT status_name FROM issues WHERE status_name IS NOT NULL ORDER BY status_name",
)?;
let statuses = stmt
.query_map([], |row| row.get::<_, String>(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(statuses)
}
fn query_issues(conn: &Connection, filters: &ListFilters) -> Result<ListResult> {
let mut where_clauses = Vec::new();
let mut params: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
if let Some(project) = filters.project {
let project_id = resolve_project(conn, project)?;
where_clauses.push("i.project_id = ?");
params.push(Box::new(project_id));
}
if let Some(state) = filters.state
&& state != "all"
{
where_clauses.push("i.state = ?");
params.push(Box::new(state.to_string()));
}
if let Some(author) = filters.author {
let username = author.strip_prefix('@').unwrap_or(author);
where_clauses.push("i.author_username = ?");
params.push(Box::new(username.to_string()));
}
if let Some(assignee) = filters.assignee {
let username = assignee.strip_prefix('@').unwrap_or(assignee);
where_clauses.push(
"EXISTS (SELECT 1 FROM issue_assignees ia
WHERE ia.issue_id = i.id AND ia.username = ?)",
);
params.push(Box::new(username.to_string()));
}
if let Some(since_str) = filters.since {
let cutoff_ms = parse_since(since_str).ok_or_else(|| {
LoreError::Other(format!(
"Invalid --since value '{}'. Use relative (7d, 2w, 1m) or absolute (YYYY-MM-DD) format.",
since_str
))
})?;
where_clauses.push("i.updated_at >= ?");
params.push(Box::new(cutoff_ms));
}
if let Some(labels) = filters.labels {
for label in labels {
where_clauses.push(
"EXISTS (SELECT 1 FROM issue_labels il
JOIN labels l ON il.label_id = l.id
WHERE il.issue_id = i.id AND l.name = ?)",
);
params.push(Box::new(label.clone()));
}
}
if let Some(milestone) = filters.milestone {
where_clauses.push("i.milestone_title = ?");
params.push(Box::new(milestone.to_string()));
}
if let Some(due_before) = filters.due_before {
where_clauses.push("i.due_date IS NOT NULL AND i.due_date <= ?");
params.push(Box::new(due_before.to_string()));
}
if filters.has_due_date {
where_clauses.push("i.due_date IS NOT NULL");
}
let status_in_clause;
if filters.statuses.len() == 1 {
where_clauses.push("i.status_name = ? COLLATE NOCASE");
params.push(Box::new(filters.statuses[0].clone()));
} else if filters.statuses.len() > 1 {
let placeholders: Vec<&str> = filters.statuses.iter().map(|_| "?").collect();
status_in_clause = format!(
"i.status_name COLLATE NOCASE IN ({})",
placeholders.join(", ")
);
where_clauses.push(&status_in_clause);
for s in filters.statuses {
params.push(Box::new(s.clone()));
}
}
let where_sql = if where_clauses.is_empty() {
String::new()
} else {
format!("WHERE {}", where_clauses.join(" AND "))
};
let count_sql = format!(
"SELECT COUNT(*) FROM issues i
JOIN projects p ON i.project_id = p.id
{where_sql}"
);
let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let total_count: i64 = conn.query_row(&count_sql, param_refs.as_slice(), |row| row.get(0))?;
let total_count = total_count as usize;
let sort_column = match filters.sort {
"created" => "i.created_at",
"iid" => "i.iid",
_ => "i.updated_at",
};
let order = if filters.order == "asc" {
"ASC"
} else {
"DESC"
};
let query_sql = format!(
"SELECT
i.iid,
i.title,
i.state,
i.author_username,
i.created_at,
i.updated_at,
i.web_url,
p.path_with_namespace,
(SELECT GROUP_CONCAT(l.name, X'1F')
FROM issue_labels il
JOIN labels l ON il.label_id = l.id
WHERE il.issue_id = i.id) AS labels_csv,
(SELECT GROUP_CONCAT(ia.username, X'1F')
FROM issue_assignees ia
WHERE ia.issue_id = i.id) AS assignees_csv,
(SELECT COUNT(*) FROM discussions d
WHERE d.issue_id = i.id) AS discussion_count,
(SELECT COUNT(*) FROM discussions d
WHERE d.issue_id = i.id AND d.resolvable = 1 AND d.resolved = 0) AS unresolved_count,
i.status_name,
i.status_category,
i.status_color,
i.status_icon_name,
i.status_synced_at
FROM issues i
JOIN projects p ON i.project_id = p.id
{where_sql}
ORDER BY {sort_column} {order}
LIMIT ?"
);
params.push(Box::new(filters.limit as i64));
let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let mut stmt = conn.prepare(&query_sql)?;
let issues: Vec<IssueListRow> = stmt
.query_map(param_refs.as_slice(), |row| {
let labels_csv: Option<String> = row.get(8)?;
let labels = labels_csv
.map(|s| s.split('\x1F').map(String::from).collect())
.unwrap_or_default();
let assignees_csv: Option<String> = row.get(9)?;
let assignees = assignees_csv
.map(|s| s.split('\x1F').map(String::from).collect())
.unwrap_or_default();
Ok(IssueListRow {
iid: row.get(0)?,
title: row.get(1)?,
state: row.get(2)?,
author_username: row.get(3)?,
created_at: row.get(4)?,
updated_at: row.get(5)?,
web_url: row.get(6)?,
project_path: row.get(7)?,
labels,
assignees,
discussion_count: row.get(10)?,
unresolved_count: row.get(11)?,
status_name: row.get(12)?,
status_category: row.get(13)?,
status_color: row.get(14)?,
status_icon_name: row.get(15)?,
status_synced_at: row.get(16)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(ListResult {
issues,
total_count,
available_statuses: Vec::new(),
})
}
pub fn print_list_issues(result: &ListResult) {
if result.issues.is_empty() {
println!("No issues found.");
return;
}
println!(
"{} {} of {}\n",
Theme::bold().render("Issues"),
result.issues.len(),
result.total_count
);
let has_any_status = result.issues.iter().any(|i| i.status_name.is_some());
let mut headers = vec!["IID", "Title", "State"];
if has_any_status {
headers.push("Status");
}
headers.extend(["Assignee", "Labels", "Disc", "Updated"]);
let mut table = LoreTable::new().headers(&headers).align(0, Align::Right);
for issue in &result.issues {
let title = render::truncate(&issue.title, 45);
let relative_time = render::format_relative_time_compact(issue.updated_at);
let labels = render::format_labels_bare(&issue.labels, 2);
let assignee = format_assignees(&issue.assignees);
let discussions = format_discussions(issue.discussion_count, issue.unresolved_count);
let (icon, state_style) = if issue.state == "opened" {
(Icons::issue_opened(), Theme::success())
} else {
(Icons::issue_closed(), Theme::dim())
};
let state_cell = StyledCell::styled(format!("{icon} {}", issue.state), state_style);
let mut row = vec![
StyledCell::styled(format!("#{}", issue.iid), Theme::info()),
StyledCell::plain(title),
state_cell,
];
if has_any_status {
match &issue.status_name {
Some(status) => {
row.push(StyledCell::plain(render::style_with_hex(
status,
issue.status_color.as_deref(),
)));
}
None => {
row.push(StyledCell::plain(""));
}
}
}
row.extend([
StyledCell::styled(assignee, Theme::accent()),
StyledCell::styled(labels, Theme::warning()),
discussions,
StyledCell::styled(relative_time, Theme::dim()),
]);
table.add_row(row);
}
println!("{}", table.render());
}
pub fn print_list_issues_json(result: &ListResult, elapsed_ms: u64, fields: Option<&[String]>) {
let json_result = ListResultJson::from(result);
let output = serde_json::json!({
"ok": true,
"data": json_result,
"meta": {
"elapsed_ms": elapsed_ms,
"available_statuses": result.available_statuses,
},
});
let mut output = output;
if let Some(f) = fields {
let expanded = expand_fields_preset(f, "issues");
filter_fields(&mut output, "issues", &expanded);
}
match serde_json::to_string(&output) {
Ok(json) => println!("{json}"),
Err(e) => eprintln!("Error serializing to JSON: {e}"),
}
}
pub fn open_issue_in_browser(result: &ListResult) -> Option<String> {
let first_issue = result.issues.first()?;
let url = first_issue.web_url.as_ref()?;
match open::that(url) {
Ok(()) => {
println!("Opened: {url}");
Some(url.clone())
}
Err(e) => {
eprintln!("Failed to open browser: {e}");
None
}
}
}

View File

@@ -1,6 +1,9 @@
use super::*;
use crate::cli::render;
use crate::core::time::now_ms;
use crate::test_support::{
insert_project as insert_test_project, setup_test_db as setup_note_test_db, test_config,
};
#[test]
fn truncate_leaves_short_strings_alone() {
@@ -82,34 +85,6 @@ fn format_discussions_with_unresolved() {
// Note query layer tests
// -----------------------------------------------------------------------
use std::path::Path;
use crate::core::config::{
Config, EmbeddingConfig, GitLabConfig, LoggingConfig, ProjectConfig, ScoringConfig,
StorageConfig, SyncConfig,
};
use crate::core::db::{create_connection, run_migrations};
fn test_config(default_project: Option<&str>) -> Config {
Config {
gitlab: GitLabConfig {
base_url: "https://gitlab.example.com".to_string(),
token_env_var: "GITLAB_TOKEN".to_string(),
token: None,
username: None,
},
projects: vec![ProjectConfig {
path: "group/project".to_string(),
}],
default_project: default_project.map(String::from),
sync: SyncConfig::default(),
storage: StorageConfig::default(),
embedding: EmbeddingConfig::default(),
logging: LoggingConfig::default(),
scoring: ScoringConfig::default(),
}
}
fn default_note_filters() -> NoteListFilters {
NoteListFilters {
limit: 50,
@@ -132,26 +107,6 @@ fn default_note_filters() -> NoteListFilters {
}
}
fn setup_note_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn insert_test_project(conn: &Connection, id: i64, path: &str) {
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url)
VALUES (?1, ?2, ?3, ?4)",
rusqlite::params![
id,
id * 100,
path,
format!("https://gitlab.example.com/{path}")
],
)
.unwrap();
}
fn insert_test_issue(conn: &Connection, id: i64, project_id: i64, iid: i64, title: &str) {
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, author_username,

View File

@@ -0,0 +1,28 @@
mod issues;
mod mrs;
mod notes;
mod render_helpers;
pub use issues::{
IssueListRow, IssueListRowJson, ListFilters, ListResult, ListResultJson, open_issue_in_browser,
print_list_issues, print_list_issues_json, run_list_issues,
};
pub use mrs::{
MrListFilters, MrListResult, MrListResultJson, MrListRow, MrListRowJson, open_mr_in_browser,
print_list_mrs, print_list_mrs_json, run_list_mrs,
};
pub use notes::{
NoteListFilters, NoteListResult, NoteListResultJson, NoteListRow, NoteListRowJson,
print_list_notes, print_list_notes_json, query_notes,
};
#[cfg(test)]
use crate::core::path_resolver::escape_like as note_escape_like;
#[cfg(test)]
use render_helpers::{format_discussions, format_note_parent, format_note_type, truncate_body};
#[cfg(test)]
use rusqlite::Connection;
#[cfg(test)]
#[path = "list_tests.rs"]
mod tests;

View File

@@ -0,0 +1,404 @@
use crate::cli::render::{self, Align, Icons, StyledCell, Table as LoreTable, Theme};
use rusqlite::Connection;
use serde::Serialize;
use crate::Config;
use crate::cli::robot::{RobotMeta, expand_fields_preset, filter_fields};
use crate::core::db::create_connection;
use crate::core::error::{LoreError, Result};
use crate::core::paths::get_db_path;
use crate::core::project::resolve_project;
use crate::core::time::{ms_to_iso, parse_since};
use super::render_helpers::{format_branches, format_discussions};
#[derive(Debug, Serialize)]
pub struct MrListRow {
pub iid: i64,
pub title: String,
pub state: String,
pub draft: bool,
pub author_username: String,
pub source_branch: String,
pub target_branch: String,
pub created_at: i64,
pub updated_at: i64,
#[serde(skip_serializing_if = "Option::is_none")]
pub web_url: Option<String>,
pub project_path: String,
pub labels: Vec<String>,
pub assignees: Vec<String>,
pub reviewers: Vec<String>,
pub discussion_count: i64,
pub unresolved_count: i64,
}
#[derive(Serialize)]
pub struct MrListRowJson {
pub iid: i64,
pub title: String,
pub state: String,
pub draft: bool,
pub author_username: String,
pub source_branch: String,
pub target_branch: String,
pub labels: Vec<String>,
pub assignees: Vec<String>,
pub reviewers: Vec<String>,
pub discussion_count: i64,
pub unresolved_count: i64,
pub created_at_iso: String,
pub updated_at_iso: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub web_url: Option<String>,
pub project_path: String,
}
impl From<&MrListRow> for MrListRowJson {
fn from(row: &MrListRow) -> Self {
Self {
iid: row.iid,
title: row.title.clone(),
state: row.state.clone(),
draft: row.draft,
author_username: row.author_username.clone(),
source_branch: row.source_branch.clone(),
target_branch: row.target_branch.clone(),
labels: row.labels.clone(),
assignees: row.assignees.clone(),
reviewers: row.reviewers.clone(),
discussion_count: row.discussion_count,
unresolved_count: row.unresolved_count,
created_at_iso: ms_to_iso(row.created_at),
updated_at_iso: ms_to_iso(row.updated_at),
web_url: row.web_url.clone(),
project_path: row.project_path.clone(),
}
}
}
#[derive(Serialize)]
pub struct MrListResult {
pub mrs: Vec<MrListRow>,
pub total_count: usize,
}
#[derive(Serialize)]
pub struct MrListResultJson {
pub mrs: Vec<MrListRowJson>,
pub total_count: usize,
pub showing: usize,
}
impl From<&MrListResult> for MrListResultJson {
fn from(result: &MrListResult) -> Self {
Self {
mrs: result.mrs.iter().map(MrListRowJson::from).collect(),
total_count: result.total_count,
showing: result.mrs.len(),
}
}
}
pub struct MrListFilters<'a> {
pub limit: usize,
pub project: Option<&'a str>,
pub state: Option<&'a str>,
pub author: Option<&'a str>,
pub assignee: Option<&'a str>,
pub reviewer: Option<&'a str>,
pub labels: Option<&'a [String]>,
pub since: Option<&'a str>,
pub draft: bool,
pub no_draft: bool,
pub target_branch: Option<&'a str>,
pub source_branch: Option<&'a str>,
pub sort: &'a str,
pub order: &'a str,
}
pub fn run_list_mrs(config: &Config, filters: MrListFilters) -> Result<MrListResult> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
let result = query_mrs(&conn, &filters)?;
Ok(result)
}
fn query_mrs(conn: &Connection, filters: &MrListFilters) -> Result<MrListResult> {
let mut where_clauses = Vec::new();
let mut params: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
if let Some(project) = filters.project {
let project_id = resolve_project(conn, project)?;
where_clauses.push("m.project_id = ?");
params.push(Box::new(project_id));
}
if let Some(state) = filters.state
&& state != "all"
{
where_clauses.push("m.state = ?");
params.push(Box::new(state.to_string()));
}
if let Some(author) = filters.author {
let username = author.strip_prefix('@').unwrap_or(author);
where_clauses.push("m.author_username = ?");
params.push(Box::new(username.to_string()));
}
if let Some(assignee) = filters.assignee {
let username = assignee.strip_prefix('@').unwrap_or(assignee);
where_clauses.push(
"EXISTS (SELECT 1 FROM mr_assignees ma
WHERE ma.merge_request_id = m.id AND ma.username = ?)",
);
params.push(Box::new(username.to_string()));
}
if let Some(reviewer) = filters.reviewer {
let username = reviewer.strip_prefix('@').unwrap_or(reviewer);
where_clauses.push(
"EXISTS (SELECT 1 FROM mr_reviewers mr
WHERE mr.merge_request_id = m.id AND mr.username = ?)",
);
params.push(Box::new(username.to_string()));
}
if let Some(since_str) = filters.since {
let cutoff_ms = parse_since(since_str).ok_or_else(|| {
LoreError::Other(format!(
"Invalid --since value '{}'. Use relative (7d, 2w, 1m) or absolute (YYYY-MM-DD) format.",
since_str
))
})?;
where_clauses.push("m.updated_at >= ?");
params.push(Box::new(cutoff_ms));
}
if let Some(labels) = filters.labels {
for label in labels {
where_clauses.push(
"EXISTS (SELECT 1 FROM mr_labels ml
JOIN labels l ON ml.label_id = l.id
WHERE ml.merge_request_id = m.id AND l.name = ?)",
);
params.push(Box::new(label.clone()));
}
}
if filters.draft {
where_clauses.push("m.draft = 1");
} else if filters.no_draft {
where_clauses.push("m.draft = 0");
}
if let Some(target_branch) = filters.target_branch {
where_clauses.push("m.target_branch = ?");
params.push(Box::new(target_branch.to_string()));
}
if let Some(source_branch) = filters.source_branch {
where_clauses.push("m.source_branch = ?");
params.push(Box::new(source_branch.to_string()));
}
let where_sql = if where_clauses.is_empty() {
String::new()
} else {
format!("WHERE {}", where_clauses.join(" AND "))
};
let count_sql = format!(
"SELECT COUNT(*) FROM merge_requests m
JOIN projects p ON m.project_id = p.id
{where_sql}"
);
let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let total_count: i64 = conn.query_row(&count_sql, param_refs.as_slice(), |row| row.get(0))?;
let total_count = total_count as usize;
let sort_column = match filters.sort {
"created" => "m.created_at",
"iid" => "m.iid",
_ => "m.updated_at",
};
let order = if filters.order == "asc" {
"ASC"
} else {
"DESC"
};
let query_sql = format!(
"SELECT
m.iid,
m.title,
m.state,
m.draft,
m.author_username,
m.source_branch,
m.target_branch,
m.created_at,
m.updated_at,
m.web_url,
p.path_with_namespace,
(SELECT GROUP_CONCAT(l.name, X'1F')
FROM mr_labels ml
JOIN labels l ON ml.label_id = l.id
WHERE ml.merge_request_id = m.id) AS labels_csv,
(SELECT GROUP_CONCAT(ma.username, X'1F')
FROM mr_assignees ma
WHERE ma.merge_request_id = m.id) AS assignees_csv,
(SELECT GROUP_CONCAT(mr.username, X'1F')
FROM mr_reviewers mr
WHERE mr.merge_request_id = m.id) AS reviewers_csv,
(SELECT COUNT(*) FROM discussions d
WHERE d.merge_request_id = m.id) AS discussion_count,
(SELECT COUNT(*) FROM discussions d
WHERE d.merge_request_id = m.id AND d.resolvable = 1 AND d.resolved = 0) AS unresolved_count
FROM merge_requests m
JOIN projects p ON m.project_id = p.id
{where_sql}
ORDER BY {sort_column} {order}
LIMIT ?"
);
params.push(Box::new(filters.limit as i64));
let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let mut stmt = conn.prepare(&query_sql)?;
let mrs: Vec<MrListRow> = stmt
.query_map(param_refs.as_slice(), |row| {
let labels_csv: Option<String> = row.get(11)?;
let labels = labels_csv
.map(|s| s.split('\x1F').map(String::from).collect())
.unwrap_or_default();
let assignees_csv: Option<String> = row.get(12)?;
let assignees = assignees_csv
.map(|s| s.split('\x1F').map(String::from).collect())
.unwrap_or_default();
let reviewers_csv: Option<String> = row.get(13)?;
let reviewers = reviewers_csv
.map(|s| s.split('\x1F').map(String::from).collect())
.unwrap_or_default();
let draft_int: i64 = row.get(3)?;
Ok(MrListRow {
iid: row.get(0)?,
title: row.get(1)?,
state: row.get(2)?,
draft: draft_int == 1,
author_username: row.get(4)?,
source_branch: row.get(5)?,
target_branch: row.get(6)?,
created_at: row.get(7)?,
updated_at: row.get(8)?,
web_url: row.get(9)?,
project_path: row.get(10)?,
labels,
assignees,
reviewers,
discussion_count: row.get(14)?,
unresolved_count: row.get(15)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(MrListResult { mrs, total_count })
}
pub fn print_list_mrs(result: &MrListResult) {
if result.mrs.is_empty() {
println!("No merge requests found.");
return;
}
println!(
"{} {} of {}\n",
Theme::bold().render("Merge Requests"),
result.mrs.len(),
result.total_count
);
let mut table = LoreTable::new()
.headers(&[
"IID", "Title", "State", "Author", "Branches", "Disc", "Updated",
])
.align(0, Align::Right);
for mr in &result.mrs {
let title = if mr.draft {
format!("{} {}", Icons::mr_draft(), render::truncate(&mr.title, 42))
} else {
render::truncate(&mr.title, 45)
};
let relative_time = render::format_relative_time_compact(mr.updated_at);
let branches = format_branches(&mr.target_branch, &mr.source_branch, 25);
let discussions = format_discussions(mr.discussion_count, mr.unresolved_count);
let (icon, style) = match mr.state.as_str() {
"opened" => (Icons::mr_opened(), Theme::success()),
"merged" => (Icons::mr_merged(), Theme::accent()),
"closed" => (Icons::mr_closed(), Theme::error()),
"locked" => (Icons::mr_opened(), Theme::warning()),
_ => (Icons::mr_opened(), Theme::dim()),
};
let state_cell = StyledCell::styled(format!("{icon} {}", mr.state), style);
table.add_row(vec![
StyledCell::styled(format!("!{}", mr.iid), Theme::info()),
StyledCell::plain(title),
state_cell,
StyledCell::styled(
format!("@{}", render::truncate(&mr.author_username, 12)),
Theme::accent(),
),
StyledCell::styled(branches, Theme::info()),
discussions,
StyledCell::styled(relative_time, Theme::dim()),
]);
}
println!("{}", table.render());
}
pub fn print_list_mrs_json(result: &MrListResult, elapsed_ms: u64, fields: Option<&[String]>) {
let json_result = MrListResultJson::from(result);
let meta = RobotMeta { elapsed_ms };
let output = serde_json::json!({
"ok": true,
"data": json_result,
"meta": meta,
});
let mut output = output;
if let Some(f) = fields {
let expanded = expand_fields_preset(f, "mrs");
filter_fields(&mut output, "mrs", &expanded);
}
match serde_json::to_string(&output) {
Ok(json) => println!("{json}"),
Err(e) => eprintln!("Error serializing to JSON: {e}"),
}
}
pub fn open_mr_in_browser(result: &MrListResult) -> Option<String> {
let first_mr = result.mrs.first()?;
let url = first_mr.web_url.as_ref()?;
match open::that(url) {
Ok(()) => {
println!("Opened: {url}");
Some(url.clone())
}
Err(e) => {
eprintln!("Failed to open browser: {e}");
None
}
}
}

View File

@@ -0,0 +1,470 @@
use crate::cli::render::{self, Align, StyledCell, Table as LoreTable, Theme};
use rusqlite::Connection;
use serde::Serialize;
use crate::Config;
use crate::cli::robot::{RobotMeta, expand_fields_preset, filter_fields};
use crate::core::error::{LoreError, Result};
use crate::core::path_resolver::escape_like as note_escape_like;
use crate::core::project::resolve_project;
use crate::core::time::{iso_to_ms, ms_to_iso, parse_since};
use super::render_helpers::{
format_note_parent, format_note_path, format_note_type, truncate_body,
};
#[derive(Debug, Serialize)]
pub struct NoteListRow {
pub id: i64,
pub gitlab_id: i64,
pub author_username: String,
pub body: Option<String>,
pub note_type: Option<String>,
pub is_system: bool,
pub created_at: i64,
pub updated_at: i64,
pub position_new_path: Option<String>,
pub position_new_line: Option<i64>,
pub position_old_path: Option<String>,
pub position_old_line: Option<i64>,
pub resolvable: bool,
pub resolved: bool,
pub resolved_by: Option<String>,
pub noteable_type: Option<String>,
pub parent_iid: Option<i64>,
pub parent_title: Option<String>,
pub project_path: String,
}
#[derive(Serialize)]
pub struct NoteListRowJson {
pub id: i64,
pub gitlab_id: i64,
pub author_username: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub body: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub note_type: Option<String>,
pub is_system: bool,
pub created_at_iso: String,
pub updated_at_iso: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub position_new_path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub position_new_line: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub position_old_path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub position_old_line: Option<i64>,
pub resolvable: bool,
pub resolved: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub resolved_by: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub noteable_type: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub parent_iid: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub parent_title: Option<String>,
pub project_path: String,
}
impl From<&NoteListRow> for NoteListRowJson {
fn from(row: &NoteListRow) -> Self {
Self {
id: row.id,
gitlab_id: row.gitlab_id,
author_username: row.author_username.clone(),
body: row.body.clone(),
note_type: row.note_type.clone(),
is_system: row.is_system,
created_at_iso: ms_to_iso(row.created_at),
updated_at_iso: ms_to_iso(row.updated_at),
position_new_path: row.position_new_path.clone(),
position_new_line: row.position_new_line,
position_old_path: row.position_old_path.clone(),
position_old_line: row.position_old_line,
resolvable: row.resolvable,
resolved: row.resolved,
resolved_by: row.resolved_by.clone(),
noteable_type: row.noteable_type.clone(),
parent_iid: row.parent_iid,
parent_title: row.parent_title.clone(),
project_path: row.project_path.clone(),
}
}
}
#[derive(Debug)]
pub struct NoteListResult {
pub notes: Vec<NoteListRow>,
pub total_count: i64,
}
#[derive(Serialize)]
pub struct NoteListResultJson {
pub notes: Vec<NoteListRowJson>,
pub total_count: i64,
pub showing: usize,
}
impl From<&NoteListResult> for NoteListResultJson {
fn from(result: &NoteListResult) -> Self {
Self {
notes: result.notes.iter().map(NoteListRowJson::from).collect(),
total_count: result.total_count,
showing: result.notes.len(),
}
}
}
pub struct NoteListFilters {
pub limit: usize,
pub project: Option<String>,
pub author: Option<String>,
pub note_type: Option<String>,
pub include_system: bool,
pub for_issue_iid: Option<i64>,
pub for_mr_iid: Option<i64>,
pub note_id: Option<i64>,
pub gitlab_note_id: Option<i64>,
pub discussion_id: Option<String>,
pub since: Option<String>,
pub until: Option<String>,
pub path: Option<String>,
pub contains: Option<String>,
pub resolution: Option<String>,
pub sort: String,
pub order: String,
}
pub fn print_list_notes(result: &NoteListResult) {
if result.notes.is_empty() {
println!("No notes found.");
return;
}
println!(
"{} {} of {}\n",
Theme::bold().render("Notes"),
result.notes.len(),
result.total_count
);
let mut table = LoreTable::new()
.headers(&[
"ID",
"Author",
"Type",
"Body",
"Path:Line",
"Parent",
"Created",
])
.align(0, Align::Right);
for note in &result.notes {
let body = note
.body
.as_deref()
.map(|b| truncate_body(b, 60))
.unwrap_or_default();
let path = format_note_path(note.position_new_path.as_deref(), note.position_new_line);
let parent = format_note_parent(note.noteable_type.as_deref(), note.parent_iid);
let relative_time = render::format_relative_time_compact(note.created_at);
let note_type = format_note_type(note.note_type.as_deref());
table.add_row(vec![
StyledCell::styled(note.gitlab_id.to_string(), Theme::info()),
StyledCell::styled(
format!("@{}", render::truncate(&note.author_username, 12)),
Theme::accent(),
),
StyledCell::plain(note_type),
StyledCell::plain(body),
StyledCell::plain(path),
StyledCell::plain(parent),
StyledCell::styled(relative_time, Theme::dim()),
]);
}
println!("{}", table.render());
}
pub fn print_list_notes_json(result: &NoteListResult, elapsed_ms: u64, fields: Option<&[String]>) {
let json_result = NoteListResultJson::from(result);
let meta = RobotMeta { elapsed_ms };
let output = serde_json::json!({
"ok": true,
"data": json_result,
"meta": meta,
});
let mut output = output;
if let Some(f) = fields {
let expanded = expand_fields_preset(f, "notes");
filter_fields(&mut output, "notes", &expanded);
}
match serde_json::to_string(&output) {
Ok(json) => println!("{json}"),
Err(e) => eprintln!("Error serializing to JSON: {e}"),
}
}
pub fn query_notes(
conn: &Connection,
filters: &NoteListFilters,
config: &Config,
) -> Result<NoteListResult> {
let mut where_clauses: Vec<String> = Vec::new();
let mut params: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
if let Some(ref project) = filters.project {
let project_id = resolve_project(conn, project)?;
where_clauses.push("n.project_id = ?".to_string());
params.push(Box::new(project_id));
}
if let Some(ref author) = filters.author {
let username = author.strip_prefix('@').unwrap_or(author);
where_clauses.push("n.author_username = ? COLLATE NOCASE".to_string());
params.push(Box::new(username.to_string()));
}
if let Some(ref note_type) = filters.note_type {
where_clauses.push("n.note_type = ?".to_string());
params.push(Box::new(note_type.clone()));
}
if !filters.include_system {
where_clauses.push("n.is_system = 0".to_string());
}
let since_ms = if let Some(ref since_str) = filters.since {
let ms = parse_since(since_str).ok_or_else(|| {
LoreError::Other(format!(
"Invalid --since value '{}'. Use relative (7d, 2w, 1m) or absolute (YYYY-MM-DD) format.",
since_str
))
})?;
where_clauses.push("n.created_at >= ?".to_string());
params.push(Box::new(ms));
Some(ms)
} else {
None
};
if let Some(ref until_str) = filters.until {
let until_ms = if until_str.len() == 10
&& until_str.chars().filter(|&c| c == '-').count() == 2
{
let iso_full = format!("{until_str}T23:59:59.999Z");
iso_to_ms(&iso_full).ok_or_else(|| {
LoreError::Other(format!(
"Invalid --until value '{}'. Use YYYY-MM-DD or relative format.",
until_str
))
})?
} else {
parse_since(until_str).ok_or_else(|| {
LoreError::Other(format!(
"Invalid --until value '{}'. Use relative (7d, 2w, 1m) or absolute (YYYY-MM-DD) format.",
until_str
))
})?
};
if let Some(s) = since_ms
&& s > until_ms
{
return Err(LoreError::Other(
"Invalid time window: --since is after --until.".to_string(),
));
}
where_clauses.push("n.created_at <= ?".to_string());
params.push(Box::new(until_ms));
}
if let Some(ref path) = filters.path {
if let Some(prefix) = path.strip_suffix('/') {
let escaped = note_escape_like(prefix);
where_clauses.push("n.position_new_path LIKE ? ESCAPE '\\'".to_string());
params.push(Box::new(format!("{escaped}%")));
} else {
where_clauses.push("n.position_new_path = ?".to_string());
params.push(Box::new(path.clone()));
}
}
if let Some(ref contains) = filters.contains {
let escaped = note_escape_like(contains);
where_clauses.push("n.body LIKE ? ESCAPE '\\' COLLATE NOCASE".to_string());
params.push(Box::new(format!("%{escaped}%")));
}
if let Some(ref resolution) = filters.resolution {
match resolution.as_str() {
"unresolved" => {
where_clauses.push("n.resolvable = 1 AND n.resolved = 0".to_string());
}
"resolved" => {
where_clauses.push("n.resolvable = 1 AND n.resolved = 1".to_string());
}
other => {
return Err(LoreError::Other(format!(
"Invalid --resolution value '{}'. Use 'resolved' or 'unresolved'.",
other
)));
}
}
}
if let Some(iid) = filters.for_issue_iid {
let project_str = filters
.project
.as_deref()
.or(config.default_project.as_deref())
.ok_or_else(|| {
LoreError::Other(
"Cannot filter by issue IID without a project context. Use --project or set defaultProject in config."
.to_string(),
)
})?;
let project_id = resolve_project(conn, project_str)?;
where_clauses.push(
"d.issue_id = (SELECT id FROM issues WHERE project_id = ? AND iid = ?)".to_string(),
);
params.push(Box::new(project_id));
params.push(Box::new(iid));
}
if let Some(iid) = filters.for_mr_iid {
let project_str = filters
.project
.as_deref()
.or(config.default_project.as_deref())
.ok_or_else(|| {
LoreError::Other(
"Cannot filter by MR IID without a project context. Use --project or set defaultProject in config."
.to_string(),
)
})?;
let project_id = resolve_project(conn, project_str)?;
where_clauses.push(
"d.merge_request_id = (SELECT id FROM merge_requests WHERE project_id = ? AND iid = ?)"
.to_string(),
);
params.push(Box::new(project_id));
params.push(Box::new(iid));
}
if let Some(id) = filters.note_id {
where_clauses.push("n.id = ?".to_string());
params.push(Box::new(id));
}
if let Some(gitlab_id) = filters.gitlab_note_id {
where_clauses.push("n.gitlab_id = ?".to_string());
params.push(Box::new(gitlab_id));
}
if let Some(ref disc_id) = filters.discussion_id {
where_clauses.push("d.gitlab_discussion_id = ?".to_string());
params.push(Box::new(disc_id.clone()));
}
let where_sql = if where_clauses.is_empty() {
String::new()
} else {
format!("WHERE {}", where_clauses.join(" AND "))
};
let count_sql = format!(
"SELECT COUNT(*) FROM notes n
JOIN discussions d ON n.discussion_id = d.id
JOIN projects p ON n.project_id = p.id
LEFT JOIN issues i ON d.issue_id = i.id
LEFT JOIN merge_requests m ON d.merge_request_id = m.id
{where_sql}"
);
let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let total_count: i64 = conn.query_row(&count_sql, param_refs.as_slice(), |row| row.get(0))?;
let sort_column = match filters.sort.as_str() {
"updated" => "n.updated_at",
_ => "n.created_at",
};
let order = if filters.order == "asc" {
"ASC"
} else {
"DESC"
};
let query_sql = format!(
"SELECT
n.id,
n.gitlab_id,
n.author_username,
n.body,
n.note_type,
n.is_system,
n.created_at,
n.updated_at,
n.position_new_path,
n.position_new_line,
n.position_old_path,
n.position_old_line,
n.resolvable,
n.resolved,
n.resolved_by,
d.noteable_type,
COALESCE(i.iid, m.iid) AS parent_iid,
COALESCE(i.title, m.title) AS parent_title,
p.path_with_namespace AS project_path
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
JOIN projects p ON n.project_id = p.id
LEFT JOIN issues i ON d.issue_id = i.id
LEFT JOIN merge_requests m ON d.merge_request_id = m.id
{where_sql}
ORDER BY {sort_column} {order}, n.id {order}
LIMIT ?"
);
params.push(Box::new(filters.limit as i64));
let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let mut stmt = conn.prepare(&query_sql)?;
let notes: Vec<NoteListRow> = stmt
.query_map(param_refs.as_slice(), |row| {
let is_system_int: i64 = row.get(5)?;
let resolvable_int: i64 = row.get(12)?;
let resolved_int: i64 = row.get(13)?;
Ok(NoteListRow {
id: row.get(0)?,
gitlab_id: row.get(1)?,
author_username: row.get::<_, Option<String>>(2)?.unwrap_or_default(),
body: row.get(3)?,
note_type: row.get(4)?,
is_system: is_system_int == 1,
created_at: row.get(6)?,
updated_at: row.get(7)?,
position_new_path: row.get(8)?,
position_new_line: row.get(9)?,
position_old_path: row.get(10)?,
position_old_line: row.get(11)?,
resolvable: resolvable_int == 1,
resolved: resolved_int == 1,
resolved_by: row.get(14)?,
noteable_type: row.get(15)?,
parent_iid: row.get(16)?,
parent_title: row.get(17)?,
project_path: row.get(18)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(NoteListResult { notes, total_count })
}

View File

@@ -0,0 +1,73 @@
use crate::cli::render::{self, StyledCell, Theme};
pub(crate) fn format_assignees(assignees: &[String]) -> String {
if assignees.is_empty() {
return "-".to_string();
}
let max_shown = 2;
let shown: Vec<String> = assignees
.iter()
.take(max_shown)
.map(|s| format!("@{}", render::truncate(s, 10)))
.collect();
let overflow = assignees.len().saturating_sub(max_shown);
if overflow > 0 {
format!("{} +{}", shown.join(", "), overflow)
} else {
shown.join(", ")
}
}
pub(crate) fn format_discussions(total: i64, unresolved: i64) -> StyledCell {
if total == 0 {
return StyledCell::plain(String::new());
}
if unresolved > 0 {
let text = format!("{total}/");
let warn = Theme::warning().render(&format!("{unresolved}!"));
StyledCell::plain(format!("{text}{warn}"))
} else {
StyledCell::plain(format!("{total}"))
}
}
pub(crate) fn format_branches(target: &str, source: &str, max_width: usize) -> String {
let full = format!("{} <- {}", target, source);
render::truncate(&full, max_width)
}
pub(crate) fn truncate_body(body: &str, max_len: usize) -> String {
if body.chars().count() <= max_len {
body.to_string()
} else {
let truncated: String = body.chars().take(max_len).collect();
format!("{truncated}...")
}
}
pub(crate) fn format_note_type(note_type: Option<&str>) -> &'static str {
match note_type {
Some("DiffNote") => "Diff",
Some("DiscussionNote") => "Disc",
_ => "-",
}
}
pub(crate) fn format_note_path(path: Option<&str>, line: Option<i64>) -> String {
match (path, line) {
(Some(p), Some(l)) => format!("{p}:{l}"),
(Some(p), None) => p.to_string(),
_ => "-".to_string(),
}
}
pub(crate) fn format_note_parent(noteable_type: Option<&str>, parent_iid: Option<i64>) -> String {
match (noteable_type, parent_iid) {
(Some("Issue"), Some(iid)) => format!("Issue #{iid}"),
(Some("MergeRequest"), Some(iid)) => format!("MR !{iid}"),
_ => "-".to_string(),
}
}

View File

@@ -1,32 +1,11 @@
use super::*;
use crate::cli::commands::me::types::{ActivityEventType, AttentionState};
use crate::core::db::{create_connection, run_migrations};
use crate::core::time::now_ms;
use crate::test_support::{insert_project, setup_test_db};
use rusqlite::Connection;
use std::path::Path;
// ─── Helpers ────────────────────────────────────────────────────────────────
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn insert_project(conn: &Connection, id: i64, path: &str) {
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url)
VALUES (?1, ?2, ?3, ?4)",
rusqlite::params![
id,
id * 100,
path,
format!("https://git.example.com/{path}")
],
)
.unwrap();
}
fn insert_issue(conn: &Connection, id: i64, project_id: i64, iid: i64, author: &str) {
insert_issue_with_status(
conn,

View File

@@ -17,7 +17,6 @@ pub mod show;
pub mod stats;
pub mod sync;
pub mod sync_status;
pub mod sync_surgical;
pub mod timeline;
pub mod trace;
pub mod who;
@@ -61,9 +60,8 @@ pub use show::{
run_show_mr,
};
pub use stats::{print_stats, print_stats_json, run_stats};
pub use sync::{SyncOptions, SyncResult, print_sync, print_sync_json, run_sync};
pub use sync::{SyncOptions, SyncResult, print_sync, print_sync_json, run_sync, run_sync_surgical};
pub use sync_status::{print_sync_status, print_sync_status_json, run_sync_status};
pub use sync_surgical::run_sync_surgical;
pub use timeline::{TimelineParams, print_timeline, print_timeline_json_with_meta, run_timeline};
pub use trace::{parse_trace_path, print_trace, print_trace_json};
pub use who::{WhoRun, print_who_human, print_who_json, run_who};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,310 @@
#[derive(Debug, Clone, Serialize)]
pub struct ClosingMrRef {
pub iid: i64,
pub title: String,
pub state: String,
pub web_url: Option<String>,
}
#[derive(Debug, Serialize)]
pub struct IssueDetail {
pub id: i64,
pub iid: i64,
pub title: String,
pub description: Option<String>,
pub state: String,
pub author_username: String,
pub created_at: i64,
pub updated_at: i64,
pub closed_at: Option<String>,
pub confidential: bool,
pub web_url: Option<String>,
pub project_path: String,
pub references_full: String,
pub labels: Vec<String>,
pub assignees: Vec<String>,
pub due_date: Option<String>,
pub milestone: Option<String>,
pub user_notes_count: i64,
pub merge_requests_count: usize,
pub closing_merge_requests: Vec<ClosingMrRef>,
pub discussions: Vec<DiscussionDetail>,
pub status_name: Option<String>,
pub status_category: Option<String>,
pub status_color: Option<String>,
pub status_icon_name: Option<String>,
pub status_synced_at: Option<i64>,
}
#[derive(Debug, Serialize)]
pub struct DiscussionDetail {
pub notes: Vec<NoteDetail>,
pub individual_note: bool,
}
#[derive(Debug, Serialize)]
pub struct NoteDetail {
pub author_username: String,
pub body: String,
pub created_at: i64,
pub is_system: bool,
}
pub fn run_show_issue(
config: &Config,
iid: i64,
project_filter: Option<&str>,
) -> Result<IssueDetail> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
let issue = find_issue(&conn, iid, project_filter)?;
let labels = get_issue_labels(&conn, issue.id)?;
let assignees = get_issue_assignees(&conn, issue.id)?;
let closing_mrs = get_closing_mrs(&conn, issue.id)?;
let discussions = get_issue_discussions(&conn, issue.id)?;
let references_full = format!("{}#{}", issue.project_path, issue.iid);
let merge_requests_count = closing_mrs.len();
Ok(IssueDetail {
id: issue.id,
iid: issue.iid,
title: issue.title,
description: issue.description,
state: issue.state,
author_username: issue.author_username,
created_at: issue.created_at,
updated_at: issue.updated_at,
closed_at: issue.closed_at,
confidential: issue.confidential,
web_url: issue.web_url,
project_path: issue.project_path,
references_full,
labels,
assignees,
due_date: issue.due_date,
milestone: issue.milestone_title,
user_notes_count: issue.user_notes_count,
merge_requests_count,
closing_merge_requests: closing_mrs,
discussions,
status_name: issue.status_name,
status_category: issue.status_category,
status_color: issue.status_color,
status_icon_name: issue.status_icon_name,
status_synced_at: issue.status_synced_at,
})
}
#[derive(Debug)]
struct IssueRow {
id: i64,
iid: i64,
title: String,
description: Option<String>,
state: String,
author_username: String,
created_at: i64,
updated_at: i64,
closed_at: Option<String>,
confidential: bool,
web_url: Option<String>,
project_path: String,
due_date: Option<String>,
milestone_title: Option<String>,
user_notes_count: i64,
status_name: Option<String>,
status_category: Option<String>,
status_color: Option<String>,
status_icon_name: Option<String>,
status_synced_at: Option<i64>,
}
fn find_issue(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Result<IssueRow> {
let (sql, params): (&str, Vec<Box<dyn rusqlite::ToSql>>) = match project_filter {
Some(project) => {
let project_id = resolve_project(conn, project)?;
(
"SELECT i.id, i.iid, i.title, i.description, i.state, i.author_username,
i.created_at, i.updated_at, i.closed_at, i.confidential,
i.web_url, p.path_with_namespace,
i.due_date, i.milestone_title,
(SELECT COUNT(*) FROM notes n
JOIN discussions d ON n.discussion_id = d.id
WHERE d.noteable_type = 'Issue' AND d.issue_id = i.id AND n.is_system = 0) AS user_notes_count,
i.status_name, i.status_category, i.status_color,
i.status_icon_name, i.status_synced_at
FROM issues i
JOIN projects p ON i.project_id = p.id
WHERE i.iid = ? AND i.project_id = ?",
vec![Box::new(iid), Box::new(project_id)],
)
}
None => (
"SELECT i.id, i.iid, i.title, i.description, i.state, i.author_username,
i.created_at, i.updated_at, i.closed_at, i.confidential,
i.web_url, p.path_with_namespace,
i.due_date, i.milestone_title,
(SELECT COUNT(*) FROM notes n
JOIN discussions d ON n.discussion_id = d.id
WHERE d.noteable_type = 'Issue' AND d.issue_id = i.id AND n.is_system = 0) AS user_notes_count,
i.status_name, i.status_category, i.status_color,
i.status_icon_name, i.status_synced_at
FROM issues i
JOIN projects p ON i.project_id = p.id
WHERE i.iid = ?",
vec![Box::new(iid)],
),
};
let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let mut stmt = conn.prepare(sql)?;
let issues: Vec<IssueRow> = stmt
.query_map(param_refs.as_slice(), |row| {
let confidential_val: i64 = row.get(9)?;
Ok(IssueRow {
id: row.get(0)?,
iid: row.get(1)?,
title: row.get(2)?,
description: row.get(3)?,
state: row.get(4)?,
author_username: row.get(5)?,
created_at: row.get(6)?,
updated_at: row.get(7)?,
closed_at: row.get(8)?,
confidential: confidential_val != 0,
web_url: row.get(10)?,
project_path: row.get(11)?,
due_date: row.get(12)?,
milestone_title: row.get(13)?,
user_notes_count: row.get(14)?,
status_name: row.get(15)?,
status_category: row.get(16)?,
status_color: row.get(17)?,
status_icon_name: row.get(18)?,
status_synced_at: row.get(19)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
match issues.len() {
0 => Err(LoreError::NotFound(format!("Issue #{} not found", iid))),
1 => Ok(issues.into_iter().next().unwrap()),
_ => {
let projects: Vec<String> = issues.iter().map(|i| i.project_path.clone()).collect();
Err(LoreError::Ambiguous(format!(
"Issue #{} exists in multiple projects: {}. Use --project to specify.",
iid,
projects.join(", ")
)))
}
}
}
fn get_issue_labels(conn: &Connection, issue_id: i64) -> Result<Vec<String>> {
let mut stmt = conn.prepare(
"SELECT l.name FROM labels l
JOIN issue_labels il ON l.id = il.label_id
WHERE il.issue_id = ?
ORDER BY l.name",
)?;
let labels: Vec<String> = stmt
.query_map([issue_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(labels)
}
fn get_issue_assignees(conn: &Connection, issue_id: i64) -> Result<Vec<String>> {
let mut stmt = conn.prepare(
"SELECT username FROM issue_assignees
WHERE issue_id = ?
ORDER BY username",
)?;
let assignees: Vec<String> = stmt
.query_map([issue_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(assignees)
}
fn get_closing_mrs(conn: &Connection, issue_id: i64) -> Result<Vec<ClosingMrRef>> {
let mut stmt = conn.prepare(
"SELECT mr.iid, mr.title, mr.state, mr.web_url
FROM entity_references er
JOIN merge_requests mr ON mr.id = er.source_entity_id
WHERE er.target_entity_type = 'issue'
AND er.target_entity_id = ?
AND er.source_entity_type = 'merge_request'
AND er.reference_type = 'closes'
ORDER BY mr.iid",
)?;
let mrs: Vec<ClosingMrRef> = stmt
.query_map([issue_id], |row| {
Ok(ClosingMrRef {
iid: row.get(0)?,
title: row.get(1)?,
state: row.get(2)?,
web_url: row.get(3)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(mrs)
}
fn get_issue_discussions(conn: &Connection, issue_id: i64) -> Result<Vec<DiscussionDetail>> {
let mut disc_stmt = conn.prepare(
"SELECT id, individual_note FROM discussions
WHERE issue_id = ?
ORDER BY first_note_at",
)?;
let disc_rows: Vec<(i64, bool)> = disc_stmt
.query_map([issue_id], |row| {
let individual: i64 = row.get(1)?;
Ok((row.get(0)?, individual == 1))
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
let mut note_stmt = conn.prepare(
"SELECT author_username, body, created_at, is_system
FROM notes
WHERE discussion_id = ?
ORDER BY position",
)?;
let mut discussions = Vec::new();
for (disc_id, individual_note) in disc_rows {
let notes: Vec<NoteDetail> = note_stmt
.query_map([disc_id], |row| {
let is_system: i64 = row.get(3)?;
Ok(NoteDetail {
author_username: row.get(0)?,
body: row.get(1)?,
created_at: row.get(2)?,
is_system: is_system == 1,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
let has_user_notes = notes.iter().any(|n| !n.is_system);
if has_user_notes || notes.is_empty() {
discussions.push(DiscussionDetail {
notes,
individual_note,
});
}
}
Ok(discussions)
}

View File

@@ -0,0 +1,19 @@
use crate::cli::render::{self, Icons, Theme};
use rusqlite::Connection;
use serde::Serialize;
use crate::Config;
use crate::cli::robot::RobotMeta;
use crate::core::db::create_connection;
use crate::core::error::{LoreError, Result};
use crate::core::paths::get_db_path;
use crate::core::project::resolve_project;
use crate::core::time::ms_to_iso;
include!("issue.rs");
include!("mr.rs");
include!("render.rs");
#[cfg(test)]
#[path = "show_tests.rs"]
mod tests;

283
src/cli/commands/show/mr.rs Normal file
View File

@@ -0,0 +1,283 @@
#[derive(Debug, Serialize)]
pub struct MrDetail {
pub id: i64,
pub iid: i64,
pub title: String,
pub description: Option<String>,
pub state: String,
pub draft: bool,
pub author_username: String,
pub source_branch: String,
pub target_branch: String,
pub created_at: i64,
pub updated_at: i64,
pub merged_at: Option<i64>,
pub closed_at: Option<i64>,
pub web_url: Option<String>,
pub project_path: String,
pub labels: Vec<String>,
pub assignees: Vec<String>,
pub reviewers: Vec<String>,
pub discussions: Vec<MrDiscussionDetail>,
}
#[derive(Debug, Serialize)]
pub struct MrDiscussionDetail {
pub notes: Vec<MrNoteDetail>,
pub individual_note: bool,
}
#[derive(Debug, Serialize)]
pub struct MrNoteDetail {
pub author_username: String,
pub body: String,
pub created_at: i64,
pub is_system: bool,
pub position: Option<DiffNotePosition>,
}
#[derive(Debug, Clone, Serialize)]
pub struct DiffNotePosition {
pub old_path: Option<String>,
pub new_path: Option<String>,
pub old_line: Option<i64>,
pub new_line: Option<i64>,
pub position_type: Option<String>,
}
pub fn run_show_mr(config: &Config, iid: i64, project_filter: Option<&str>) -> Result<MrDetail> {
let db_path = get_db_path(config.storage.db_path.as_deref());
let conn = create_connection(&db_path)?;
let mr = find_mr(&conn, iid, project_filter)?;
let labels = get_mr_labels(&conn, mr.id)?;
let assignees = get_mr_assignees(&conn, mr.id)?;
let reviewers = get_mr_reviewers(&conn, mr.id)?;
let discussions = get_mr_discussions(&conn, mr.id)?;
Ok(MrDetail {
id: mr.id,
iid: mr.iid,
title: mr.title,
description: mr.description,
state: mr.state,
draft: mr.draft,
author_username: mr.author_username,
source_branch: mr.source_branch,
target_branch: mr.target_branch,
created_at: mr.created_at,
updated_at: mr.updated_at,
merged_at: mr.merged_at,
closed_at: mr.closed_at,
web_url: mr.web_url,
project_path: mr.project_path,
labels,
assignees,
reviewers,
discussions,
})
}
struct MrRow {
id: i64,
iid: i64,
title: String,
description: Option<String>,
state: String,
draft: bool,
author_username: String,
source_branch: String,
target_branch: String,
created_at: i64,
updated_at: i64,
merged_at: Option<i64>,
closed_at: Option<i64>,
web_url: Option<String>,
project_path: String,
}
fn find_mr(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Result<MrRow> {
let (sql, params): (&str, Vec<Box<dyn rusqlite::ToSql>>) = match project_filter {
Some(project) => {
let project_id = resolve_project(conn, project)?;
(
"SELECT m.id, m.iid, m.title, m.description, m.state, m.draft,
m.author_username, m.source_branch, m.target_branch,
m.created_at, m.updated_at, m.merged_at, m.closed_at,
m.web_url, p.path_with_namespace
FROM merge_requests m
JOIN projects p ON m.project_id = p.id
WHERE m.iid = ? AND m.project_id = ?",
vec![Box::new(iid), Box::new(project_id)],
)
}
None => (
"SELECT m.id, m.iid, m.title, m.description, m.state, m.draft,
m.author_username, m.source_branch, m.target_branch,
m.created_at, m.updated_at, m.merged_at, m.closed_at,
m.web_url, p.path_with_namespace
FROM merge_requests m
JOIN projects p ON m.project_id = p.id
WHERE m.iid = ?",
vec![Box::new(iid)],
),
};
let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect();
let mut stmt = conn.prepare(sql)?;
let mrs: Vec<MrRow> = stmt
.query_map(param_refs.as_slice(), |row| {
let draft_val: i64 = row.get(5)?;
Ok(MrRow {
id: row.get(0)?,
iid: row.get(1)?,
title: row.get(2)?,
description: row.get(3)?,
state: row.get(4)?,
draft: draft_val == 1,
author_username: row.get(6)?,
source_branch: row.get(7)?,
target_branch: row.get(8)?,
created_at: row.get(9)?,
updated_at: row.get(10)?,
merged_at: row.get(11)?,
closed_at: row.get(12)?,
web_url: row.get(13)?,
project_path: row.get(14)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
match mrs.len() {
0 => Err(LoreError::NotFound(format!("MR !{} not found", iid))),
1 => Ok(mrs.into_iter().next().unwrap()),
_ => {
let projects: Vec<String> = mrs.iter().map(|m| m.project_path.clone()).collect();
Err(LoreError::Ambiguous(format!(
"MR !{} exists in multiple projects: {}. Use --project to specify.",
iid,
projects.join(", ")
)))
}
}
}
fn get_mr_labels(conn: &Connection, mr_id: i64) -> Result<Vec<String>> {
let mut stmt = conn.prepare(
"SELECT l.name FROM labels l
JOIN mr_labels ml ON l.id = ml.label_id
WHERE ml.merge_request_id = ?
ORDER BY l.name",
)?;
let labels: Vec<String> = stmt
.query_map([mr_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(labels)
}
fn get_mr_assignees(conn: &Connection, mr_id: i64) -> Result<Vec<String>> {
let mut stmt = conn.prepare(
"SELECT username FROM mr_assignees
WHERE merge_request_id = ?
ORDER BY username",
)?;
let assignees: Vec<String> = stmt
.query_map([mr_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(assignees)
}
fn get_mr_reviewers(conn: &Connection, mr_id: i64) -> Result<Vec<String>> {
let mut stmt = conn.prepare(
"SELECT username FROM mr_reviewers
WHERE merge_request_id = ?
ORDER BY username",
)?;
let reviewers: Vec<String> = stmt
.query_map([mr_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(reviewers)
}
fn get_mr_discussions(conn: &Connection, mr_id: i64) -> Result<Vec<MrDiscussionDetail>> {
let mut disc_stmt = conn.prepare(
"SELECT id, individual_note FROM discussions
WHERE merge_request_id = ?
ORDER BY first_note_at",
)?;
let disc_rows: Vec<(i64, bool)> = disc_stmt
.query_map([mr_id], |row| {
let individual: i64 = row.get(1)?;
Ok((row.get(0)?, individual == 1))
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
let mut note_stmt = conn.prepare(
"SELECT author_username, body, created_at, is_system,
position_old_path, position_new_path, position_old_line,
position_new_line, position_type
FROM notes
WHERE discussion_id = ?
ORDER BY position",
)?;
let mut discussions = Vec::new();
for (disc_id, individual_note) in disc_rows {
let notes: Vec<MrNoteDetail> = note_stmt
.query_map([disc_id], |row| {
let is_system: i64 = row.get(3)?;
let old_path: Option<String> = row.get(4)?;
let new_path: Option<String> = row.get(5)?;
let old_line: Option<i64> = row.get(6)?;
let new_line: Option<i64> = row.get(7)?;
let position_type: Option<String> = row.get(8)?;
let position = if old_path.is_some()
|| new_path.is_some()
|| old_line.is_some()
|| new_line.is_some()
{
Some(DiffNotePosition {
old_path,
new_path,
old_line,
new_line,
position_type,
})
} else {
None
};
Ok(MrNoteDetail {
author_username: row.get(0)?,
body: row.get(1)?,
created_at: row.get(2)?,
is_system: is_system == 1,
position,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
let has_user_notes = notes.iter().any(|n| !n.is_system);
if has_user_notes || notes.is_empty() {
discussions.push(MrDiscussionDetail {
notes,
individual_note,
});
}
}
Ok(discussions)
}

View File

@@ -0,0 +1,580 @@
fn format_date(ms: i64) -> String {
render::format_date(ms)
}
fn wrap_text(text: &str, width: usize, indent: &str) -> String {
render::wrap_indent(text, width, indent)
}
pub fn print_show_issue(issue: &IssueDetail) {
// Title line
println!(
" Issue #{}: {}",
issue.iid,
Theme::bold().render(&issue.title),
);
// Details section
println!("{}", render::section_divider("Details"));
println!(
" Ref {}",
Theme::muted().render(&issue.references_full)
);
println!(
" Project {}",
Theme::info().render(&issue.project_path)
);
let (icon, state_style) = if issue.state == "opened" {
(Icons::issue_opened(), Theme::success())
} else {
(Icons::issue_closed(), Theme::dim())
};
println!(
" State {}",
state_style.render(&format!("{icon} {}", issue.state))
);
if let Some(status) = &issue.status_name {
println!(
" Status {}",
render::style_with_hex(status, issue.status_color.as_deref())
);
}
if issue.confidential {
println!(" {}", Theme::error().bold().render("CONFIDENTIAL"));
}
println!(" Author @{}", issue.author_username);
if !issue.assignees.is_empty() {
let label = if issue.assignees.len() > 1 {
"Assignees"
} else {
"Assignee"
};
println!(
" {}{} {}",
label,
" ".repeat(12 - label.len()),
issue
.assignees
.iter()
.map(|a| format!("@{a}"))
.collect::<Vec<_>>()
.join(", ")
);
}
println!(
" Created {} ({})",
format_date(issue.created_at),
render::format_relative_time_compact(issue.created_at),
);
println!(
" Updated {} ({})",
format_date(issue.updated_at),
render::format_relative_time_compact(issue.updated_at),
);
if let Some(closed_at) = &issue.closed_at {
println!(" Closed {closed_at}");
}
if let Some(due) = &issue.due_date {
println!(" Due {due}");
}
if let Some(ms) = &issue.milestone {
println!(" Milestone {ms}");
}
if !issue.labels.is_empty() {
println!(
" Labels {}",
render::format_labels_bare(&issue.labels, issue.labels.len())
);
}
if let Some(url) = &issue.web_url {
println!(" URL {}", Theme::muted().render(url));
}
// Development section
if !issue.closing_merge_requests.is_empty() {
println!("{}", render::section_divider("Development"));
for mr in &issue.closing_merge_requests {
let (mr_icon, mr_style) = match mr.state.as_str() {
"merged" => (Icons::mr_merged(), Theme::accent()),
"opened" => (Icons::mr_opened(), Theme::success()),
"closed" => (Icons::mr_closed(), Theme::error()),
_ => (Icons::mr_opened(), Theme::dim()),
};
println!(
" {} !{} {} {}",
mr_style.render(mr_icon),
mr.iid,
mr.title,
mr_style.render(&mr.state),
);
}
}
// Description section
println!("{}", render::section_divider("Description"));
if let Some(desc) = &issue.description {
let wrapped = wrap_text(desc, 72, " ");
println!(" {wrapped}");
} else {
println!(" {}", Theme::muted().render("(no description)"));
}
// Discussions section
let user_discussions: Vec<&DiscussionDetail> = issue
.discussions
.iter()
.filter(|d| d.notes.iter().any(|n| !n.is_system))
.collect();
if user_discussions.is_empty() {
println!("\n {}", Theme::muted().render("No discussions"));
} else {
println!(
"{}",
render::section_divider(&format!("Discussions ({})", user_discussions.len()))
);
for discussion in user_discussions {
let user_notes: Vec<&NoteDetail> =
discussion.notes.iter().filter(|n| !n.is_system).collect();
if let Some(first_note) = user_notes.first() {
println!(
" {} {}",
Theme::info().render(&format!("@{}", first_note.author_username)),
format_date(first_note.created_at),
);
let wrapped = wrap_text(&first_note.body, 68, " ");
println!(" {wrapped}");
println!();
for reply in user_notes.iter().skip(1) {
println!(
" {} {}",
Theme::info().render(&format!("@{}", reply.author_username)),
format_date(reply.created_at),
);
let wrapped = wrap_text(&reply.body, 66, " ");
println!(" {wrapped}");
println!();
}
}
}
}
}
pub fn print_show_mr(mr: &MrDetail) {
// Title line
let draft_prefix = if mr.draft {
format!("{} ", Icons::mr_draft())
} else {
String::new()
};
println!(
" MR !{}: {}{}",
mr.iid,
draft_prefix,
Theme::bold().render(&mr.title),
);
// Details section
println!("{}", render::section_divider("Details"));
println!(" Project {}", Theme::info().render(&mr.project_path));
let (icon, state_style) = match mr.state.as_str() {
"opened" => (Icons::mr_opened(), Theme::success()),
"merged" => (Icons::mr_merged(), Theme::accent()),
"closed" => (Icons::mr_closed(), Theme::error()),
_ => (Icons::mr_opened(), Theme::dim()),
};
println!(
" State {}",
state_style.render(&format!("{icon} {}", mr.state))
);
println!(
" Branches {} -> {}",
Theme::info().render(&mr.source_branch),
Theme::warning().render(&mr.target_branch)
);
println!(" Author @{}", mr.author_username);
if !mr.assignees.is_empty() {
println!(
" Assignees {}",
mr.assignees
.iter()
.map(|a| format!("@{a}"))
.collect::<Vec<_>>()
.join(", ")
);
}
if !mr.reviewers.is_empty() {
println!(
" Reviewers {}",
mr.reviewers
.iter()
.map(|r| format!("@{r}"))
.collect::<Vec<_>>()
.join(", ")
);
}
println!(
" Created {} ({})",
format_date(mr.created_at),
render::format_relative_time_compact(mr.created_at),
);
println!(
" Updated {} ({})",
format_date(mr.updated_at),
render::format_relative_time_compact(mr.updated_at),
);
if let Some(merged_at) = mr.merged_at {
println!(
" Merged {} ({})",
format_date(merged_at),
render::format_relative_time_compact(merged_at),
);
}
if let Some(closed_at) = mr.closed_at {
println!(
" Closed {} ({})",
format_date(closed_at),
render::format_relative_time_compact(closed_at),
);
}
if !mr.labels.is_empty() {
println!(
" Labels {}",
render::format_labels_bare(&mr.labels, mr.labels.len())
);
}
if let Some(url) = &mr.web_url {
println!(" URL {}", Theme::muted().render(url));
}
// Description section
println!("{}", render::section_divider("Description"));
if let Some(desc) = &mr.description {
let wrapped = wrap_text(desc, 72, " ");
println!(" {wrapped}");
} else {
println!(" {}", Theme::muted().render("(no description)"));
}
// Discussions section
let user_discussions: Vec<&MrDiscussionDetail> = mr
.discussions
.iter()
.filter(|d| d.notes.iter().any(|n| !n.is_system))
.collect();
if user_discussions.is_empty() {
println!("\n {}", Theme::muted().render("No discussions"));
} else {
println!(
"{}",
render::section_divider(&format!("Discussions ({})", user_discussions.len()))
);
for discussion in user_discussions {
let user_notes: Vec<&MrNoteDetail> =
discussion.notes.iter().filter(|n| !n.is_system).collect();
if let Some(first_note) = user_notes.first() {
if let Some(pos) = &first_note.position {
print_diff_position(pos);
}
println!(
" {} {}",
Theme::info().render(&format!("@{}", first_note.author_username)),
format_date(first_note.created_at),
);
let wrapped = wrap_text(&first_note.body, 68, " ");
println!(" {wrapped}");
println!();
for reply in user_notes.iter().skip(1) {
println!(
" {} {}",
Theme::info().render(&format!("@{}", reply.author_username)),
format_date(reply.created_at),
);
let wrapped = wrap_text(&reply.body, 66, " ");
println!(" {wrapped}");
println!();
}
}
}
}
}
fn print_diff_position(pos: &DiffNotePosition) {
let file = pos.new_path.as_ref().or(pos.old_path.as_ref());
if let Some(file_path) = file {
let line_str = match (pos.old_line, pos.new_line) {
(Some(old), Some(new)) if old == new => format!(":{}", new),
(Some(old), Some(new)) => format!(":{}{}", old, new),
(None, Some(new)) => format!(":+{}", new),
(Some(old), None) => format!(":-{}", old),
(None, None) => String::new(),
};
println!(
" {} {}{}",
Theme::dim().render("\u{1f4cd}"),
Theme::warning().render(file_path),
Theme::dim().render(&line_str)
);
}
}
#[derive(Serialize)]
pub struct IssueDetailJson {
pub id: i64,
pub iid: i64,
pub title: String,
pub description: Option<String>,
pub state: String,
pub author_username: String,
pub created_at: String,
pub updated_at: String,
pub closed_at: Option<String>,
pub confidential: bool,
pub web_url: Option<String>,
pub project_path: String,
pub references_full: String,
pub labels: Vec<String>,
pub assignees: Vec<String>,
pub due_date: Option<String>,
pub milestone: Option<String>,
pub user_notes_count: i64,
pub merge_requests_count: usize,
pub closing_merge_requests: Vec<ClosingMrRefJson>,
pub discussions: Vec<DiscussionDetailJson>,
pub status_name: Option<String>,
#[serde(skip_serializing)]
pub status_category: Option<String>,
pub status_color: Option<String>,
pub status_icon_name: Option<String>,
pub status_synced_at: Option<String>,
}
#[derive(Serialize)]
pub struct ClosingMrRefJson {
pub iid: i64,
pub title: String,
pub state: String,
pub web_url: Option<String>,
}
#[derive(Serialize)]
pub struct DiscussionDetailJson {
pub notes: Vec<NoteDetailJson>,
pub individual_note: bool,
}
#[derive(Serialize)]
pub struct NoteDetailJson {
pub author_username: String,
pub body: String,
pub created_at: String,
pub is_system: bool,
}
impl From<&IssueDetail> for IssueDetailJson {
fn from(issue: &IssueDetail) -> Self {
Self {
id: issue.id,
iid: issue.iid,
title: issue.title.clone(),
description: issue.description.clone(),
state: issue.state.clone(),
author_username: issue.author_username.clone(),
created_at: ms_to_iso(issue.created_at),
updated_at: ms_to_iso(issue.updated_at),
closed_at: issue.closed_at.clone(),
confidential: issue.confidential,
web_url: issue.web_url.clone(),
project_path: issue.project_path.clone(),
references_full: issue.references_full.clone(),
labels: issue.labels.clone(),
assignees: issue.assignees.clone(),
due_date: issue.due_date.clone(),
milestone: issue.milestone.clone(),
user_notes_count: issue.user_notes_count,
merge_requests_count: issue.merge_requests_count,
closing_merge_requests: issue
.closing_merge_requests
.iter()
.map(|mr| ClosingMrRefJson {
iid: mr.iid,
title: mr.title.clone(),
state: mr.state.clone(),
web_url: mr.web_url.clone(),
})
.collect(),
discussions: issue.discussions.iter().map(|d| d.into()).collect(),
status_name: issue.status_name.clone(),
status_category: issue.status_category.clone(),
status_color: issue.status_color.clone(),
status_icon_name: issue.status_icon_name.clone(),
status_synced_at: issue.status_synced_at.map(ms_to_iso),
}
}
}
impl From<&DiscussionDetail> for DiscussionDetailJson {
fn from(disc: &DiscussionDetail) -> Self {
Self {
notes: disc.notes.iter().map(|n| n.into()).collect(),
individual_note: disc.individual_note,
}
}
}
impl From<&NoteDetail> for NoteDetailJson {
fn from(note: &NoteDetail) -> Self {
Self {
author_username: note.author_username.clone(),
body: note.body.clone(),
created_at: ms_to_iso(note.created_at),
is_system: note.is_system,
}
}
}
#[derive(Serialize)]
pub struct MrDetailJson {
pub id: i64,
pub iid: i64,
pub title: String,
pub description: Option<String>,
pub state: String,
pub draft: bool,
pub author_username: String,
pub source_branch: String,
pub target_branch: String,
pub created_at: String,
pub updated_at: String,
pub merged_at: Option<String>,
pub closed_at: Option<String>,
pub web_url: Option<String>,
pub project_path: String,
pub labels: Vec<String>,
pub assignees: Vec<String>,
pub reviewers: Vec<String>,
pub discussions: Vec<MrDiscussionDetailJson>,
}
#[derive(Serialize)]
pub struct MrDiscussionDetailJson {
pub notes: Vec<MrNoteDetailJson>,
pub individual_note: bool,
}
#[derive(Serialize)]
pub struct MrNoteDetailJson {
pub author_username: String,
pub body: String,
pub created_at: String,
pub is_system: bool,
pub position: Option<DiffNotePosition>,
}
impl From<&MrDetail> for MrDetailJson {
fn from(mr: &MrDetail) -> Self {
Self {
id: mr.id,
iid: mr.iid,
title: mr.title.clone(),
description: mr.description.clone(),
state: mr.state.clone(),
draft: mr.draft,
author_username: mr.author_username.clone(),
source_branch: mr.source_branch.clone(),
target_branch: mr.target_branch.clone(),
created_at: ms_to_iso(mr.created_at),
updated_at: ms_to_iso(mr.updated_at),
merged_at: mr.merged_at.map(ms_to_iso),
closed_at: mr.closed_at.map(ms_to_iso),
web_url: mr.web_url.clone(),
project_path: mr.project_path.clone(),
labels: mr.labels.clone(),
assignees: mr.assignees.clone(),
reviewers: mr.reviewers.clone(),
discussions: mr.discussions.iter().map(|d| d.into()).collect(),
}
}
}
impl From<&MrDiscussionDetail> for MrDiscussionDetailJson {
fn from(disc: &MrDiscussionDetail) -> Self {
Self {
notes: disc.notes.iter().map(|n| n.into()).collect(),
individual_note: disc.individual_note,
}
}
}
impl From<&MrNoteDetail> for MrNoteDetailJson {
fn from(note: &MrNoteDetail) -> Self {
Self {
author_username: note.author_username.clone(),
body: note.body.clone(),
created_at: ms_to_iso(note.created_at),
is_system: note.is_system,
position: note.position.clone(),
}
}
}
pub fn print_show_issue_json(issue: &IssueDetail, elapsed_ms: u64) {
let json_result = IssueDetailJson::from(issue);
let meta = RobotMeta { elapsed_ms };
let output = serde_json::json!({
"ok": true,
"data": json_result,
"meta": meta,
});
match serde_json::to_string(&output) {
Ok(json) => println!("{json}"),
Err(e) => eprintln!("Error serializing to JSON: {e}"),
}
}
pub fn print_show_mr_json(mr: &MrDetail, elapsed_ms: u64) {
let json_result = MrDetailJson::from(mr);
let meta = RobotMeta { elapsed_ms };
let output = serde_json::json!({
"ok": true,
"data": json_result,
"meta": meta,
});
match serde_json::to_string(&output) {
Ok(json) => println!("{json}"),
Err(e) => eprintln!("Error serializing to JSON: {e}"),
}
}

View File

@@ -0,0 +1,353 @@
use super::*;
use crate::core::db::run_migrations;
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn seed_project(conn: &Connection) {
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (1, 100, 'group/repo', 'https://gitlab.example.com', 1000, 2000)",
[],
)
.unwrap();
}
fn seed_issue(conn: &Connection) {
seed_project(conn);
conn.execute(
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, author_username,
created_at, updated_at, last_seen_at)
VALUES (1, 200, 10, 1, 'Test issue', 'opened', 'author', 1000, 2000, 2000)",
[],
)
.unwrap();
}
fn seed_second_project(conn: &Connection) {
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at)
VALUES (2, 101, 'other/repo', 'https://gitlab.example.com/other', 1000, 2000)",
[],
)
.unwrap();
}
fn seed_discussion_with_notes(
conn: &Connection,
issue_id: i64,
project_id: i64,
user_notes: usize,
system_notes: usize,
) {
let disc_id: i64 = conn
.query_row(
"SELECT COALESCE(MAX(id), 0) + 1 FROM discussions",
[],
|r| r.get(0),
)
.unwrap();
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, first_note_at, last_note_at, last_seen_at)
VALUES (?1, ?2, ?3, ?4, 'Issue', 1000, 2000, 2000)",
rusqlite::params![disc_id, format!("disc-{}", disc_id), project_id, issue_id],
)
.unwrap();
for i in 0..user_notes {
conn.execute(
"INSERT INTO notes (gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system, position)
VALUES (?1, ?2, ?3, 'user1', 'comment', 1000, 2000, 2000, 0, ?4)",
rusqlite::params![1000 + disc_id * 100 + i as i64, disc_id, project_id, i as i64],
)
.unwrap();
}
for i in 0..system_notes {
conn.execute(
"INSERT INTO notes (gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system, position)
VALUES (?1, ?2, ?3, 'system', 'status changed', 1000, 2000, 2000, 1, ?4)",
rusqlite::params![2000 + disc_id * 100 + i as i64, disc_id, project_id, (user_notes + i) as i64],
)
.unwrap();
}
}
// --- find_issue tests ---
#[test]
fn test_find_issue_basic() {
let conn = setup_test_db();
seed_issue(&conn);
let row = find_issue(&conn, 10, None).unwrap();
assert_eq!(row.iid, 10);
assert_eq!(row.title, "Test issue");
assert_eq!(row.state, "opened");
assert_eq!(row.author_username, "author");
assert_eq!(row.project_path, "group/repo");
}
#[test]
fn test_find_issue_with_project_filter() {
let conn = setup_test_db();
seed_issue(&conn);
let row = find_issue(&conn, 10, Some("group/repo")).unwrap();
assert_eq!(row.iid, 10);
assert_eq!(row.project_path, "group/repo");
}
#[test]
fn test_find_issue_not_found() {
let conn = setup_test_db();
seed_issue(&conn);
let err = find_issue(&conn, 999, None).unwrap_err();
assert!(matches!(err, LoreError::NotFound(_)));
}
#[test]
fn test_find_issue_wrong_project_filter() {
let conn = setup_test_db();
seed_issue(&conn);
seed_second_project(&conn);
// Issue 10 only exists in project 1, not project 2
let err = find_issue(&conn, 10, Some("other/repo")).unwrap_err();
assert!(matches!(err, LoreError::NotFound(_)));
}
#[test]
fn test_find_issue_ambiguous_without_project() {
let conn = setup_test_db();
seed_issue(&conn); // issue iid=10 in project 1
seed_second_project(&conn);
conn.execute(
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, author_username,
created_at, updated_at, last_seen_at)
VALUES (2, 201, 10, 2, 'Same iid different project', 'opened', 'author', 1000, 2000, 2000)",
[],
)
.unwrap();
let err = find_issue(&conn, 10, None).unwrap_err();
assert!(matches!(err, LoreError::Ambiguous(_)));
}
#[test]
fn test_find_issue_ambiguous_resolved_with_project() {
let conn = setup_test_db();
seed_issue(&conn);
seed_second_project(&conn);
conn.execute(
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, author_username,
created_at, updated_at, last_seen_at)
VALUES (2, 201, 10, 2, 'Same iid different project', 'opened', 'author', 1000, 2000, 2000)",
[],
)
.unwrap();
let row = find_issue(&conn, 10, Some("other/repo")).unwrap();
assert_eq!(row.title, "Same iid different project");
}
#[test]
fn test_find_issue_user_notes_count_zero() {
let conn = setup_test_db();
seed_issue(&conn);
let row = find_issue(&conn, 10, None).unwrap();
assert_eq!(row.user_notes_count, 0);
}
#[test]
fn test_find_issue_user_notes_count_excludes_system() {
let conn = setup_test_db();
seed_issue(&conn);
// 2 user notes + 3 system notes = should count only 2
seed_discussion_with_notes(&conn, 1, 1, 2, 3);
let row = find_issue(&conn, 10, None).unwrap();
assert_eq!(row.user_notes_count, 2);
}
#[test]
fn test_find_issue_user_notes_count_across_discussions() {
let conn = setup_test_db();
seed_issue(&conn);
seed_discussion_with_notes(&conn, 1, 1, 3, 0); // 3 user notes
seed_discussion_with_notes(&conn, 1, 1, 1, 2); // 1 user note + 2 system
let row = find_issue(&conn, 10, None).unwrap();
assert_eq!(row.user_notes_count, 4);
}
#[test]
fn test_find_issue_notes_count_ignores_other_issues() {
let conn = setup_test_db();
seed_issue(&conn);
// Add a second issue
conn.execute(
"INSERT INTO issues (id, gitlab_id, iid, project_id, title, state, author_username,
created_at, updated_at, last_seen_at)
VALUES (2, 201, 20, 1, 'Other issue', 'opened', 'author', 1000, 2000, 2000)",
[],
)
.unwrap();
// Notes on issue 2, not issue 1
seed_discussion_with_notes(&conn, 2, 1, 5, 0);
let row = find_issue(&conn, 10, None).unwrap();
assert_eq!(row.user_notes_count, 0); // Issue 10 has no notes
}
#[test]
fn test_ansi256_from_rgb() {
// Moved to render.rs — keeping basic hex sanity check
let result = render::style_with_hex("test", Some("#ff0000"));
assert!(!result.is_empty());
}
#[test]
fn test_get_issue_assignees_empty() {
let conn = setup_test_db();
seed_issue(&conn);
let result = get_issue_assignees(&conn, 1).unwrap();
assert!(result.is_empty());
}
#[test]
fn test_get_issue_assignees_single() {
let conn = setup_test_db();
seed_issue(&conn);
conn.execute(
"INSERT INTO issue_assignees (issue_id, username) VALUES (1, 'charlie')",
[],
)
.unwrap();
let result = get_issue_assignees(&conn, 1).unwrap();
assert_eq!(result, vec!["charlie"]);
}
#[test]
fn test_get_issue_assignees_multiple_sorted() {
let conn = setup_test_db();
seed_issue(&conn);
conn.execute(
"INSERT INTO issue_assignees (issue_id, username) VALUES (1, 'bob')",
[],
)
.unwrap();
conn.execute(
"INSERT INTO issue_assignees (issue_id, username) VALUES (1, 'alice')",
[],
)
.unwrap();
let result = get_issue_assignees(&conn, 1).unwrap();
assert_eq!(result, vec!["alice", "bob"]); // alphabetical
}
#[test]
fn test_get_closing_mrs_empty() {
let conn = setup_test_db();
seed_issue(&conn);
let result = get_closing_mrs(&conn, 1).unwrap();
assert!(result.is_empty());
}
#[test]
fn test_get_closing_mrs_single() {
let conn = setup_test_db();
seed_issue(&conn);
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, author_username,
source_branch, target_branch, created_at, updated_at, last_seen_at)
VALUES (1, 300, 5, 1, 'Fix the bug', 'merged', 'dev', 'fix', 'main', 1000, 2000, 2000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO entity_references (project_id, source_entity_type, source_entity_id,
target_entity_type, target_entity_id, reference_type, source_method, created_at)
VALUES (1, 'merge_request', 1, 'issue', 1, 'closes', 'api', 3000)",
[],
)
.unwrap();
let result = get_closing_mrs(&conn, 1).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].iid, 5);
assert_eq!(result[0].title, "Fix the bug");
assert_eq!(result[0].state, "merged");
}
#[test]
fn test_get_closing_mrs_ignores_mentioned() {
let conn = setup_test_db();
seed_issue(&conn);
// Add a 'mentioned' reference that should be ignored
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, author_username,
source_branch, target_branch, created_at, updated_at, last_seen_at)
VALUES (1, 300, 5, 1, 'Some MR', 'opened', 'dev', 'feat', 'main', 1000, 2000, 2000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO entity_references (project_id, source_entity_type, source_entity_id,
target_entity_type, target_entity_id, reference_type, source_method, created_at)
VALUES (1, 'merge_request', 1, 'issue', 1, 'mentioned', 'note_parse', 3000)",
[],
)
.unwrap();
let result = get_closing_mrs(&conn, 1).unwrap();
assert!(result.is_empty()); // 'mentioned' refs not included
}
#[test]
fn test_get_closing_mrs_multiple_sorted() {
let conn = setup_test_db();
seed_issue(&conn);
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, author_username,
source_branch, target_branch, created_at, updated_at, last_seen_at)
VALUES (1, 300, 8, 1, 'Second fix', 'opened', 'dev', 'fix2', 'main', 1000, 2000, 2000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, iid, project_id, title, state, author_username,
source_branch, target_branch, created_at, updated_at, last_seen_at)
VALUES (2, 301, 5, 1, 'First fix', 'merged', 'dev', 'fix1', 'main', 1000, 2000, 2000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO entity_references (project_id, source_entity_type, source_entity_id,
target_entity_type, target_entity_id, reference_type, source_method, created_at)
VALUES (1, 'merge_request', 1, 'issue', 1, 'closes', 'api', 3000)",
[],
)
.unwrap();
conn.execute(
"INSERT INTO entity_references (project_id, source_entity_type, source_entity_id,
target_entity_type, target_entity_id, reference_type, source_method, created_at)
VALUES (1, 'merge_request', 2, 'issue', 1, 'closes', 'api', 3000)",
[],
)
.unwrap();
let result = get_closing_mrs(&conn, 1).unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].iid, 5); // Lower iid first
assert_eq!(result[1].iid, 8);
}
#[test]
fn wrap_text_single_line() {
assert_eq!(wrap_text("hello world", 80, " "), "hello world");
}
#[test]
fn wrap_text_multiple_lines() {
let result = wrap_text("one two three four five", 10, " ");
assert!(result.contains('\n'));
}
#[test]
fn format_date_extracts_date_part() {
let ms = 1705276800000;
let date = format_date(ms);
assert!(date.starts_with("2024-01-15"));
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,24 @@
pub mod surgical;
pub use surgical::run_sync_surgical;
use crate::cli::render::{self, Icons, Theme, format_number};
use serde::Serialize;
use std::time::Instant;
use tracing::Instrument;
use tracing::{debug, warn};
use crate::Config;
use crate::cli::progress::{format_stage_line, nested_progress, stage_spinner_v2};
use crate::core::error::Result;
use crate::core::metrics::{MetricsLayer, StageTiming};
use crate::core::shutdown::ShutdownSignal;
use super::embed::run_embed;
use super::generate_docs::run_generate_docs;
use super::ingest::{
DryRunPreview, IngestDisplay, ProjectStatusEnrichment, ProjectSummary, run_ingest,
run_ingest_dry_run,
};
include!("run.rs");
include!("render.rs");

View File

@@ -0,0 +1,533 @@
pub fn print_sync(
result: &SyncResult,
elapsed: std::time::Duration,
metrics: Option<&MetricsLayer>,
show_timings: bool,
) {
let has_data = result.issues_updated > 0
|| result.mrs_updated > 0
|| result.discussions_fetched > 0
|| result.resource_events_fetched > 0
|| result.mr_diffs_fetched > 0
|| result.documents_regenerated > 0
|| result.documents_embedded > 0
|| result.statuses_enriched > 0;
let has_failures = result.resource_events_failed > 0
|| result.mr_diffs_failed > 0
|| result.status_enrichment_errors > 0
|| result.documents_errored > 0
|| result.embedding_failed > 0;
if !has_data && !has_failures {
println!(
"\n {} ({})\n",
Theme::dim().render("Already up to date"),
Theme::timing().render(&format!("{:.1}s", elapsed.as_secs_f64()))
);
} else {
let headline = if has_failures {
Theme::warning().bold().render("Sync completed with issues")
} else {
Theme::success().bold().render("Synced")
};
println!(
"\n {} {} issues and {} MRs in {}",
headline,
Theme::info()
.bold()
.render(&result.issues_updated.to_string()),
Theme::info().bold().render(&result.mrs_updated.to_string()),
Theme::timing().render(&format!("{:.1}s", elapsed.as_secs_f64()))
);
// Detail: supporting counts, compact middle-dot format, zero-suppressed
let mut details: Vec<String> = Vec::new();
if result.discussions_fetched > 0 {
details.push(format!(
"{} {}",
Theme::info().render(&result.discussions_fetched.to_string()),
Theme::dim().render("discussions")
));
}
if result.resource_events_fetched > 0 {
details.push(format!(
"{} {}",
Theme::info().render(&result.resource_events_fetched.to_string()),
Theme::dim().render("events")
));
}
if result.mr_diffs_fetched > 0 {
details.push(format!(
"{} {}",
Theme::info().render(&result.mr_diffs_fetched.to_string()),
Theme::dim().render("diffs")
));
}
if result.statuses_enriched > 0 {
details.push(format!(
"{} {}",
Theme::info().render(&result.statuses_enriched.to_string()),
Theme::dim().render("statuses updated")
));
}
if !details.is_empty() {
let sep = Theme::dim().render(" \u{b7} ");
println!(" {}", details.join(&sep));
}
// Documents: regeneration + embedding as a second detail line
let mut doc_parts: Vec<String> = Vec::new();
if result.documents_regenerated > 0 {
doc_parts.push(format!(
"{} {}",
Theme::info().render(&result.documents_regenerated.to_string()),
Theme::dim().render("docs regenerated")
));
}
if result.documents_embedded > 0 {
doc_parts.push(format!(
"{} {}",
Theme::info().render(&result.documents_embedded.to_string()),
Theme::dim().render("embedded")
));
}
if result.documents_errored > 0 {
doc_parts
.push(Theme::error().render(&format!("{} doc errors", result.documents_errored)));
}
if !doc_parts.is_empty() {
let sep = Theme::dim().render(" \u{b7} ");
println!(" {}", doc_parts.join(&sep));
}
// Errors: visually prominent, only if non-zero
let mut errors: Vec<String> = Vec::new();
if result.resource_events_failed > 0 {
errors.push(format!("{} event failures", result.resource_events_failed));
}
if result.mr_diffs_failed > 0 {
errors.push(format!("{} diff failures", result.mr_diffs_failed));
}
if result.status_enrichment_errors > 0 {
errors.push(format!("{} status errors", result.status_enrichment_errors));
}
if result.embedding_failed > 0 {
errors.push(format!("{} embedding failures", result.embedding_failed));
}
if !errors.is_empty() {
println!(" {}", Theme::error().render(&errors.join(" \u{b7} ")));
}
println!();
}
if let Some(metrics) = metrics {
let stages = metrics.extract_timings();
if should_print_timings(show_timings, &stages) {
print_timing_summary(&stages);
}
}
}
fn issue_sub_rows(projects: &[ProjectSummary]) -> Vec<String> {
projects
.iter()
.map(|p| {
let mut parts: Vec<String> = Vec::new();
parts.push(format!(
"{} {}",
p.items_upserted,
if p.items_upserted == 1 {
"issue"
} else {
"issues"
}
));
if p.discussions_synced > 0 {
parts.push(format!("{} discussions", p.discussions_synced));
}
if p.statuses_seen > 0 || p.statuses_enriched > 0 {
parts.push(format!("{} statuses updated", p.statuses_enriched));
}
if p.events_fetched > 0 {
parts.push(format!("{} events", p.events_fetched));
}
if p.status_errors > 0 {
parts.push(Theme::warning().render(&format!("{} status errors", p.status_errors)));
}
if p.events_failed > 0 {
parts.push(Theme::warning().render(&format!("{} event failures", p.events_failed)));
}
let sep = Theme::dim().render(" \u{b7} ");
let detail = parts.join(&sep);
let path = Theme::muted().render(&format!("{:<30}", p.path));
format!(" {path} {detail}")
})
.collect()
}
fn status_sub_rows(projects: &[ProjectStatusEnrichment]) -> Vec<String> {
projects
.iter()
.map(|p| {
let total_errors = p.partial_errors + usize::from(p.error.is_some());
let mut parts: Vec<String> = vec![format!("{} statuses updated", p.enriched)];
if p.cleared > 0 {
parts.push(format!("{} cleared", p.cleared));
}
if p.seen > 0 {
parts.push(format!("{} seen", p.seen));
}
if total_errors > 0 {
parts.push(Theme::warning().render(&format!("{} errors", total_errors)));
} else if p.mode == "skipped" {
if let Some(reason) = &p.reason {
parts.push(Theme::dim().render(&format!("skipped ({reason})")));
} else {
parts.push(Theme::dim().render("skipped"));
}
}
let sep = Theme::dim().render(" \u{b7} ");
let detail = parts.join(&sep);
let path = Theme::muted().render(&format!("{:<30}", p.path));
format!(" {path} {detail}")
})
.collect()
}
fn mr_sub_rows(projects: &[ProjectSummary]) -> Vec<String> {
projects
.iter()
.map(|p| {
let mut parts: Vec<String> = Vec::new();
parts.push(format!(
"{} {}",
p.items_upserted,
if p.items_upserted == 1 { "MR" } else { "MRs" }
));
if p.discussions_synced > 0 {
parts.push(format!("{} discussions", p.discussions_synced));
}
if p.mr_diffs_fetched > 0 {
parts.push(format!("{} diffs", p.mr_diffs_fetched));
}
if p.events_fetched > 0 {
parts.push(format!("{} events", p.events_fetched));
}
if p.mr_diffs_failed > 0 {
parts
.push(Theme::warning().render(&format!("{} diff failures", p.mr_diffs_failed)));
}
if p.events_failed > 0 {
parts.push(Theme::warning().render(&format!("{} event failures", p.events_failed)));
}
let sep = Theme::dim().render(" \u{b7} ");
let detail = parts.join(&sep);
let path = Theme::muted().render(&format!("{:<30}", p.path));
format!(" {path} {detail}")
})
.collect()
}
fn emit_stage_line(
pb: &indicatif::ProgressBar,
icon: &str,
label: &str,
summary: &str,
elapsed: std::time::Duration,
) {
pb.finish_and_clear();
print_static_lines(&[format_stage_line(icon, label, summary, elapsed)]);
}
fn emit_stage_block(
pb: &indicatif::ProgressBar,
icon: &str,
label: &str,
summary: &str,
elapsed: std::time::Duration,
sub_rows: &[String],
) {
pb.finish_and_clear();
let mut lines = Vec::with_capacity(1 + sub_rows.len());
lines.push(format_stage_line(icon, label, summary, elapsed));
lines.extend(sub_rows.iter().cloned());
print_static_lines(&lines);
}
fn print_static_lines(lines: &[String]) {
crate::cli::progress::multi().suspend(|| {
for line in lines {
println!("{line}");
}
});
}
fn should_print_timings(show_timings: bool, stages: &[StageTiming]) -> bool {
show_timings && !stages.is_empty()
}
fn append_failures(summary: &mut String, failures: &[(&str, usize)]) {
let rendered: Vec<String> = failures
.iter()
.filter_map(|(label, count)| {
(*count > 0).then_some(Theme::warning().render(&format!("{count} {label}")))
})
.collect();
if !rendered.is_empty() {
summary.push_str(&format!(" ({})", rendered.join(", ")));
}
}
fn summarize_status_enrichment(projects: &[ProjectStatusEnrichment]) -> (String, bool) {
let statuses_enriched: usize = projects.iter().map(|p| p.enriched).sum();
let statuses_seen: usize = projects.iter().map(|p| p.seen).sum();
let statuses_cleared: usize = projects.iter().map(|p| p.cleared).sum();
let status_errors: usize = projects
.iter()
.map(|p| p.partial_errors + usize::from(p.error.is_some()))
.sum();
let skipped = projects.iter().filter(|p| p.mode == "skipped").count();
let mut parts = vec![format!(
"{} statuses updated",
format_number(statuses_enriched as i64)
)];
if statuses_cleared > 0 {
parts.push(format!(
"{} cleared",
format_number(statuses_cleared as i64)
));
}
if statuses_seen > 0 {
parts.push(format!("{} seen", format_number(statuses_seen as i64)));
}
if status_errors > 0 {
parts.push(format!("{} errors", format_number(status_errors as i64)));
} else if projects.is_empty() || skipped == projects.len() {
parts.push("skipped".to_string());
}
(parts.join(" \u{b7} "), status_errors > 0)
}
fn section(title: &str) {
println!("{}", render::section_divider(title));
}
fn print_timing_summary(stages: &[StageTiming]) {
section("Timing");
for stage in stages {
for sub in &stage.sub_stages {
print_stage_line(sub, 1);
}
}
}
fn print_stage_line(stage: &StageTiming, depth: usize) {
let indent = " ".repeat(depth);
let name = if let Some(ref project) = stage.project {
format!("{} ({})", stage.name, project)
} else {
stage.name.clone()
};
let pad_width = 30_usize.saturating_sub(indent.len() + name.len());
let dots = Theme::dim().render(&".".repeat(pad_width.max(2)));
let time_str = Theme::bold().render(&format!("{:.1}s", stage.elapsed_ms as f64 / 1000.0));
let mut parts: Vec<String> = Vec::new();
if stage.items_processed > 0 {
parts.push(format!("{} items", stage.items_processed));
}
if stage.errors > 0 {
parts.push(Theme::error().render(&format!("{} errors", stage.errors)));
}
if stage.rate_limit_hits > 0 {
parts.push(Theme::warning().render(&format!("{} rate limits", stage.rate_limit_hits)));
}
if parts.is_empty() {
println!("{indent}{name} {dots} {time_str}");
} else {
let suffix = parts.join(" \u{b7} ");
println!("{indent}{name} {dots} {time_str} ({suffix})");
}
for sub in &stage.sub_stages {
print_stage_line(sub, depth + 1);
}
}
#[derive(Serialize)]
struct SyncJsonOutput<'a> {
ok: bool,
data: &'a SyncResult,
meta: SyncMeta,
}
#[derive(Serialize)]
struct SyncMeta {
run_id: String,
elapsed_ms: u64,
#[serde(skip_serializing_if = "Vec::is_empty")]
stages: Vec<StageTiming>,
}
pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64, metrics: Option<&MetricsLayer>) {
let stages = metrics.map_or_else(Vec::new, MetricsLayer::extract_timings);
let output = SyncJsonOutput {
ok: true,
data: result,
meta: SyncMeta {
run_id: result.run_id.clone(),
elapsed_ms,
stages,
},
};
match serde_json::to_string(&output) {
Ok(json) => println!("{json}"),
Err(e) => eprintln!("Error serializing to JSON: {e}"),
}
}
#[derive(Debug, Default, Serialize)]
pub struct SyncDryRunResult {
pub issues_preview: DryRunPreview,
pub mrs_preview: DryRunPreview,
pub would_generate_docs: bool,
pub would_embed: bool,
}
async fn run_sync_dry_run(config: &Config, options: &SyncOptions) -> Result<SyncResult> {
// Get dry run previews for both issues and MRs
let issues_preview = run_ingest_dry_run(config, "issues", None, options.full)?;
let mrs_preview = run_ingest_dry_run(config, "mrs", None, options.full)?;
let dry_result = SyncDryRunResult {
issues_preview,
mrs_preview,
would_generate_docs: !options.no_docs,
would_embed: !options.no_embed,
};
if options.robot_mode {
print_sync_dry_run_json(&dry_result);
} else {
print_sync_dry_run(&dry_result);
}
// Return an empty SyncResult since this is just a preview
Ok(SyncResult::default())
}
pub fn print_sync_dry_run(result: &SyncDryRunResult) {
println!(
"\n {} {}",
Theme::info().bold().render("Dry run"),
Theme::dim().render("(no changes will be made)")
);
print_dry_run_entity("Issues", &result.issues_preview);
print_dry_run_entity("Merge Requests", &result.mrs_preview);
// Pipeline stages
section("Pipeline");
let mut stages: Vec<String> = Vec::new();
if result.would_generate_docs {
stages.push("generate-docs".to_string());
} else {
stages.push(Theme::dim().render("generate-docs (skip)"));
}
if result.would_embed {
stages.push("embed".to_string());
} else {
stages.push(Theme::dim().render("embed (skip)"));
}
println!(" {}", stages.join(" \u{b7} "));
}
fn print_dry_run_entity(label: &str, preview: &DryRunPreview) {
section(label);
let mode = if preview.sync_mode == "full" {
Theme::warning().render("full")
} else {
Theme::success().render("incremental")
};
println!(" {} \u{b7} {} projects", mode, preview.projects.len());
for project in &preview.projects {
let sync_status = if !project.has_cursor {
Theme::warning().render("initial sync")
} else {
Theme::success().render("incremental")
};
if project.existing_count > 0 {
println!(
" {} \u{b7} {} \u{b7} {} existing",
&project.path, sync_status, project.existing_count
);
} else {
println!(" {} \u{b7} {}", &project.path, sync_status);
}
}
}
#[derive(Serialize)]
struct SyncDryRunJsonOutput {
ok: bool,
dry_run: bool,
data: SyncDryRunJsonData,
}
#[derive(Serialize)]
struct SyncDryRunJsonData {
stages: Vec<SyncDryRunStage>,
}
#[derive(Serialize)]
struct SyncDryRunStage {
name: String,
would_run: bool,
#[serde(skip_serializing_if = "Option::is_none")]
preview: Option<DryRunPreview>,
}
pub fn print_sync_dry_run_json(result: &SyncDryRunResult) {
let output = SyncDryRunJsonOutput {
ok: true,
dry_run: true,
data: SyncDryRunJsonData {
stages: vec![
SyncDryRunStage {
name: "ingest_issues".to_string(),
would_run: true,
preview: Some(result.issues_preview.clone()),
},
SyncDryRunStage {
name: "ingest_mrs".to_string(),
would_run: true,
preview: Some(result.mrs_preview.clone()),
},
SyncDryRunStage {
name: "generate_docs".to_string(),
would_run: result.would_generate_docs,
preview: None,
},
SyncDryRunStage {
name: "embed".to_string(),
would_run: result.would_embed,
preview: None,
},
],
},
};
match serde_json::to_string(&output) {
Ok(json) => println!("{json}"),
Err(e) => eprintln!("Error serializing to JSON: {e}"),
}
}
#[cfg(test)]
#[path = "sync_tests.rs"]
mod tests;

View File

@@ -0,0 +1,380 @@
#[derive(Debug, Default)]
pub struct SyncOptions {
pub full: bool,
pub force: bool,
pub no_embed: bool,
pub no_docs: bool,
pub no_events: bool,
pub robot_mode: bool,
pub dry_run: bool,
pub issue_iids: Vec<u64>,
pub mr_iids: Vec<u64>,
pub project: Option<String>,
pub preflight_only: bool,
}
impl SyncOptions {
pub const MAX_SURGICAL_TARGETS: usize = 100;
pub fn is_surgical(&self) -> bool {
!self.issue_iids.is_empty() || !self.mr_iids.is_empty()
}
}
#[derive(Debug, Default, Serialize)]
pub struct SurgicalIids {
pub issues: Vec<u64>,
pub merge_requests: Vec<u64>,
}
#[derive(Debug, Serialize)]
pub struct EntitySyncResult {
pub entity_type: String,
pub iid: u64,
pub outcome: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub toctou_reason: Option<String>,
}
#[derive(Debug, Default, Serialize)]
pub struct SyncResult {
#[serde(skip)]
pub run_id: String,
pub issues_updated: usize,
pub mrs_updated: usize,
pub discussions_fetched: usize,
pub resource_events_fetched: usize,
pub resource_events_failed: usize,
pub mr_diffs_fetched: usize,
pub mr_diffs_failed: usize,
pub documents_regenerated: usize,
pub documents_errored: usize,
pub documents_embedded: usize,
pub embedding_failed: usize,
pub status_enrichment_errors: usize,
pub statuses_enriched: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub surgical_mode: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub surgical_iids: Option<SurgicalIids>,
#[serde(skip_serializing_if = "Option::is_none")]
pub entity_results: Option<Vec<EntitySyncResult>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub preflight_only: Option<bool>,
#[serde(skip)]
pub issue_projects: Vec<ProjectSummary>,
#[serde(skip)]
pub mr_projects: Vec<ProjectSummary>,
}
/// Alias for [`Theme::color_icon`] to keep call sites concise.
fn color_icon(icon: &str, has_errors: bool) -> String {
Theme::color_icon(icon, has_errors)
}
pub async fn run_sync(
config: &Config,
options: SyncOptions,
run_id: Option<&str>,
signal: &ShutdownSignal,
) -> Result<SyncResult> {
// Surgical dispatch: if any IIDs specified, route to surgical pipeline
if options.is_surgical() {
return run_sync_surgical(config, options, run_id, signal).await;
}
let generated_id;
let run_id = match run_id {
Some(id) => id,
None => {
generated_id = uuid::Uuid::new_v4().simple().to_string();
&generated_id[..8]
}
};
let span = tracing::info_span!("sync", %run_id);
async move {
let mut result = SyncResult {
run_id: run_id.to_string(),
..SyncResult::default()
};
// Handle dry_run mode - show preview without making any changes
if options.dry_run {
return run_sync_dry_run(config, &options).await;
}
let ingest_display = if options.robot_mode {
IngestDisplay::silent()
} else {
IngestDisplay::progress_only()
};
// ── Stage: Issues ──
let stage_start = Instant::now();
let spinner = stage_spinner_v2(Icons::sync(), "Issues", "fetching...", options.robot_mode);
debug!("Sync: ingesting issues");
let issues_result = run_ingest(
config,
"issues",
None,
options.force,
options.full,
false, // dry_run - sync has its own dry_run handling
ingest_display,
Some(spinner.clone()),
signal,
)
.await?;
result.issues_updated = issues_result.issues_upserted;
result.discussions_fetched += issues_result.discussions_fetched;
result.resource_events_fetched += issues_result.resource_events_fetched;
result.resource_events_failed += issues_result.resource_events_failed;
result.status_enrichment_errors += issues_result.status_enrichment_errors;
for sep in &issues_result.status_enrichment_projects {
result.statuses_enriched += sep.enriched;
}
result.issue_projects = issues_result.project_summaries;
let issues_elapsed = stage_start.elapsed();
if !options.robot_mode {
let (status_summary, status_has_errors) =
summarize_status_enrichment(&issues_result.status_enrichment_projects);
let status_icon = color_icon(
if status_has_errors {
Icons::warning()
} else {
Icons::success()
},
status_has_errors,
);
let mut status_lines = vec![format_stage_line(
&status_icon,
"Status",
&status_summary,
issues_elapsed,
)];
status_lines.extend(status_sub_rows(&issues_result.status_enrichment_projects));
print_static_lines(&status_lines);
}
let mut issues_summary = format!(
"{} issues from {} {}",
format_number(result.issues_updated as i64),
issues_result.projects_synced,
if issues_result.projects_synced == 1 { "project" } else { "projects" }
);
append_failures(
&mut issues_summary,
&[
("event failures", issues_result.resource_events_failed),
("status errors", issues_result.status_enrichment_errors),
],
);
let issues_icon = color_icon(
if issues_result.resource_events_failed > 0 || issues_result.status_enrichment_errors > 0
{
Icons::warning()
} else {
Icons::success()
},
issues_result.resource_events_failed > 0 || issues_result.status_enrichment_errors > 0,
);
if options.robot_mode {
emit_stage_line(&spinner, &issues_icon, "Issues", &issues_summary, issues_elapsed);
} else {
let sub_rows = issue_sub_rows(&result.issue_projects);
emit_stage_block(
&spinner,
&issues_icon,
"Issues",
&issues_summary,
issues_elapsed,
&sub_rows,
);
}
if signal.is_cancelled() {
debug!("Shutdown requested after issues stage, returning partial sync results");
return Ok(result);
}
// ── Stage: MRs ──
let stage_start = Instant::now();
let spinner = stage_spinner_v2(Icons::sync(), "MRs", "fetching...", options.robot_mode);
debug!("Sync: ingesting merge requests");
let mrs_result = run_ingest(
config,
"mrs",
None,
options.force,
options.full,
false, // dry_run - sync has its own dry_run handling
ingest_display,
Some(spinner.clone()),
signal,
)
.await?;
result.mrs_updated = mrs_result.mrs_upserted;
result.discussions_fetched += mrs_result.discussions_fetched;
result.resource_events_fetched += mrs_result.resource_events_fetched;
result.resource_events_failed += mrs_result.resource_events_failed;
result.mr_diffs_fetched += mrs_result.mr_diffs_fetched;
result.mr_diffs_failed += mrs_result.mr_diffs_failed;
result.mr_projects = mrs_result.project_summaries;
let mrs_elapsed = stage_start.elapsed();
let mut mrs_summary = format!(
"{} merge requests from {} {}",
format_number(result.mrs_updated as i64),
mrs_result.projects_synced,
if mrs_result.projects_synced == 1 { "project" } else { "projects" }
);
append_failures(
&mut mrs_summary,
&[
("event failures", mrs_result.resource_events_failed),
("diff failures", mrs_result.mr_diffs_failed),
],
);
let mrs_icon = color_icon(
if mrs_result.resource_events_failed > 0 || mrs_result.mr_diffs_failed > 0 {
Icons::warning()
} else {
Icons::success()
},
mrs_result.resource_events_failed > 0 || mrs_result.mr_diffs_failed > 0,
);
if options.robot_mode {
emit_stage_line(&spinner, &mrs_icon, "MRs", &mrs_summary, mrs_elapsed);
} else {
let sub_rows = mr_sub_rows(&result.mr_projects);
emit_stage_block(&spinner, &mrs_icon, "MRs", &mrs_summary, mrs_elapsed, &sub_rows);
}
if signal.is_cancelled() {
debug!("Shutdown requested after MRs stage, returning partial sync results");
return Ok(result);
}
// ── Stage: Docs ──
if !options.no_docs {
let stage_start = Instant::now();
let spinner = stage_spinner_v2(Icons::sync(), "Docs", "generating...", options.robot_mode);
debug!("Sync: generating documents");
let docs_bar = nested_progress("Docs", 0, options.robot_mode);
let docs_bar_clone = docs_bar.clone();
let docs_cb: Box<dyn Fn(usize, usize)> = Box::new(move |processed, total| {
if total > 0 {
docs_bar_clone.set_length(total as u64);
docs_bar_clone.set_position(processed as u64);
}
});
let docs_result = run_generate_docs(config, options.full, None, Some(docs_cb))?;
result.documents_regenerated = docs_result.regenerated;
result.documents_errored = docs_result.errored;
docs_bar.finish_and_clear();
let mut docs_summary = format!(
"{} documents generated",
format_number(result.documents_regenerated as i64),
);
append_failures(&mut docs_summary, &[("errors", docs_result.errored)]);
let docs_icon = color_icon(
if docs_result.errored > 0 {
Icons::warning()
} else {
Icons::success()
},
docs_result.errored > 0,
);
emit_stage_line(&spinner, &docs_icon, "Docs", &docs_summary, stage_start.elapsed());
} else {
debug!("Sync: skipping document generation (--no-docs)");
}
// ── Stage: Embed ──
if !options.no_embed {
let stage_start = Instant::now();
let spinner = stage_spinner_v2(Icons::sync(), "Embed", "preparing...", options.robot_mode);
debug!("Sync: embedding documents");
let embed_bar = nested_progress("Embed", 0, options.robot_mode);
let embed_bar_clone = embed_bar.clone();
let embed_cb: Box<dyn Fn(usize, usize)> = Box::new(move |processed, total| {
if total > 0 {
embed_bar_clone.set_length(total as u64);
embed_bar_clone.set_position(processed as u64);
}
});
match run_embed(config, options.full, false, Some(embed_cb), signal).await {
Ok(embed_result) => {
result.documents_embedded = embed_result.docs_embedded;
result.embedding_failed = embed_result.failed;
embed_bar.finish_and_clear();
let mut embed_summary = format!(
"{} chunks embedded",
format_number(embed_result.chunks_embedded as i64),
);
let mut tail_parts = Vec::new();
if embed_result.failed > 0 {
tail_parts.push(format!("{} failed", embed_result.failed));
}
if embed_result.skipped > 0 {
tail_parts.push(format!("{} skipped", embed_result.skipped));
}
if !tail_parts.is_empty() {
embed_summary.push_str(&format!(" ({})", tail_parts.join(", ")));
}
let embed_icon = color_icon(
if embed_result.failed > 0 {
Icons::warning()
} else {
Icons::success()
},
embed_result.failed > 0,
);
emit_stage_line(
&spinner,
&embed_icon,
"Embed",
&embed_summary,
stage_start.elapsed(),
);
}
Err(e) => {
embed_bar.finish_and_clear();
let warn_summary = format!("skipped ({})", e);
let warn_icon = color_icon(Icons::warning(), true);
emit_stage_line(
&spinner,
&warn_icon,
"Embed",
&warn_summary,
stage_start.elapsed(),
);
warn!(error = %e, "Embedding stage failed (Ollama may be unavailable), continuing");
}
}
} else {
debug!("Sync: skipping embedding (--no-embed)");
}
debug!(
issues = result.issues_updated,
mrs = result.mrs_updated,
discussions = result.discussions_fetched,
resource_events = result.resource_events_fetched,
resource_events_failed = result.resource_events_failed,
mr_diffs = result.mr_diffs_fetched,
mr_diffs_failed = result.mr_diffs_failed,
docs = result.documents_regenerated,
embedded = result.documents_embedded,
"Sync pipeline complete"
);
Ok(result)
}
.instrument(span)
.await
}

View File

@@ -12,11 +12,11 @@ use crate::core::lock::{AppLock, LockOptions};
use crate::core::paths::get_db_path;
use crate::core::project::resolve_project;
use crate::core::shutdown::ShutdownSignal;
use crate::core::sync_run::SyncRunRecorder;
use crate::documents::{SourceType, regenerate_dirty_documents_for_sources};
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
use crate::embedding::pipeline::{DEFAULT_EMBED_CONCURRENCY, embed_documents_by_ids};
use crate::gitlab::GitLabClient;
use crate::ingestion::storage::sync_run::SyncRunRecorder;
use crate::ingestion::surgical::{
fetch_dependents_for_issue, fetch_dependents_for_mr, ingest_issue_by_iid, ingest_mr_by_iid,
preflight_fetch,

View File

@@ -0,0 +1,268 @@
use super::*;
fn default_options() -> SyncOptions {
SyncOptions {
full: false,
force: false,
no_embed: false,
no_docs: false,
no_events: false,
robot_mode: false,
dry_run: false,
issue_iids: vec![],
mr_iids: vec![],
project: None,
preflight_only: false,
}
}
#[test]
fn append_failures_skips_zeroes() {
let mut summary = "base".to_string();
append_failures(&mut summary, &[("errors", 0), ("failures", 0)]);
assert_eq!(summary, "base");
}
#[test]
fn append_failures_renders_non_zero_counts() {
let mut summary = "base".to_string();
append_failures(&mut summary, &[("errors", 2), ("failures", 1)]);
assert!(summary.contains("base"));
assert!(summary.contains("2 errors"));
assert!(summary.contains("1 failures"));
}
#[test]
fn summarize_status_enrichment_reports_skipped_when_all_skipped() {
let projects = vec![ProjectStatusEnrichment {
path: "vs/typescript-code".to_string(),
mode: "skipped".to_string(),
reason: None,
seen: 0,
enriched: 0,
cleared: 0,
without_widget: 0,
partial_errors: 0,
first_partial_error: None,
error: None,
}];
let (summary, has_errors) = summarize_status_enrichment(&projects);
assert!(summary.contains("0 statuses updated"));
assert!(summary.contains("skipped"));
assert!(!has_errors);
}
#[test]
fn summarize_status_enrichment_reports_errors() {
let projects = vec![ProjectStatusEnrichment {
path: "vs/typescript-code".to_string(),
mode: "fetched".to_string(),
reason: None,
seen: 3,
enriched: 1,
cleared: 1,
without_widget: 0,
partial_errors: 2,
first_partial_error: None,
error: Some("boom".to_string()),
}];
let (summary, has_errors) = summarize_status_enrichment(&projects);
assert!(summary.contains("1 statuses updated"));
assert!(summary.contains("1 cleared"));
assert!(summary.contains("3 seen"));
assert!(summary.contains("3 errors"));
assert!(has_errors);
}
#[test]
fn should_print_timings_only_when_enabled_and_non_empty() {
let stages = vec![StageTiming {
name: "x".to_string(),
elapsed_ms: 10,
items_processed: 0,
items_skipped: 0,
errors: 0,
rate_limit_hits: 0,
retries: 0,
project: None,
sub_stages: vec![],
}];
assert!(should_print_timings(true, &stages));
assert!(!should_print_timings(false, &stages));
assert!(!should_print_timings(true, &[]));
}
#[test]
fn issue_sub_rows_include_project_and_statuses() {
let rows = issue_sub_rows(&[ProjectSummary {
path: "vs/typescript-code".to_string(),
items_upserted: 2,
discussions_synced: 0,
events_fetched: 0,
events_failed: 0,
statuses_enriched: 1,
statuses_seen: 5,
status_errors: 0,
mr_diffs_fetched: 0,
mr_diffs_failed: 0,
}]);
assert_eq!(rows.len(), 1);
assert!(rows[0].contains("vs/typescript-code"));
assert!(rows[0].contains("2 issues"));
assert!(rows[0].contains("1 statuses updated"));
}
#[test]
fn mr_sub_rows_include_project_and_diff_failures() {
let rows = mr_sub_rows(&[ProjectSummary {
path: "vs/python-code".to_string(),
items_upserted: 3,
discussions_synced: 0,
events_fetched: 0,
events_failed: 0,
statuses_enriched: 0,
statuses_seen: 0,
status_errors: 0,
mr_diffs_fetched: 4,
mr_diffs_failed: 1,
}]);
assert_eq!(rows.len(), 1);
assert!(rows[0].contains("vs/python-code"));
assert!(rows[0].contains("3 MRs"));
assert!(rows[0].contains("4 diffs"));
assert!(rows[0].contains("1 diff failures"));
}
#[test]
fn status_sub_rows_include_project_and_skip_reason() {
let rows = status_sub_rows(&[ProjectStatusEnrichment {
path: "vs/python-code".to_string(),
mode: "skipped".to_string(),
reason: Some("disabled".to_string()),
seen: 0,
enriched: 0,
cleared: 0,
without_widget: 0,
partial_errors: 0,
first_partial_error: None,
error: None,
}]);
assert_eq!(rows.len(), 1);
assert!(rows[0].contains("vs/python-code"));
assert!(rows[0].contains("0 statuses updated"));
assert!(rows[0].contains("skipped (disabled)"));
}
#[test]
fn is_surgical_with_issues() {
let opts = SyncOptions {
issue_iids: vec![1],
..default_options()
};
assert!(opts.is_surgical());
}
#[test]
fn is_surgical_with_mrs() {
let opts = SyncOptions {
mr_iids: vec![10],
..default_options()
};
assert!(opts.is_surgical());
}
#[test]
fn is_surgical_empty() {
let opts = default_options();
assert!(!opts.is_surgical());
}
#[test]
fn max_surgical_targets_is_100() {
assert_eq!(SyncOptions::MAX_SURGICAL_TARGETS, 100);
}
#[test]
fn sync_result_default_omits_surgical_fields() {
let result = SyncResult::default();
let json = serde_json::to_value(&result).unwrap();
assert!(json.get("surgical_mode").is_none());
assert!(json.get("surgical_iids").is_none());
assert!(json.get("entity_results").is_none());
assert!(json.get("preflight_only").is_none());
}
#[test]
fn sync_result_with_surgical_fields_serializes_correctly() {
let result = SyncResult {
surgical_mode: Some(true),
surgical_iids: Some(SurgicalIids {
issues: vec![7, 42],
merge_requests: vec![10],
}),
entity_results: Some(vec![
EntitySyncResult {
entity_type: "issue".to_string(),
iid: 7,
outcome: "synced".to_string(),
error: None,
toctou_reason: None,
},
EntitySyncResult {
entity_type: "issue".to_string(),
iid: 42,
outcome: "skipped_toctou".to_string(),
error: None,
toctou_reason: Some("updated_at changed".to_string()),
},
]),
preflight_only: Some(false),
..SyncResult::default()
};
let json = serde_json::to_value(&result).unwrap();
assert_eq!(json["surgical_mode"], true);
assert_eq!(json["surgical_iids"]["issues"], serde_json::json!([7, 42]));
assert_eq!(json["entity_results"].as_array().unwrap().len(), 2);
assert_eq!(json["entity_results"][1]["outcome"], "skipped_toctou");
assert_eq!(json["preflight_only"], false);
}
#[test]
fn entity_sync_result_omits_none_fields() {
let entity = EntitySyncResult {
entity_type: "merge_request".to_string(),
iid: 10,
outcome: "synced".to_string(),
error: None,
toctou_reason: None,
};
let json = serde_json::to_value(&entity).unwrap();
assert!(json.get("error").is_none());
assert!(json.get("toctou_reason").is_none());
assert!(json.get("entity_type").is_some());
}
#[test]
fn is_surgical_with_both_issues_and_mrs() {
let opts = SyncOptions {
issue_iids: vec![1, 2],
mr_iids: vec![10],
..default_options()
};
assert!(opts.is_surgical());
}
#[test]
fn is_not_surgical_with_only_project() {
let opts = SyncOptions {
project: Some("group/repo".to_string()),
..default_options()
};
assert!(!opts.is_surgical());
}

View File

@@ -8,13 +8,13 @@ use crate::core::error::{LoreError, Result};
use crate::core::paths::get_db_path;
use crate::core::project::resolve_project;
use crate::core::time::{ms_to_iso, parse_since};
use crate::core::timeline::{
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
use crate::timeline::collect::collect_events;
use crate::timeline::expand::expand_timeline;
use crate::timeline::seed::{seed_timeline, seed_timeline_direct};
use crate::timeline::{
EntityRef, ExpandedEntityRef, TimelineEvent, TimelineEventType, TimelineResult, UnresolvedRef,
};
use crate::core::timeline_collect::collect_events;
use crate::core::timeline_expand::expand_timeline;
use crate::core::timeline_seed::{seed_timeline, seed_timeline_direct};
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
/// Parameters for running the timeline pipeline.
pub struct TimelineParams {

View File

@@ -1,12 +1,5 @@
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
use crate::test_support::{insert_project, setup_test_db};
fn default_scoring() -> ScoringConfig {
ScoringConfig::default()
@@ -17,20 +10,6 @@ fn test_as_of_ms() -> i64 {
now_ms() + 1000
}
fn insert_project(conn: &Connection, id: i64, path: &str) {
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url)
VALUES (?1, ?2, ?3, ?4)",
rusqlite::params![
id,
id * 100,
path,
format!("https://git.example.com/{}", path)
],
)
.unwrap();
}
fn insert_mr(conn: &Connection, id: i64, project_id: i64, iid: i64, author: &str, state: &str) {
let ts = now_ms();
conn.execute(

View File

@@ -1,10 +1,11 @@
pub mod args;
pub mod autocorrect;
pub mod commands;
pub mod progress;
pub mod render;
pub mod robot;
use clap::{Args, Parser, Subcommand};
use clap::{Parser, Subcommand};
use std::io::IsTerminal;
#[derive(Parser)]
@@ -398,871 +399,8 @@ pub enum Commands {
SyncStatus,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore issues -n 10 # List 10 most recently updated issues
lore issues -s opened -l bug # Open issues labeled 'bug'
lore issues 42 -p group/repo # Show issue #42 in a specific project
lore issues --since 7d -a jsmith # Issues updated in last 7 days by jsmith")]
pub struct IssuesArgs {
/// Issue IID (omit to list, provide to show details)
pub iid: Option<i64>,
/// Maximum results
#[arg(
short = 'n',
long = "limit",
default_value = "50",
help_heading = "Output"
)]
pub limit: usize,
/// Select output fields (comma-separated, or 'minimal' preset: iid,title,state,updated_at_iso)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Filter by state (opened, closed, all)
#[arg(short = 's', long, help_heading = "Filters", value_parser = ["opened", "closed", "all"])]
pub state: Option<String>,
/// Filter by project path
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Filter by author username
#[arg(short = 'a', long, help_heading = "Filters")]
pub author: Option<String>,
/// Filter by assignee username
#[arg(short = 'A', long, help_heading = "Filters")]
pub assignee: Option<String>,
/// Filter by label (repeatable, AND logic)
#[arg(short = 'l', long, help_heading = "Filters")]
pub label: Option<Vec<String>>,
/// Filter by milestone title
#[arg(short = 'm', long, help_heading = "Filters")]
pub milestone: Option<String>,
/// Filter by work-item status name (repeatable, OR logic)
#[arg(long, help_heading = "Filters")]
pub status: Vec<String>,
/// Filter by time (7d, 2w, 1m, or YYYY-MM-DD)
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Filter by due date (before this date, YYYY-MM-DD)
#[arg(long = "due-before", help_heading = "Filters")]
pub due_before: Option<String>,
/// Show only issues with a due date
#[arg(
long = "has-due",
help_heading = "Filters",
overrides_with = "no_has_due"
)]
pub has_due: bool,
#[arg(long = "no-has-due", hide = true, overrides_with = "has_due")]
pub no_has_due: bool,
/// Sort field (updated, created, iid)
#[arg(long, value_parser = ["updated", "created", "iid"], default_value = "updated", help_heading = "Sorting")]
pub sort: String,
/// Sort ascending (default: descending)
#[arg(long, help_heading = "Sorting", overrides_with = "no_asc")]
pub asc: bool,
#[arg(long = "no-asc", hide = true, overrides_with = "asc")]
pub no_asc: bool,
/// Open first matching item in browser
#[arg(
short = 'o',
long,
help_heading = "Actions",
overrides_with = "no_open"
)]
pub open: bool,
#[arg(long = "no-open", hide = true, overrides_with = "open")]
pub no_open: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore mrs -s opened # List open merge requests
lore mrs -s merged --since 2w # MRs merged in the last 2 weeks
lore mrs 99 -p group/repo # Show MR !99 in a specific project
lore mrs -D --reviewer jsmith # Non-draft MRs reviewed by jsmith")]
pub struct MrsArgs {
/// MR IID (omit to list, provide to show details)
pub iid: Option<i64>,
/// Maximum results
#[arg(
short = 'n',
long = "limit",
default_value = "50",
help_heading = "Output"
)]
pub limit: usize,
/// Select output fields (comma-separated, or 'minimal' preset: iid,title,state,updated_at_iso)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Filter by state (opened, merged, closed, locked, all)
#[arg(short = 's', long, help_heading = "Filters", value_parser = ["opened", "merged", "closed", "locked", "all"])]
pub state: Option<String>,
/// Filter by project path
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Filter by author username
#[arg(short = 'a', long, help_heading = "Filters")]
pub author: Option<String>,
/// Filter by assignee username
#[arg(short = 'A', long, help_heading = "Filters")]
pub assignee: Option<String>,
/// Filter by reviewer username
#[arg(short = 'r', long, help_heading = "Filters")]
pub reviewer: Option<String>,
/// Filter by label (repeatable, AND logic)
#[arg(short = 'l', long, help_heading = "Filters")]
pub label: Option<Vec<String>>,
/// Filter by time (7d, 2w, 1m, or YYYY-MM-DD)
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Show only draft MRs
#[arg(
short = 'd',
long,
conflicts_with = "no_draft",
help_heading = "Filters"
)]
pub draft: bool,
/// Exclude draft MRs
#[arg(
short = 'D',
long = "no-draft",
conflicts_with = "draft",
help_heading = "Filters"
)]
pub no_draft: bool,
/// Filter by target branch
#[arg(long, help_heading = "Filters")]
pub target: Option<String>,
/// Filter by source branch
#[arg(long, help_heading = "Filters")]
pub source: Option<String>,
/// Sort field (updated, created, iid)
#[arg(long, value_parser = ["updated", "created", "iid"], default_value = "updated", help_heading = "Sorting")]
pub sort: String,
/// Sort ascending (default: descending)
#[arg(long, help_heading = "Sorting", overrides_with = "no_asc")]
pub asc: bool,
#[arg(long = "no-asc", hide = true, overrides_with = "asc")]
pub no_asc: bool,
/// Open first matching item in browser
#[arg(
short = 'o',
long,
help_heading = "Actions",
overrides_with = "no_open"
)]
pub open: bool,
#[arg(long = "no-open", hide = true, overrides_with = "open")]
pub no_open: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore notes # List 50 most recent notes
lore notes --author alice --since 7d # Notes by alice in last 7 days
lore notes --for-issue 42 -p group/repo # Notes on issue #42
lore notes --path src/ --resolution unresolved # Unresolved diff notes in src/")]
pub struct NotesArgs {
/// Maximum results
#[arg(
short = 'n',
long = "limit",
default_value = "50",
help_heading = "Output"
)]
pub limit: usize,
/// Select output fields (comma-separated, or 'minimal' preset: id,author_username,body,created_at_iso)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Filter by author username
#[arg(short = 'a', long, help_heading = "Filters")]
pub author: Option<String>,
/// Filter by note type (DiffNote, DiscussionNote)
#[arg(long, help_heading = "Filters")]
pub note_type: Option<String>,
/// Filter by body text (substring match)
#[arg(long, help_heading = "Filters")]
pub contains: Option<String>,
/// Filter by internal note ID
#[arg(long, help_heading = "Filters")]
pub note_id: Option<i64>,
/// Filter by GitLab note ID
#[arg(long, help_heading = "Filters")]
pub gitlab_note_id: Option<i64>,
/// Filter by discussion ID
#[arg(long, help_heading = "Filters")]
pub discussion_id: Option<String>,
/// Include system notes (excluded by default)
#[arg(long, help_heading = "Filters")]
pub include_system: bool,
/// Filter to notes on a specific issue IID (requires --project or default_project)
#[arg(long, conflicts_with = "for_mr", help_heading = "Filters")]
pub for_issue: Option<i64>,
/// Filter to notes on a specific MR IID (requires --project or default_project)
#[arg(long, conflicts_with = "for_issue", help_heading = "Filters")]
pub for_mr: Option<i64>,
/// Filter by project path
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Filter by time (7d, 2w, 1m, or YYYY-MM-DD)
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Filter until date (YYYY-MM-DD, inclusive end-of-day)
#[arg(long, help_heading = "Filters")]
pub until: Option<String>,
/// Filter by file path (exact match or prefix with trailing /)
#[arg(long, help_heading = "Filters")]
pub path: Option<String>,
/// Filter by resolution status (any, unresolved, resolved)
#[arg(
long,
value_parser = ["any", "unresolved", "resolved"],
help_heading = "Filters"
)]
pub resolution: Option<String>,
/// Sort field (created, updated)
#[arg(
long,
value_parser = ["created", "updated"],
default_value = "created",
help_heading = "Sorting"
)]
pub sort: String,
/// Sort ascending (default: descending)
#[arg(long, help_heading = "Sorting")]
pub asc: bool,
/// Open first matching item in browser
#[arg(long, help_heading = "Actions")]
pub open: bool,
}
#[derive(Parser)]
pub struct IngestArgs {
/// Entity to ingest (issues, mrs). Omit to ingest everything
#[arg(value_parser = ["issues", "mrs"])]
pub entity: Option<String>,
/// Filter to single project
#[arg(short = 'p', long)]
pub project: Option<String>,
/// Override stale sync lock
#[arg(short = 'f', long, overrides_with = "no_force")]
pub force: bool,
#[arg(long = "no-force", hide = true, overrides_with = "force")]
pub no_force: bool,
/// Full re-sync: reset cursors and fetch all data from scratch
#[arg(long, overrides_with = "no_full")]
pub full: bool,
#[arg(long = "no-full", hide = true, overrides_with = "full")]
pub no_full: bool,
/// Preview what would be synced without making changes
#[arg(long, overrides_with = "no_dry_run")]
pub dry_run: bool,
#[arg(long = "no-dry-run", hide = true, overrides_with = "dry_run")]
pub no_dry_run: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore stats # Show document and index statistics
lore stats --check # Run integrity checks
lore stats --repair --dry-run # Preview what repair would fix
lore --robot stats # JSON output for automation")]
pub struct StatsArgs {
/// Run integrity checks
#[arg(long, overrides_with = "no_check")]
pub check: bool,
#[arg(long = "no-check", hide = true, overrides_with = "check")]
pub no_check: bool,
/// Repair integrity issues (auto-enables --check)
#[arg(long)]
pub repair: bool,
/// Preview what would be repaired without making changes (requires --repair)
#[arg(long, overrides_with = "no_dry_run")]
pub dry_run: bool,
#[arg(long = "no-dry-run", hide = true, overrides_with = "dry_run")]
pub no_dry_run: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore search 'authentication bug' # Hybrid search (default)
lore search 'deploy' --mode lexical --type mr # Lexical search, MRs only
lore search 'API rate limit' --since 30d # Recent results only
lore search 'config' -p group/repo --explain # With ranking explanation")]
pub struct SearchArgs {
/// Search query string
pub query: String,
/// Search mode (lexical, hybrid, semantic)
#[arg(long, default_value = "hybrid", value_parser = ["lexical", "hybrid", "semantic"], help_heading = "Mode")]
pub mode: String,
/// Filter by source type (issue, mr, discussion, note)
#[arg(long = "type", value_name = "TYPE", value_parser = ["issue", "mr", "discussion", "note"], help_heading = "Filters")]
pub source_type: Option<String>,
/// Filter by author username
#[arg(long, help_heading = "Filters")]
pub author: Option<String>,
/// Filter by project path
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Filter by label (repeatable, AND logic)
#[arg(long, action = clap::ArgAction::Append, help_heading = "Filters")]
pub label: Vec<String>,
/// Filter by file path (trailing / for prefix match)
#[arg(long, help_heading = "Filters")]
pub path: Option<String>,
/// Filter by created since (7d, 2w, or YYYY-MM-DD)
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Filter by updated since (7d, 2w, or YYYY-MM-DD)
#[arg(long = "updated-since", help_heading = "Filters")]
pub updated_since: Option<String>,
/// Maximum results (default 20, max 100)
#[arg(
short = 'n',
long = "limit",
default_value = "20",
help_heading = "Output"
)]
pub limit: usize,
/// Select output fields (comma-separated, or 'minimal' preset: document_id,title,source_type,score)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Show ranking explanation per result
#[arg(long, help_heading = "Output", overrides_with = "no_explain")]
pub explain: bool,
#[arg(long = "no-explain", hide = true, overrides_with = "explain")]
pub no_explain: bool,
/// FTS query mode: safe (default) or raw
#[arg(long = "fts-mode", default_value = "safe", value_parser = ["safe", "raw"], help_heading = "Mode")]
pub fts_mode: String,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore generate-docs # Generate docs for dirty entities
lore generate-docs --full # Full rebuild of all documents
lore generate-docs --full -p group/repo # Full rebuild for one project")]
pub struct GenerateDocsArgs {
/// Full rebuild: seed all entities into dirty queue, then drain
#[arg(long)]
pub full: bool,
/// Filter to single project
#[arg(short = 'p', long)]
pub project: Option<String>,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore sync # Full pipeline: ingest + docs + embed
lore sync --no-embed # Skip embedding step
lore sync --no-status # Skip work-item status enrichment
lore sync --full --force # Full re-sync, override stale lock
lore sync --dry-run # Preview what would change
lore sync --issue 42 -p group/repo # Surgically sync one issue
lore sync --mr 10 --mr 20 -p g/r # Surgically sync two MRs")]
pub struct SyncArgs {
/// Reset cursors, fetch everything
#[arg(long, overrides_with = "no_full")]
pub full: bool,
#[arg(long = "no-full", hide = true, overrides_with = "full")]
pub no_full: bool,
/// Override stale lock
#[arg(long, overrides_with = "no_force")]
pub force: bool,
#[arg(long = "no-force", hide = true, overrides_with = "force")]
pub no_force: bool,
/// Skip embedding step
#[arg(long)]
pub no_embed: bool,
/// Skip document regeneration
#[arg(long)]
pub no_docs: bool,
/// Skip resource event fetching (overrides config)
#[arg(long = "no-events")]
pub no_events: bool,
/// Skip MR file change fetching (overrides config)
#[arg(long = "no-file-changes")]
pub no_file_changes: bool,
/// Skip work-item status enrichment via GraphQL (overrides config)
#[arg(long = "no-status")]
pub no_status: bool,
/// Preview what would be synced without making changes
#[arg(long, overrides_with = "no_dry_run")]
pub dry_run: bool,
#[arg(long = "no-dry-run", hide = true, overrides_with = "dry_run")]
pub no_dry_run: bool,
/// Show detailed timing breakdown for sync stages
#[arg(short = 't', long = "timings")]
pub timings: bool,
/// Acquire file lock before syncing (skip if another sync is running)
#[arg(long)]
pub lock: bool,
/// Surgically sync specific issues by IID (repeatable, must be positive)
#[arg(long, value_parser = clap::value_parser!(u64).range(1..), action = clap::ArgAction::Append)]
pub issue: Vec<u64>,
/// Surgically sync specific merge requests by IID (repeatable, must be positive)
#[arg(long, value_parser = clap::value_parser!(u64).range(1..), action = clap::ArgAction::Append)]
pub mr: Vec<u64>,
/// Scope to a single project (required when --issue or --mr is used)
#[arg(short = 'p', long)]
pub project: Option<String>,
/// Validate remote entities exist without DB writes (preflight only)
#[arg(long)]
pub preflight_only: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore embed # Embed new/changed documents
lore embed --full # Re-embed all documents from scratch
lore embed --retry-failed # Retry previously failed embeddings")]
pub struct EmbedArgs {
/// Re-embed all documents (clears existing embeddings first)
#[arg(long, overrides_with = "no_full")]
pub full: bool,
#[arg(long = "no-full", hide = true, overrides_with = "full")]
pub no_full: bool,
/// Retry previously failed embeddings
#[arg(long, overrides_with = "no_retry_failed")]
pub retry_failed: bool,
#[arg(long = "no-retry-failed", hide = true, overrides_with = "retry_failed")]
pub no_retry_failed: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore timeline 'deployment' # Search-based seeding
lore timeline issue:42 # Direct: issue #42 and related entities
lore timeline i:42 # Shorthand for issue:42
lore timeline mr:99 # Direct: MR !99 and related entities
lore timeline 'auth' --since 30d -p group/repo # Scoped to project and time
lore timeline 'migration' --depth 2 # Deep cross-reference expansion
lore timeline 'auth' --no-mentions # Only 'closes' and 'related' edges")]
pub struct TimelineArgs {
/// Search text or entity reference (issue:N, i:N, mr:N, m:N)
pub query: String,
/// Scope to a specific project (fuzzy match)
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Only show events after this date (e.g. "6m", "2w", "2024-01-01")
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Cross-reference expansion depth (0 = no expansion)
#[arg(long, default_value = "1", help_heading = "Expansion")]
pub depth: u32,
/// Skip 'mentioned' edges during expansion (only follow 'closes' and 'related')
#[arg(long = "no-mentions", help_heading = "Expansion")]
pub no_mentions: bool,
/// Maximum number of events to display
#[arg(
short = 'n',
long = "limit",
default_value = "100",
help_heading = "Output"
)]
pub limit: usize,
/// Select output fields (comma-separated, or 'minimal' preset: timestamp,type,entity_iid,detail)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Maximum seed entities from search
#[arg(long = "max-seeds", default_value = "10", help_heading = "Expansion")]
pub max_seeds: usize,
/// Maximum expanded entities via cross-references
#[arg(
long = "max-entities",
default_value = "50",
help_heading = "Expansion"
)]
pub max_entities: usize,
/// Maximum evidence notes included
#[arg(
long = "max-evidence",
default_value = "10",
help_heading = "Expansion"
)]
pub max_evidence: usize,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore who src/features/auth/ # Who knows about this area?
lore who @asmith # What is asmith working on?
lore who @asmith --reviews # What review patterns does asmith have?
lore who --active # What discussions need attention?
lore who --overlap src/features/auth/ # Who else is touching these files?
lore who --path README.md # Expert lookup for a root file
lore who --path Makefile # Expert lookup for a dotless root file")]
pub struct WhoArgs {
/// Username or file path (path if contains /)
pub target: Option<String>,
/// Force expert mode for a file/directory path.
/// Root files (README.md, LICENSE, Makefile) are treated as exact matches.
/// Use a trailing `/` to force directory-prefix matching.
#[arg(long, help_heading = "Mode", conflicts_with_all = ["active", "overlap", "reviews"])]
pub path: Option<String>,
/// Show active unresolved discussions
#[arg(long, help_heading = "Mode", conflicts_with_all = ["target", "overlap", "reviews", "path"])]
pub active: bool,
/// Find users with MRs/notes touching this file path
#[arg(long, help_heading = "Mode", conflicts_with_all = ["target", "active", "reviews", "path"])]
pub overlap: Option<String>,
/// Show review pattern analysis (requires username target)
#[arg(long, help_heading = "Mode", requires = "target", conflicts_with_all = ["active", "overlap", "path"])]
pub reviews: bool,
/// Time window (7d, 2w, 6m, YYYY-MM-DD). Default varies by mode.
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Scope to a project (supports fuzzy matching)
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Maximum results per section (1..=500); omit for unlimited
#[arg(
short = 'n',
long = "limit",
value_parser = clap::value_parser!(u16).range(1..=500),
help_heading = "Output"
)]
pub limit: Option<u16>,
/// Select output fields (comma-separated, or 'minimal' preset; varies by mode)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Show per-MR detail breakdown (expert mode only)
#[arg(
long,
help_heading = "Output",
overrides_with = "no_detail",
conflicts_with = "explain_score"
)]
pub detail: bool,
#[arg(long = "no-detail", hide = true, overrides_with = "detail")]
pub no_detail: bool,
/// Score as if "now" is this date (ISO 8601 or duration like 30d). Expert mode only.
#[arg(long = "as-of", help_heading = "Scoring")]
pub as_of: Option<String>,
/// Show per-component score breakdown in output. Expert mode only.
#[arg(long = "explain-score", help_heading = "Scoring")]
pub explain_score: bool,
/// Include bot users in results (normally excluded via scoring.excluded_usernames).
#[arg(long = "include-bots", help_heading = "Scoring")]
pub include_bots: bool,
/// Include discussions on closed issues and merged/closed MRs
#[arg(long, help_heading = "Filters")]
pub include_closed: bool,
/// Remove the default time window (query all history). Conflicts with --since.
#[arg(
long = "all-history",
help_heading = "Filters",
conflicts_with = "since"
)]
pub all_history: bool,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore me # Full dashboard (default project or all)
lore me --issues # Issues section only
lore me --mrs # MRs section only
lore me --activity # Activity feed only
lore me --all # All synced projects
lore me --since 2d # Activity window (default: 30d)
lore me --project group/repo # Scope to one project
lore me --user jdoe # Override configured username")]
pub struct MeArgs {
/// Show open issues section
#[arg(long, help_heading = "Sections")]
pub issues: bool,
/// Show authored + reviewing MRs section
#[arg(long, help_heading = "Sections")]
pub mrs: bool,
/// Show activity feed section
#[arg(long, help_heading = "Sections")]
pub activity: bool,
/// Show items you're @mentioned in (not assigned/authored/reviewing)
#[arg(long, help_heading = "Sections")]
pub mentions: bool,
/// Activity window (e.g. 7d, 2w, 30d). Default: 30d. Only affects activity section.
#[arg(long, help_heading = "Filters")]
pub since: Option<String>,
/// Scope to a project (supports fuzzy matching)
#[arg(short = 'p', long, help_heading = "Filters", conflicts_with = "all")]
pub project: Option<String>,
/// Show all synced projects (overrides default_project)
#[arg(long, help_heading = "Filters", conflicts_with = "project")]
pub all: bool,
/// Override configured username
#[arg(long = "user", help_heading = "Filters")]
pub user: Option<String>,
/// Select output fields (comma-separated, or 'minimal' preset)
#[arg(long, help_heading = "Output", value_delimiter = ',')]
pub fields: Option<Vec<String>>,
/// Reset the since-last-check cursor (next run shows no new events)
#[arg(long, help_heading = "Output")]
pub reset_cursor: bool,
}
impl MeArgs {
/// Returns true if no section flags were passed (show all sections).
pub fn show_all_sections(&self) -> bool {
!self.issues && !self.mrs && !self.activity && !self.mentions
}
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore file-history src/main.rs # MRs that touched this file
lore file-history src/auth/ -p group/repo # Scoped to project
lore file-history src/foo.rs --discussions # Include DiffNote snippets
lore file-history src/bar.rs --no-follow-renames # Skip rename chain")]
pub struct FileHistoryArgs {
/// File path to trace history for
pub path: String,
/// Scope to a specific project (fuzzy match)
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Include discussion snippets from DiffNotes on this file
#[arg(long, help_heading = "Output")]
pub discussions: bool,
/// Disable rename chain resolution
#[arg(long = "no-follow-renames", help_heading = "Filters")]
pub no_follow_renames: bool,
/// Only show merged MRs
#[arg(long, help_heading = "Filters")]
pub merged: bool,
/// Maximum results
#[arg(
short = 'n',
long = "limit",
default_value = "50",
help_heading = "Output"
)]
pub limit: usize,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore trace src/main.rs # Why was this file changed?
lore trace src/auth/ -p group/repo # Scoped to project
lore trace src/foo.rs --discussions # Include DiffNote context
lore trace src/bar.rs:42 # Line hint (Tier 2 warning)")]
pub struct TraceArgs {
/// File path to trace (supports :line suffix for future Tier 2)
pub path: String,
/// Scope to a specific project (fuzzy match)
#[arg(short = 'p', long, help_heading = "Filters")]
pub project: Option<String>,
/// Include DiffNote discussion snippets
#[arg(long, help_heading = "Output")]
pub discussions: bool,
/// Disable rename chain resolution
#[arg(long = "no-follow-renames", help_heading = "Filters")]
pub no_follow_renames: bool,
/// Maximum trace chains to display
#[arg(
short = 'n',
long = "limit",
default_value = "20",
help_heading = "Output"
)]
pub limit: usize,
}
#[derive(Parser)]
#[command(after_help = "\x1b[1mExamples:\x1b[0m
lore count issues # Total issues in local database
lore count notes --for mr # Notes on merge requests only
lore count discussions --for issue # Discussions on issues only")]
pub struct CountArgs {
/// Entity type to count (issues, mrs, discussions, notes, events)
#[arg(value_parser = ["issues", "mrs", "discussions", "notes", "events"])]
pub entity: String,
/// Parent type filter: issue or mr (for discussions/notes)
#[arg(short = 'f', long = "for", value_parser = ["issue", "mr"])]
pub for_entity: Option<String>,
}
#[derive(Parser)]
pub struct CronArgs {
#[command(subcommand)]
pub action: CronAction,
}
#[derive(Subcommand)]
pub enum CronAction {
/// Install cron job for automatic syncing
Install {
/// Sync interval in minutes (default: 8)
#[arg(long, default_value = "8")]
interval: u32,
},
/// Remove cron job
Uninstall,
/// Show current cron configuration
Status,
}
#[derive(Args)]
pub struct TokenArgs {
#[command(subcommand)]
pub action: TokenAction,
}
#[derive(Subcommand)]
pub enum TokenAction {
/// Store a GitLab token in the config file
Set {
/// Token value (reads from stdin if omitted in non-interactive mode)
#[arg(long)]
token: Option<String>,
},
/// Show the current token (masked by default)
Show {
/// Show the full unmasked token
#[arg(long)]
unmask: bool,
},
}
pub use args::{
CountArgs, CronAction, CronArgs, EmbedArgs, FileHistoryArgs, GenerateDocsArgs, IngestArgs,
IssuesArgs, MeArgs, MrsArgs, NotesArgs, SearchArgs, StatsArgs, SyncArgs, TimelineArgs,
TokenAction, TokenArgs, TraceArgs, WhoArgs,
};

View File

@@ -1,6 +1,15 @@
use serde::Serialize;
use thiserror::Error;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NetworkErrorKind {
Timeout,
ConnectionRefused,
DnsResolution,
Tls,
Other,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ErrorCode {
ConfigNotFound,
@@ -99,8 +108,8 @@ pub enum LoreError {
#[error("Cannot connect to GitLab at {base_url}")]
GitLabNetworkError {
base_url: String,
#[source]
source: Option<reqwest::Error>,
kind: NetworkErrorKind,
detail: Option<String>,
},
#[error(
@@ -122,9 +131,6 @@ pub enum LoreError {
#[error("Database error: {0}")]
Database(#[from] rusqlite::Error),
#[error("HTTP error: {0}")]
Http(#[from] reqwest::Error),
#[error("JSON error: {0}")]
Json(#[from] serde_json::Error),
@@ -146,8 +152,7 @@ pub enum LoreError {
#[error("Cannot connect to Ollama at {base_url}. Is it running?")]
OllamaUnavailable {
base_url: String,
#[source]
source: Option<reqwest::Error>,
detail: Option<String>,
},
#[error("Ollama model '{model}' not found. Run: ollama pull {model}")]
@@ -187,7 +192,6 @@ impl LoreError {
ErrorCode::DatabaseError
}
}
Self::Http(_) => ErrorCode::GitLabNetworkError,
Self::Json(_) => ErrorCode::InternalError,
Self::Io(_) => ErrorCode::IoError,
Self::Transform(_) => ErrorCode::TransformError,
@@ -238,7 +242,6 @@ impl LoreError {
Some("Check database file permissions.\n\n Example:\n lore doctor")
}
}
Self::Http(_) => Some("Check network connection"),
Self::NotFound(_) => {
Some("Verify the entity exists.\n\n Example:\n lore issues\n lore mrs")
}

View File

@@ -4,26 +4,16 @@ pub mod config;
pub mod cron;
pub mod cursor;
pub mod db;
pub mod dependent_queue;
pub mod error;
pub mod events_db;
pub mod file_history;
pub mod lock;
pub mod logging;
pub mod metrics;
pub mod note_parser;
pub mod path_resolver;
pub mod paths;
pub mod payloads;
pub mod project;
pub mod references;
pub mod shutdown;
pub mod sync_run;
pub mod time;
pub mod timeline;
pub mod timeline_collect;
pub mod timeline_expand;
pub mod timeline_seed;
pub mod trace;
pub use config::Config;

View File

@@ -1,6 +1,22 @@
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use asupersync::runtime::RuntimeHandle;
/// Spawn a background task that listens for Ctrl+C.
///
/// First press: cancels `signal` and prints an interrupt message.
/// Second press: force-exits with code 130.
pub fn install_ctrl_c_handler(handle: &RuntimeHandle, signal: ShutdownSignal) {
handle.spawn(async move {
let _ = asupersync::signal::ctrl_c().await;
eprintln!("\nInterrupted, finishing current batch... (Ctrl+C again to force quit)");
signal.cancel();
let _ = asupersync::signal::ctrl_c().await;
std::process::exit(130);
});
}
/// A cooperative cancellation token for graceful shutdown.
///
/// Clone-able and cheaply checkable from any thread or async task.

View File

@@ -1,512 +0,0 @@
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn insert_test_project(conn: &Connection) -> i64 {
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/project', 'https://gitlab.com/group/project')",
[],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_test_issue(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO issues (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test issue', 'opened', 'alice', 1000, 2000, 3000)",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_test_mr(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO merge_requests (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test MR', 'opened', 'bob', 1000, 2000, 3000)",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_document(
conn: &Connection,
source_type: &str,
source_id: i64,
project_id: i64,
content: &str,
) -> i64 {
conn.execute(
"INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) VALUES (?1, ?2, ?3, ?4, ?5)",
rusqlite::params![source_type, source_id, project_id, content, format!("hash_{source_id}")],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_discussion(
conn: &Connection,
project_id: i64,
issue_id: Option<i64>,
mr_id: Option<i64>,
) -> i64 {
let noteable_type = if issue_id.is_some() {
"Issue"
} else {
"MergeRequest"
};
conn.execute(
"INSERT INTO discussions (gitlab_discussion_id, project_id, issue_id, merge_request_id, noteable_type, last_seen_at) VALUES (?1, ?2, ?3, ?4, ?5, 0)",
rusqlite::params![format!("disc_{}", rand::random::<u32>()), project_id, issue_id, mr_id, noteable_type],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_note(
conn: &Connection,
discussion_id: i64,
project_id: i64,
body: &str,
is_system: bool,
) -> i64 {
let gitlab_id: i64 = rand::random::<u32>().into();
conn.execute(
"INSERT INTO notes (gitlab_id, discussion_id, project_id, is_system, author_username, body, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, ?4, 'alice', ?5, 5000, 5000, 5000)",
rusqlite::params![gitlab_id, discussion_id, project_id, is_system as i32, body],
)
.unwrap();
conn.last_insert_rowid()
}
#[tokio::test]
async fn test_seed_empty_query_returns_empty() {
let conn = setup_test_db();
let result = seed_timeline(&conn, None, "", None, None, 50, 10)
.await
.unwrap();
assert!(result.seed_entities.is_empty());
assert!(result.evidence_notes.is_empty());
}
#[tokio::test]
async fn test_seed_no_matches_returns_empty() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
insert_document(
&conn,
"issue",
issue_id,
project_id,
"unrelated content here",
);
let result = seed_timeline(&conn, None, "nonexistent_xyzzy_query", None, None, 50, 10)
.await
.unwrap();
assert!(result.seed_entities.is_empty());
}
#[tokio::test]
async fn test_seed_finds_issue() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 42);
insert_document(
&conn,
"issue",
issue_id,
project_id,
"authentication error in login flow",
);
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 42);
assert_eq!(result.seed_entities[0].project_path, "group/project");
}
#[tokio::test]
async fn test_seed_finds_mr() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let mr_id = insert_test_mr(&conn, project_id, 99);
insert_document(
&conn,
"merge_request",
mr_id,
project_id,
"fix authentication bug",
);
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "merge_request");
assert_eq!(result.seed_entities[0].entity_iid, 99);
}
#[tokio::test]
async fn test_seed_deduplicates_entities() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 10);
// Two documents referencing the same issue
insert_document(
&conn,
"issue",
issue_id,
project_id,
"authentication error first doc",
);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"authentication error second doc",
);
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
// Should deduplicate: both map to the same issue
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_iid, 10);
}
#[tokio::test]
async fn test_seed_resolves_discussion_to_parent() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 7);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment pipeline failed",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 7);
}
#[tokio::test]
async fn test_seed_evidence_capped() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
// Create 15 discussion documents with notes about "deployment"
for i in 0..15 {
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
&format!("deployment issue number {i}"),
);
insert_note(
&conn,
disc_id,
project_id,
&format!("deployment note {i}"),
false,
);
}
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 5)
.await
.unwrap();
assert!(result.evidence_notes.len() <= 5);
}
#[tokio::test]
async fn test_seed_evidence_snippet_truncated() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment configuration",
);
let long_body = "x".repeat(500);
insert_note(&conn, disc_id, project_id, &long_body, false);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert!(!result.evidence_notes.is_empty());
if let TimelineEventType::NoteEvidence { snippet, .. } = &result.evidence_notes[0].event_type {
assert!(snippet.chars().count() <= 200);
} else {
panic!("Expected NoteEvidence");
}
}
#[tokio::test]
async fn test_seed_respects_project_filter() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
// Insert a second project
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (2, 'other/repo', 'https://gitlab.com/other/repo')",
[],
)
.unwrap();
let project2_id = conn.last_insert_rowid();
let issue1_id = insert_test_issue(&conn, project_id, 1);
insert_document(
&conn,
"issue",
issue1_id,
project_id,
"authentication error",
);
let issue2_id = insert_test_issue(&conn, project2_id, 2);
insert_document(
&conn,
"issue",
issue2_id,
project2_id,
"authentication error",
);
// Filter to project 1 only
let result = seed_timeline(
&conn,
None,
"authentication",
Some(project_id),
None,
50,
10,
)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].project_path, "group/project");
}
// ─── Matched discussion tests ───────────────────────────────────────────────
#[tokio::test]
async fn test_seed_captures_matched_discussions_from_discussion_doc() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment pipeline authentication",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.matched_discussions.len(), 1);
assert_eq!(result.matched_discussions[0].discussion_id, disc_id);
assert_eq!(result.matched_discussions[0].entity_type, "issue");
assert_eq!(result.matched_discussions[0].entity_id, issue_id);
}
#[tokio::test]
async fn test_seed_captures_matched_discussions_from_note_doc() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
let note_id = insert_note(&conn, disc_id, project_id, "note about deployment", false);
insert_document(
&conn,
"note",
note_id,
project_id,
"deployment configuration details",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(
result.matched_discussions.len(),
1,
"Note doc should resolve to parent discussion"
);
assert_eq!(result.matched_discussions[0].discussion_id, disc_id);
assert_eq!(result.matched_discussions[0].entity_type, "issue");
}
#[tokio::test]
async fn test_seed_deduplicates_matched_discussions() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
// Two docs referencing the same discussion
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment pipeline first doc",
);
let note_id = insert_note(&conn, disc_id, project_id, "deployment note", false);
insert_document(
&conn,
"note",
note_id,
project_id,
"deployment pipeline second doc",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(
result.matched_discussions.len(),
1,
"Same discussion_id from two docs should deduplicate"
);
}
#[tokio::test]
async fn test_seed_matched_discussions_have_correct_parent_entity() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let mr_id = insert_test_mr(&conn, project_id, 99);
let disc_id = insert_discussion(&conn, project_id, None, Some(mr_id));
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment pipeline for merge request",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.matched_discussions.len(), 1);
assert_eq!(result.matched_discussions[0].entity_type, "merge_request");
assert_eq!(result.matched_discussions[0].entity_id, mr_id);
}
// ─── seed_timeline_direct tests ─────────────────────────────────────────────
#[test]
fn test_direct_seed_resolves_entity() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
insert_test_issue(&conn, project_id, 42);
let result = seed_timeline_direct(&conn, "issue", 42, None).unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 42);
assert_eq!(result.seed_entities[0].project_path, "group/project");
}
#[test]
fn test_direct_seed_gathers_all_discussions() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 42);
// Create 3 discussions for this issue
let disc1 = insert_discussion(&conn, project_id, Some(issue_id), None);
let disc2 = insert_discussion(&conn, project_id, Some(issue_id), None);
let disc3 = insert_discussion(&conn, project_id, Some(issue_id), None);
let result = seed_timeline_direct(&conn, "issue", 42, None).unwrap();
assert_eq!(result.matched_discussions.len(), 3);
let disc_ids: Vec<i64> = result
.matched_discussions
.iter()
.map(|d| d.discussion_id)
.collect();
assert!(disc_ids.contains(&disc1));
assert!(disc_ids.contains(&disc2));
assert!(disc_ids.contains(&disc3));
}
#[test]
fn test_direct_seed_no_evidence_notes() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 42);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_note(&conn, disc_id, project_id, "some note body", false);
let result = seed_timeline_direct(&conn, "issue", 42, None).unwrap();
assert!(
result.evidence_notes.is_empty(),
"Direct seeding should not produce evidence notes"
);
}
#[test]
fn test_direct_seed_search_mode_is_direct() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
insert_test_issue(&conn, project_id, 42);
let result = seed_timeline_direct(&conn, "issue", 42, None).unwrap();
assert_eq!(result.search_mode, "direct");
}
#[test]
fn test_direct_seed_not_found() {
let conn = setup_test_db();
insert_test_project(&conn);
let result = seed_timeline_direct(&conn, "issue", 999, None);
assert!(result.is_err());
}
#[test]
fn test_direct_seed_mr() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let mr_id = insert_test_mr(&conn, project_id, 99);
let disc_id = insert_discussion(&conn, project_id, None, Some(mr_id));
let result = seed_timeline_direct(&conn, "merge_request", 99, None).unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "merge_request");
assert_eq!(result.seed_entities[0].entity_iid, 99);
assert_eq!(result.matched_discussions.len(), 1);
assert_eq!(result.matched_discussions[0].discussion_id, disc_id);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,80 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SourceType {
Issue,
MergeRequest,
Discussion,
Note,
}
impl SourceType {
pub fn as_str(&self) -> &'static str {
match self {
Self::Issue => "issue",
Self::MergeRequest => "merge_request",
Self::Discussion => "discussion",
Self::Note => "note",
}
}
pub fn parse(s: &str) -> Option<Self> {
match s.to_lowercase().as_str() {
"issue" | "issues" => Some(Self::Issue),
"mr" | "mrs" | "merge_request" | "merge_requests" => Some(Self::MergeRequest),
"discussion" | "discussions" => Some(Self::Discussion),
"note" | "notes" => Some(Self::Note),
_ => None,
}
}
}
impl std::fmt::Display for SourceType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_str())
}
}
#[derive(Debug, Clone)]
pub struct DocumentData {
pub source_type: SourceType,
pub source_id: i64,
pub project_id: i64,
pub author_username: Option<String>,
pub labels: Vec<String>,
pub paths: Vec<String>,
pub labels_hash: String,
pub paths_hash: String,
pub created_at: i64,
pub updated_at: i64,
pub url: Option<String>,
pub title: Option<String>,
pub content_text: String,
pub content_hash: String,
pub is_truncated: bool,
pub truncated_reason: Option<String>,
}
pub fn compute_content_hash(content: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(content.as_bytes());
format!("{:x}", hasher.finalize())
}
pub fn compute_list_hash(items: &[String]) -> String {
let mut indices: Vec<usize> = (0..items.len()).collect();
indices.sort_by(|a, b| items[*a].cmp(&items[*b]));
let mut hasher = Sha256::new();
for (i, &idx) in indices.iter().enumerate() {
if i > 0 {
hasher.update(b"\n");
}
hasher.update(items[idx].as_bytes());
}
format!("{:x}", hasher.finalize())
}
fn format_date(ms: i64) -> String {
DateTime::from_timestamp_millis(ms)
.map(|dt| dt.format("%Y-%m-%d").to_string())
.unwrap_or_else(|| "unknown".to_string())
}

View File

@@ -0,0 +1,216 @@
pub fn extract_discussion_document(
conn: &Connection,
discussion_id: i64,
) -> Result<Option<DocumentData>> {
let disc_row = conn.query_row(
"SELECT d.id, d.noteable_type, d.issue_id, d.merge_request_id,
p.path_with_namespace, p.id AS project_id
FROM discussions d
JOIN projects p ON p.id = d.project_id
WHERE d.id = ?1",
rusqlite::params![discussion_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, String>(1)?,
row.get::<_, Option<i64>>(2)?,
row.get::<_, Option<i64>>(3)?,
row.get::<_, String>(4)?,
row.get::<_, i64>(5)?,
))
},
);
let (id, noteable_type, issue_id, merge_request_id, path_with_namespace, project_id) =
match disc_row {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let (_parent_iid, parent_title, parent_web_url, parent_type_prefix, labels) =
match noteable_type.as_str() {
"Issue" => {
let parent_id = match issue_id {
Some(pid) => pid,
None => return Ok(None),
};
let parent = conn.query_row(
"SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM issue_labels il
JOIN labels l ON l.id = il.label_id
WHERE il.issue_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
(iid, title, web_url, format!("Issue #{}", iid), labels)
}
"MergeRequest" => {
let parent_id = match merge_request_id {
Some(pid) => pid,
None => return Ok(None),
};
let parent = conn.query_row(
"SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM mr_labels ml
JOIN labels l ON l.id = ml.label_id
WHERE ml.merge_request_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
(iid, title, web_url, format!("MR !{}", iid), labels)
}
_ => return Ok(None),
};
let mut note_stmt = conn.prepare_cached(
"SELECT n.author_username, n.body, n.created_at, n.gitlab_id,
n.note_type, n.position_old_path, n.position_new_path
FROM notes n
WHERE n.discussion_id = ?1 AND n.is_system = 0
ORDER BY n.created_at ASC, n.id ASC",
)?;
struct NoteRow {
author: Option<String>,
body: Option<String>,
created_at: i64,
gitlab_id: i64,
old_path: Option<String>,
new_path: Option<String>,
}
let notes: Vec<NoteRow> = note_stmt
.query_map(rusqlite::params![id], |row| {
Ok(NoteRow {
author: row.get(0)?,
body: row.get(1)?,
created_at: row.get(2)?,
gitlab_id: row.get(3)?,
old_path: row.get(5)?,
new_path: row.get(6)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
if notes.is_empty() {
return Ok(None);
}
let mut path_set = BTreeSet::new();
for note in &notes {
if let Some(ref p) = note.old_path
&& !p.is_empty()
{
path_set.insert(p.clone());
}
if let Some(ref p) = note.new_path
&& !p.is_empty()
{
path_set.insert(p.clone());
}
}
let paths: Vec<String> = path_set.into_iter().collect();
let first_note_gitlab_id = notes[0].gitlab_id;
let url = parent_web_url
.as_ref()
.map(|wu| format!("{}#note_{}", wu, first_note_gitlab_id));
let author_username = notes[0].author.clone();
let display_title = parent_title.as_deref().unwrap_or("(untitled)");
let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
let paths_json = serde_json::to_string(&paths).unwrap_or_else(|_| "[]".to_string());
let mut content = format!(
"[[Discussion]] {}: {}\nProject: {}\n",
parent_type_prefix, display_title, path_with_namespace
);
if let Some(ref u) = url {
let _ = writeln!(content, "URL: {}", u);
}
let _ = writeln!(content, "Labels: {}", labels_json);
if !paths.is_empty() {
let _ = writeln!(content, "Files: {}", paths_json);
}
let note_contents: Vec<NoteContent> = notes
.iter()
.map(|note| NoteContent {
author: note.author.as_deref().unwrap_or("unknown").to_string(),
date: format_date(note.created_at),
body: note.body.as_deref().unwrap_or("").to_string(),
})
.collect();
let header_len = content.len() + "\n--- Thread ---\n\n".len();
let thread_budget = MAX_DISCUSSION_BYTES.saturating_sub(header_len);
let thread_result = truncate_discussion(&note_contents, thread_budget);
content.push_str("\n--- Thread ---\n\n");
content.push_str(&thread_result.content);
let created_at = notes[0].created_at;
let updated_at = notes.last().map(|n| n.created_at).unwrap_or(created_at);
let content_hash = compute_content_hash(&content);
let labels_hash = compute_list_hash(&labels);
let paths_hash = compute_list_hash(&paths);
Ok(Some(DocumentData {
source_type: SourceType::Discussion,
source_id: id,
project_id,
author_username,
labels,
paths,
labels_hash,
paths_hash,
created_at,
updated_at,
url,
title: None,
content_text: content,
content_hash,
is_truncated: thread_result.is_truncated,
truncated_reason: thread_result.reason.map(|r| r.as_str().to_string()),
}))
}

View File

@@ -0,0 +1,110 @@
pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result<Option<DocumentData>> {
let row = conn.query_row(
"SELECT i.id, i.iid, i.title, i.description, i.state, i.author_username,
i.created_at, i.updated_at, i.web_url,
p.path_with_namespace, p.id AS project_id
FROM issues i
JOIN projects p ON p.id = i.project_id
WHERE i.id = ?1",
rusqlite::params![issue_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, Option<String>>(3)?,
row.get::<_, String>(4)?,
row.get::<_, Option<String>>(5)?,
row.get::<_, i64>(6)?,
row.get::<_, i64>(7)?,
row.get::<_, Option<String>>(8)?,
row.get::<_, String>(9)?,
row.get::<_, i64>(10)?,
))
},
);
let (
id,
iid,
title,
description,
state,
author_username,
created_at,
updated_at,
web_url,
path_with_namespace,
project_id,
) = match row {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM issue_labels il
JOIN labels l ON l.id = il.label_id
WHERE il.issue_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
let display_title = title.as_deref().unwrap_or("(untitled)");
let mut content = format!(
"[[Issue]] #{}: {}\nProject: {}\n",
iid, display_title, path_with_namespace
);
if let Some(ref url) = web_url {
let _ = writeln!(content, "URL: {}", url);
}
let _ = writeln!(content, "Labels: {}", labels_json);
let _ = writeln!(content, "State: {}", state);
if let Some(ref author) = author_username {
let _ = writeln!(content, "Author: @{}", author);
}
if let Some(ref desc) = description {
content.push_str("\n--- Description ---\n\n");
// Pre-truncate to avoid unbounded memory allocation for huge descriptions
let pre_trunc = pre_truncate_description(desc, MAX_DOCUMENT_BYTES_HARD);
if pre_trunc.was_truncated {
warn!(
iid,
original_bytes = pre_trunc.original_bytes,
"Issue description truncated (oversized)"
);
}
content.push_str(&pre_trunc.content);
}
let labels_hash = compute_list_hash(&labels);
let paths_hash = compute_list_hash(&[]);
let hard_cap = truncate_hard_cap(&content);
let content_hash = compute_content_hash(&hard_cap.content);
Ok(Some(DocumentData {
source_type: SourceType::Issue,
source_id: id,
project_id,
author_username,
labels,
paths: Vec::new(),
labels_hash,
paths_hash,
created_at,
updated_at,
url: web_url,
title: Some(display_title.to_string()),
content_text: hard_cap.content,
content_hash,
is_truncated: hard_cap.is_truncated,
truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()),
}))
}

View File

@@ -0,0 +1,24 @@
use chrono::DateTime;
use rusqlite::Connection;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::collections::{BTreeSet, HashMap};
use std::fmt::Write as _;
use super::truncation::{
MAX_DISCUSSION_BYTES, MAX_DOCUMENT_BYTES_HARD, NoteContent, pre_truncate_description,
truncate_discussion, truncate_hard_cap,
};
use crate::core::error::Result;
use crate::core::time::ms_to_iso;
use tracing::warn;
include!("common.rs");
include!("issues.rs");
include!("mrs.rs");
include!("discussions.rs");
include!("notes.rs");
#[cfg(test)]
#[path = "extractor_tests.rs"]
mod tests;

View File

@@ -0,0 +1,119 @@
pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result<Option<DocumentData>> {
let row = conn.query_row(
"SELECT m.id, m.iid, m.title, m.description, m.state, m.author_username,
m.source_branch, m.target_branch,
m.created_at, m.updated_at, m.web_url,
p.path_with_namespace, p.id AS project_id
FROM merge_requests m
JOIN projects p ON p.id = m.project_id
WHERE m.id = ?1",
rusqlite::params![mr_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, Option<String>>(3)?,
row.get::<_, Option<String>>(4)?,
row.get::<_, Option<String>>(5)?,
row.get::<_, Option<String>>(6)?,
row.get::<_, Option<String>>(7)?,
row.get::<_, Option<i64>>(8)?,
row.get::<_, Option<i64>>(9)?,
row.get::<_, Option<String>>(10)?,
row.get::<_, String>(11)?,
row.get::<_, i64>(12)?,
))
},
);
let (
id,
iid,
title,
description,
state,
author_username,
source_branch,
target_branch,
created_at,
updated_at,
web_url,
path_with_namespace,
project_id,
) = match row {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM mr_labels ml
JOIN labels l ON l.id = ml.label_id
WHERE ml.merge_request_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
let display_title = title.as_deref().unwrap_or("(untitled)");
let display_state = state.as_deref().unwrap_or("unknown");
let mut content = format!(
"[[MergeRequest]] !{}: {}\nProject: {}\n",
iid, display_title, path_with_namespace
);
if let Some(ref url) = web_url {
let _ = writeln!(content, "URL: {}", url);
}
let _ = writeln!(content, "Labels: {}", labels_json);
let _ = writeln!(content, "State: {}", display_state);
if let Some(ref author) = author_username {
let _ = writeln!(content, "Author: @{}", author);
}
if let (Some(src), Some(tgt)) = (&source_branch, &target_branch) {
let _ = writeln!(content, "Source: {} -> {}", src, tgt);
}
if let Some(ref desc) = description {
content.push_str("\n--- Description ---\n\n");
// Pre-truncate to avoid unbounded memory allocation for huge descriptions
let pre_trunc = pre_truncate_description(desc, MAX_DOCUMENT_BYTES_HARD);
if pre_trunc.was_truncated {
warn!(
iid,
original_bytes = pre_trunc.original_bytes,
"MR description truncated (oversized)"
);
}
content.push_str(&pre_trunc.content);
}
let labels_hash = compute_list_hash(&labels);
let paths_hash = compute_list_hash(&[]);
let hard_cap = truncate_hard_cap(&content);
let content_hash = compute_content_hash(&hard_cap.content);
Ok(Some(DocumentData {
source_type: SourceType::MergeRequest,
source_id: id,
project_id,
author_username,
labels,
paths: Vec::new(),
labels_hash,
paths_hash,
created_at: created_at.unwrap_or(0),
updated_at: updated_at.unwrap_or(0),
url: web_url,
title: Some(display_title.to_string()),
content_text: hard_cap.content,
content_hash,
is_truncated: hard_cap.is_truncated,
truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()),
}))
}

View File

@@ -0,0 +1,514 @@
pub fn extract_note_document(conn: &Connection, note_id: i64) -> Result<Option<DocumentData>> {
let row = conn.query_row(
"SELECT n.id, n.gitlab_id, n.author_username, n.body, n.note_type, n.is_system,
n.created_at, n.updated_at, n.position_new_path, n.position_new_line,
n.position_old_path, n.position_old_line, n.resolvable, n.resolved, n.resolved_by,
d.noteable_type, d.issue_id, d.merge_request_id,
p.path_with_namespace, p.id AS project_id
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
JOIN projects p ON n.project_id = p.id
WHERE n.id = ?1",
rusqlite::params![note_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, Option<String>>(3)?,
row.get::<_, Option<String>>(4)?,
row.get::<_, bool>(5)?,
row.get::<_, i64>(6)?,
row.get::<_, i64>(7)?,
row.get::<_, Option<String>>(8)?,
row.get::<_, Option<i64>>(9)?,
row.get::<_, Option<String>>(10)?,
row.get::<_, Option<i64>>(11)?,
row.get::<_, bool>(12)?,
row.get::<_, bool>(13)?,
row.get::<_, Option<String>>(14)?,
row.get::<_, String>(15)?,
row.get::<_, Option<i64>>(16)?,
row.get::<_, Option<i64>>(17)?,
row.get::<_, String>(18)?,
row.get::<_, i64>(19)?,
))
},
);
let (
_id,
gitlab_id,
author_username,
body,
note_type,
is_system,
created_at,
updated_at,
position_new_path,
position_new_line,
position_old_path,
_position_old_line,
resolvable,
resolved,
_resolved_by,
noteable_type,
issue_id,
merge_request_id,
path_with_namespace,
project_id,
) = match row {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
if is_system {
return Ok(None);
}
let (parent_iid, parent_title, parent_web_url, parent_type_label, labels) =
match noteable_type.as_str() {
"Issue" => {
let parent_id = match issue_id {
Some(pid) => pid,
None => return Ok(None),
};
let parent = conn.query_row(
"SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM issue_labels il
JOIN labels l ON l.id = il.label_id
WHERE il.issue_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
(iid, title, web_url, "Issue", labels)
}
"MergeRequest" => {
let parent_id = match merge_request_id {
Some(pid) => pid,
None => return Ok(None),
};
let parent = conn.query_row(
"SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM mr_labels ml
JOIN labels l ON l.id = ml.label_id
WHERE ml.merge_request_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
(iid, title, web_url, "MergeRequest", labels)
}
_ => return Ok(None),
};
build_note_document(
note_id,
gitlab_id,
author_username,
body,
note_type,
created_at,
updated_at,
position_new_path,
position_new_line,
position_old_path,
resolvable,
resolved,
parent_iid,
parent_title.as_deref(),
parent_web_url.as_deref(),
&labels,
parent_type_label,
&path_with_namespace,
project_id,
)
}
pub struct ParentMetadata {
pub iid: i64,
pub title: Option<String>,
pub web_url: Option<String>,
pub labels: Vec<String>,
pub project_path: String,
}
pub struct ParentMetadataCache {
cache: HashMap<(String, i64), Option<ParentMetadata>>,
}
impl Default for ParentMetadataCache {
fn default() -> Self {
Self::new()
}
}
impl ParentMetadataCache {
pub fn new() -> Self {
Self {
cache: HashMap::new(),
}
}
pub fn get_or_fetch(
&mut self,
conn: &Connection,
noteable_type: &str,
parent_id: i64,
project_path: &str,
) -> Result<Option<&ParentMetadata>> {
let key = (noteable_type.to_string(), parent_id);
if !self.cache.contains_key(&key) {
let meta = fetch_parent_metadata(conn, noteable_type, parent_id, project_path)?;
self.cache.insert(key.clone(), meta);
}
Ok(self.cache.get(&key).and_then(|m| m.as_ref()))
}
}
fn fetch_parent_metadata(
conn: &Connection,
noteable_type: &str,
parent_id: i64,
project_path: &str,
) -> Result<Option<ParentMetadata>> {
match noteable_type {
"Issue" => {
let parent = conn.query_row(
"SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM issue_labels il
JOIN labels l ON l.id = il.label_id
WHERE il.issue_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(Some(ParentMetadata {
iid,
title,
web_url,
labels,
project_path: project_path.to_string(),
}))
}
"MergeRequest" => {
let parent = conn.query_row(
"SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM mr_labels ml
JOIN labels l ON l.id = ml.label_id
WHERE ml.merge_request_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(Some(ParentMetadata {
iid,
title,
web_url,
labels,
project_path: project_path.to_string(),
}))
}
_ => Ok(None),
}
}
pub fn extract_note_document_cached(
conn: &Connection,
note_id: i64,
cache: &mut ParentMetadataCache,
) -> Result<Option<DocumentData>> {
let row = conn.query_row(
"SELECT n.id, n.gitlab_id, n.author_username, n.body, n.note_type, n.is_system,
n.created_at, n.updated_at, n.position_new_path, n.position_new_line,
n.position_old_path, n.position_old_line, n.resolvable, n.resolved, n.resolved_by,
d.noteable_type, d.issue_id, d.merge_request_id,
p.path_with_namespace, p.id AS project_id
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
JOIN projects p ON n.project_id = p.id
WHERE n.id = ?1",
rusqlite::params![note_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, Option<String>>(3)?,
row.get::<_, Option<String>>(4)?,
row.get::<_, bool>(5)?,
row.get::<_, i64>(6)?,
row.get::<_, i64>(7)?,
row.get::<_, Option<String>>(8)?,
row.get::<_, Option<i64>>(9)?,
row.get::<_, Option<String>>(10)?,
row.get::<_, Option<i64>>(11)?,
row.get::<_, bool>(12)?,
row.get::<_, bool>(13)?,
row.get::<_, Option<String>>(14)?,
row.get::<_, String>(15)?,
row.get::<_, Option<i64>>(16)?,
row.get::<_, Option<i64>>(17)?,
row.get::<_, String>(18)?,
row.get::<_, i64>(19)?,
))
},
);
let (
_id,
gitlab_id,
author_username,
body,
note_type,
is_system,
created_at,
updated_at,
position_new_path,
position_new_line,
position_old_path,
_position_old_line,
resolvable,
resolved,
_resolved_by,
noteable_type,
issue_id,
merge_request_id,
path_with_namespace,
project_id,
) = match row {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
if is_system {
return Ok(None);
}
let parent_id = match noteable_type.as_str() {
"Issue" => match issue_id {
Some(pid) => pid,
None => return Ok(None),
},
"MergeRequest" => match merge_request_id {
Some(pid) => pid,
None => return Ok(None),
},
_ => return Ok(None),
};
let parent = cache.get_or_fetch(conn, &noteable_type, parent_id, &path_with_namespace)?;
let parent = match parent {
Some(p) => p,
None => return Ok(None),
};
let parent_iid = parent.iid;
let parent_title = parent.title.as_deref();
let parent_web_url = parent.web_url.as_deref();
let labels = parent.labels.clone();
let parent_type_label = noteable_type.as_str();
build_note_document(
note_id,
gitlab_id,
author_username,
body,
note_type,
created_at,
updated_at,
position_new_path,
position_new_line,
position_old_path,
resolvable,
resolved,
parent_iid,
parent_title,
parent_web_url,
&labels,
parent_type_label,
&path_with_namespace,
project_id,
)
}
#[allow(clippy::too_many_arguments)]
fn build_note_document(
note_id: i64,
gitlab_id: i64,
author_username: Option<String>,
body: Option<String>,
note_type: Option<String>,
created_at: i64,
updated_at: i64,
position_new_path: Option<String>,
position_new_line: Option<i64>,
position_old_path: Option<String>,
resolvable: bool,
resolved: bool,
parent_iid: i64,
parent_title: Option<&str>,
parent_web_url: Option<&str>,
labels: &[String],
parent_type_label: &str,
path_with_namespace: &str,
project_id: i64,
) -> Result<Option<DocumentData>> {
let mut path_set = BTreeSet::new();
if let Some(ref p) = position_old_path
&& !p.is_empty()
{
path_set.insert(p.clone());
}
if let Some(ref p) = position_new_path
&& !p.is_empty()
{
path_set.insert(p.clone());
}
let paths: Vec<String> = path_set.into_iter().collect();
let url = parent_web_url.map(|wu| format!("{}#note_{}", wu, gitlab_id));
let display_title = parent_title.unwrap_or("(untitled)");
let display_note_type = note_type.as_deref().unwrap_or("Note");
let display_author = author_username.as_deref().unwrap_or("unknown");
let parent_prefix = if parent_type_label == "Issue" {
format!("Issue #{}", parent_iid)
} else {
format!("MR !{}", parent_iid)
};
let title = format!(
"Note by @{} on {}: {}",
display_author, parent_prefix, display_title
);
let labels_csv = labels.join(", ");
let mut content = String::new();
let _ = writeln!(content, "[[Note]]");
let _ = writeln!(content, "source_type: note");
let _ = writeln!(content, "note_gitlab_id: {}", gitlab_id);
let _ = writeln!(content, "project: {}", path_with_namespace);
let _ = writeln!(content, "parent_type: {}", parent_type_label);
let _ = writeln!(content, "parent_iid: {}", parent_iid);
let _ = writeln!(content, "parent_title: {}", display_title);
let _ = writeln!(content, "note_type: {}", display_note_type);
let _ = writeln!(content, "author: @{}", display_author);
let _ = writeln!(content, "created_at: {}", ms_to_iso(created_at));
if resolvable {
let _ = writeln!(content, "resolved: {}", resolved);
}
if display_note_type == "DiffNote"
&& let Some(ref p) = position_new_path
{
if let Some(line) = position_new_line {
let _ = writeln!(content, "path: {}:{}", p, line);
} else {
let _ = writeln!(content, "path: {}", p);
}
}
if !labels.is_empty() {
let _ = writeln!(content, "labels: {}", labels_csv);
}
if let Some(ref u) = url {
let _ = writeln!(content, "url: {}", u);
}
content.push_str("\n--- Body ---\n\n");
content.push_str(body.as_deref().unwrap_or(""));
let labels_hash = compute_list_hash(labels);
let paths_hash = compute_list_hash(&paths);
let hard_cap = truncate_hard_cap(&content);
let content_hash = compute_content_hash(&hard_cap.content);
Ok(Some(DocumentData {
source_type: SourceType::Note,
source_id: note_id,
project_id,
author_username,
labels: labels.to_vec(),
paths,
labels_hash,
paths_hash,
created_at,
updated_at,
url,
title: Some(title),
content_text: hard_cap.content,
content_hash,
is_truncated: hard_cap.is_truncated,
truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()),
}))
}

View File

@@ -1,7 +1,7 @@
use rusqlite::Connection;
use crate::core::error::Result;
use crate::embedding::chunking::{CHUNK_MAX_BYTES, EXPECTED_DIMS};
use crate::embedding::chunks::{CHUNK_MAX_BYTES, EXPECTED_DIMS};
#[derive(Debug)]
pub struct PendingDocument {

177
src/embedding/chunks.rs Normal file
View File

@@ -0,0 +1,177 @@
pub const CHUNK_ROWID_MULTIPLIER: i64 = 1000;
pub fn encode_rowid(document_id: i64, chunk_index: i64) -> i64 {
assert!(
(0..CHUNK_ROWID_MULTIPLIER).contains(&chunk_index),
"chunk_index {chunk_index} out of range [0, {CHUNK_ROWID_MULTIPLIER})"
);
document_id
.checked_mul(CHUNK_ROWID_MULTIPLIER)
.and_then(|v| v.checked_add(chunk_index))
.unwrap_or_else(|| {
panic!("encode_rowid overflow: document_id={document_id}, chunk_index={chunk_index}")
})
}
pub fn decode_rowid(rowid: i64) -> (i64, i64) {
assert!(
rowid >= 0,
"decode_rowid called with negative rowid: {rowid}"
);
let document_id = rowid / CHUNK_ROWID_MULTIPLIER;
let chunk_index = rowid % CHUNK_ROWID_MULTIPLIER;
(document_id, chunk_index)
}
#[cfg(test)]
mod chunk_ids_tests {
use super::*;
#[test]
fn test_encode_single_chunk() {
assert_eq!(encode_rowid(1, 0), 1000);
}
#[test]
fn test_encode_multi_chunk() {
assert_eq!(encode_rowid(1, 5), 1005);
}
#[test]
fn test_encode_specific_values() {
assert_eq!(encode_rowid(42, 0), 42000);
assert_eq!(encode_rowid(42, 5), 42005);
}
#[test]
fn test_decode_zero_chunk() {
assert_eq!(decode_rowid(42000), (42, 0));
}
#[test]
fn test_decode_roundtrip() {
for doc_id in [0, 1, 42, 100, 999, 10000] {
for chunk_idx in [0, 1, 5, 99, 999] {
let rowid = encode_rowid(doc_id, chunk_idx);
let (decoded_doc, decoded_chunk) = decode_rowid(rowid);
assert_eq!(
(decoded_doc, decoded_chunk),
(doc_id, chunk_idx),
"Roundtrip failed for doc_id={doc_id}, chunk_idx={chunk_idx}"
);
}
}
}
#[test]
fn test_multiplier_value() {
assert_eq!(CHUNK_ROWID_MULTIPLIER, 1000);
}
}
pub const CHUNK_MAX_BYTES: usize = 1_500;
pub const EXPECTED_DIMS: usize = 768;
pub const CHUNK_OVERLAP_CHARS: usize = 200;
pub fn split_into_chunks(content: &str) -> Vec<(usize, String)> {
if content.is_empty() {
return Vec::new();
}
if content.len() <= CHUNK_MAX_BYTES {
return vec![(0, content.to_string())];
}
let mut chunks: Vec<(usize, String)> = Vec::new();
let mut start = 0;
let mut chunk_index = 0;
while start < content.len() {
let remaining = &content[start..];
if remaining.len() <= CHUNK_MAX_BYTES {
chunks.push((chunk_index, remaining.to_string()));
break;
}
let end = floor_char_boundary(content, start + CHUNK_MAX_BYTES);
let window = &content[start..end];
let split_at = find_paragraph_break(window)
.or_else(|| find_sentence_break(window))
.or_else(|| find_word_break(window))
.unwrap_or(window.len());
let chunk_text = &content[start..start + split_at];
chunks.push((chunk_index, chunk_text.to_string()));
let advance = if split_at > CHUNK_OVERLAP_CHARS {
split_at - CHUNK_OVERLAP_CHARS
} else {
split_at
}
.max(1);
let old_start = start;
start += advance;
// Ensure start lands on a char boundary after overlap subtraction
start = floor_char_boundary(content, start);
// Guarantee forward progress: multi-byte chars can cause
// floor_char_boundary to round back to old_start
if start <= old_start {
start = old_start
+ content[old_start..]
.chars()
.next()
.map_or(1, |c| c.len_utf8());
}
chunk_index += 1;
}
chunks
}
fn find_paragraph_break(window: &str) -> Option<usize> {
let search_start = floor_char_boundary(window, window.len() * 2 / 3);
window[search_start..]
.rfind("\n\n")
.map(|pos| search_start + pos + 2)
.or_else(|| window[..search_start].rfind("\n\n").map(|pos| pos + 2))
}
fn find_sentence_break(window: &str) -> Option<usize> {
let search_start = floor_char_boundary(window, window.len() / 2);
for pat in &[". ", "? ", "! "] {
if let Some(pos) = window[search_start..].rfind(pat) {
return Some(search_start + pos + pat.len());
}
}
for pat in &[". ", "? ", "! "] {
if let Some(pos) = window[..search_start].rfind(pat) {
return Some(pos + pat.len());
}
}
None
}
fn find_word_break(window: &str) -> Option<usize> {
let search_start = floor_char_boundary(window, window.len() / 2);
window[search_start..]
.rfind(' ')
.map(|pos| search_start + pos + 1)
.or_else(|| window[..search_start].rfind(' ').map(|pos| pos + 1))
}
fn floor_char_boundary(s: &str, idx: usize) -> usize {
if idx >= s.len() {
return s.len();
}
let mut i = idx;
while i > 0 && !s.is_char_boundary(i) {
i -= 1;
}
i
}
#[cfg(test)]
#[path = "chunking_tests.rs"]
mod chunking_tests;

View File

@@ -1,11 +1,10 @@
pub mod change_detector;
pub mod chunk_ids;
pub mod chunking;
pub mod chunks;
pub mod ollama;
pub mod pipeline;
pub mod similarity;
pub use change_detector::{PendingDocument, count_pending_documents, find_pending_documents};
pub use chunking::{CHUNK_MAX_BYTES, CHUNK_OVERLAP_CHARS, split_into_chunks};
pub use chunks::{CHUNK_MAX_BYTES, CHUNK_OVERLAP_CHARS, split_into_chunks};
pub use pipeline::{EmbedForIdsResult, EmbedResult, embed_documents, embed_documents_by_ids};
pub use similarity::cosine_similarity;

View File

@@ -1,9 +1,8 @@
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::time::Duration;
use tracing::warn;
use crate::core::error::{LoreError, Result};
use crate::http::Client;
pub struct OllamaConfig {
pub base_url: String,
@@ -51,17 +50,7 @@ struct ModelInfo {
impl OllamaClient {
pub fn new(config: OllamaConfig) -> Self {
let client = Client::builder()
.timeout(Duration::from_secs(config.timeout_secs))
.build()
.unwrap_or_else(|e| {
warn!(
error = %e,
"Failed to build configured Ollama HTTP client; falling back to default client"
);
Client::new()
});
let client = Client::with_timeout(Duration::from_secs(config.timeout_secs));
Self { client, config }
}
@@ -70,21 +59,16 @@ impl OllamaClient {
let response =
self.client
.get(&url)
.send()
.get(&url, &[])
.await
.map_err(|e| LoreError::OllamaUnavailable {
base_url: self.config.base_url.clone(),
source: Some(e),
detail: Some(format!("{e:?}")),
})?;
let tags: TagsResponse =
response
.json()
.await
.map_err(|e| LoreError::OllamaUnavailable {
let tags: TagsResponse = response.json().map_err(|e| LoreError::OllamaUnavailable {
base_url: self.config.base_url.clone(),
source: Some(e),
detail: Some(format!("{e:?}")),
})?;
let model_found = tags.models.iter().any(|m| {
@@ -110,31 +94,26 @@ impl OllamaClient {
let response = self
.client
.post(&url)
.json(&request)
.send()
.post_json(&url, &[], &request)
.await
.map_err(|e| LoreError::OllamaUnavailable {
base_url: self.config.base_url.clone(),
source: Some(e),
detail: Some(format!("{e:?}")),
})?;
let status = response.status();
if !status.is_success() {
let body = response.text().await.unwrap_or_default();
if !response.is_success() {
let status = response.status;
let body = response.text().unwrap_or_default();
return Err(LoreError::EmbeddingFailed {
document_id: 0,
reason: format!("HTTP {}: {}", status, body),
reason: format!("HTTP {status}: {body}"),
});
}
let embed_response: EmbedResponse =
response
.json()
.await
.map_err(|e| LoreError::EmbeddingFailed {
response.json().map_err(|e| LoreError::EmbeddingFailed {
document_id: 0,
reason: format!("Failed to parse embed response: {}", e),
reason: format!("Failed to parse embed response: {e}"),
})?;
Ok(embed_response.embeddings)
@@ -142,17 +121,9 @@ impl OllamaClient {
}
pub async fn check_ollama_health(base_url: &str) -> bool {
let client = Client::builder()
.timeout(Duration::from_secs(5))
.build()
.ok();
let Some(client) = client else {
return false;
};
let client = Client::with_timeout(Duration::from_secs(5));
let url = format!("{base_url}/api/tags");
client.get(&url).send().await.is_ok()
client.get(&url, &[]).await.is_ok_and(|r| r.is_success())
}
#[cfg(test)]

View File

@@ -9,8 +9,9 @@ use tracing::{debug, info, instrument, warn};
use crate::core::error::Result;
use crate::core::shutdown::ShutdownSignal;
use crate::embedding::change_detector::{count_pending_documents, find_pending_documents};
use crate::embedding::chunk_ids::{CHUNK_ROWID_MULTIPLIER, encode_rowid};
use crate::embedding::chunking::{CHUNK_MAX_BYTES, EXPECTED_DIMS, split_into_chunks};
use crate::embedding::chunks::{
CHUNK_MAX_BYTES, CHUNK_ROWID_MULTIPLIER, EXPECTED_DIMS, encode_rowid, split_into_chunks,
};
use crate::embedding::ollama::OllamaClient;
const BATCH_SIZE: usize = 32;
@@ -160,7 +161,7 @@ async fn embed_page(
continue;
}
if page_normal_docs.is_multiple_of(50) {
if page_normal_docs != 0 && page_normal_docs.is_multiple_of(50) {
debug!(
doc_id = doc.document_id,
doc_num = page_normal_docs,
@@ -168,7 +169,7 @@ async fn embed_page(
"Chunking document"
);
}
if page_normal_docs.is_multiple_of(100) {
if page_normal_docs != 0 && page_normal_docs.is_multiple_of(100) {
info!(
doc_id = doc.document_id,
content_bytes = doc.content_text.len(),
@@ -685,7 +686,7 @@ fn find_documents_by_ids(
document_ids: &[i64],
model_name: &str,
) -> Result<Vec<crate::embedding::change_detector::PendingDocument>> {
use crate::embedding::chunking::{CHUNK_MAX_BYTES, EXPECTED_DIMS};
use crate::embedding::chunks::{CHUNK_MAX_BYTES, EXPECTED_DIMS};
if document_ids.is_empty() {
return Ok(Vec::new());

View File

@@ -6,7 +6,7 @@ use wiremock::{Mock, MockServer, ResponseTemplate};
use crate::core::db::{create_connection, run_migrations};
use crate::core::shutdown::ShutdownSignal;
use crate::embedding::chunking::EXPECTED_DIMS;
use crate::embedding::chunks::EXPECTED_DIMS;
use crate::embedding::ollama::{OllamaClient, OllamaConfig};
use crate::embedding::pipeline::embed_documents_by_ids;

View File

@@ -1,20 +1,19 @@
use asupersync::time::{sleep, wall_now};
use async_stream::stream;
use chrono::{DateTime, Utc};
use futures::Stream;
use reqwest::header::{ACCEPT, HeaderMap, HeaderValue};
use reqwest::{Client, Response, StatusCode};
use std::pin::Pin;
use std::sync::Arc;
use std::sync::Mutex;
use std::time::{Duration, Instant};
use tokio::sync::Mutex;
use tokio::time::sleep;
use tracing::{debug, warn};
use tracing::debug;
use super::types::{
GitLabDiscussion, GitLabIssue, GitLabIssueRef, GitLabLabelEvent, GitLabMergeRequest,
GitLabMilestoneEvent, GitLabMrDiff, GitLabProject, GitLabStateEvent, GitLabUser, GitLabVersion,
};
use crate::core::error::{LoreError, Result};
use crate::http;
struct RateLimiter {
last_request: Instant,
@@ -56,9 +55,8 @@ fn rand_jitter() -> u64 {
(n ^ nanos) % 50
}
#[derive(Clone)]
pub struct GitLabClient {
client: Client,
client: http::Client,
base_url: String,
token: String,
rate_limiter: Arc<Mutex<RateLimiter>>,
@@ -66,27 +64,8 @@ pub struct GitLabClient {
impl GitLabClient {
pub fn new(base_url: &str, token: &str, requests_per_second: Option<f64>) -> Self {
let mut headers = HeaderMap::new();
headers.insert(ACCEPT, HeaderValue::from_static("application/json"));
let client = Client::builder()
.default_headers(headers.clone())
.timeout(Duration::from_secs(30))
.build()
.unwrap_or_else(|e| {
warn!(
error = %e,
"Failed to build configured HTTP client; falling back to default client with timeout"
);
Client::builder()
.default_headers(headers)
.timeout(Duration::from_secs(30))
.build()
.unwrap_or_else(|_| Client::new())
});
Self {
client,
client: http::Client::with_timeout(Duration::from_secs(30)),
base_url: base_url.trim_end_matches('/').to_string(),
token: token.to_string(),
rate_limiter: Arc::new(Mutex::new(RateLimiter::new(
@@ -131,25 +110,34 @@ impl GitLabClient {
let mut last_response = None;
for attempt in 0..=Self::MAX_RETRIES {
let delay = self.rate_limiter.lock().await.check_delay();
// SAFETY: std::sync::Mutex blocks the executor thread while held. This is safe
// because the critical section is a single Instant::now() comparison with no I/O.
// If async work is ever added inside the lock, switch to an async-aware lock.
let delay = {
let mut limiter = self
.rate_limiter
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
limiter.check_delay()
};
if let Some(d) = delay {
sleep(d).await;
sleep(wall_now(), d).await;
}
debug!(url = %url, attempt, "GitLab request");
let response = self
.client
.get(&url)
.header("PRIVATE-TOKEN", &self.token)
.send()
.await
.map_err(|e| LoreError::GitLabNetworkError {
base_url: self.base_url.clone(),
source: Some(e),
})?;
.get(
&url,
&[
("PRIVATE-TOKEN", self.token.as_str()),
("Accept", "application/json"),
],
)
.await?;
if response.status() == StatusCode::TOO_MANY_REQUESTS && attempt < Self::MAX_RETRIES {
if response.status == 429 && attempt < Self::MAX_RETRIES {
let retry_after = Self::parse_retry_after(&response);
tracing::info!(
path = %path,
@@ -158,7 +146,7 @@ impl GitLabClient {
status_code = 429u16,
"Rate limited, retrying"
);
sleep(Duration::from_secs(retry_after)).await;
sleep(wall_now(), Duration::from_secs(retry_after)).await;
continue;
}
@@ -167,53 +155,35 @@ impl GitLabClient {
}
self.handle_response(last_response.expect("retry loop ran at least once"), path)
.await
}
fn parse_retry_after(response: &Response) -> u64 {
fn parse_retry_after(response: &http::Response) -> u64 {
response
.headers()
.get("retry-after")
.and_then(|v| v.to_str().ok())
.header("retry-after")
.and_then(|s| s.parse().ok())
.unwrap_or(60)
}
async fn handle_response<T: serde::de::DeserializeOwned>(
fn handle_response<T: serde::de::DeserializeOwned>(
&self,
response: Response,
response: http::Response,
path: &str,
) -> Result<T> {
match response.status() {
StatusCode::UNAUTHORIZED => Err(LoreError::GitLabAuthFailed),
StatusCode::NOT_FOUND => Err(LoreError::GitLabNotFound {
match response.status {
401 => Err(LoreError::GitLabAuthFailed),
404 => Err(LoreError::GitLabNotFound {
resource: path.to_string(),
}),
StatusCode::TOO_MANY_REQUESTS => {
429 => {
let retry_after = Self::parse_retry_after(&response);
Err(LoreError::GitLabRateLimited { retry_after })
}
status if status.is_success() => {
let text = response.text().await?;
serde_json::from_str(&text).map_err(|e| {
let preview = if text.len() > 500 {
&text[..text.floor_char_boundary(500)]
} else {
&text
};
LoreError::Other(format!(
"Failed to decode response from {path}: {e}\nResponse preview: {preview}"
))
})
}
status => Err(LoreError::Other(format!(
"GitLab API error: {} {}",
status.as_u16(),
status.canonical_reason().unwrap_or("Unknown")
_ if response.is_success() => response.json::<T>().map_err(|e| {
LoreError::Other(format!("Failed to decode response from {path}: {e}"))
}),
s => Err(LoreError::Other(format!(
"GitLab API error: {s} {}",
response.reason
))),
}
}
@@ -261,9 +231,7 @@ impl GitLabClient {
yield Ok(issue);
}
let next_page = headers
.get("x-next-page")
.and_then(|v| v.to_str().ok())
let next_page = header_value(&headers, "x-next-page")
.and_then(|s| s.parse::<u32>().ok());
match next_page {
@@ -317,9 +285,7 @@ impl GitLabClient {
yield Ok(discussion);
}
let next_page = headers
.get("x-next-page")
.and_then(|v| v.to_str().ok())
let next_page = header_value(&headers, "x-next-page")
.and_then(|s| s.parse::<u32>().ok());
match next_page {
@@ -422,10 +388,7 @@ impl GitLabClient {
.await?;
let link_next = parse_link_header_next(&headers);
let x_next_page = headers
.get("x-next-page")
.and_then(|v| v.to_str().ok())
.and_then(|s| s.parse::<u32>().ok());
let x_next_page = header_value(&headers, "x-next-page").and_then(|s| s.parse::<u32>().ok());
let full_page = items.len() as u32 == per_page;
let (next_page, is_last_page) = match (link_next.is_some(), x_next_page, full_page) {
@@ -473,9 +436,7 @@ impl GitLabClient {
}
let link_next = parse_link_header_next(&headers);
let x_next_page = headers
.get("x-next-page")
.and_then(|v| v.to_str().ok())
let x_next_page = header_value(&headers, "x-next-page")
.and_then(|s| s.parse::<u32>().ok());
let should_continue = match (link_next.is_some(), x_next_page, full_page) {
@@ -511,31 +472,40 @@ impl GitLabClient {
&self,
path: &str,
params: &[(&str, String)],
) -> Result<(T, HeaderMap)> {
) -> Result<(T, Vec<(String, String)>)> {
let url = format!("{}{}", self.base_url, path);
let mut last_response = None;
for attempt in 0..=Self::MAX_RETRIES {
let delay = self.rate_limiter.lock().await.check_delay();
// SAFETY: std::sync::Mutex blocks the executor thread while held. This is safe
// because the critical section is a single Instant::now() comparison with no I/O.
// If async work is ever added inside the lock, switch to an async-aware lock.
let delay = {
let mut limiter = self
.rate_limiter
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
limiter.check_delay()
};
if let Some(d) = delay {
sleep(d).await;
sleep(wall_now(), d).await;
}
debug!(url = %url, ?params, attempt, "GitLab paginated request");
let response = self
.client
.get(&url)
.query(params)
.header("PRIVATE-TOKEN", &self.token)
.send()
.await
.map_err(|e| LoreError::GitLabNetworkError {
base_url: self.base_url.clone(),
source: Some(e),
})?;
.get_with_query(
&url,
params,
&[
("PRIVATE-TOKEN", self.token.as_str()),
("Accept", "application/json"),
],
)
.await?;
if response.status() == StatusCode::TOO_MANY_REQUESTS && attempt < Self::MAX_RETRIES {
if response.status == 429 && attempt < Self::MAX_RETRIES {
let retry_after = Self::parse_retry_after(&response);
tracing::info!(
path = %path,
@@ -544,7 +514,7 @@ impl GitLabClient {
status_code = 429u16,
"Rate limited, retrying"
);
sleep(Duration::from_secs(retry_after)).await;
sleep(wall_now(), Duration::from_secs(retry_after)).await;
continue;
}
@@ -553,8 +523,8 @@ impl GitLabClient {
}
let response = last_response.expect("retry loop ran at least once");
let headers = response.headers().clone();
let body = self.handle_response(response, path).await?;
let headers = response.headers.clone();
let body = self.handle_response(response, path)?;
Ok((body, headers))
}
}
@@ -613,10 +583,8 @@ impl GitLabClient {
let full_page = items.len() as u32 == per_page;
results.extend(items);
let next_page = headers
.get("x-next-page")
.and_then(|v| v.to_str().ok())
.and_then(|s| s.parse::<u32>().ok());
let next_page =
header_value(&headers, "x-next-page").and_then(|s| s.parse::<u32>().ok());
match next_page {
Some(next) if next > page => page = next,
@@ -726,14 +694,14 @@ impl GitLabClient {
)> {
let (state_res, label_res, milestone_res) = match entity_type {
"issue" => {
tokio::join!(
futures::join!(
self.fetch_issue_state_events(gitlab_project_id, iid),
self.fetch_issue_label_events(gitlab_project_id, iid),
self.fetch_issue_milestone_events(gitlab_project_id, iid),
)
}
"merge_request" => {
tokio::join!(
futures::join!(
self.fetch_mr_state_events(gitlab_project_id, iid),
self.fetch_mr_label_events(gitlab_project_id, iid),
self.fetch_mr_milestone_events(gitlab_project_id, iid),
@@ -761,11 +729,15 @@ pub struct MergeRequestPage {
pub is_last_page: bool,
}
fn parse_link_header_next(headers: &HeaderMap) -> Option<String> {
fn header_value<'a>(headers: &'a [(String, String)], name: &str) -> Option<&'a str> {
headers
.get("link")
.and_then(|v| v.to_str().ok())
.and_then(|link_str| {
.iter()
.find(|(k, _)| k.eq_ignore_ascii_case(name))
.map(|(_, v)| v.as_str())
}
fn parse_link_header_next(headers: &[(String, String)]) -> Option<String> {
header_value(headers, "link").and_then(|link_str| {
for part in link_str.split(',') {
let part = part.trim();
if (part.contains("rel=\"next\"") || part.contains("rel=next"))
@@ -836,13 +808,10 @@ mod tests {
#[test]
fn parse_link_header_extracts_next_url() {
let mut headers = HeaderMap::new();
headers.insert(
"link",
HeaderValue::from_static(
r#"<https://gitlab.example.com/api/v4/projects/1/merge_requests?page=2>; rel="next", <https://gitlab.example.com/api/v4/projects/1/merge_requests?page=5>; rel="last""#,
),
);
let headers = vec![(
"link".to_string(),
r#"<https://gitlab.example.com/api/v4/projects/1/merge_requests?page=2>; rel="next", <https://gitlab.example.com/api/v4/projects/1/merge_requests?page=5>; rel="last""#.to_string(),
)];
let result = parse_link_header_next(&headers);
assert_eq!(
@@ -853,11 +822,10 @@ mod tests {
#[test]
fn parse_link_header_handles_unquoted_rel() {
let mut headers = HeaderMap::new();
headers.insert(
"link",
HeaderValue::from_static(r#"<https://example.com/next>; rel=next"#),
);
let headers = vec![(
"link".to_string(),
r#"<https://example.com/next>; rel=next"#.to_string(),
)];
let result = parse_link_header_next(&headers);
assert_eq!(result, Some("https://example.com/next".to_string()));
@@ -865,11 +833,10 @@ mod tests {
#[test]
fn parse_link_header_returns_none_when_no_next() {
let mut headers = HeaderMap::new();
headers.insert(
"link",
HeaderValue::from_static(r#"<https://example.com/last>; rel="last""#),
);
let headers = vec![(
"link".to_string(),
r#"<https://example.com/last>; rel="last""#.to_string(),
)];
let result = parse_link_header_next(&headers);
assert!(result.is_none());
@@ -877,7 +844,7 @@ mod tests {
#[test]
fn parse_link_header_returns_none_when_missing() {
let headers = HeaderMap::new();
let headers: Vec<(String, String)> = vec![];
let result = parse_link_header_next(&headers);
assert!(result.is_none());
}

View File

@@ -1,10 +1,10 @@
use reqwest::Client;
use serde::Deserialize;
use serde_json::Value;
use std::time::{Duration, SystemTime};
use tracing::warn;
use crate::core::error::LoreError;
use crate::http::Client;
pub struct GraphqlClient {
http: Client,
@@ -21,13 +21,8 @@ pub struct GraphqlQueryResult {
impl GraphqlClient {
pub fn new(base_url: &str, token: &str) -> Self {
let http = Client::builder()
.timeout(Duration::from_secs(30))
.build()
.unwrap_or_else(|_| Client::new());
Self {
http,
http: Client::with_timeout(Duration::from_secs(30)),
base_url: base_url.trim_end_matches('/').to_string(),
token: token.to_string(),
}
@@ -45,22 +40,13 @@ impl GraphqlClient {
"variables": variables,
});
let bearer = format!("Bearer {}", self.token);
let response = self
.http
.post(&url)
.header("Authorization", format!("Bearer {}", self.token))
.header("Content-Type", "application/json")
.json(&body)
.send()
.await
.map_err(|e| LoreError::GitLabNetworkError {
base_url: self.base_url.clone(),
source: Some(e),
})?;
.post_json(&url, &[("Authorization", bearer.as_str())], &body)
.await?;
let status = response.status();
match status.as_u16() {
match response.status {
401 | 403 => return Err(LoreError::GitLabAuthFailed),
404 => {
return Err(LoreError::GitLabNotFound {
@@ -72,14 +58,13 @@ impl GraphqlClient {
return Err(LoreError::GitLabRateLimited { retry_after });
}
s if s >= 400 => {
return Err(LoreError::Other(format!("GraphQL HTTP {status}")));
return Err(LoreError::Other(format!("GraphQL HTTP {s}")));
}
_ => {}
}
let json: Value = response
.json()
.await
.map_err(|e| LoreError::Other(format!("Failed to parse GraphQL response: {e}")))?;
let errors = json.get("errors").and_then(|e| e.as_array());
@@ -116,12 +101,8 @@ impl GraphqlClient {
}
}
fn parse_retry_after(response: &reqwest::Response) -> u64 {
let header = match response
.headers()
.get("retry-after")
.and_then(|v| v.to_str().ok())
{
fn parse_retry_after(response: &crate::http::Response) -> u64 {
let header = match response.header("retry-after") {
Some(s) => s,
None => return 60,
};

318
src/http.rs Normal file
View File

@@ -0,0 +1,318 @@
use std::time::Duration;
use asupersync::http::h1::{
ClientError, HttpClient, HttpClientConfig, Method, Response as RawResponse,
};
use asupersync::http::pool::PoolConfig;
use asupersync::time::{timeout, wall_now};
use serde::Serialize;
use serde::de::DeserializeOwned;
use crate::core::error::{LoreError, NetworkErrorKind, Result};
const MAX_RESPONSE_BODY_BYTES: usize = 64 * 1024 * 1024; // 64 MiB
pub struct Client {
inner: HttpClient,
timeout: Duration,
}
#[derive(Debug)]
pub struct Response {
pub status: u16,
pub reason: String,
pub headers: Vec<(String, String)>,
body: Vec<u8>,
}
impl Client {
pub fn with_timeout(timeout: Duration) -> Self {
Self {
inner: HttpClient::with_config(HttpClientConfig {
pool_config: PoolConfig {
max_connections_per_host: 6,
max_total_connections: 100,
idle_timeout: Duration::from_secs(90),
..Default::default()
},
..Default::default()
}),
timeout,
}
}
pub async fn get(&self, url: &str, headers: &[(&str, &str)]) -> Result<Response> {
self.execute(Method::Get, url, headers, Vec::new()).await
}
pub async fn get_with_query(
&self,
url: &str,
params: &[(&str, String)],
headers: &[(&str, &str)],
) -> Result<Response> {
let full_url = append_query_params(url, params);
self.execute(Method::Get, &full_url, headers, Vec::new())
.await
}
pub async fn post_json<T: Serialize>(
&self,
url: &str,
headers: &[(&str, &str)],
body: &T,
) -> Result<Response> {
let body_bytes = serde_json::to_vec(body)
.map_err(|e| LoreError::Other(format!("JSON serialization failed: {e}")))?;
let mut all_headers: Vec<(&str, &str)> = headers.to_vec();
all_headers.push(("Content-Type", "application/json"));
self.execute(Method::Post, url, &all_headers, body_bytes)
.await
}
async fn execute(
&self,
method: Method,
url: &str,
headers: &[(&str, &str)],
body: Vec<u8>,
) -> Result<Response> {
let header_tuples: Vec<(String, String)> = headers
.iter()
.map(|(k, v)| ((*k).to_owned(), (*v).to_owned()))
.collect();
let raw: RawResponse = timeout(
wall_now(),
self.timeout,
self.inner.request(method, url, header_tuples, body),
)
.await
.map_err(|_| LoreError::GitLabNetworkError {
base_url: url.to_string(),
kind: NetworkErrorKind::Timeout,
detail: Some(format!("Request timed out after {:?}", self.timeout)),
})?
.map_err(|e| LoreError::GitLabNetworkError {
base_url: url.to_string(),
kind: classify_transport_error(&e),
detail: Some(format!("{e:?}")),
})?;
if raw.body.len() > MAX_RESPONSE_BODY_BYTES {
return Err(LoreError::Other(format!(
"Response body too large: {} bytes (max {MAX_RESPONSE_BODY_BYTES})",
raw.body.len(),
)));
}
Ok(Response {
status: raw.status,
reason: raw.reason,
headers: raw.headers,
body: raw.body,
})
}
}
impl Response {
pub fn is_success(&self) -> bool {
(200..300).contains(&self.status)
}
pub fn json<T: DeserializeOwned>(&self) -> Result<T> {
serde_json::from_slice(&self.body)
.map_err(|e| LoreError::Other(format!("JSON parse error: {e}")))
}
pub fn text(self) -> Result<String> {
String::from_utf8(self.body)
.map_err(|e| LoreError::Other(format!("UTF-8 decode error: {e}")))
}
pub fn header(&self, name: &str) -> Option<&str> {
self.headers
.iter()
.find(|(k, _)| k.eq_ignore_ascii_case(name))
.map(|(_, v)| v.as_str())
}
pub fn headers_all(&self, name: &str) -> Vec<&str> {
self.headers
.iter()
.filter(|(k, _)| k.eq_ignore_ascii_case(name))
.map(|(_, v)| v.as_str())
.collect()
}
}
fn classify_transport_error(e: &ClientError) -> NetworkErrorKind {
match e {
ClientError::DnsError(_) => NetworkErrorKind::DnsResolution,
ClientError::ConnectError(_) => NetworkErrorKind::ConnectionRefused,
ClientError::TlsError(_) => NetworkErrorKind::Tls,
_ => NetworkErrorKind::Other,
}
}
fn append_query_params(url: &str, params: &[(&str, String)]) -> String {
if params.is_empty() {
return url.to_string();
}
let query: String = params
.iter()
.map(|(k, v)| format!("{}={}", urlencoding::encode(k), urlencoding::encode(v)))
.collect::<Vec<_>>()
.join("&");
let (base, fragment) = match url.split_once('#') {
Some((b, f)) => (b, Some(f)),
None => (url, None),
};
let with_query = if base.contains('?') {
format!("{base}&{query}")
} else {
format!("{base}?{query}")
};
match fragment {
Some(f) => format!("{with_query}#{f}"),
None => with_query,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn append_query_params_empty_returns_unchanged() {
let url = "https://example.com/api";
assert_eq!(append_query_params(url, &[]), url);
}
#[test]
fn append_query_params_adds_question_mark() {
let result = append_query_params(
"https://example.com/api",
&[("page", "1".into()), ("per_page", "20".into())],
);
assert_eq!(result, "https://example.com/api?page=1&per_page=20");
}
#[test]
fn append_query_params_existing_query_uses_ampersand() {
let result = append_query_params(
"https://example.com/api?state=opened",
&[("page", "2".into())],
);
assert_eq!(result, "https://example.com/api?state=opened&page=2");
}
#[test]
fn append_query_params_preserves_fragment() {
let result =
append_query_params("https://example.com/api#section", &[("key", "val".into())]);
assert_eq!(result, "https://example.com/api?key=val#section");
}
#[test]
fn append_query_params_encodes_special_chars() {
let result =
append_query_params("https://example.com/api", &[("labels[]", "bug fix".into())]);
assert_eq!(result, "https://example.com/api?labels%5B%5D=bug%20fix");
}
#[test]
fn append_query_params_repeated_keys() {
let result = append_query_params(
"https://example.com/api",
&[("labels[]", "bug".into()), ("labels[]", "urgent".into())],
);
assert_eq!(
result,
"https://example.com/api?labels%5B%5D=bug&labels%5B%5D=urgent"
);
}
#[test]
fn response_header_case_insensitive() {
let resp = Response {
status: 200,
reason: "OK".into(),
headers: vec![
("Content-Type".into(), "application/json".into()),
("X-Page".into(), "1".into()),
],
body: Vec::new(),
};
assert_eq!(resp.header("content-type"), Some("application/json"));
assert_eq!(resp.header("CONTENT-TYPE"), Some("application/json"));
assert_eq!(resp.header("Content-Type"), Some("application/json"));
assert_eq!(resp.header("x-page"), Some("1"));
assert_eq!(resp.header("X-Missing"), None);
}
#[test]
fn response_headers_all_returns_multiple_values() {
let resp = Response {
status: 200,
reason: "OK".into(),
headers: vec![
("Link".into(), "<url1>; rel=\"next\"".into()),
("Link".into(), "<url2>; rel=\"last\"".into()),
("Content-Type".into(), "application/json".into()),
],
body: Vec::new(),
};
let links = resp.headers_all("link");
assert_eq!(links.len(), 2);
assert_eq!(links[0], "<url1>; rel=\"next\"");
assert_eq!(links[1], "<url2>; rel=\"last\"");
}
#[test]
fn response_is_success_range() {
for status in [200, 201, 204, 299] {
let resp = Response {
status,
reason: String::new(),
headers: Vec::new(),
body: Vec::new(),
};
assert!(resp.is_success(), "status {status} should be success");
}
for status in [100, 199, 300, 301, 400, 404, 500] {
let resp = Response {
status,
reason: String::new(),
headers: Vec::new(),
body: Vec::new(),
};
assert!(!resp.is_success(), "status {status} should not be success");
}
}
#[test]
fn classify_dns_error() {
let err = ClientError::DnsError(std::io::Error::other("dns failed"));
assert_eq!(
classify_transport_error(&err),
NetworkErrorKind::DnsResolution
);
}
#[test]
fn classify_connect_error() {
let err = ClientError::ConnectError(std::io::Error::other("refused"));
assert_eq!(
classify_transport_error(&err),
NetworkErrorKind::ConnectionRefused
);
}
#[test]
fn classify_tls_error() {
let err = ClientError::TlsError("bad cert".into());
assert_eq!(classify_transport_error(&err), NetworkErrorKind::Tls);
}
}

View File

@@ -3,7 +3,6 @@ use tracing::{debug, warn};
use crate::Config;
use crate::core::error::Result;
use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::time::now_ms;
use crate::documents::SourceType;
use crate::gitlab::GitLabClient;
@@ -12,6 +11,7 @@ use crate::gitlab::transformers::{
};
use crate::gitlab::types::GitLabDiscussion;
use crate::ingestion::dirty_tracker;
use crate::ingestion::storage::payloads::{StorePayloadOptions, store_payload};
use super::issues::IssueForDiscussionSync;

View File

@@ -6,7 +6,6 @@ use tracing::{debug, warn};
use crate::Config;
use crate::core::error::{LoreError, Result};
use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::shutdown::ShutdownSignal;
use crate::core::time::now_ms;
use crate::documents::SourceType;
@@ -14,6 +13,7 @@ use crate::gitlab::GitLabClient;
use crate::gitlab::transformers::{MilestoneRow, transform_issue};
use crate::gitlab::types::GitLabIssue;
use crate::ingestion::dirty_tracker;
use crate::ingestion::storage::payloads::{StorePayloadOptions, store_payload};
#[derive(Debug, Default)]
pub struct IngestIssuesResult {

View File

@@ -5,7 +5,6 @@ use tracing::{debug, warn};
use crate::Config;
use crate::core::error::{LoreError, Result};
use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::shutdown::ShutdownSignal;
use crate::core::time::now_ms;
use crate::documents::SourceType;
@@ -13,6 +12,7 @@ use crate::gitlab::GitLabClient;
use crate::gitlab::transformers::merge_request::transform_merge_request;
use crate::gitlab::types::GitLabMergeRequest;
use crate::ingestion::dirty_tracker;
use crate::ingestion::storage::payloads::{StorePayloadOptions, store_payload};
#[derive(Debug, Default)]
pub struct IngestMergeRequestsResult {

View File

@@ -6,6 +6,7 @@ pub mod merge_requests;
pub mod mr_diffs;
pub mod mr_discussions;
pub mod orchestrator;
pub mod storage;
pub(crate) mod surgical;
pub use discussions::{

View File

@@ -4,7 +4,6 @@ use tracing::{debug, info, warn};
use crate::Config;
use crate::core::error::Result;
use crate::core::payloads::{StorePayloadOptions, store_payload};
use crate::core::time::now_ms;
use crate::documents::SourceType;
use crate::gitlab::GitLabClient;
@@ -15,6 +14,7 @@ use crate::gitlab::transformers::{
use crate::gitlab::types::GitLabDiscussion;
use crate::ingestion::dirty_tracker;
use crate::ingestion::discussions::NoteUpsertOutcome;
use crate::ingestion::storage::payloads::{StorePayloadOptions, store_payload};
use super::merge_requests::MrForDiscussionSync;

View File

@@ -3,15 +3,15 @@ use rusqlite::Connection;
use tracing::{debug, instrument, warn};
use crate::Config;
use crate::core::dependent_queue::{
claim_jobs, complete_job_tx, count_claimable_jobs, enqueue_job, fail_job, reclaim_stale_locks,
};
use crate::core::error::Result;
use crate::core::references::{
EntityReference, insert_entity_reference, resolve_issue_local_id, resolve_project_path,
};
use crate::core::shutdown::ShutdownSignal;
use crate::gitlab::GitLabClient;
use crate::ingestion::storage::queue::{
claim_jobs, complete_job_tx, count_claimable_jobs, enqueue_job, fail_job, reclaim_stale_locks,
};
use crate::xref::references::{
EntityReference, insert_entity_reference, resolve_issue_local_id, resolve_project_path,
};
use super::discussions::{prefetch_issue_discussions, write_prefetched_issue_discussions};
use super::issues::{IssueForDiscussionSync, ingest_issues};
@@ -354,7 +354,7 @@ pub async fn ingest_project_issues_with_progress(
result.resource_events_failed = drain_result.failed;
let refs_inserted =
crate::core::references::extract_refs_from_state_events(conn, project_id)?;
crate::xref::references::extract_refs_from_state_events(conn, project_id)?;
if refs_inserted > 0 {
debug!(
refs_inserted,
@@ -654,7 +654,7 @@ pub async fn ingest_project_merge_requests_with_progress(
result.resource_events_failed = drain_result.failed;
let refs_inserted =
crate::core::references::extract_refs_from_state_events(conn, project_id)?;
crate::xref::references::extract_refs_from_state_events(conn, project_id)?;
if refs_inserted > 0 {
debug!(
refs_inserted,
@@ -668,7 +668,7 @@ pub async fn ingest_project_merge_requests_with_progress(
return Ok(result);
}
let note_refs = crate::core::note_parser::extract_refs_from_system_notes(conn, project_id)?;
let note_refs = crate::xref::note_parser::extract_refs_from_system_notes(conn, project_id)?;
if note_refs.inserted > 0 || note_refs.skipped_unresolvable > 0 {
debug!(
inserted = note_refs.inserted,
@@ -678,7 +678,7 @@ pub async fn ingest_project_merge_requests_with_progress(
);
}
let desc_refs = crate::core::note_parser::extract_refs_from_descriptions(conn, project_id)?;
let desc_refs = crate::xref::note_parser::extract_refs_from_descriptions(conn, project_id)?;
if desc_refs.inserted > 0 || desc_refs.skipped_unresolvable > 0 {
debug!(
inserted = desc_refs.inserted,
@@ -687,7 +687,7 @@ pub async fn ingest_project_merge_requests_with_progress(
);
}
let user_note_refs = crate::core::note_parser::extract_refs_from_user_notes(conn, project_id)?;
let user_note_refs = crate::xref::note_parser::extract_refs_from_user_notes(conn, project_id)?;
if user_note_refs.inserted > 0 || user_note_refs.skipped_unresolvable > 0 {
debug!(
inserted = user_note_refs.inserted,
@@ -1121,7 +1121,7 @@ pub(crate) fn store_resource_events(
milestone_events: &[crate::gitlab::types::GitLabMilestoneEvent],
) -> Result<()> {
if !state_events.is_empty() {
crate::core::events_db::upsert_state_events(
crate::ingestion::storage::events::upsert_state_events(
conn,
project_id,
entity_type,
@@ -1131,7 +1131,7 @@ pub(crate) fn store_resource_events(
}
if !label_events.is_empty() {
crate::core::events_db::upsert_label_events(
crate::ingestion::storage::events::upsert_label_events(
conn,
project_id,
entity_type,
@@ -1141,7 +1141,7 @@ pub(crate) fn store_resource_events(
}
if !milestone_events.is_empty() {
crate::core::events_db::upsert_milestone_events(
crate::ingestion::storage::events::upsert_milestone_events(
conn,
project_id,
entity_type,

View File

@@ -1,7 +1,7 @@
use rusqlite::Connection;
use super::error::{LoreError, Result};
use super::time::iso_to_ms_strict;
use crate::core::error::{LoreError, Result};
use crate::core::time::iso_to_ms_strict;
use crate::gitlab::types::{GitLabLabelEvent, GitLabMilestoneEvent, GitLabStateEvent};
pub fn upsert_state_events(

View File

@@ -0,0 +1,4 @@
pub mod events;
pub mod payloads;
pub mod queue;
pub mod sync_run;

View File

@@ -6,8 +6,8 @@ use rusqlite::OptionalExtension;
use sha2::{Digest, Sha256};
use std::io::{Read, Write};
use super::error::Result;
use super::time::now_ms;
use crate::core::error::Result;
use crate::core::time::now_ms;
pub struct StorePayloadOptions<'a> {
pub project_id: Option<i64>,

View File

@@ -2,8 +2,8 @@ use std::collections::HashMap;
use rusqlite::Connection;
use super::error::Result;
use super::time::now_ms;
use crate::core::error::{LoreError, Result};
use crate::core::time::now_ms;
#[derive(Debug)]
pub struct PendingJob {
@@ -139,7 +139,7 @@ pub fn fail_job(conn: &Connection, job_id: i64, error: &str) -> Result<()> {
)?;
if changes == 0 {
return Err(crate::core::error::LoreError::Other(
return Err(LoreError::Other(
"fail_job: job not found (may have been reclaimed or completed)".into(),
));
}

View File

@@ -1,8 +1,8 @@
use rusqlite::Connection;
use super::error::Result;
use super::metrics::StageTiming;
use super::time::now_ms;
use crate::core::error::Result;
use crate::core::metrics::StageTiming;
use crate::core::time::now_ms;
pub struct SyncRunRecorder {
row_id: i64,

View File

@@ -3,7 +3,12 @@ pub mod core;
pub mod documents;
pub mod embedding;
pub mod gitlab;
pub mod http;
pub mod ingestion;
pub mod search;
#[cfg(test)]
pub mod test_support;
pub mod timeline;
pub mod xref;
pub use core::{Config, LoreError, Result};

File diff suppressed because it is too large Load Diff

View File

@@ -4,7 +4,7 @@ use rusqlite::Connection;
use rusqlite::OptionalExtension;
use crate::core::error::Result;
use crate::embedding::chunk_ids::decode_rowid;
use crate::embedding::chunks::decode_rowid;
#[derive(Debug)]
pub struct VectorResult {

49
src/test_support.rs Normal file
View File

@@ -0,0 +1,49 @@
use std::path::Path;
use rusqlite::Connection;
use crate::core::config::{
Config, EmbeddingConfig, GitLabConfig, LoggingConfig, ProjectConfig, ScoringConfig,
StorageConfig, SyncConfig,
};
use crate::core::db::{create_connection, run_migrations};
pub fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
pub fn insert_project(conn: &Connection, id: i64, path: &str) {
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url)
VALUES (?1, ?2, ?3, ?4)",
rusqlite::params![
id,
id * 100,
path,
format!("https://git.example.com/{path}")
],
)
.unwrap();
}
pub fn test_config(default_project: Option<&str>) -> Config {
Config {
gitlab: GitLabConfig {
base_url: "https://gitlab.example.com".to_string(),
token_env_var: "GITLAB_TOKEN".to_string(),
token: None,
username: None,
},
projects: vec![ProjectConfig {
path: "group/project".to_string(),
}],
default_project: default_project.map(String::from),
sync: SyncConfig::default(),
storage: StorageConfig::default(),
embedding: EmbeddingConfig::default(),
logging: LoggingConfig::default(),
scoring: ScoringConfig::default(),
}
}

View File

@@ -2,11 +2,11 @@ use rusqlite::Connection;
use std::collections::HashSet;
use crate::core::error::{LoreError, Result};
use crate::core::timeline::{
use super::types::{
EntityRef, ExpandedEntityRef, MatchedDiscussion, THREAD_MAX_NOTES, THREAD_NOTE_MAX_CHARS,
ThreadNote, TimelineEvent, TimelineEventType, truncate_to_chars,
};
use crate::core::error::{LoreError, Result};
/// Collect all events for seed and expanded entities, interleave chronologically.
///

View File

@@ -2,8 +2,8 @@ use std::collections::{HashSet, VecDeque};
use rusqlite::Connection;
use super::types::{EntityRef, ExpandedEntityRef, UnresolvedRef, resolve_entity_ref};
use crate::core::error::Result;
use crate::core::timeline::{EntityRef, ExpandedEntityRef, UnresolvedRef, resolve_entity_ref};
/// Result of the expand phase.
pub struct ExpandResult {

11
src/timeline/mod.rs Normal file
View File

@@ -0,0 +1,11 @@
mod types;
pub mod collect;
pub mod expand;
pub mod seed;
pub use types::{
EntityRef, ExpandedEntityRef, MatchedDiscussion, THREAD_MAX_NOTES, THREAD_NOTE_MAX_CHARS,
ThreadNote, TimelineEvent, TimelineEventType, TimelineResult, UnresolvedRef,
resolve_entity_by_iid, resolve_entity_ref,
};

View File

@@ -3,11 +3,11 @@ use std::collections::HashSet;
use rusqlite::Connection;
use tracing::debug;
use crate::core::error::Result;
use crate::core::timeline::{
use super::types::{
EntityRef, MatchedDiscussion, TimelineEvent, TimelineEventType, resolve_entity_by_iid,
resolve_entity_ref, truncate_to_chars,
};
use crate::core::error::Result;
use crate::embedding::ollama::OllamaClient;
use crate::search::{FtsQueryMode, SearchFilters, SearchMode, search_hybrid, to_fts_query};

View File

@@ -554,7 +554,7 @@ fn test_collect_discussion_thread_body_truncation() {
if let TimelineEventType::DiscussionThread { notes, .. } = &thread.event_type {
assert!(
notes[0].body.chars().count() <= crate::core::timeline::THREAD_NOTE_MAX_CHARS,
notes[0].body.chars().count() <= crate::timeline::THREAD_NOTE_MAX_CHARS,
"Body should be truncated to THREAD_NOTE_MAX_CHARS"
);
} else {
@@ -598,7 +598,7 @@ fn test_collect_discussion_thread_note_cap() {
// 50 notes + 1 synthetic summary = 51
assert_eq!(
notes.len(),
crate::core::timeline::THREAD_MAX_NOTES + 1,
crate::timeline::THREAD_MAX_NOTES + 1,
"Should cap at THREAD_MAX_NOTES + synthetic summary"
);
let last = notes.last().unwrap();

View File

@@ -0,0 +1,553 @@
use super::*;
use crate::core::db::{create_connection, run_migrations};
use std::path::Path;
fn setup_test_db() -> Connection {
let conn = create_connection(Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn
}
fn insert_test_project(conn: &Connection) -> i64 {
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (1, 'group/project', 'https://gitlab.com/group/project')",
[],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_test_issue(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO issues (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test issue', 'opened', 'alice', 1000, 2000, 3000)",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_test_mr(conn: &Connection, project_id: i64, iid: i64) -> i64 {
conn.execute(
"INSERT INTO merge_requests (gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, 'Test MR', 'opened', 'bob', 1000, 2000, 3000)",
rusqlite::params![iid * 100, project_id, iid],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_document(
conn: &Connection,
source_type: &str,
source_id: i64,
project_id: i64,
content: &str,
) -> i64 {
conn.execute(
"INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) VALUES (?1, ?2, ?3, ?4, ?5)",
rusqlite::params![source_type, source_id, project_id, content, format!("hash_{source_id}")],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_discussion(
conn: &Connection,
project_id: i64,
issue_id: Option<i64>,
mr_id: Option<i64>,
) -> i64 {
let noteable_type = if issue_id.is_some() {
"Issue"
} else {
"MergeRequest"
};
conn.execute(
"INSERT INTO discussions (gitlab_discussion_id, project_id, issue_id, merge_request_id, noteable_type, last_seen_at) VALUES (?1, ?2, ?3, ?4, ?5, 0)",
rusqlite::params![format!("disc_{}", rand::random::<u32>()), project_id, issue_id, mr_id, noteable_type],
)
.unwrap();
conn.last_insert_rowid()
}
fn insert_note(
conn: &Connection,
discussion_id: i64,
project_id: i64,
body: &str,
is_system: bool,
) -> i64 {
let gitlab_id: i64 = rand::random::<u32>().into();
conn.execute(
"INSERT INTO notes (gitlab_id, discussion_id, project_id, is_system, author_username, body, created_at, updated_at, last_seen_at) VALUES (?1, ?2, ?3, ?4, 'alice', ?5, 5000, 5000, 5000)",
rusqlite::params![gitlab_id, discussion_id, project_id, is_system as i32, body],
)
.unwrap();
conn.last_insert_rowid()
}
#[test]
fn test_seed_empty_query_returns_empty() {
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let conn = setup_test_db();
let result = seed_timeline(&conn, None, "", None, None, 50, 10)
.await
.unwrap();
assert!(result.seed_entities.is_empty());
assert!(result.evidence_notes.is_empty());
});
}
#[test]
fn test_seed_no_matches_returns_empty() {
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
insert_document(
&conn,
"issue",
issue_id,
project_id,
"unrelated content here",
);
let result = seed_timeline(&conn, None, "nonexistent_xyzzy_query", None, None, 50, 10)
.await
.unwrap();
assert!(result.seed_entities.is_empty());
});
}
#[test]
fn test_seed_finds_issue() {
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 42);
insert_document(
&conn,
"issue",
issue_id,
project_id,
"authentication error in login flow",
);
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 42);
assert_eq!(result.seed_entities[0].project_path, "group/project");
});
}
#[test]
fn test_seed_finds_mr() {
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let mr_id = insert_test_mr(&conn, project_id, 99);
insert_document(
&conn,
"merge_request",
mr_id,
project_id,
"fix authentication bug",
);
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "merge_request");
assert_eq!(result.seed_entities[0].entity_iid, 99);
});
}
#[test]
fn test_seed_deduplicates_entities() {
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 10);
// Two documents referencing the same issue
insert_document(
&conn,
"issue",
issue_id,
project_id,
"authentication error first doc",
);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"authentication error second doc",
);
let result = seed_timeline(&conn, None, "authentication", None, None, 50, 10)
.await
.unwrap();
// Should deduplicate: both map to the same issue
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_iid, 10);
});
}
#[test]
fn test_seed_resolves_discussion_to_parent() {
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 7);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment pipeline failed",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 7);
});
}
#[test]
fn test_seed_evidence_capped() {
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
// Create 15 discussion documents with notes about "deployment"
for i in 0..15 {
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
&format!("deployment issue number {i}"),
);
insert_note(
&conn,
disc_id,
project_id,
&format!("deployment note {i}"),
false,
);
}
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 5)
.await
.unwrap();
assert!(result.evidence_notes.len() <= 5);
});
}
#[test]
fn test_seed_evidence_snippet_truncated() {
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment configuration",
);
let long_body = "x".repeat(500);
insert_note(&conn, disc_id, project_id, &long_body, false);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert!(!result.evidence_notes.is_empty());
if let TimelineEventType::NoteEvidence { snippet, .. } =
&result.evidence_notes[0].event_type
{
assert!(snippet.chars().count() <= 200);
} else {
panic!("Expected NoteEvidence");
}
});
}
#[test]
fn test_seed_respects_project_filter() {
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
// Insert a second project
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) VALUES (2, 'other/repo', 'https://gitlab.com/other/repo')",
[],
)
.unwrap();
let project2_id = conn.last_insert_rowid();
let issue1_id = insert_test_issue(&conn, project_id, 1);
insert_document(
&conn,
"issue",
issue1_id,
project_id,
"authentication error",
);
let issue2_id = insert_test_issue(&conn, project2_id, 2);
insert_document(
&conn,
"issue",
issue2_id,
project2_id,
"authentication error",
);
// Filter to project 1 only
let result = seed_timeline(
&conn,
None,
"authentication",
Some(project_id),
None,
50,
10,
)
.await
.unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].project_path, "group/project");
});
}
// ─── Matched discussion tests ───────────────────────────────────────────────
#[test]
fn test_seed_captures_matched_discussions_from_discussion_doc() {
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment pipeline authentication",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.matched_discussions.len(), 1);
assert_eq!(result.matched_discussions[0].discussion_id, disc_id);
assert_eq!(result.matched_discussions[0].entity_type, "issue");
assert_eq!(result.matched_discussions[0].entity_id, issue_id);
});
}
#[test]
fn test_seed_captures_matched_discussions_from_note_doc() {
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
let note_id = insert_note(&conn, disc_id, project_id, "note about deployment", false);
insert_document(
&conn,
"note",
note_id,
project_id,
"deployment configuration details",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(
result.matched_discussions.len(),
1,
"Note doc should resolve to parent discussion"
);
assert_eq!(result.matched_discussions[0].discussion_id, disc_id);
assert_eq!(result.matched_discussions[0].entity_type, "issue");
});
}
#[test]
fn test_seed_deduplicates_matched_discussions() {
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 1);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
// Two docs referencing the same discussion
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment pipeline first doc",
);
let note_id = insert_note(&conn, disc_id, project_id, "deployment note", false);
insert_document(
&conn,
"note",
note_id,
project_id,
"deployment pipeline second doc",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(
result.matched_discussions.len(),
1,
"Same discussion_id from two docs should deduplicate"
);
});
}
#[test]
fn test_seed_matched_discussions_have_correct_parent_entity() {
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let mr_id = insert_test_mr(&conn, project_id, 99);
let disc_id = insert_discussion(&conn, project_id, None, Some(mr_id));
insert_document(
&conn,
"discussion",
disc_id,
project_id,
"deployment pipeline for merge request",
);
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
.await
.unwrap();
assert_eq!(result.matched_discussions.len(), 1);
assert_eq!(result.matched_discussions[0].entity_type, "merge_request");
assert_eq!(result.matched_discussions[0].entity_id, mr_id);
});
}
// ─── seed_timeline_direct tests ─────────────────────────────────────────────
#[test]
fn test_direct_seed_resolves_entity() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
insert_test_issue(&conn, project_id, 42);
let result = seed_timeline_direct(&conn, "issue", 42, None).unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "issue");
assert_eq!(result.seed_entities[0].entity_iid, 42);
assert_eq!(result.seed_entities[0].project_path, "group/project");
}
#[test]
fn test_direct_seed_gathers_all_discussions() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 42);
// Create 3 discussions for this issue
let disc1 = insert_discussion(&conn, project_id, Some(issue_id), None);
let disc2 = insert_discussion(&conn, project_id, Some(issue_id), None);
let disc3 = insert_discussion(&conn, project_id, Some(issue_id), None);
let result = seed_timeline_direct(&conn, "issue", 42, None).unwrap();
assert_eq!(result.matched_discussions.len(), 3);
let disc_ids: Vec<i64> = result
.matched_discussions
.iter()
.map(|d| d.discussion_id)
.collect();
assert!(disc_ids.contains(&disc1));
assert!(disc_ids.contains(&disc2));
assert!(disc_ids.contains(&disc3));
}
#[test]
fn test_direct_seed_no_evidence_notes() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let issue_id = insert_test_issue(&conn, project_id, 42);
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
insert_note(&conn, disc_id, project_id, "some note body", false);
let result = seed_timeline_direct(&conn, "issue", 42, None).unwrap();
assert!(
result.evidence_notes.is_empty(),
"Direct seeding should not produce evidence notes"
);
}
#[test]
fn test_direct_seed_search_mode_is_direct() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
insert_test_issue(&conn, project_id, 42);
let result = seed_timeline_direct(&conn, "issue", 42, None).unwrap();
assert_eq!(result.search_mode, "direct");
}
#[test]
fn test_direct_seed_not_found() {
let conn = setup_test_db();
insert_test_project(&conn);
let result = seed_timeline_direct(&conn, "issue", 999, None);
assert!(result.is_err());
}
#[test]
fn test_direct_seed_mr() {
let conn = setup_test_db();
let project_id = insert_test_project(&conn);
let mr_id = insert_test_mr(&conn, project_id, 99);
let disc_id = insert_discussion(&conn, project_id, None, Some(mr_id));
let result = seed_timeline_direct(&conn, "merge_request", 99, None).unwrap();
assert_eq!(result.seed_entities.len(), 1);
assert_eq!(result.seed_entities[0].entity_type, "merge_request");
assert_eq!(result.seed_entities[0].entity_iid, 99);
assert_eq!(result.matched_discussions.len(), 1);
assert_eq!(result.matched_discussions[0].discussion_id, disc_id);
}

View File

@@ -3,7 +3,7 @@ use std::cmp::Ordering;
use rusqlite::Connection;
use serde::Serialize;
use super::error::Result;
use crate::core::error::{LoreError, Result};
/// The core timeline event. All pipeline stages produce or consume these.
/// Spec ref: Section 3.3 "Event Model"
@@ -232,7 +232,7 @@ pub fn resolve_entity_by_iid(
"issue" => "issues",
"merge_request" => "merge_requests",
_ => {
return Err(super::error::LoreError::NotFound(format!(
return Err(LoreError::NotFound(format!(
"Unknown entity type: {entity_type}"
)));
}
@@ -259,7 +259,7 @@ pub fn resolve_entity_by_iid(
match rows.len() {
0 => {
let sigil = if entity_type == "issue" { "#" } else { "!" };
Err(super::error::LoreError::NotFound(format!(
Err(LoreError::NotFound(format!(
"{entity_type} {sigil}{iid} not found"
)))
}
@@ -275,7 +275,7 @@ pub fn resolve_entity_by_iid(
_ => {
let projects: Vec<&str> = rows.iter().map(|(_, _, p)| p.as_str()).collect();
let sigil = if entity_type == "issue" { "#" } else { "!" };
Err(super::error::LoreError::Ambiguous(format!(
Err(LoreError::Ambiguous(format!(
"{entity_type} {sigil}{iid} exists in multiple projects: {}. Use --project to specify.",
projects.join(", ")
)))

2
src/xref/mod.rs Normal file
View File

@@ -0,0 +1,2 @@
pub mod note_parser;
pub mod references;

View File

@@ -4,8 +4,8 @@ use regex::Regex;
use rusqlite::Connection;
use tracing::debug;
use super::error::Result;
use super::time::now_ms;
use crate::core::error::Result;
use crate::core::time::now_ms;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParsedCrossRef {

View File

@@ -1,8 +1,8 @@
use rusqlite::{Connection, OptionalExtension};
use tracing::info;
use super::error::Result;
use super::time::now_ms;
use crate::core::error::Result;
use crate::core::time::now_ms;
pub fn extract_refs_from_state_events(conn: &Connection, project_id: i64) -> Result<usize> {
let changes = conn.execute(

851
tests/asupersync_e2e.rs Normal file
View File

@@ -0,0 +1,851 @@
//! E2E runtime acceptance tests for the asupersync migration.
//!
//! Proves the full runtime lifecycle works end-to-end:
//! 1. RuntimeBuilder creates a working runtime with IO and timers
//! 2. ShutdownSignal cancellation mid-flow stops processing cleanly
//! 3. Resume: second run picks up where the cancelled first run left off
//! 4. Structured tracing events fire with expected fields
//! 5. No DB corruption across cancel + resume cycle
//! 6. Real HTTP ingestion pipeline (GitLabClient -> pagination -> DB writes)
use std::io::{Read, Write};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
use futures::future::join_all;
use rusqlite::Connection;
use serde_json::json;
use lore::core::config::{
Config, EmbeddingConfig, GitLabConfig, LoggingConfig, ProjectConfig, ScoringConfig,
StorageConfig, SyncConfig,
};
use lore::core::db::{create_connection, run_migrations};
use lore::core::shutdown::ShutdownSignal;
use lore::gitlab::GitLabClient;
use lore::ingestion::ingest_issues;
fn run<F: std::future::Future<Output = T>, T>(f: F) -> T {
asupersync::runtime::RuntimeBuilder::new()
.build()
.unwrap()
.block_on(f)
}
fn setup_db() -> Connection {
let conn = create_connection(std::path::Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url)
VALUES (1, 100, 'test/repo', 'https://git.example.com/test/repo')",
[],
)
.unwrap();
conn
}
/// Simulates a multi-phase sync pipeline. Returns the number of phases completed
/// and the IIDs that were "synced" (written to DB).
///
/// If `cancel_after_batches` is `Some(n)`, the signal is cancelled after `n`
/// batches complete, causing the loop to exit before processing subsequent chunks.
async fn run_sync_pipeline(
conn: &Connection,
signal: &ShutdownSignal,
project_id: i64,
all_iids: &[i64],
batch_size: usize,
cancel_after_batches: Option<usize>,
) -> (usize, Vec<i64>) {
let mut phases_completed = 0usize;
let mut synced_iids = Vec::new();
for chunk in all_iids.chunks(batch_size) {
if signal.is_cancelled() {
break;
}
// Phase: concurrent fetch (simulated)
let fetched: Vec<i64> = {
let futs: Vec<_> = chunk
.iter()
.map(|&iid| async move {
asupersync::time::sleep(asupersync::time::wall_now(), Duration::from_millis(1))
.await;
iid
})
.collect();
join_all(futs).await
};
if signal.is_cancelled() {
break;
}
// Phase: DB write (transactional)
let tx = conn.unchecked_transaction().unwrap();
for &iid in &fetched {
tx.execute(
"INSERT OR IGNORE INTO issues (
gitlab_id, project_id, iid, title, state,
author_username, created_at, updated_at, last_seen_at
) VALUES (?1, ?2, ?3, 'issue', 'opened', 'bot', 1000, 2000, 3000)",
rusqlite::params![iid + 10000, project_id, iid],
)
.unwrap();
}
tx.commit().unwrap();
synced_iids.extend(&fetched);
phases_completed += 1;
// Simulate Ctrl+C after N batches (deterministic cancellation)
if cancel_after_batches.is_some_and(|limit| phases_completed >= limit) {
signal.cancel();
}
}
(phases_completed, synced_iids)
}
// ───────────────────────────────────────────────────────────────────
// Test 1: Full runtime lifecycle — build, spawn, block_on, drop
// ───────────────────────────────────────────────────────────────────
#[test]
fn runtime_lifecycle_build_run_drop() {
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
let handle = rt.handle();
let counter = Arc::new(AtomicUsize::new(0));
let c = Arc::clone(&counter);
// Spawn a background task via handle
handle.spawn(async move {
asupersync::time::sleep(asupersync::time::wall_now(), Duration::from_millis(5)).await;
c.fetch_add(1, Ordering::Relaxed);
});
// block_on drives the reactor
let result = rt.block_on(async {
asupersync::time::sleep(asupersync::time::wall_now(), Duration::from_millis(20)).await;
42
});
assert_eq!(result, 42, "block_on should return the async result");
assert_eq!(
counter.load(Ordering::Relaxed),
1,
"spawned task should complete during block_on"
);
// rt drops here — no panics, no leaks
}
// ───────────────────────────────────────────────────────────────────
// Test 2: Cancel mid-flow, then resume — proves idempotent sync
// ───────────────────────────────────────────────────────────────────
#[test]
fn cancel_then_resume_completes_all_work() {
let conn = setup_db();
let all_iids: Vec<i64> = (1..=15).collect();
let batch_size = 5; // 3 batches of 5
// Run 1: cancel after first batch (deterministic — no timing dependency)
let (phases_r1, synced_r1) = run(async {
let signal = ShutdownSignal::new();
run_sync_pipeline(&conn, &signal, 1, &all_iids, batch_size, Some(1)).await
});
assert_eq!(
phases_r1, 1,
"exactly one batch should complete before cancel"
);
assert_eq!(
synced_r1.len(),
batch_size,
"only first batch should be synced"
);
// Verify DB state matches what was reported
let count_r1: i64 = conn
.query_row(
"SELECT COUNT(*) FROM issues WHERE project_id = 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(
count_r1,
synced_r1.len() as i64,
"DB count should match synced IIDs"
);
// Run 2: resume — find remaining IIDs and sync them
let already_synced: Vec<i64> = {
let mut stmt = conn
.prepare("SELECT iid FROM issues WHERE project_id = 1 ORDER BY iid")
.unwrap();
stmt.query_map([], |row| row.get(0))
.unwrap()
.map(|r| r.unwrap())
.collect()
};
let remaining: Vec<i64> = all_iids
.iter()
.filter(|iid| !already_synced.contains(iid))
.copied()
.collect();
assert!(
!remaining.is_empty(),
"there should be remaining work after cancellation"
);
let (phases_r2, synced_r2) = run(async {
let signal = ShutdownSignal::new(); // fresh signal, no cancel
run_sync_pipeline(&conn, &signal, 1, &remaining, batch_size, None).await
});
assert!(
phases_r2 >= 1,
"resume run should process remaining batches"
);
// Verify ALL 15 issues are now in the DB
let count_final: i64 = conn
.query_row(
"SELECT COUNT(*) FROM issues WHERE project_id = 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(
count_final, 15,
"all 15 issues should be in DB after cancel + resume"
);
// Verify no duplicates (INSERT OR IGNORE should prevent)
let total_synced = synced_r1.len() + synced_r2.len();
assert_eq!(
total_synced, 15,
"combined synced count should equal total issues"
);
}
// ───────────────────────────────────────────────────────────────────
// Test 3: Structured tracing events fire with expected fields
// ───────────────────────────────────────────────────────────────────
#[test]
fn structured_tracing_captures_phase_transitions() {
use tracing_subscriber::layer::SubscriberExt;
// Collect tracing events into a shared buffer
let events: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
let events_clone = Arc::clone(&events);
let layer = tracing_subscriber::fmt::layer()
.json()
.with_writer(move || EventWriter(Arc::clone(&events_clone)))
.with_target(false);
let subscriber = tracing_subscriber::registry().with(layer);
let _guard = tracing::subscriber::set_default(subscriber);
run(async {
let span = tracing::info_span!(
"sync_pipeline",
run_id = "test-run-001",
project = "test/repo"
);
let _enter = span.enter();
tracing::info!(phase = "fetch_issues", entity_count = 10, "phase started");
asupersync::time::sleep(asupersync::time::wall_now(), Duration::from_millis(1)).await;
tracing::info!(
phase = "fetch_issues",
entity_count = 10,
success = 8,
skipped = 2,
"phase completed"
);
tracing::info!(phase = "cancelled", reason = "ctrl_c", "pipeline stopped");
});
let captured = events.lock().unwrap();
assert!(
captured.len() >= 3,
"should capture at least 3 tracing events, got {}",
captured.len()
);
// Verify structured fields are present in the JSON output
let all_text = captured.join("\n");
assert!(
all_text.contains("sync_pipeline"),
"span name should appear in output"
);
assert!(
all_text.contains("test-run-001"),
"run_id field should appear in output"
);
assert!(
all_text.contains("fetch_issues"),
"phase field should appear in output"
);
assert!(
all_text.contains("ctrl_c"),
"cancellation reason should appear in output"
);
}
/// A Write impl that appends each line to a shared Vec.
struct EventWriter(Arc<Mutex<Vec<String>>>);
impl std::io::Write for EventWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if let Ok(s) = std::str::from_utf8(buf) {
let trimmed = s.trim();
if !trimmed.is_empty() {
self.0.lock().unwrap().push(trimmed.to_string());
}
}
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
// ───────────────────────────────────────────────────────────────────
// Test 4: Concurrent fan-out under asupersync with timing proof
// ───────────────────────────────────────────────────────────────────
#[test]
fn concurrent_fanout_runs_in_parallel() {
run(async {
let start = Instant::now();
// 10 tasks each sleeping 50ms — should complete in ~50ms if parallel
let futs: Vec<_> = (0..10)
.map(|_| async {
asupersync::time::sleep(asupersync::time::wall_now(), Duration::from_millis(50))
.await;
})
.collect();
join_all(futs).await;
let elapsed = start.elapsed();
// If sequential, this would take 500ms+. Parallel should be well under 200ms.
assert!(
elapsed < Duration::from_millis(200),
"fan-out should run concurrently, took {:?}",
elapsed
);
});
}
// ───────────────────────────────────────────────────────────────────
// Test 5: DB integrity after multiple runtime instantiations
// ───────────────────────────────────────────────────────────────────
#[test]
fn db_integrity_across_runtime_restarts() {
let conn = setup_db();
// Run 1: insert issues 1-5
{
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let tx = conn.unchecked_transaction().unwrap();
for iid in 1..=5 {
tx.execute(
"INSERT INTO issues (
gitlab_id, project_id, iid, title, state,
author_username, created_at, updated_at, last_seen_at
) VALUES (?1, 1, ?2, 'issue', 'opened', 'bot', 1000, 2000, 3000)",
rusqlite::params![iid + 10000, iid],
)
.unwrap();
}
tx.commit().unwrap();
});
// rt drops here — runtime fully torn down
}
// Run 2: new runtime, verify data survives, insert more
{
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
rt.block_on(async {
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM issues WHERE project_id = 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 5, "run 1 data should persist");
let tx = conn.unchecked_transaction().unwrap();
for iid in 6..=10 {
tx.execute(
"INSERT INTO issues (
gitlab_id, project_id, iid, title, state,
author_username, created_at, updated_at, last_seen_at
) VALUES (?1, 1, ?2, 'issue', 'opened', 'bot', 1000, 2000, 3000)",
rusqlite::params![iid + 10000, iid],
)
.unwrap();
}
tx.commit().unwrap();
});
}
// Verify final state without a runtime (sync query)
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM issues WHERE project_id = 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 10, "both runs should have persisted correctly");
}
// ═══════════════════════════════════════════════════════════════════
// Real HTTP pipeline tests — exercise GitLabClient -> ingest_issues
// ═══════════════════════════════════════════════════════════════════
fn test_config() -> Config {
Config {
gitlab: GitLabConfig {
base_url: "https://gitlab.example.com".to_string(),
token_env_var: "GITLAB_TOKEN".to_string(),
token: None,
username: None,
},
projects: vec![ProjectConfig {
path: "group/project".to_string(),
}],
default_project: None,
sync: SyncConfig::default(),
storage: StorageConfig::default(),
embedding: EmbeddingConfig::default(),
logging: LoggingConfig::default(),
scoring: ScoringConfig::default(),
}
}
/// Build a GitLab-style issue JSON object with deterministic fields.
fn make_issue_json(id: i64, iid: i64, updated_at: &str) -> serde_json::Value {
json!({
"id": id,
"iid": iid,
"project_id": 100,
"title": format!("Issue {iid}"),
"description": format!("Description for issue {iid}"),
"state": "opened",
"created_at": "2024-01-01T00:00:00.000Z",
"updated_at": updated_at,
"closed_at": null,
"author": {
"id": 1,
"username": "testbot",
"name": "Test Bot"
},
"assignees": [],
"labels": ["backend"],
"milestone": null,
"due_date": null,
"web_url": format!("https://git.example.com/test/repo/-/issues/{iid}")
})
}
/// Drain HTTP request bytes from a TCP stream until the end-of-headers marker.
fn drain_http_request(stream: &mut std::net::TcpStream) {
let mut buf = [0u8; 8192];
let mut accumulated = Vec::new();
loop {
let n = stream.read(&mut buf).unwrap();
if n == 0 {
break;
}
accumulated.extend_from_slice(&buf[..n]);
if accumulated.windows(4).any(|w| w == b"\r\n\r\n") {
break;
}
}
}
/// Write an HTTP response with the given status, headers, and JSON body.
fn write_http_response(
stream: &mut std::net::TcpStream,
status: u16,
reason: &str,
extra_headers: &[(&str, &str)],
body: &str,
) {
let mut header_block = format!(
"HTTP/1.1 {status} {reason}\r\n\
Content-Type: application/json\r\n\
Content-Length: {}\r\n",
body.len()
);
for (k, v) in extra_headers {
header_block.push_str(&format!("{k}: {v}\r\n"));
}
header_block.push_str("Connection: close\r\n\r\n");
stream.write_all(header_block.as_bytes()).unwrap();
stream.write_all(body.as_bytes()).unwrap();
stream.flush().unwrap();
}
/// Spin up a mock GitLab server that handles `request_count` sequential HTTP
/// connections. Each connection is handled by `handler_fn(connection_index, stream)`.
/// Returns the `http://127.0.0.1:{port}` base URL.
fn mock_gitlab_server<F>(request_count: usize, handler_fn: F) -> String
where
F: Fn(usize, &mut std::net::TcpStream) + Send + 'static,
{
let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
let port = listener.local_addr().unwrap().port();
std::thread::spawn(move || {
for i in 0..request_count {
let Ok((mut stream, _)) = listener.accept() else {
break;
};
handler_fn(i, &mut stream);
}
});
format!("http://127.0.0.1:{port}")
}
// ───────────────────────────────────────────────────────────────────
// Test 6: Full HTTP ingestion pipeline — paginated issues via mock
// ───────────────────────────────────────────────────────────────────
#[test]
fn http_pipeline_ingest_issues_paginated() {
// Serve 2 pages of issues: page 1 has 3, page 2 has 2 (no x-next-page = done).
let base = mock_gitlab_server(2, |i, stream| {
drain_http_request(stream);
match i {
0 => {
let body = serde_json::to_string(&vec![
make_issue_json(1001, 1, "2024-06-01T00:00:00.000Z"),
make_issue_json(1002, 2, "2024-06-02T00:00:00.000Z"),
make_issue_json(1003, 3, "2024-06-03T00:00:00.000Z"),
])
.unwrap();
write_http_response(stream, 200, "OK", &[("x-next-page", "2")], &body);
}
_ => {
let body = serde_json::to_string(&vec![
make_issue_json(1004, 4, "2024-06-04T00:00:00.000Z"),
make_issue_json(1005, 5, "2024-06-05T00:00:00.000Z"),
])
.unwrap();
write_http_response(stream, 200, "OK", &[], &body);
}
}
});
let conn = setup_db();
let config = test_config();
run(async {
let client = GitLabClient::new(&base, "test-token", Some(1000.0));
let signal = ShutdownSignal::new();
let result = ingest_issues(&conn, &client, &config, 1, 100, &signal)
.await
.unwrap();
assert_eq!(
result.fetched, 5,
"should fetch all 5 issues across 2 pages"
);
assert_eq!(result.upserted, 5, "should upsert all 5 issues");
let db_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM issues WHERE project_id = 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(db_count, 5, "DB should contain 5 issues");
// Verify sync cursor was persisted
let cursor_ts: i64 = conn
.query_row(
"SELECT updated_at_cursor FROM sync_cursors
WHERE project_id = 1 AND resource_type = 'issues'",
[],
|row| row.get(0),
)
.unwrap();
assert!(cursor_ts > 0, "sync cursor should be set after ingestion");
// Verify labels created
let label_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM labels WHERE project_id = 1 AND name = 'backend'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(label_count, 1, "backend label should exist");
let summary = json!({
"test": "http_pipeline_ingest_issues_paginated",
"fetched": result.fetched,
"upserted": result.upserted,
"labels_created": result.labels_created,
"db_count": db_count,
"cursor_set": cursor_ts > 0,
});
eprintln!("E2E_SUMMARY: {}", serde_json::to_string(&summary).unwrap());
});
}
// ───────────────────────────────────────────────────────────────────
// Test 7: Cancellation preserves DB integrity during real ingestion
// ───────────────────────────────────────────────────────────────────
#[test]
fn http_pipeline_cancel_preserves_integrity() {
let requests_served = Arc::new(AtomicUsize::new(0));
let requests_clone = Arc::clone(&requests_served);
// Serve 2 pages. The test cancels the signal immediately, so the stream
// should stop early. Whatever was committed must be consistent.
let base = mock_gitlab_server(2, move |i, stream| {
drain_http_request(stream);
requests_clone.fetch_add(1, Ordering::Relaxed);
match i {
0 => {
let body = serde_json::to_string(&vec![
make_issue_json(2001, 10, "2024-07-01T00:00:00.000Z"),
make_issue_json(2002, 11, "2024-07-02T00:00:00.000Z"),
make_issue_json(2003, 12, "2024-07-03T00:00:00.000Z"),
])
.unwrap();
write_http_response(stream, 200, "OK", &[("x-next-page", "2")], &body);
}
_ => {
let body = serde_json::to_string(&vec![
make_issue_json(2004, 13, "2024-07-04T00:00:00.000Z"),
make_issue_json(2005, 14, "2024-07-05T00:00:00.000Z"),
])
.unwrap();
write_http_response(stream, 200, "OK", &[], &body);
}
}
});
let conn = setup_db();
let config = test_config();
run(async {
let client = GitLabClient::new(&base, "test-token", Some(1000.0));
let signal = ShutdownSignal::new();
// Cancel immediately — signal is checked between each streamed issue
signal.cancel();
let result = ingest_issues(&conn, &client, &config, 1, 100, &signal)
.await
.unwrap();
// Key invariant: DB count must match reported upsert count
let db_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM issues WHERE project_id = 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(
db_count as usize, result.upserted,
"DB count must equal upserted count (no partial data)"
);
// If anything was upserted, cursor must exist
if result.upserted > 0 {
let cursor_exists: bool = conn
.query_row(
"SELECT COUNT(*) > 0 FROM sync_cursors
WHERE project_id = 1 AND resource_type = 'issues'",
[],
|row| row.get(0),
)
.unwrap();
assert!(
cursor_exists,
"cursor should exist when items were upserted"
);
}
let summary = json!({
"test": "http_pipeline_cancel_preserves_integrity",
"fetched": result.fetched,
"upserted": result.upserted,
"db_count": db_count,
"integrity_ok": db_count as usize == result.upserted,
});
eprintln!("E2E_SUMMARY: {}", serde_json::to_string(&summary).unwrap());
});
}
// ───────────────────────────────────────────────────────────────────
// Test 8: Resume via cursor — second run deduplicates correctly
// ───────────────────────────────────────────────────────────────────
#[test]
fn http_pipeline_resume_via_cursor() {
let conn = setup_db();
let config = test_config();
// --- Run 1: Ingest 3 issues ---
let base1 = mock_gitlab_server(1, |_i, stream| {
drain_http_request(stream);
let body = serde_json::to_string(&vec![
make_issue_json(3001, 20, "2024-08-01T00:00:00.000Z"),
make_issue_json(3002, 21, "2024-08-02T00:00:00.000Z"),
make_issue_json(3003, 22, "2024-08-03T00:00:00.000Z"),
])
.unwrap();
write_http_response(stream, 200, "OK", &[], &body);
});
let run1 = run(async {
let client = GitLabClient::new(&base1, "test-token", Some(1000.0));
let signal = ShutdownSignal::new();
ingest_issues(&conn, &client, &config, 1, 100, &signal)
.await
.unwrap()
});
assert_eq!(run1.upserted, 3, "run 1 should upsert 3 issues");
// --- Run 2: Same 3 + 2 new issues. Cursor should skip the first 3. ---
let base2 = mock_gitlab_server(1, |_i, stream| {
drain_http_request(stream);
let body = serde_json::to_string(&vec![
make_issue_json(3001, 20, "2024-08-01T00:00:00.000Z"),
make_issue_json(3002, 21, "2024-08-02T00:00:00.000Z"),
make_issue_json(3003, 22, "2024-08-03T00:00:00.000Z"),
make_issue_json(3004, 23, "2024-08-04T00:00:00.000Z"),
make_issue_json(3005, 24, "2024-08-05T00:00:00.000Z"),
])
.unwrap();
write_http_response(stream, 200, "OK", &[], &body);
});
let run2 = run(async {
let client = GitLabClient::new(&base2, "test-token", Some(1000.0));
let signal = ShutdownSignal::new();
ingest_issues(&conn, &client, &config, 1, 100, &signal)
.await
.unwrap()
});
assert_eq!(run2.fetched, 5, "run 2 should fetch all 5 from API");
assert_eq!(run2.upserted, 2, "run 2 should only upsert 2 new issues");
let total: i64 = conn
.query_row(
"SELECT COUNT(*) FROM issues WHERE project_id = 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(total, 5, "DB should have 5 total issues after resume");
let distinct: i64 = conn
.query_row(
"SELECT COUNT(DISTINCT iid) FROM issues WHERE project_id = 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(distinct, 5, "no duplicate IIDs");
let summary = json!({
"test": "http_pipeline_resume_via_cursor",
"run1_upserted": run1.upserted,
"run2_fetched": run2.fetched,
"run2_upserted": run2.upserted,
"total": total,
"no_duplicates": distinct == 5,
});
eprintln!("E2E_SUMMARY: {}", serde_json::to_string(&summary).unwrap());
}
// ───────────────────────────────────────────────────────────────────
// Test 9: Runtime quiescence — no leaked tasks after real ingestion
// ───────────────────────────────────────────────────────────────────
#[test]
fn http_pipeline_runtime_quiescence() {
let base = mock_gitlab_server(1, |_i, stream| {
drain_http_request(stream);
let body = serde_json::to_string(&vec![
make_issue_json(4001, 30, "2024-09-01T00:00:00.000Z"),
make_issue_json(4002, 31, "2024-09-02T00:00:00.000Z"),
])
.unwrap();
write_http_response(stream, 200, "OK", &[], &body);
});
let conn = setup_db();
let config = test_config();
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
let result = rt.block_on(async {
let client = GitLabClient::new(&base, "test-token", Some(1000.0));
let signal = ShutdownSignal::new();
ingest_issues(&conn, &client, &config, 1, 100, &signal)
.await
.unwrap()
});
// Runtime drops cleanly — if tasks leaked, this would hang or panic
drop(rt);
assert_eq!(result.upserted, 2);
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM issues WHERE project_id = 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 2, "data committed before runtime drop");
let summary = json!({
"test": "http_pipeline_runtime_quiescence",
"upserted": result.upserted,
"db_count": count,
"runtime_dropped_cleanly": true,
});
eprintln!("E2E_SUMMARY: {}", serde_json::to_string(&summary).unwrap());
}

369
tests/cancellation_tests.rs Normal file
View File

@@ -0,0 +1,369 @@
//! Cancellation integration tests for asupersync runtime migration.
//!
//! Verifies:
//! 1. ShutdownSignal stops fan-out loops cleanly (no task leaks)
//! 2. After runtime completes, no background tasks remain (quiescence)
//! 3. Transaction integrity: cancel during fetch-before-write yields zero partial data
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use futures::future::join_all;
use rusqlite::Connection;
use lore::core::db::{create_connection, run_migrations};
use lore::core::shutdown::ShutdownSignal;
fn run<F: std::future::Future<Output = T>, T>(f: F) -> T {
asupersync::runtime::RuntimeBuilder::new()
.build()
.unwrap()
.block_on(f)
}
fn setup_db() -> Connection {
let conn = create_connection(std::path::Path::new(":memory:")).unwrap();
run_migrations(&conn).unwrap();
conn.execute(
"INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url)
VALUES (100, 'test/repo', 'https://git.example.com/test/repo')",
[],
)
.unwrap();
conn
}
// ───────────────────────────────────────────────────────────────────
// Test 1: ShutdownSignal cancels fan-out mid-batch
// ───────────────────────────────────────────────────────────────────
#[test]
fn cancel_mid_fanout_drains_all_inflight_tasks() {
run(async {
let signal = ShutdownSignal::new();
let completed = Arc::new(AtomicUsize::new(0));
// Simulate a batch of 10 concurrent tasks (like join_all prefetch)
let futures: Vec<_> = (0..10)
.map(|i| {
let completed = Arc::clone(&completed);
async move {
// Simulate some async work
asupersync::time::sleep(
asupersync::time::wall_now(),
std::time::Duration::from_millis(1),
)
.await;
completed.fetch_add(1, Ordering::Relaxed);
i
}
})
.collect();
// Cancel signal BEFORE awaiting the batch
signal.cancel();
// join_all still completes all in-flight futures
let results = join_all(futures).await;
assert_eq!(results.len(), 10, "all futures should resolve");
assert_eq!(
completed.load(Ordering::Relaxed),
10,
"all tasks should have completed"
);
// After cancellation, the signal should be observable
assert!(signal.is_cancelled());
});
}
// ───────────────────────────────────────────────────────────────────
// Test 2: Cancellation loop pattern — signal checked between batches
// ───────────────────────────────────────────────────────────────────
#[test]
fn cancel_between_batches_stops_processing() {
run(async {
let signal = ShutdownSignal::new();
let batches_processed = Arc::new(AtomicUsize::new(0));
// Simulate the ingestion loop pattern: check signal between batches
for batch_num in 0..5 {
if signal.is_cancelled() {
break;
}
// Simulate batch work
let futures: Vec<_> = (0..3)
.map(|_| async {
asupersync::time::sleep(
asupersync::time::wall_now(),
std::time::Duration::from_millis(1),
)
.await;
})
.collect();
join_all(futures).await;
batches_processed.fetch_add(1, Ordering::Relaxed);
// Cancel after first batch completes
if batch_num == 0 {
signal.cancel();
}
}
assert_eq!(
batches_processed.load(Ordering::Relaxed),
1,
"only first batch should complete before cancellation stops the loop"
);
});
}
// ───────────────────────────────────────────────────────────────────
// Test 3: RuntimeHandle::spawn tasks complete before runtime drops
// ───────────────────────────────────────────────────────────────────
#[test]
fn spawned_tasks_complete_before_runtime_drop() {
let completed = Arc::new(AtomicUsize::new(0));
let completed_clone = Arc::clone(&completed);
let rt = asupersync::runtime::RuntimeBuilder::new().build().unwrap();
let handle = rt.handle();
// Spawn background tasks (like the signal handler pattern)
for _ in 0..5 {
let c = Arc::clone(&completed_clone);
handle.spawn(async move {
asupersync::time::sleep(
asupersync::time::wall_now(),
std::time::Duration::from_millis(1),
)
.await;
c.fetch_add(1, Ordering::Relaxed);
});
}
rt.block_on(async {
// Give spawned tasks a chance to run
asupersync::time::sleep(
asupersync::time::wall_now(),
std::time::Duration::from_millis(50),
)
.await;
});
// After block_on returns, spawned tasks should have completed
assert_eq!(
completed.load(Ordering::Relaxed),
5,
"all spawned tasks should complete"
);
}
// ───────────────────────────────────────────────────────────────────
// Test 4: Transaction integrity — cancel during fetch, before write
// ───────────────────────────────────────────────────────────────────
#[test]
fn cancel_during_fetch_commits_zero_partial_data() {
let conn = setup_db();
run(async {
let signal = ShutdownSignal::new();
// Simulate the fetch-then-write pattern from orchestrator
let mut items_written = 0usize;
for batch_num in 0..3 {
if signal.is_cancelled() {
break;
}
// Phase 1: Concurrent fetch (simulated)
let fetched: Vec<i64> = {
let futures: Vec<_> = (0..5)
.map(|i| async move {
asupersync::time::sleep(
asupersync::time::wall_now(),
std::time::Duration::from_millis(1),
)
.await;
(batch_num * 5 + i) as i64
})
.collect();
join_all(futures).await
};
// Cancel after first fetch completes but before second batch writes
if batch_num == 0 {
signal.cancel();
}
// Phase 2: Serial DB write (in transaction)
let tx = conn.unchecked_transaction().unwrap();
for iid in &fetched {
tx.execute(
"INSERT INTO issues (
gitlab_id, project_id, iid, title, state,
author_username, created_at, updated_at, last_seen_at
) VALUES (?1, 1, ?2, 'test', 'opened', 'bot', 1000, 2000, 3000)",
rusqlite::params![iid + 1000, iid],
)
.unwrap();
}
tx.commit().unwrap();
items_written += fetched.len();
}
// Only batch 0's data should be written (signal checked before batch 1 fetch)
assert_eq!(items_written, 5, "only one batch should have been written");
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM issues WHERE project_id = 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 5, "exactly one batch of 5 issues in DB");
});
}
// ───────────────────────────────────────────────────────────────────
// Test 5: SAVEPOINT rollback on cancellation (embedding pattern)
// ───────────────────────────────────────────────────────────────────
#[test]
fn savepoint_rollback_on_cancel_preserves_prior_data() {
let conn = setup_db();
run(async {
let signal = ShutdownSignal::new();
// Write page 1 successfully
conn.execute_batch("SAVEPOINT embed_page").unwrap();
conn.execute(
"INSERT INTO issues (
gitlab_id, project_id, iid, title, state,
author_username, created_at, updated_at, last_seen_at
) VALUES (2001, 1, 1, 'page1-issue', 'opened', 'bot', 1000, 2000, 3000)",
[],
)
.unwrap();
conn.execute_batch("RELEASE embed_page").unwrap();
// Start page 2, write partial data, then cancel
conn.execute_batch("SAVEPOINT embed_page").unwrap();
conn.execute(
"INSERT INTO issues (
gitlab_id, project_id, iid, title, state,
author_username, created_at, updated_at, last_seen_at
) VALUES (2002, 1, 2, 'page2-issue', 'opened', 'bot', 1000, 2000, 3000)",
[],
)
.unwrap();
// Cancel mid-page
signal.cancel();
// Rollback the incomplete page (matches embed_documents pattern)
if signal.is_cancelled() {
conn.execute_batch("ROLLBACK TO embed_page; RELEASE embed_page")
.unwrap();
}
// Page 1 data survives, page 2 data rolled back
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM issues WHERE project_id = 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 1, "only page 1 issue should survive");
let title: String = conn
.query_row("SELECT title FROM issues WHERE project_id = 1", [], |row| {
row.get(0)
})
.unwrap();
assert_eq!(
title, "page1-issue",
"surviving issue should be from page 1"
);
});
}
// ───────────────────────────────────────────────────────────────────
// Test 6: Transaction drop (implicit rollback) on error
// ───────────────────────────────────────────────────────────────────
#[test]
fn transaction_drop_without_commit_rolls_back() {
let conn = setup_db();
// Start transaction, write data, then drop without commit
{
let tx = conn.unchecked_transaction().unwrap();
tx.execute(
"INSERT INTO issues (
gitlab_id, project_id, iid, title, state,
author_username, created_at, updated_at, last_seen_at
) VALUES (3001, 1, 1, 'dropped', 'opened', 'bot', 1000, 2000, 3000)",
[],
)
.unwrap();
// tx dropped here without commit
}
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM issues WHERE project_id = 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(count, 0, "dropped transaction should roll back all writes");
}
// ───────────────────────────────────────────────────────────────────
// Test 7: Signal propagates across clones (thread safety)
// ───────────────────────────────────────────────────────────────────
#[test]
fn signal_propagates_across_async_tasks() {
run(async {
let signal = ShutdownSignal::new();
// Spawn tasks that observe the signal
let futures: Vec<_> = (0..5)
.map(|_| {
let s = signal.clone();
async move {
// Busy-wait briefly, then check
asupersync::time::sleep(
asupersync::time::wall_now(),
std::time::Duration::from_millis(10),
)
.await;
s.is_cancelled()
}
})
.collect();
// Cancel before tasks complete their sleep
signal.cancel();
let results = join_all(futures).await;
assert!(
results.iter().all(|&cancelled| cancelled),
"all cloned signals should observe cancellation"
);
});
}

241
tests/http_integration.rs Normal file
View File

@@ -0,0 +1,241 @@
use std::io::{Read, Write};
use std::net::TcpListener;
use std::time::Duration;
use lore::http::Client;
/// Spin up a one-shot TCP server that replies with `response_bytes` to the first
/// connection, then shuts down. Returns the `http://127.0.0.1:{port}` base URL.
fn oneshot_server(response_bytes: Vec<u8>) -> String {
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
let port = listener.local_addr().unwrap().port();
std::thread::spawn(move || {
let (mut stream, _) = listener.accept().unwrap();
// Drain the request so the client doesn't get a broken pipe.
let mut buf = [0u8; 4096];
loop {
let n = stream.read(&mut buf).unwrap();
// Detect end-of-headers (double CRLF). For simplicity we don't
// parse Content-Length; our test requests have tiny/no bodies.
if buf[..n].windows(4).any(|w| w == b"\r\n\r\n") {
break;
}
}
stream.write_all(&response_bytes).unwrap();
stream.flush().unwrap();
// Drop closes the connection.
});
format!("http://127.0.0.1:{port}")
}
fn json_response(status: u16, reason: &str, body: &str) -> Vec<u8> {
let headers = format!(
"HTTP/1.1 {status} {reason}\r\n\
Content-Type: application/json\r\n\
Content-Length: {}\r\n\
\r\n",
body.len()
);
let mut bytes = headers.into_bytes();
bytes.extend_from_slice(body.as_bytes());
bytes
}
fn run<F: std::future::Future<Output = T>, T>(f: F) -> T {
asupersync::runtime::RuntimeBuilder::new()
.build()
.unwrap()
.block_on(f)
}
// -------------------------------------------------------------------
// Test 1: GET with headers + JSON response
// -------------------------------------------------------------------
#[test]
fn get_with_headers_and_json_response() {
let body = r#"{"ok":true,"data":"hello"}"#;
let raw = format!(
"HTTP/1.1 200 OK\r\n\
Content-Type: application/json\r\n\
X-Custom: test-value\r\n\
Content-Length: {}\r\n\
\r\n\
{body}",
body.len()
);
let base = oneshot_server(raw.into_bytes());
run(async {
let client = Client::with_timeout(Duration::from_secs(5));
let resp = client
.get(
&format!("{base}/api/test"),
&[("Accept", "application/json")],
)
.await
.unwrap();
assert!(resp.is_success());
assert_eq!(resp.status, 200);
assert_eq!(resp.header("x-custom"), Some("test-value"));
let parsed: serde_json::Value = resp.json().unwrap();
assert_eq!(parsed["ok"], true);
assert_eq!(parsed["data"], "hello");
});
}
// -------------------------------------------------------------------
// Test 2: POST with JSON body
// -------------------------------------------------------------------
#[test]
fn post_json_body_round_trip() {
let resp_body = r#"{"received":true}"#;
let base = oneshot_server(json_response(200, "OK", resp_body));
run(async {
let client = Client::with_timeout(Duration::from_secs(5));
#[derive(serde::Serialize)]
struct Payload {
model: String,
input: Vec<String>,
}
let payload = Payload {
model: "test-model".into(),
input: vec!["hello".into(), "world".into()],
};
let resp = client
.post_json(&format!("{base}/api/embed"), &[], &payload)
.await
.unwrap();
assert!(resp.is_success());
let parsed: serde_json::Value = resp.json().unwrap();
assert_eq!(parsed["received"], true);
});
}
// -------------------------------------------------------------------
// Test 3: Non-success status code (429) with Retry-After header
// -------------------------------------------------------------------
#[test]
fn non_success_status_with_retry_after() {
let body = r#"{"error":"rate limited"}"#;
let raw = format!(
"HTTP/1.1 429 Too Many Requests\r\n\
Retry-After: 30\r\n\
Content-Type: application/json\r\n\
Content-Length: {}\r\n\
\r\n\
{body}",
body.len()
);
let base = oneshot_server(raw.into_bytes());
run(async {
let client = Client::with_timeout(Duration::from_secs(5));
let resp = client.get(&format!("{base}/api/data"), &[]).await.unwrap();
assert!(!resp.is_success());
assert_eq!(resp.status, 429);
assert_eq!(resp.header("retry-after"), Some("30"));
let parsed: serde_json::Value = resp.json().unwrap();
assert_eq!(parsed["error"], "rate limited");
});
}
// -------------------------------------------------------------------
// Test 4: Timeout fires correctly
// -------------------------------------------------------------------
#[test]
fn timeout_fires_on_slow_server() {
// Server accepts but never responds.
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
let port = listener.local_addr().unwrap().port();
std::thread::spawn(move || {
let (_stream, _) = listener.accept().unwrap();
// Hold the connection open without writing anything.
std::thread::sleep(Duration::from_secs(30));
});
let base = format!("http://127.0.0.1:{port}");
run(async {
let client = Client::with_timeout(Duration::from_millis(200));
let result = client.get(&format!("{base}/api/slow"), &[]).await;
assert!(result.is_err(), "expected timeout error");
let err_str = format!("{:?}", result.unwrap_err());
// The error should mention timeout.
assert!(
err_str.to_lowercase().contains("timeout")
|| err_str.to_lowercase().contains("timed out"),
"error should mention timeout, got: {err_str}"
);
});
}
// -------------------------------------------------------------------
// Test 5: Large response rejection (64 MiB guard)
// -------------------------------------------------------------------
#[test]
fn large_response_rejected() {
// Build a response claiming a huge Content-Length but only sending the headers
// plus a body that exceeds 64 MiB. We actually send 64 MiB + 1 byte.
// To avoid allocating that much in the test, we use chunked transfer with a
// server that streams data.
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
let port = listener.local_addr().unwrap().port();
std::thread::spawn(move || {
let (mut stream, _) = listener.accept().unwrap();
// Drain request headers.
let mut buf = [0u8; 4096];
loop {
let n = stream.read(&mut buf).unwrap();
if buf[..n].windows(4).any(|w| w == b"\r\n\r\n") {
break;
}
}
let oversized = 64 * 1024 * 1024 + 1; // 64 MiB + 1
let header = format!(
"HTTP/1.1 200 OK\r\n\
Content-Length: {oversized}\r\n\
\r\n"
);
stream.write_all(header.as_bytes()).unwrap();
// Stream zeros in chunks to avoid a huge allocation.
let chunk = vec![0u8; 1024 * 1024]; // 1 MiB chunks
for _ in 0..65 {
if stream.write_all(&chunk).is_err() {
break; // Client may close early.
}
}
});
let base = format!("http://127.0.0.1:{port}");
run(async {
let client = Client::with_timeout(Duration::from_secs(30));
let result = client.get(&format!("{base}/api/huge"), &[]).await;
assert!(result.is_err(), "expected large-response rejection");
let err_str = format!("{:?}", result.unwrap_err());
assert!(
err_str.contains("too large")
|| err_str.contains("Response body")
|| err_str.contains("BodyTooLarge"),
"error should mention body size, got: {err_str}"
);
});
}

392
tests/http_parity_tests.rs Normal file
View File

@@ -0,0 +1,392 @@
//! HTTP behavior parity tests: verify asupersync h1 matches reqwest semantics
//! that `lore` depends on.
//!
//! These tests confirm six critical behaviors:
//! 1. Auto redirect: 301 -> follows Location header
//! 2. Proxy: HTTP_PROXY not supported (documented)
//! 3. Connection keep-alive: sequential requests reuse connections
//! 4. System DNS: hostname resolution works
//! 5. Content-Length on POST: header is auto-added
//! 6. TLS cert validation: invalid certs are rejected
use std::io::{Read, Write};
use std::net::TcpListener;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::Duration;
use lore::http::Client;
/// Run an async block on the asupersync runtime.
fn run<F: std::future::Future<Output = T>, T>(f: F) -> T {
asupersync::runtime::RuntimeBuilder::new()
.build()
.unwrap()
.block_on(f)
}
/// Read one HTTP request from a stream (drain until double-CRLF), returning
/// the raw request bytes (headers only).
fn drain_request(stream: &mut std::net::TcpStream) -> Vec<u8> {
let mut buf = Vec::new();
let mut tmp = [0u8; 1];
loop {
let n = stream.read(&mut tmp).unwrap();
if n == 0 {
break;
}
buf.extend_from_slice(&tmp[..n]);
if buf.len() >= 4 && buf[buf.len() - 4..] == *b"\r\n\r\n" {
break;
}
}
buf
}
// =========================================================================
// Test 1: Auto redirect — 301 is followed transparently
// =========================================================================
#[test]
fn redirect_301_is_followed() {
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
let port = listener.local_addr().unwrap().port();
std::thread::spawn(move || {
// First request: return 301 with Location pointing to /final
let (mut stream, _) = listener.accept().unwrap();
drain_request(&mut stream);
let redirect = format!(
"HTTP/1.1 301 Moved Permanently\r\n\
Location: http://127.0.0.1:{port}/final\r\n\
Content-Length: 0\r\n\
\r\n"
);
stream.write_all(redirect.as_bytes()).unwrap();
stream.flush().unwrap();
drop(stream);
// Second request (after redirect): return 200 with body
let (mut stream2, _) = listener.accept().unwrap();
drain_request(&mut stream2);
let body = r#"{"redirected":true}"#;
let response = format!(
"HTTP/1.1 200 OK\r\n\
Content-Type: application/json\r\n\
Content-Length: {}\r\n\
\r\n\
{body}",
body.len()
);
stream2.write_all(response.as_bytes()).unwrap();
stream2.flush().unwrap();
});
let base = format!("http://127.0.0.1:{port}");
run(async {
let client = Client::with_timeout(Duration::from_secs(5));
let resp = client.get(&format!("{base}/original"), &[]).await.unwrap();
assert!(
resp.is_success(),
"expected 200 after redirect, got {}",
resp.status
);
assert_eq!(resp.status, 200);
let parsed: serde_json::Value = resp.json().unwrap();
assert_eq!(parsed["redirected"], true);
});
}
// =========================================================================
// Test 2: Proxy — HTTP_PROXY is NOT auto-detected (documented difference)
// =========================================================================
#[test]
fn proxy_env_not_auto_detected() {
// Set HTTP_PROXY to a bogus address. If the client respected it, the
// request would fail connecting to the proxy. Since asupersync ignores
// proxy env vars, the request should go directly to the target.
let body = r#"{"direct":true}"#;
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
let port = listener.local_addr().unwrap().port();
std::thread::spawn(move || {
let (mut stream, _) = listener.accept().unwrap();
drain_request(&mut stream);
let response = format!(
"HTTP/1.1 200 OK\r\n\
Content-Type: application/json\r\n\
Content-Length: {}\r\n\
\r\n\
{body}",
body.len()
);
stream.write_all(response.as_bytes()).unwrap();
stream.flush().unwrap();
});
// Set a bogus proxy — the client should ignore it.
// SAFETY: test-only; no other thread reads HTTP_PROXY concurrently.
unsafe { std::env::set_var("HTTP_PROXY", "http://192.0.2.1:9999") };
let base = format!("http://127.0.0.1:{port}");
run(async {
let client = Client::with_timeout(Duration::from_secs(5));
let resp = client.get(&format!("{base}/api/test"), &[]).await.unwrap();
assert!(resp.is_success());
let parsed: serde_json::Value = resp.json().unwrap();
assert_eq!(parsed["direct"], true);
});
// Clean up env var.
// SAFETY: test-only; no other thread reads HTTP_PROXY concurrently.
unsafe { std::env::remove_var("HTTP_PROXY") };
}
// =========================================================================
// Test 3: Connection keep-alive — sequential requests to same host
// =========================================================================
#[test]
fn sequential_requests_connect_separately() {
// Track how many TCP connections are accepted. Each request should
// establish its own connection (current behavior — pool not yet wired).
let connection_count = Arc::new(AtomicUsize::new(0));
let count_clone = Arc::clone(&connection_count);
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
let port = listener.local_addr().unwrap().port();
std::thread::spawn(move || {
for _ in 0..3 {
let (mut stream, _) = listener.accept().unwrap();
count_clone.fetch_add(1, Ordering::SeqCst);
drain_request(&mut stream);
let body = r#"{"ok":true}"#;
let response = format!(
"HTTP/1.1 200 OK\r\n\
Content-Type: application/json\r\n\
Content-Length: {}\r\n\
Connection: keep-alive\r\n\
\r\n\
{body}",
body.len()
);
stream.write_all(response.as_bytes()).unwrap();
stream.flush().unwrap();
}
});
let base = format!("http://127.0.0.1:{port}");
run(async {
let client = Client::with_timeout(Duration::from_secs(5));
for _ in 0..3 {
let resp = client.get(&format!("{base}/api/data"), &[]).await.unwrap();
assert!(resp.is_success());
}
});
let total = connection_count.load(Ordering::SeqCst);
// Document current behavior: each request opens a new connection.
// If/when connection pooling is wired, this assertion should change
// to assert!(total <= 2) to verify keep-alive.
assert!(
(1..=3).contains(&total),
"expected 1-3 connections (got {total}); \
3 = no pooling, 1 = full keep-alive"
);
}
// =========================================================================
// Test 4: System DNS — localhost resolves and connects
// =========================================================================
#[test]
fn system_dns_resolves_localhost() {
let body = r#"{"dns":"ok"}"#;
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
let port = listener.local_addr().unwrap().port();
std::thread::spawn(move || {
let (mut stream, _) = listener.accept().unwrap();
drain_request(&mut stream);
let response = format!(
"HTTP/1.1 200 OK\r\n\
Content-Type: application/json\r\n\
Content-Length: {}\r\n\
\r\n\
{body}",
body.len()
);
stream.write_all(response.as_bytes()).unwrap();
stream.flush().unwrap();
});
run(async {
let client = Client::with_timeout(Duration::from_secs(5));
// Use "localhost" instead of "127.0.0.1" to exercise DNS resolution.
let resp = client
.get(
&format!("http://localhost:{port}/api/dns-test"),
&[("Accept", "application/json")],
)
.await
.unwrap();
assert!(resp.is_success());
let parsed: serde_json::Value = resp.json().unwrap();
assert_eq!(parsed["dns"], "ok");
});
}
// =========================================================================
// Test 5: Content-Length on POST — header is auto-added by codec
// =========================================================================
#[test]
fn post_includes_content_length_header() {
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
let port = listener.local_addr().unwrap().port();
let (tx, rx) = std::sync::mpsc::channel::<String>();
std::thread::spawn(move || {
let (mut stream, _) = listener.accept().unwrap();
let request_bytes = drain_request(&mut stream);
let request_text = String::from_utf8_lossy(&request_bytes).to_string();
tx.send(request_text).unwrap();
let body = r#"{"received":true}"#;
let response = format!(
"HTTP/1.1 200 OK\r\n\
Content-Type: application/json\r\n\
Content-Length: {}\r\n\
\r\n\
{body}",
body.len()
);
stream.write_all(response.as_bytes()).unwrap();
stream.flush().unwrap();
});
let base = format!("http://127.0.0.1:{port}");
run(async {
let client = Client::with_timeout(Duration::from_secs(5));
#[derive(serde::Serialize)]
struct Payload {
model: String,
input: Vec<String>,
}
let payload = Payload {
model: "test-model".into(),
input: vec!["hello".into()],
};
let resp = client
.post_json(&format!("{base}/api/embed"), &[], &payload)
.await
.unwrap();
assert!(resp.is_success());
});
let captured = rx.recv_timeout(Duration::from_secs(5)).unwrap();
// Verify Content-Length header was present in the request.
let has_content_length = captured
.lines()
.any(|line| line.to_lowercase().starts_with("content-length:"));
assert!(
has_content_length,
"POST request should include Content-Length header.\n\
Captured request:\n{captured}"
);
// Verify Content-Length value matches actual body length.
let cl_value: usize = captured
.lines()
.find(|line| line.to_lowercase().starts_with("content-length:"))
.and_then(|line| line.split(':').nth(1))
.and_then(|v| v.trim().parse().ok())
.expect("Content-Length should be a valid number");
assert!(
cl_value > 0,
"Content-Length should be > 0 for non-empty POST body"
);
}
// =========================================================================
// Test 6: TLS cert validation — self-signed/invalid cert is rejected
// =========================================================================
#[test]
fn tls_rejects_plain_tcp_as_https() {
// Start a plain TCP server and try to connect via https://.
// The TLS handshake should fail because the server doesn't speak TLS.
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
let port = listener.local_addr().unwrap().port();
std::thread::spawn(move || {
// Accept and hold the connection so the client can attempt TLS.
if let Ok((mut stream, _)) = listener.accept() {
// Send garbage — the TLS handshake will fail on the client side.
let _ = stream.write_all(b"NOT TLS\r\n");
std::thread::sleep(Duration::from_secs(2));
}
});
run(async {
let client = Client::with_timeout(Duration::from_secs(5));
let result = client
.get(&format!("https://127.0.0.1:{port}/api/test"), &[])
.await;
assert!(
result.is_err(),
"expected TLS error when connecting to plain TCP"
);
let err_str = format!("{:?}", result.unwrap_err());
// The error should be TLS-related (not a generic connection error).
assert!(
err_str.contains("Tls")
|| err_str.to_lowercase().contains("tls")
|| err_str.to_lowercase().contains("ssl")
|| err_str.to_lowercase().contains("handshake")
|| err_str.to_lowercase().contains("certificate"),
"error should mention TLS/SSL, got: {err_str}"
);
});
}
// =========================================================================
// Test 6b: TLS cert validation — connection to unreachable host fails
// =========================================================================
#[test]
fn tls_connection_to_nonexistent_host_fails() {
run(async {
let client = Client::with_timeout(Duration::from_secs(3));
// 192.0.2.1 is TEST-NET-1 (RFC 5737) — guaranteed unroutable.
let result = client.get("https://192.0.2.1:443/api/test", &[]).await;
assert!(
result.is_err(),
"expected error connecting to unroutable host"
);
});
}

View File

@@ -1,8 +1,8 @@
use lore::core::db::{create_connection, run_migrations};
use lore::core::timeline::{TimelineEventType, resolve_entity_ref};
use lore::core::timeline_collect::collect_events;
use lore::core::timeline_expand::expand_timeline;
use lore::core::timeline_seed::seed_timeline;
use lore::timeline::collect::collect_events;
use lore::timeline::expand::expand_timeline;
use lore::timeline::seed::seed_timeline;
use lore::timeline::{TimelineEventType, resolve_entity_ref};
use rusqlite::Connection;
use std::path::Path;