From 55b895a2ebbef64067b1cbf5647fe9c17b541ab1 Mon Sep 17 00:00:00 2001 From: teernisse Date: Wed, 28 Jan 2026 15:49:10 -0500 Subject: [PATCH] Update name to gitlore instead of gitlab-inbox --- .gitignore | 2 +- AGENTS.md | 32 +- Cargo.lock | 66 +- Cargo.toml | 6 +- PRD.md | 6 +- README.md | 208 +++--- SPEC-REVISIONS-2.md | 2 + SPEC-REVISIONS-3.md | 2 + SPEC-REVISIONS.md | 2 + SPEC.md | 4 +- docs/prd/checkpoint-0.md | 2 + docs/prd/checkpoint-1.md | 2 + docs/prd/checkpoint-2.md | 2 + docs/prd/checkpoint-3.md | 947 +++++++++++++++++++++++----- docs/prd/cp1-cp2-alignment-audit.md | 2 + docs/robot-mode-design.md | 26 +- src/cli/commands/doctor.rs | 8 +- src/cli/commands/ingest.rs | 2 +- src/cli/commands/init.rs | 2 +- src/cli/commands/sync_status.rs | 2 +- src/cli/mod.rs | 6 +- src/core/db.rs | 2 +- src/core/error.rs | 16 +- src/core/paths.rs | 18 +- src/lib.rs | 6 +- src/main.rs | 30 +- tests/diffnote_position_tests.rs | 4 +- tests/fixture_tests.rs | 2 +- tests/gitlab_types_tests.rs | 2 +- tests/mr_discussion_tests.rs | 4 +- tests/mr_transformer_tests.rs | 4 +- 31 files changed, 1046 insertions(+), 373 deletions(-) diff --git a/.gitignore b/.gitignore index 011d09b..6ee6054 100644 --- a/.gitignore +++ b/.gitignore @@ -30,7 +30,7 @@ yarn-debug.log* yarn-error.log* # Local config files -gi.config.json +lore.config.json # beads .bv/ diff --git a/AGENTS.md b/AGENTS.md index c7af02a..0e4181b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -25,46 +25,46 @@ If you aren't 100% sure how to use a third-party library, **SEARCH ONLINE** to f --- -## GitLab Inbox Robot Mode +## Gitlore Robot Mode -The `gi` CLI has a robot mode optimized for AI agent consumption with structured JSON output, meaningful exit codes, and TTY auto-detection. +The `lore` CLI has a robot mode optimized for AI agent consumption with structured JSON output, meaningful exit codes, and TTY auto-detection. ### Activation ```bash # Explicit flag -gi --robot list issues +lore --robot list issues # Auto-detection (when stdout is not a TTY) -gi list issues | jq . +lore list issues | jq . # Environment variable -GI_ROBOT=1 gi list issues +LORE_ROBOT=true lore list issues ``` ### Robot Mode Commands ```bash # List issues/MRs with JSON output -gi --robot list issues --limit=10 -gi --robot list mrs --state=opened +lore --robot list issues --limit=10 +lore --robot list mrs --state=opened # Count entities -gi --robot count issues -gi --robot count discussions --type=mr +lore --robot count issues +lore --robot count discussions --type=mr # Show detailed entity info -gi --robot show issue 123 -gi --robot show mr 456 --project=group/repo +lore --robot show issue 123 +lore --robot show mr 456 --project=group/repo # Check sync status -gi --robot sync-status +lore --robot sync-status # Run ingestion (quiet, JSON summary) -gi --robot ingest --type=issues +lore --robot ingest --type=issues # Check environment health -gi --robot doctor +lore --robot doctor ``` ### Response Format @@ -78,7 +78,7 @@ All commands return consistent JSON: Errors return structured JSON to stderr: ```json -{"error":{"code":"CONFIG_NOT_FOUND","message":"...","suggestion":"Run 'gi init'"}} +{"error":{"code":"CONFIG_NOT_FOUND","message":"...","suggestion":"Run 'lore init'"}} ``` ### Exit Codes @@ -102,7 +102,7 @@ Errors return structured JSON to stderr: ### Best Practices -- Use `gi --robot` for all agent interactions +- Use `lore --robot` for all agent interactions - Check exit codes for error handling - Parse JSON errors from stderr - Use `--limit` to control response size diff --git a/Cargo.lock b/Cargo.lock index d4aa348..67a784f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -630,39 +630,6 @@ dependencies = [ "wasip2", ] -[[package]] -name = "gi" -version = "0.1.0" -dependencies = [ - "async-stream", - "chrono", - "clap", - "comfy-table", - "console", - "dialoguer", - "dirs", - "flate2", - "futures", - "indicatif", - "open", - "reqwest", - "rusqlite", - "serde", - "serde_json", - "sha2", - "sqlite-vec", - "tempfile", - "thiserror", - "tokio", - "tracing", - "tracing-indicatif", - "tracing-subscriber", - "url", - "urlencoding", - "uuid", - "wiremock", -] - [[package]] name = "h2" version = "0.4.13" @@ -1111,6 +1078,39 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "lore" +version = "0.1.0" +dependencies = [ + "async-stream", + "chrono", + "clap", + "comfy-table", + "console", + "dialoguer", + "dirs", + "flate2", + "futures", + "indicatif", + "open", + "reqwest", + "rusqlite", + "serde", + "serde_json", + "sha2", + "sqlite-vec", + "tempfile", + "thiserror", + "tokio", + "tracing", + "tracing-indicatif", + "tracing-subscriber", + "url", + "urlencoding", + "uuid", + "wiremock", +] + [[package]] name = "matchers" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 90df70b..ef59656 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,13 +1,13 @@ [package] -name = "gi" +name = "lore" version = "0.1.0" edition = "2024" -description = "GitLab Knowledge Engine - semantic search for GitLab issues, MRs, and discussions" +description = "Gitlore - Local GitLab data management with semantic search" authors = ["Taylor Eernisse"] license = "MIT" [[bin]] -name = "gi" +name = "lore" path = "src/main.rs" [dependencies] diff --git a/PRD.md b/PRD.md index 1239cf8..5bce46a 100644 --- a/PRD.md +++ b/PRD.md @@ -1,8 +1,10 @@ -# GitLab Inbox - Product Requirements Document +# Gitlore - Product Requirements Document + +> **Note:** The project was renamed from "gitlab-inbox" to "gitlore" and the CLI from "gi" to "lore". ## Overview -**Product Name**: GitLab Inbox +**Product Name**: Gitlore (formerly GitLab Inbox) **Version**: 1.0 **Author**: Taylor Eernisse **Date**: January 16, 2026 diff --git a/README.md b/README.md index 2e67c69..742182b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# gi - GitLab Inbox +# Gitlore -A command-line tool for managing GitLab issues and merge requests locally. Syncs issues, MRs, discussions, and notes from GitLab to a local SQLite database for fast, offline-capable querying and filtering. +Local GitLab data management with semantic search. Syncs issues, MRs, discussions, and notes from GitLab to a local SQLite database for fast, offline-capable querying and filtering. ## Features @@ -22,40 +22,40 @@ Or build from source: ```bash cargo build --release -./target/release/gi --help +./target/release/lore --help ``` ## Quick Start ```bash # Initialize configuration (interactive) -gi init +lore init # Verify authentication -gi auth-test +lore auth-test # Sync issues from GitLab -gi ingest --type issues +lore ingest --type issues # Sync merge requests from GitLab -gi ingest --type mrs +lore ingest --type mrs # List recent issues -gi list issues --limit 10 +lore list issues --limit 10 # List open merge requests -gi list mrs --state opened +lore list mrs --state opened # Show issue details -gi show issue 123 --project group/repo +lore show issue 123 --project group/repo # Show MR details with discussions -gi show mr 456 --project group/repo +lore show mr 456 --project group/repo ``` ## Configuration -Configuration is stored in `~/.config/gi/config.json` (or `$XDG_CONFIG_HOME/gi/config.json`). +Configuration is stored in `~/.config/lore/config.json` (or `$XDG_CONFIG_HOME/lore/config.json`). ### Example Configuration @@ -96,8 +96,8 @@ Configuration is stored in `~/.config/gi/config.json` (or `$XDG_CONFIG_HOME/gi/c | `sync` | `cursorRewindSeconds` | `2` | Seconds to rewind cursor for overlap safety | | `sync` | `primaryConcurrency` | `4` | Concurrent GitLab requests for primary resources | | `sync` | `dependentConcurrency` | `2` | Concurrent requests for dependent resources | -| `storage` | `dbPath` | `~/.local/share/gi/gi.db` | Database file path | -| `storage` | `backupDir` | `~/.local/share/gi/backups` | Backup directory | +| `storage` | `dbPath` | `~/.local/share/lore/lore.db` | Database file path | +| `storage` | `backupDir` | `~/.local/share/lore/backups` | Backup directory | | `storage` | `compressRawPayloads` | `true` | Compress stored API responses with gzip | | `embedding` | `provider` | `ollama` | Embedding provider | | `embedding` | `model` | `nomic-embed-text` | Model name for embeddings | @@ -108,9 +108,9 @@ Configuration is stored in `~/.config/gi/config.json` (or `$XDG_CONFIG_HOME/gi/c The config file is resolved in this order: 1. `--config` CLI flag -2. `GI_CONFIG_PATH` environment variable -3. `~/.config/gi/config.json` (XDG default) -4. `./gi.config.json` (local fallback for development) +2. `LORE_CONFIG_PATH` environment variable +3. `~/.config/lore/config.json` (XDG default) +4. `./lore.config.json` (local fallback for development) ### GitLab Token @@ -125,40 +125,40 @@ Create a personal access token with `read_api` scope: | Variable | Purpose | Required | |----------|---------|----------| | `GITLAB_TOKEN` | GitLab API authentication token (name configurable via `gitlab.tokenEnvVar`) | Yes | -| `GI_CONFIG_PATH` | Override config file location | No | +| `LORE_CONFIG_PATH` | Override config file location | No | | `XDG_CONFIG_HOME` | XDG Base Directory for config (fallback: `~/.config`) | No | | `XDG_DATA_HOME` | XDG Base Directory for data (fallback: `~/.local/share`) | No | -| `RUST_LOG` | Logging level filter (e.g., `gi=debug`) | No | +| `RUST_LOG` | Logging level filter (e.g., `lore=debug`) | No | ## Commands -### `gi init` +### `lore init` Initialize configuration and database interactively. ```bash -gi init # Interactive setup -gi init --force # Overwrite existing config -gi init --non-interactive # Fail if prompts needed +lore init # Interactive setup +lore init --force # Overwrite existing config +lore init --non-interactive # Fail if prompts needed ``` -### `gi auth-test` +### `lore auth-test` Verify GitLab authentication is working. ```bash -gi auth-test +lore auth-test # Authenticated as @username (Full Name) # GitLab: https://gitlab.com ``` -### `gi doctor` +### `lore doctor` Check environment health and configuration. ```bash -gi doctor # Human-readable output -gi doctor --json # JSON output for scripting +lore doctor # Human-readable output +lore doctor --json # JSON output for scripting ``` Checks performed: @@ -168,21 +168,21 @@ Checks performed: - Project accessibility - Ollama connectivity (optional) -### `gi ingest` +### `lore ingest` Sync data from GitLab to local database. ```bash # Issues -gi ingest --type issues # Sync all projects -gi ingest --type issues --project group/repo # Single project -gi ingest --type issues --force # Override stale lock -gi ingest --type issues --full # Full re-sync (reset cursors) +lore ingest --type issues # Sync all projects +lore ingest --type issues --project group/repo # Single project +lore ingest --type issues --force # Override stale lock +lore ingest --type issues --full # Full re-sync (reset cursors) # Merge Requests -gi ingest --type mrs # Sync all projects -gi ingest --type mrs --project group/repo # Single project -gi ingest --type mrs --full # Full re-sync (reset cursors) +lore ingest --type mrs # Sync all projects +lore ingest --type mrs --project group/repo # Single project +lore ingest --type mrs --full # Full re-sync (reset cursors) ``` The `--full` flag resets sync cursors and discussion watermarks, then fetches all data from scratch. Useful when: @@ -190,103 +190,103 @@ The `--full` flag resets sync cursors and discussion watermarks, then fetches al - You want to ensure complete data after schema changes - Troubleshooting sync issues -### `gi list issues` +### `lore list issues` Query issues from local database. ```bash -gi list issues # Recent issues (default 50) -gi list issues --limit 100 # More results -gi list issues --state opened # Only open issues -gi list issues --state closed # Only closed issues -gi list issues --author username # By author (@ prefix optional) -gi list issues --assignee username # By assignee (@ prefix optional) -gi list issues --label bug # By label (AND logic) -gi list issues --label bug --label urgent # Multiple labels -gi list issues --milestone "v1.0" # By milestone title -gi list issues --since 7d # Updated in last 7 days -gi list issues --since 2w # Updated in last 2 weeks -gi list issues --since 2024-01-01 # Updated since date -gi list issues --due-before 2024-12-31 # Due before date -gi list issues --has-due-date # Only issues with due dates -gi list issues --project group/repo # Filter by project -gi list issues --sort created --order asc # Sort options -gi list issues --open # Open first result in browser -gi list issues --json # JSON output +lore list issues # Recent issues (default 50) +lore list issues --limit 100 # More results +lore list issues --state opened # Only open issues +lore list issues --state closed # Only closed issues +lore list issues --author username # By author (@ prefix optional) +lore list issues --assignee username # By assignee (@ prefix optional) +lore list issues --label bug # By label (AND logic) +lore list issues --label bug --label urgent # Multiple labels +lore list issues --milestone "v1.0" # By milestone title +lore list issues --since 7d # Updated in last 7 days +lore list issues --since 2w # Updated in last 2 weeks +lore list issues --since 2024-01-01 # Updated since date +lore list issues --due-before 2024-12-31 # Due before date +lore list issues --has-due-date # Only issues with due dates +lore list issues --project group/repo # Filter by project +lore list issues --sort created --order asc # Sort options +lore list issues --open # Open first result in browser +lore list issues --json # JSON output ``` Output includes: IID, title, state, author, assignee, labels, and update time. -### `gi list mrs` +### `lore list mrs` Query merge requests from local database. ```bash -gi list mrs # Recent MRs (default 50) -gi list mrs --limit 100 # More results -gi list mrs --state opened # Only open MRs -gi list mrs --state merged # Only merged MRs -gi list mrs --state closed # Only closed MRs -gi list mrs --state locked # Only locked MRs -gi list mrs --state all # All states -gi list mrs --author username # By author (@ prefix optional) -gi list mrs --assignee username # By assignee (@ prefix optional) -gi list mrs --reviewer username # By reviewer (@ prefix optional) -gi list mrs --draft # Only draft/WIP MRs -gi list mrs --no-draft # Exclude draft MRs -gi list mrs --target-branch main # By target branch -gi list mrs --source-branch feature/foo # By source branch -gi list mrs --label needs-review # By label (AND logic) -gi list mrs --since 7d # Updated in last 7 days -gi list mrs --project group/repo # Filter by project -gi list mrs --sort created --order asc # Sort options -gi list mrs --open # Open first result in browser -gi list mrs --json # JSON output +lore list mrs # Recent MRs (default 50) +lore list mrs --limit 100 # More results +lore list mrs --state opened # Only open MRs +lore list mrs --state merged # Only merged MRs +lore list mrs --state closed # Only closed MRs +lore list mrs --state locked # Only locked MRs +lore list mrs --state all # All states +lore list mrs --author username # By author (@ prefix optional) +lore list mrs --assignee username # By assignee (@ prefix optional) +lore list mrs --reviewer username # By reviewer (@ prefix optional) +lore list mrs --draft # Only draft/WIP MRs +lore list mrs --no-draft # Exclude draft MRs +lore list mrs --target-branch main # By target branch +lore list mrs --source-branch feature/foo # By source branch +lore list mrs --label needs-review # By label (AND logic) +lore list mrs --since 7d # Updated in last 7 days +lore list mrs --project group/repo # Filter by project +lore list mrs --sort created --order asc # Sort options +lore list mrs --open # Open first result in browser +lore list mrs --json # JSON output ``` Output includes: IID, title (with [DRAFT] prefix if applicable), state, author, assignee, labels, and update time. -### `gi show issue` +### `lore show issue` Display detailed issue information. ```bash -gi show issue 123 # Show issue #123 -gi show issue 123 --project group/repo # Disambiguate if needed +lore show issue 123 # Show issue #123 +lore show issue 123 --project group/repo # Disambiguate if needed ``` Shows: title, description, state, author, assignees, labels, milestone, due date, web URL, and threaded discussions. -### `gi show mr` +### `lore show mr` Display detailed merge request information. ```bash -gi show mr 456 # Show MR !456 -gi show mr 456 --project group/repo # Disambiguate if needed +lore show mr 456 # Show MR !456 +lore show mr 456 --project group/repo # Disambiguate if needed ``` Shows: title, description, state, draft status, author, assignees, reviewers, labels, source/target branches, merge status, web URL, and threaded discussions. Inline code review comments (DiffNotes) display file context in the format `[src/file.ts:45]`. -### `gi count` +### `lore count` Count entities in local database. ```bash -gi count issues # Total issues -gi count mrs # Total MRs (with state breakdown) -gi count discussions # Total discussions -gi count discussions --type issue # Issue discussions only -gi count discussions --type mr # MR discussions only -gi count notes # Total notes (shows system vs user breakdown) +lore count issues # Total issues +lore count mrs # Total MRs (with state breakdown) +lore count discussions # Total discussions +lore count discussions --type issue # Issue discussions only +lore count discussions --type mr # MR discussions only +lore count notes # Total notes (shows system vs user breakdown) ``` -### `gi sync-status` +### `lore sync-status` Show current sync state and watermarks. ```bash -gi sync-status +lore sync-status ``` Displays: @@ -294,40 +294,40 @@ Displays: - Cursor positions per project and resource type (issues and MRs) - Data summary counts -### `gi migrate` +### `lore migrate` Run pending database migrations. ```bash -gi migrate +lore migrate ``` Shows current schema version and applies any pending migrations. -### `gi version` +### `lore version` Show version information. ```bash -gi version +lore version ``` -### `gi backup` +### `lore backup` Create timestamped database backup. ```bash -gi backup +lore backup ``` *Note: Not yet implemented.* -### `gi reset` +### `lore reset` Delete database and reset all state. ```bash -gi reset --confirm +lore reset --confirm ``` *Note: Not yet implemented.* @@ -356,12 +356,12 @@ Data is stored in SQLite with WAL mode and foreign keys enabled. Main tables: | `raw_payloads` | Compressed original API responses | | `schema_version` | Migration version tracking | -The database is stored at `~/.local/share/gi/gi.db` by default (XDG compliant). +The database is stored at `~/.local/share/lore/lore.db` by default (XDG compliant). ## Global Options ```bash -gi --config /path/to/config.json # Use alternate config +lore --config /path/to/config.json # Use alternate config ``` ## Development @@ -371,10 +371,10 @@ gi --config /path/to/config.json # Use alternate config cargo test # Run with debug logging -RUST_LOG=gi=debug gi list issues +RUST_LOG=lore=debug lore list issues # Run with trace logging -RUST_LOG=gi=trace gi ingest --type issues +RUST_LOG=lore=trace lore ingest --type issues # Check formatting cargo fmt --check @@ -396,7 +396,7 @@ cargo clippy ## Current Status -This is Checkpoint 2 (CP2) of the GitLab Knowledge Engine project. Currently implemented: +This is Checkpoint 2 (CP2) of the Gitlore project. Currently implemented: - Issue ingestion with cursor-based incremental sync - Merge request ingestion with cursor-based incremental sync diff --git a/SPEC-REVISIONS-2.md b/SPEC-REVISIONS-2.md index b134f13..afb46af 100644 --- a/SPEC-REVISIONS-2.md +++ b/SPEC-REVISIONS-2.md @@ -1,5 +1,7 @@ # SPEC.md Revision Document - Round 2 +> **Note:** The project was renamed from "gitlab-inbox" to "gitlore" and the CLI from "gi" to "lore". References to "gi" in this document should be read as "lore". + This document provides git-diff style changes for the second round of improvements from ChatGPT's review. These are primarily correctness fixes and optimizations. --- diff --git a/SPEC-REVISIONS-3.md b/SPEC-REVISIONS-3.md index 818ac87..9d61d56 100644 --- a/SPEC-REVISIONS-3.md +++ b/SPEC-REVISIONS-3.md @@ -1,5 +1,7 @@ # SPEC.md Revisions - First-Time User Experience +> **Note:** The project was renamed from "gitlab-inbox" to "gitlore" and the CLI from "gi" to "lore". References to "gi" in this document should be read as "lore". + **Date:** 2026-01-21 **Purpose:** Document all changes adding installation, setup, and user flow documentation to SPEC.md diff --git a/SPEC-REVISIONS.md b/SPEC-REVISIONS.md index caa9573..f3e9108 100644 --- a/SPEC-REVISIONS.md +++ b/SPEC-REVISIONS.md @@ -1,5 +1,7 @@ # SPEC.md Revision Document +> **Note:** The project was renamed from "gitlab-inbox" to "gitlore" and the CLI from "gi" to "lore". References to "gi" in this document should be read as "lore". + This document provides git-diff style changes to integrate improvements from ChatGPT's review into the original SPEC.md. The goal is a "best of all worlds" hybrid that maintains the original architecture while adding production-grade hardening. --- diff --git a/SPEC.md b/SPEC.md index 44fc3b2..84e68e2 100644 --- a/SPEC.md +++ b/SPEC.md @@ -1,6 +1,6 @@ -# GitLab Knowledge Engine - Spec Document +# Gitlore - Spec Document -> **Note:** This is a historical planning document. The actual implementation uses Rust instead of TypeScript/Node.js. See [README.md](README.md) for current documentation. +> **Note:** This is a historical planning document. The actual implementation uses Rust instead of TypeScript/Node.js. See [README.md](README.md) for current documentation. The project was renamed from "gitlab-inbox" to "gitlore" and the CLI from "gi" to "lore". ## Executive Summary diff --git a/docs/prd/checkpoint-0.md b/docs/prd/checkpoint-0.md index 12f99d1..afc642e 100644 --- a/docs/prd/checkpoint-0.md +++ b/docs/prd/checkpoint-0.md @@ -1,5 +1,7 @@ # Checkpoint 0: Project Setup - PRD +> **Note:** The project was renamed from "gitlab-inbox" to "gitlore" and the CLI from "gi" to "lore". References to "gi" in this document should be read as "lore". + **Version:** 1.0 **Status:** Ready for Implementation **Depends On:** None (first checkpoint) diff --git a/docs/prd/checkpoint-1.md b/docs/prd/checkpoint-1.md index 6f59080..f883c36 100644 --- a/docs/prd/checkpoint-1.md +++ b/docs/prd/checkpoint-1.md @@ -1,5 +1,7 @@ # Checkpoint 1: Issue Ingestion - PRD +> **Note:** The project was renamed from "gitlab-inbox" to "gitlore" and the CLI from "gi" to "lore". References to "gi" in this document should be read as "lore". + **Version:** 2.0 **Status:** Ready for Implementation **Depends On:** Checkpoint 0 (Project Setup) diff --git a/docs/prd/checkpoint-2.md b/docs/prd/checkpoint-2.md index c89e909..fc03b26 100644 --- a/docs/prd/checkpoint-2.md +++ b/docs/prd/checkpoint-2.md @@ -1,5 +1,7 @@ # Checkpoint 2: MR Ingestion - PRD +> **Note:** The project was renamed from "gitlab-inbox" to "gitlore" and the CLI from "gi" to "lore". References to "gi" in this document should be read as "lore". + **Version:** 1.3 **Status:** Ready for Implementation **Depends On:** Checkpoint 1 (Issue Ingestion) diff --git a/docs/prd/checkpoint-3.md b/docs/prd/checkpoint-3.md index 826edc1..2457096 100644 --- a/docs/prd/checkpoint-3.md +++ b/docs/prd/checkpoint-3.md @@ -1,12 +1,14 @@ # Checkpoint 3: Search & Sync MVP +> **Note:** The project was renamed from "gitlab-inbox" to "gitlore" and the CLI from "gi" to "lore". References to "gi" in this document should be read as "lore". + > **Status:** Planning > **Prerequisite:** Checkpoints 0, 1, 2 complete (issues, MRs, discussions ingested) > **Goal:** Deliver working semantic + lexical hybrid search with efficient incremental sync This checkpoint consolidates SPEC.md checkpoints 3A, 3B, 4, and 5 into a unified implementation plan. The work is structured for parallel agent execution where dependencies allow. -All code integrates with existing `gitlab-inbox` infrastructure: +All code integrates with existing `gitlore` infrastructure: - Error handling via `GiError` and `ErrorCode` in `src/core/error.rs` - CLI patterns matching `src/cli/commands/*.rs` (run functions, JSON/human output) - Database via `rusqlite::Connection` with migrations in `migrations/` @@ -29,6 +31,9 @@ All code integrates with existing `gitlab-inbox` infrastructure: - sqlite-vec `rowid = documents.id` for simple joins - RRF ranking avoids score normalization complexity - Queue-based discussion fetching isolates failures +- FTS5 query sanitization prevents syntax errors from user input +- Exponential backoff on all queues prevents hot-loop retries +- Transient embed failures trigger graceful degradation (not hard errors) --- @@ -53,6 +58,8 @@ CREATE TABLE documents ( title TEXT, -- null for discussions content_text TEXT NOT NULL, -- canonical text for embedding/search content_hash TEXT NOT NULL, -- SHA-256 for change detection + labels_hash TEXT NOT NULL DEFAULT '', -- SHA-256 over sorted labels (write optimization) + paths_hash TEXT NOT NULL DEFAULT '', -- SHA-256 over sorted paths (write optimization) is_truncated INTEGER NOT NULL DEFAULT 0, truncated_reason TEXT CHECK ( truncated_reason IN ('token_limit_middle_drop','single_note_oversized','first_last_oversized') @@ -83,6 +90,7 @@ CREATE TABLE document_paths ( CREATE INDEX idx_document_paths_path ON document_paths(path); -- Queue for incremental document regeneration (with retry tracking) +-- Uses next_attempt_at for index-friendly backoff queries CREATE TABLE dirty_sources ( source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion')), source_id INTEGER NOT NULL, @@ -90,13 +98,13 @@ CREATE TABLE dirty_sources ( attempt_count INTEGER NOT NULL DEFAULT 0, last_attempt_at INTEGER, last_error TEXT, + next_attempt_at INTEGER, -- ms epoch UTC; NULL means ready immediately PRIMARY KEY(source_type, source_id) ); -CREATE INDEX idx_dirty_sources_retry - ON dirty_sources(attempt_count, last_attempt_at) - WHERE last_error IS NOT NULL; +CREATE INDEX idx_dirty_sources_next_attempt ON dirty_sources(next_attempt_at); -- Resumable queue for dependent discussion fetching +-- Uses next_attempt_at for index-friendly backoff queries CREATE TABLE pending_discussion_fetches ( project_id INTEGER NOT NULL REFERENCES projects(id), noteable_type TEXT NOT NULL, -- 'Issue' | 'MergeRequest' @@ -105,11 +113,10 @@ CREATE TABLE pending_discussion_fetches ( attempt_count INTEGER NOT NULL DEFAULT 0, last_attempt_at INTEGER, last_error TEXT, + next_attempt_at INTEGER, -- ms epoch UTC; NULL means ready immediately PRIMARY KEY(project_id, noteable_type, noteable_iid) ); -CREATE INDEX idx_pending_discussions_retry - ON pending_discussion_fetches(attempt_count, last_attempt_at) - WHERE last_error IS NOT NULL; +CREATE INDEX idx_pending_discussions_next_attempt ON pending_discussion_fetches(next_attempt_at); ``` **Acceptance Criteria:** @@ -117,6 +124,8 @@ CREATE INDEX idx_pending_discussions_retry - [ ] Migration applies cleanly after CP2 schema - [ ] All foreign keys enforced - [ ] Indexes created +- [ ] `labels_hash` and `paths_hash` columns present for write optimization +- [ ] `next_attempt_at` indexed for efficient backoff queries --- @@ -285,6 +294,18 @@ impl SourceType { Self::Discussion => "discussion", } } + + /// Parse from CLI input, accepting common aliases. + /// + /// Accepts: "issue", "mr", "merge_request", "discussion" + pub fn parse(s: &str) -> Option { + match s.to_lowercase().as_str() { + "issue" | "issues" => Some(Self::Issue), + "mr" | "mrs" | "merge_request" | "merge_requests" => Some(Self::MergeRequest), + "discussion" | "discussions" => Some(Self::Discussion), + _ => None, + } + } } impl std::fmt::Display for SourceType { @@ -302,6 +323,8 @@ pub struct DocumentData { pub author_username: Option, pub labels: Vec, pub paths: Vec, // DiffNote file paths + pub labels_hash: String, // SHA-256 over sorted labels (write optimization) + pub paths_hash: String, // SHA-256 over sorted paths (write optimization) pub created_at: i64, pub updated_at: i64, pub url: Option, @@ -318,16 +341,56 @@ pub fn compute_content_hash(content: &str) -> String { hasher.update(content.as_bytes()); format!("{:x}", hasher.finalize()) } + +/// Compute SHA-256 hash over a sorted list of strings. +/// Used for labels_hash and paths_hash to detect changes efficiently. +pub fn compute_list_hash(items: &[String]) -> String { + let mut sorted = items.to_vec(); + sorted.sort(); + let joined = sorted.join("\n"); + compute_content_hash(&joined) +} ``` **Document Formats:** +All document types use consistent header format for better search relevance and context: + | Source | content_text | |--------|-------------| -| Issue | `{title}\n\n{description}` | -| MR | `{title}\n\n{description}` | +| Issue | Structured header + description (see below) | +| MR | Structured header + description (see below) | | Discussion | Full thread with header (see below) | +**Issue Document Format:** +``` +[[Issue]] #234: Authentication redesign +Project: group/project-one +URL: https://gitlab.example.com/group/project-one/-/issues/234 +Labels: ["bug", "auth"] +State: opened +Author: @johndoe + +--- Description --- + +We need to modernize our authentication system... +``` + +**MR Document Format:** +``` +[[MergeRequest]] !456: Implement JWT authentication +Project: group/project-one +URL: https://gitlab.example.com/group/project-one/-/merge_requests/456 +Labels: ["feature", "auth"] +State: opened +Author: @johndoe +Source: feature/jwt-auth -> main + +--- Description --- + +This MR implements JWT-based authentication as discussed in #234... +``` + **Discussion Document Format:** ``` [[Discussion]] Issue #234: Authentication redesign @@ -346,13 +409,14 @@ Agreed. What about refresh token strategy? ``` **Acceptance Criteria:** -- [ ] Issue document: title + description concatenated -- [ ] MR document: title + description concatenated -- [ ] Discussion document: includes parent title, project, URL, labels, files, thread +- [ ] Issue document: structured header with `[[Issue]]` prefix, project, URL, labels, state, author, then description +- [ ] MR document: structured header with `[[MergeRequest]]` prefix, project, URL, labels, state, author, branches, then description +- [ ] Discussion document: includes parent type+title, project, URL, labels, files, then thread - [ ] System notes (is_system=1) excluded from discussion content - [ ] DiffNote file paths extracted to paths vector - [ ] Labels extracted to labels vector - [ ] SHA-256 hash computed from content_text +- [ ] Headers use consistent separator lines (`--- Description ---`, `--- Thread ---`) --- @@ -454,6 +518,10 @@ pub struct GenerateDocsResult { pub skipped: usize, // Unchanged documents } +/// Chunk size for --full mode transactions. +/// Balances throughput against WAL file growth and memory pressure. +const FULL_MODE_CHUNK_SIZE: usize = 2000; + /// Run document generation (incremental by default). /// /// Incremental mode (default): @@ -462,6 +530,7 @@ pub struct GenerateDocsResult { /// /// Full mode (--full): /// - Regenerates ALL documents from scratch +/// - Uses chunked transactions (2k docs/tx) to bound WAL growth /// - Use when schema changes or after migration pub fn run_generate_docs( config: &Config, @@ -469,19 +538,58 @@ pub fn run_generate_docs( project_filter: Option<&str>, ) -> Result { if full { - // Full mode: regenerate everything inside a single transaction + // Full mode: regenerate everything using chunked transactions + // + // Using chunked transactions instead of a single giant transaction: + // - Bounds WAL file growth (single 50k-doc tx could balloon WAL) + // - Reduces memory pressure from statement caches + // - Allows progress reporting between chunks + // - Crash partway through leaves partial but consistent state + // + // Steps per chunk: // 1. BEGIN IMMEDIATE transaction - // 2. Query all issues, MRs, discussions + // 2. Query next batch of sources (issues/MRs/discussions) // 3. For each: generate document, compute hash // 4. Upsert into `documents` table (FTS triggers auto-fire) // 5. Populate `document_labels` and `document_paths` - // 6. Rebuild FTS: INSERT INTO documents_fts(documents_fts) VALUES('rebuild') - // 7. COMMIT - // 8. Return counts + // 6. COMMIT + // 7. Report progress, loop to next chunk // - // The FTS rebuild at step 6 ensures the index is consistent - // after bulk operations. Wrapping in a transaction avoids - // partial state if the process is interrupted. + // After all chunks: + // 8. Single final transaction for FTS rebuild: + // INSERT INTO documents_fts(documents_fts) VALUES('rebuild') + // + // Example implementation: + let conn = open_db(config)?; + let mut result = GenerateDocsResult::default(); + let mut offset = 0; + + loop { + // Process issues in chunks + let issues: Vec = query_issues(&conn, project_filter, FULL_MODE_CHUNK_SIZE, offset)?; + if issues.is_empty() { break; } + + let tx = conn.transaction()?; + for issue in &issues { + let doc = generate_issue_document(issue)?; + upsert_document(&tx, &doc)?; + result.issues += 1; + } + tx.commit()?; + + offset += issues.len(); + // Report progress here if using indicatif + } + + // Similar chunked loops for MRs and discussions... + + // Final FTS rebuild in its own transaction + let tx = conn.transaction()?; + tx.execute( + "INSERT INTO documents_fts(documents_fts) VALUES('rebuild')", + [], + )?; + tx.commit()?; } else { // Incremental mode: process dirty_sources only // 1. Query dirty_sources (bounded by LIMIT) @@ -538,6 +646,8 @@ pub struct GenerateDocsArgs { - [ ] Creates document for each discussion - [ ] Default mode processes dirty_sources queue only (incremental) - [ ] `--full` regenerates all documents from scratch +- [ ] `--full` uses chunked transactions (2k docs/tx) to bound WAL growth +- [ ] Final FTS rebuild after all chunks complete - [ ] Progress bar in human mode (via `indicatif`) - [ ] JSON output in robot mode @@ -555,7 +665,6 @@ src/search/ ├── fts.rs # FTS5 search ├── vector.rs # Vector search (sqlite-vec) ├── hybrid.rs # Combined hybrid search -├── rrf.rs # RRF ranking algorithm └── filters.rs # Filter parsing and application ``` @@ -572,8 +681,8 @@ mod hybrid; mod rrf; mod vector; -pub use filters::{SearchFilters, apply_filters}; -pub use fts::{search_fts, FtsResult}; +pub use filters::{SearchFilters, PathFilter, apply_filters}; +pub use fts::{search_fts, to_fts_query, FtsResult, FtsQueryMode, generate_fallback_snippet, get_result_snippet}; pub use hybrid::{search_hybrid, HybridResult, SearchMode}; pub use rrf::{rank_rrf, RrfResult}; pub use vector::{search_vector, VectorResult}; @@ -597,20 +706,138 @@ pub struct FtsResult { pub snippet: String, // Context snippet around match } +/// Generate fallback snippet for semantic-only results. +/// +/// When FTS snippets aren't available (semantic-only mode), this generates +/// a context snippet by truncating the document content. Useful for displaying +/// search results without FTS hits. +/// +/// Args: +/// content_text: Full document content +/// max_chars: Maximum snippet length (default 200) +/// +/// Returns a truncated string with ellipsis if truncated. +pub fn generate_fallback_snippet(content_text: &str, max_chars: usize) -> String { + let trimmed = content_text.trim(); + if trimmed.len() <= max_chars { + return trimmed.to_string(); + } + + // Find word boundary near max_chars to avoid cutting mid-word + let truncation_point = trimmed[..max_chars] + .rfind(|c: char| c.is_whitespace()) + .unwrap_or(max_chars); + + format!("{}...", &trimmed[..truncation_point]) +} + +/// Get snippet for search result, preferring FTS when available. +/// +/// Priority: +/// 1. FTS snippet (if document matched FTS query) +/// 2. Fallback: truncated content_text +pub fn get_result_snippet( + fts_snippet: Option<&str>, + content_text: &str, +) -> String { + match fts_snippet { + Some(snippet) if !snippet.is_empty() => snippet.to_string(), + _ => generate_fallback_snippet(content_text, 200), + } +} + +/// FTS query parsing mode. +#[derive(Debug, Clone, Copy, Default)] +pub enum FtsQueryMode { + /// Safe parsing (default): escapes dangerous syntax but preserves + /// trailing `*` for obvious prefix queries (type-ahead UX). + #[default] + Safe, + /// Raw mode: passes user MATCH syntax through unchanged. + /// Use with caution - invalid syntax will cause FTS5 errors. + Raw, +} + +/// Convert user query to FTS5-safe MATCH expression. +/// +/// FTS5 MATCH syntax has special characters that cause errors if passed raw: +/// - `-` (NOT operator) +/// - `"` (phrase quotes) +/// - `:` (column filter) +/// - `*` (prefix) +/// - `AND`, `OR`, `NOT` (operators) +/// +/// Strategy for Safe mode: +/// - Wrap each whitespace-delimited token in double quotes +/// - Escape internal quotes by doubling them +/// - PRESERVE trailing `*` for simple prefix queries (alphanumeric tokens) +/// - This forces FTS5 to treat tokens as literals while allowing type-ahead +/// +/// Raw mode passes the query through unchanged for power users who want +/// full FTS5 syntax (phrase queries, column scopes, boolean operators). +/// +/// Examples (Safe mode): +/// - "auth error" -> `"auth" "error"` (implicit AND) +/// - "auth*" -> `"auth"*` (prefix preserved!) +/// - "jwt_token*" -> `"jwt_token"*` (prefix preserved!) +/// - "C++" -> `"C++"` (special chars preserved, no prefix) +/// - "don't panic" -> `"don't" "panic"` (apostrophe preserved) +/// - "-DWITH_SSL" -> `"-DWITH_SSL"` (leading dash neutralized) +pub fn to_fts_query(raw: &str, mode: FtsQueryMode) -> String { + if matches!(mode, FtsQueryMode::Raw) { + return raw.trim().to_string(); + } + + raw.split_whitespace() + .map(|token| { + let t = token.trim(); + if t.is_empty() { + return "\"\"".to_string(); + } + + // Detect simple prefix queries: alphanumeric/underscore followed by * + // e.g., "auth*", "jwt_token*", "user123*" + let is_prefix = t.ends_with('*') + && t.len() > 1 + && t[..t.len() - 1] + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_'); + + // Escape internal double quotes by doubling them + let escaped = t.replace('"', "\"\""); + + if is_prefix { + // Strip trailing *, quote the core, then re-add * + let core = &escaped[..escaped.len() - 1]; + format!("\"{}\"*", core) + } else { + format!("\"{}\"", escaped) + } + }) + .collect::>() + .join(" ") +} + /// Search documents using FTS5. /// /// Returns matching document IDs with BM25 rank scores and snippets. /// Lower rank values indicate better matches. /// Uses bm25() explicitly (not the `rank` alias) and snippet() for context. +/// +/// IMPORTANT: User input is sanitized via `to_fts_query()` to prevent +/// FTS5 syntax errors from special characters while preserving prefix search. pub fn search_fts( conn: &Connection, query: &str, limit: usize, + mode: FtsQueryMode, ) -> Result> { if query.trim().is_empty() { return Ok(Vec::new()); } + let safe_query = to_fts_query(query, mode); + let mut stmt = conn.prepare( "SELECT rowid, bm25(documents_fts), @@ -622,7 +849,7 @@ pub fn search_fts( )?; let results = stmt - .query_map([query, &limit.to_string()], |row| { + .query_map([&safe_query, &limit.to_string()], |row| { Ok(FtsResult { document_id: row.get(0)?, rank: row.get(1)?, @@ -638,9 +865,14 @@ pub fn search_fts( **Acceptance Criteria:** - [ ] Returns matching document IDs with BM25 rank - [ ] Porter stemming works (search/searching match) -- [ ] Prefix search works (type-ahead UX) +- [ ] Prefix search works (type-ahead UX): `auth*` returns results starting with "auth" - [ ] Empty query returns empty results - [ ] Nonsense query returns empty results +- [ ] Special characters in query don't cause FTS5 syntax errors (`-`, `"`, `:`, `*`) +- [ ] Query `"-DWITH_SSL"` returns results (not treated as NOT operator) +- [ ] Query `C++` returns results (special chars preserved) +- [ ] Safe mode preserves trailing `*` on alphanumeric tokens +- [ ] Raw mode (`--fts-mode=raw`) passes query through unchanged --- @@ -653,6 +885,12 @@ use rusqlite::Connection; use crate::core::error::Result; use crate::documents::SourceType; +/// Maximum allowed limit for search results. +const MAX_SEARCH_LIMIT: usize = 100; + +/// Default limit for search results. +const DEFAULT_SEARCH_LIMIT: usize = 20; + /// Search filters applied post-retrieval. #[derive(Debug, Clone, Default)] pub struct SearchFilters { @@ -665,6 +903,27 @@ pub struct SearchFilters { pub limit: usize, // Default 20, max 100 } +impl SearchFilters { + /// Check if any filter is set (used for adaptive recall). + pub fn has_any_filter(&self) -> bool { + self.source_type.is_some() + || self.author.is_some() + || self.project_id.is_some() + || self.after.is_some() + || !self.labels.is_empty() + || self.path.is_some() + } + + /// Clamp limit to valid range [1, MAX_SEARCH_LIMIT]. + pub fn clamp_limit(&self) -> usize { + if self.limit == 0 { + DEFAULT_SEARCH_LIMIT + } else { + self.limit.min(MAX_SEARCH_LIMIT) + } + } +} + /// Path filter with prefix or exact match. #[derive(Debug, Clone)] pub enum PathFilter { @@ -686,39 +945,115 @@ impl PathFilter { /// /// IMPORTANT: Preserves ranking order from input document_ids. /// Filters must not reorder results - maintain the RRF/search ranking. +/// +/// Uses JSON1 extension for efficient ordered ID passing: +/// - Passes document_ids as JSON array: `[1,2,3,...]` +/// - Uses `json_each()` to expand into rows with `key` as position +/// - JOINs with documents table and applies filters +/// - Orders by original position to preserve ranking pub fn apply_filters( conn: &Connection, document_ids: &[i64], filters: &SearchFilters, ) -> Result> { - // Build dynamic WHERE clause based on filters - // Multiple --label flags use AND logic - // Path prefix vs exact match per PathFilter variant - // - // Implementation strategy to preserve ranking order: - // 1. Accept document_ids as ordered list - // 2. Build CTE with position - // 3. JOIN with filters - // 4. ORDER BY original position - // - // Example SQL pattern: - // ```sql - // WITH ranked_docs(doc_id, pos) AS ( - // SELECT column1, ROW_NUMBER() OVER() as pos - // FROM (VALUES (?),(?),(?),...) - // ) - // SELECT d.id - // FROM documents d - // JOIN ranked_docs rd ON d.id = rd.doc_id - // WHERE d.source_type = ? - // AND EXISTS ( - // SELECT 1 FROM document_labels dl - // WHERE dl.document_id = d.id AND dl.label_name = ? - // ) - // ORDER BY rd.pos - // LIMIT ? - // ``` - todo!() + if document_ids.is_empty() { + return Ok(Vec::new()); + } + + // Build JSON array of document IDs + let ids_json = serde_json::to_string(document_ids)?; + + // Build dynamic WHERE clauses + let mut conditions: Vec = Vec::new(); + let mut params: Vec> = Vec::new(); + + // Always bind the JSON array first + params.push(Box::new(ids_json)); + + if let Some(ref source_type) = filters.source_type { + conditions.push("d.source_type = ?".into()); + params.push(Box::new(source_type.as_str().to_string())); + } + + if let Some(ref author) = filters.author { + conditions.push("d.author_username = ?".into()); + params.push(Box::new(author.clone())); + } + + if let Some(project_id) = filters.project_id { + conditions.push("d.project_id = ?".into()); + params.push(Box::new(project_id)); + } + + if let Some(after) = filters.after { + conditions.push("d.created_at >= ?".into()); + params.push(Box::new(after)); + } + + // Labels: AND logic - all labels must be present + for label in &filters.labels { + conditions.push( + "EXISTS (SELECT 1 FROM document_labels dl WHERE dl.document_id = d.id AND dl.label_name = ?)".into() + ); + params.push(Box::new(label.clone())); + } + + // Path filter + if let Some(ref path_filter) = filters.path { + match path_filter { + PathFilter::Exact(path) => { + conditions.push( + "EXISTS (SELECT 1 FROM document_paths dp WHERE dp.document_id = d.id AND dp.path = ?)".into() + ); + params.push(Box::new(path.clone())); + } + PathFilter::Prefix(prefix) => { + // IMPORTANT: Must use ESCAPE clause for backslash escaping to work in SQLite LIKE + conditions.push( + "EXISTS (SELECT 1 FROM document_paths dp WHERE dp.document_id = d.id AND dp.path LIKE ? ESCAPE '\\')".into() + ); + // Escape LIKE wildcards and add trailing % + let like_pattern = format!( + "{}%", + prefix.replace('%', "\\%").replace('_', "\\_") + ); + params.push(Box::new(like_pattern)); + } + } + } + + let where_clause = if conditions.is_empty() { + String::new() + } else { + format!("AND {}", conditions.join(" AND ")) + }; + + let limit = filters.clamp_limit(); + + // SQL using JSON1 for ordered ID passing + // json_each() returns rows with `key` (0-indexed position) and `value` (the ID) + let sql = format!( + r#" + SELECT d.id + FROM json_each(?) AS j + JOIN documents d ON d.id = j.value + WHERE 1=1 {} + ORDER BY j.key + LIMIT ? + "#, + where_clause + ); + + params.push(Box::new(limit as i64)); + + let mut stmt = conn.prepare(&sql)?; + let params_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect(); + + let results = stmt + .query_map(params_refs.as_slice(), |row| row.get(0))? + .collect::, _>>()?; + + Ok(results) } ``` @@ -738,7 +1073,10 @@ pub fn apply_filters( - [ ] Multiple `--label` flags use AND logic - [ ] Path prefix vs exact match works correctly - [ ] Filters compose (all applied together) -- [ ] Ranking order preserved after filtering +- [ ] Ranking order preserved after filtering (ORDER BY position) +- [ ] Limit clamped to valid range [1, 100] +- [ ] Default limit is 20 when not specified +- [ ] JSON1 `json_each()` correctly expands document IDs --- @@ -754,7 +1092,7 @@ use serde::Serialize; use crate::core::error::Result; use crate::core::time::ms_to_iso; -use crate::search::{SearchFilters, SearchMode, search_hybrid, HybridResult}; +use crate::search::{SearchFilters, SearchMode, search_fts, search_vector, rank_rrf, RrfResult}; use crate::Config; /// Search result for display. @@ -917,6 +1255,12 @@ pub struct SearchArgs { /// Show ranking breakdown #[arg(long)] explain: bool, + + /// FTS query mode: "safe" (default) or "raw" + /// - safe: Escapes special chars but preserves `*` for prefix queries + /// - raw: Pass FTS5 MATCH syntax through unchanged (advanced) + #[arg(long, default_value = "safe")] + fts_mode: String, // "safe" | "raw" } ``` @@ -924,9 +1268,12 @@ pub struct SearchArgs { - [ ] Works without Ollama running - [ ] All filters functional - [ ] Human-readable output with snippets +- [ ] Semantic-only results get fallback snippets from content_text - [ ] JSON output matches schema - [ ] Empty results show helpful message - [ ] "No data indexed" message if documents table empty +- [ ] `--fts-mode=safe` (default) preserves prefix `*` while escaping special chars +- [ ] `--fts-mode=raw` passes FTS5 MATCH syntax through unchanged --- @@ -1213,6 +1560,10 @@ const BATCH_SIZE: usize = 32; /// SQLite page size for paging through pending documents. const DB_PAGE_SIZE: usize = 500; +/// Expected embedding dimensions for nomic-embed-text model. +/// IMPORTANT: Validates against this to prevent silent corruption. +const EXPECTED_DIMS: usize = 768; + /// Which documents to embed. #[derive(Debug, Clone, Copy)] pub enum EmbedSelection { @@ -1233,24 +1584,22 @@ pub struct EmbedResult { /// Embed documents that need embedding. /// /// Process: -/// 1. Page through documents needing embedding (DB_PAGE_SIZE at a time) -/// 2. Batch documents (32 per Ollama request) -/// 3. Fire concurrent HTTP requests via FuturesUnordered (capped by concurrency) -/// 4. Collect results and write to SQLite sequentially (rusqlite is !Send) -/// 5. On failure: record error with actual content_hash, continue with next batch -/// -/// Architecture note: rusqlite::Connection is !Send, so all DB reads/writes -/// happen on the main thread. Only HTTP calls are concurrent. +/// 1. Query dirty_sources ordered by queued_at +/// 2. For each: regenerate document, compute new hash +/// 3. ALWAYS upsert document (labels/paths may change even if content_hash unchanged) +/// 4. Track whether content_hash changed (for stats) +/// 5. Delete from dirty_sources (or record error on failure) pub async fn embed_documents( conn: &Connection, client: &OllamaClient, + selection: EmbedSelection, concurrency: usize, progress_callback: Option>, ) -> Result { use futures::stream::{FuturesUnordered, StreamExt}; let mut result = EmbedResult::default(); - let mut total_pending = count_pending_documents(conn)?; + let total_pending = count_pending_documents(conn, selection)?; if total_pending == 0 { return Ok(result); @@ -1258,7 +1607,7 @@ pub async fn embed_documents( // Page through pending documents to avoid loading all into memory loop { - let pending = find_pending_documents(conn, DB_PAGE_SIZE)?; + let pending = find_pending_documents(conn, DB_PAGE_SIZE, selection)?; if pending.is_empty() { break; } @@ -1300,6 +1649,10 @@ pub async fn embed_documents( } /// Collect embedding results and write to DB (sequential, on main thread). +/// +/// IMPORTANT: Validates embedding dimensions to prevent silent corruption. +/// If model returns wrong dimensions (e.g., different model configured), +/// the document is marked as failed rather than storing corrupt data. fn collect_writes( conn: &Connection, batch_meta: &[(i64, String)], @@ -1310,6 +1663,21 @@ fn collect_writes( match embed_result { Ok(embeddings) => { for ((doc_id, hash), embedding) in batch_meta.iter().zip(embeddings.iter()) { + // Validate dimensions to prevent silent corruption + if embedding.len() != EXPECTED_DIMS { + record_embedding_error( + &tx, + *doc_id, + hash, + &format!( + "embedding dimension mismatch: got {}, expected {}", + embedding.len(), + EXPECTED_DIMS + ), + )?; + result.failed += 1; + continue; + } store_embedding(&tx, *doc_id, embedding, hash)?; result.embedded += 1; } @@ -1332,19 +1700,29 @@ struct PendingDocument { } /// Count total pending documents (for progress reporting). -fn count_pending_documents(conn: &Connection) -> Result { - let count: usize = conn.query_row( - "SELECT COUNT(*) - FROM documents d - LEFT JOIN embedding_metadata em ON d.id = em.document_id - WHERE em.document_id IS NULL - OR em.content_hash != d.content_hash", - [], - |row| row.get(0), - )?; +fn count_pending_documents(conn: &Connection, selection: EmbedSelection) -> Result { + let sql = match selection { + EmbedSelection::Pending => + "SELECT COUNT(*) + FROM documents d + LEFT JOIN embedding_metadata em ON d.id = em.document_id + WHERE em.document_id IS NULL + OR em.content_hash != d.content_hash", + EmbedSelection::RetryFailed => + "SELECT COUNT(*) + FROM documents d + JOIN embedding_metadata em ON d.id = em.document_id + WHERE em.last_error IS NOT NULL", + }; + let count: usize = conn.query_row(sql, [], |row| row.get(0))?; Ok(count) } +/// Find pending documents for embedding. +/// +/// IMPORTANT: Uses deterministic ORDER BY d.id to ensure consistent +/// paging behavior. Without ordering, SQLite may return rows in +/// different orders across calls, causing missed or duplicate documents. fn find_pending_documents( conn: &Connection, limit: usize, @@ -1357,12 +1735,14 @@ fn find_pending_documents( LEFT JOIN embedding_metadata em ON d.id = em.document_id WHERE em.document_id IS NULL OR em.content_hash != d.content_hash + ORDER BY d.id LIMIT ?", EmbedSelection::RetryFailed => "SELECT d.id, d.content_text, d.content_hash FROM documents d JOIN embedding_metadata em ON d.id = em.document_id WHERE em.last_error IS NOT NULL + ORDER BY d.id LIMIT ?", }; let mut stmt = conn.prepare(sql)?; @@ -1381,7 +1761,7 @@ fn find_pending_documents( } fn store_embedding( - conn: &Connection, + tx: &rusqlite::Transaction, document_id: i64, embedding: &[f32], content_hash: &str, @@ -1394,14 +1774,14 @@ fn store_embedding( .collect(); // Store in sqlite-vec (rowid = document_id) - conn.execute( + tx.execute( "INSERT OR REPLACE INTO embeddings(rowid, embedding) VALUES (?, ?)", rusqlite::params![document_id, embedding_bytes], )?; // Update metadata let now = crate::core::time::now_ms(); - conn.execute( + tx.execute( "INSERT OR REPLACE INTO embedding_metadata (document_id, model, dims, content_hash, created_at, last_error, attempt_count, last_attempt_at) VALUES (?, 'nomic-embed-text', 768, ?, ?, NULL, 0, ?)", @@ -1412,13 +1792,13 @@ fn store_embedding( } fn record_embedding_error( - conn: &Connection, + tx: &rusqlite::Transaction, document_id: i64, content_hash: &str, error: &str, ) -> Result<()> { let now = crate::core::time::now_ms(); - conn.execute( + tx.execute( "INSERT INTO embedding_metadata (document_id, model, dims, content_hash, created_at, last_error, attempt_count, last_attempt_at) VALUES (?, 'nomic-embed-text', 768, ?, ?, ?, 1, ?) @@ -1442,6 +1822,8 @@ fn record_embedding_error( - [ ] Writes batched in transactions for performance - [ ] Concurrency parameter respected - [ ] Progress reported during embedding +- [ ] Deterministic `ORDER BY d.id` ensures consistent paging +- [ ] `EmbedSelection` parameter controls pending vs retry-failed mode --- @@ -1464,6 +1846,9 @@ pub async fn run_embed( config: &Config, retry_failed: bool, ) -> Result { + use crate::core::db::open_database; + use crate::embedding::pipeline::EmbedSelection; + let ollama_config = OllamaConfig { base_url: config.embedding.base_url.clone(), model: config.embedding.model.clone(), @@ -1475,10 +1860,21 @@ pub async fn run_embed( // Health check client.health_check().await?; + // Open database connection + let conn = open_database(config)?; + + // Determine selection mode + let selection = if retry_failed { + EmbedSelection::RetryFailed + } else { + EmbedSelection::Pending + }; + // Run embedding let result = embed_documents( &conn, &client, + selection, config.embedding.concurrency as usize, None, ).await?; @@ -1492,7 +1888,7 @@ pub fn print_embed(result: &EmbedResult, elapsed_secs: u64) { println!(" Embedded: {:>6} documents", result.embedded); println!(" Failed: {:>6} documents", result.failed); println!(" Skipped: {:>6} documents", result.skipped); - println!(" Elapsed: {}m {}s", elapsed_secs / 60, elapsed_secs % 60); + println!(" Elapsed: {}m {}s", elapsed_secs / 60, elapsed_secs % 60); } /// Print JSON output for robot mode. @@ -1551,6 +1947,7 @@ pub struct Stats { pub documents: DocumentStats, pub embeddings: EmbeddingStats, pub fts: FtsStats, + pub queues: QueueStats, } #[derive(Debug, Serialize)] @@ -1575,6 +1972,22 @@ pub struct FtsStats { pub indexed: usize, } +/// Queue statistics for observability. +/// +/// Exposes internal queue depths so operators can detect backlogs +/// and failing items that need manual intervention. +#[derive(Debug, Serialize)] +pub struct QueueStats { + /// Items in dirty_sources queue (pending document regeneration) + pub dirty_sources: usize, + /// Items in dirty_sources with last_error set (failing regeneration) + pub dirty_sources_failed: usize, + /// Items in pending_discussion_fetches queue + pub pending_discussion_fetches: usize, + /// Items in pending_discussion_fetches with last_error set + pub pending_discussion_fetches_failed: usize, +} + /// Integrity check result. #[derive(Debug, Serialize)] pub struct IntegrityCheck { @@ -1608,6 +2021,62 @@ pub fn run_integrity_check(config: &Config) -> Result { todo!() } +/// Repair result from --repair flag. +#[derive(Debug, Serialize)] +pub struct RepairResult { + pub orphaned_embeddings_deleted: usize, + pub stale_embeddings_cleared: usize, + pub missing_fts_repopulated: usize, +} + +/// Repair issues found by integrity check (--repair flag). +/// +/// Fixes: +/// - Deletes orphaned embeddings (embedding_metadata rows with no matching document) +/// - Clears stale embedding_metadata (hash mismatch) so they get re-embedded +/// - Repopulates FTS for documents missing from documents_fts +pub fn run_repair(config: &Config) -> Result { + let conn = open_db(config)?; + + // Delete orphaned embeddings (no matching document) + let orphaned_deleted = conn.execute( + "DELETE FROM embedding_metadata + WHERE document_id NOT IN (SELECT id FROM documents)", + [], + )?; + + // Also delete from embeddings virtual table (sqlite-vec) + conn.execute( + "DELETE FROM embeddings + WHERE rowid NOT IN (SELECT id FROM documents)", + [], + )?; + + // Clear stale embedding_metadata (hash mismatch) - will be re-embedded + let stale_cleared = conn.execute( + "DELETE FROM embedding_metadata + WHERE (document_id, content_hash) NOT IN ( + SELECT id, content_hash FROM documents + )", + [], + )?; + + // Repopulate FTS for missing documents + let fts_repopulated = conn.execute( + "INSERT INTO documents_fts(rowid, title, content_text) + SELECT id, COALESCE(title, ''), content_text + FROM documents + WHERE id NOT IN (SELECT rowid FROM documents_fts)", + [], + )?; + + Ok(RepairResult { + orphaned_embeddings_deleted: orphaned_deleted, + stale_embeddings_cleared: stale_cleared, + missing_fts_repopulated: fts_repopulated, + }) +} + /// Print human-readable stats. pub fn print_stats(stats: &Stats) { println!("Document Statistics:"); @@ -1626,6 +2095,16 @@ pub fn print_stats(stats: &Stats) { println!(); println!("FTS Index:"); println!(" Indexed: {:>6} documents", stats.fts.indexed); + println!(); + println!("Queue Depths:"); + println!(" Dirty sources: {:>6} ({} failed)", + stats.queues.dirty_sources, + stats.queues.dirty_sources_failed + ); + println!(" Discussion fetches:{:>6} ({} failed)", + stats.queues.pending_discussion_fetches, + stats.queues.pending_discussion_fetches_failed + ); } /// Print integrity check results. @@ -1655,6 +2134,24 @@ pub fn print_stats_json(stats: &Stats) { } ``` +/// Print repair results. +pub fn print_repair_result(result: &RepairResult) { + println!("Repair Results:"); + println!(" Orphaned embeddings deleted: {}", result.orphaned_embeddings_deleted); + println!(" Stale embeddings cleared: {}", result.stale_embeddings_cleared); + println!(" Missing FTS repopulated: {}", result.missing_fts_repopulated); + println!(); + let total = result.orphaned_embeddings_deleted + + result.stale_embeddings_cleared + + result.missing_fts_repopulated; + if total == 0 { + println!(" No issues found to repair."); + } else { + println!(" Fixed {} issues.", total); + } +} +``` + **CLI integration:** ```rust /// Stats subcommand arguments. @@ -1663,6 +2160,10 @@ pub struct StatsArgs { /// Run integrity checks (document/FTS/embedding consistency) #[arg(long)] check: bool, + + /// Repair issues found by --check (deletes orphaned embeddings, clears stale metadata) + #[arg(long, requires = "check")] + repair: bool, } ``` @@ -1671,7 +2172,10 @@ pub struct StatsArgs { - [ ] Shows embedding coverage - [ ] Shows FTS index count - [ ] Identifies truncated documents +- [ ] Shows queue depths (dirty_sources, pending_discussion_fetches) +- [ ] Shows failed item counts for each queue - [ ] `--check` verifies document/FTS/embedding consistency +- [ ] `--repair` fixes orphaned embeddings, stale metadata, missing FTS entries - [ ] JSON output for scripting --- @@ -1837,13 +2341,16 @@ use rusqlite::Connection; use crate::core::error::Result; use crate::embedding::OllamaClient; -use crate::search::{SearchFilters, search_fts, search_vector, rank_rrf, RrfResult}; +use crate::search::{SearchFilters, SearchMode, search_fts, search_vector, rank_rrf, RrfResult, FtsQueryMode}; -/// Base recall for unfiltered search. -const BASE_RECALL: usize = 50; +/// Minimum base recall for unfiltered search. +const BASE_RECALL_MIN: usize = 50; -/// Expanded recall when filters are applied. -const FILTERED_RECALL: usize = 200; +/// Minimum recall when filters are applied. +const FILTERED_RECALL_MIN: usize = 200; + +/// Maximum recall to prevent excessive resource usage. +const RECALL_CAP: usize = 1500; /// Search mode. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -1884,30 +2391,38 @@ pub struct HybridResult { /// Execute hybrid search. /// -/// Adaptive recall: expands topK when filters are applied to prevent -/// "no results" when relevant docs exist but would be filtered out. +/// Adaptive recall: expands topK proportionally to requested limit and filter +/// restrictiveness to prevent "no results" when relevant docs would be filtered out. +/// +/// Formula: +/// - Unfiltered: max(50, limit * 10), capped at 1500 +/// - Filtered: max(200, limit * 50), capped at 1500 /// /// IMPORTANT: All modes use RRF consistently to ensure rank fields /// are populated correctly for --explain output. pub async fn search_hybrid( conn: &Connection, client: Option<&OllamaClient>, + ollama_base_url: Option<&str>, // For actionable error messages query: &str, mode: SearchMode, filters: &SearchFilters, + fts_mode: FtsQueryMode, ) -> Result<(Vec, Vec)> { let mut warnings: Vec = Vec::new(); - // Determine recall based on filters + + // Adaptive recall: proportional to requested limit and filter count + let requested = filters.clamp_limit(); let top_k = if filters.has_any_filter() { - FILTERED_RECALL + (requested * 50).max(FILTERED_RECALL_MIN).min(RECALL_CAP) } else { - BASE_RECALL + (requested * 10).max(BASE_RECALL_MIN).min(RECALL_CAP) }; match mode { SearchMode::Lexical => { // FTS only - use RRF with empty vector results for consistent ranking - let fts_results = search_fts(conn, query, top_k)?; + let fts_results = search_fts(conn, query, top_k, fts_mode)?; let fts_tuples: Vec<_> = fts_results.iter().map(|r| (r.document_id, r.rank)).collect(); let ranked = rank_rrf(&[], &fts_tuples); @@ -1927,7 +2442,7 @@ pub async fn search_hybrid( SearchMode::Semantic => { // Vector only - requires client let client = client.ok_or_else(|| crate::core::error::GiError::OllamaUnavailable { - base_url: "unknown".into(), + base_url: ollama_base_url.unwrap_or("http://localhost:11434").into(), source: None, })?; @@ -1954,16 +2469,34 @@ pub async fn search_hybrid( } SearchMode::Hybrid => { // Both retrievers with RRF fusion - let fts_results = search_fts(conn, query, top_k)?; + let fts_results = search_fts(conn, query, top_k, fts_mode)?; - let vec_results = if let Some(client) = client { - let query_embedding = client.embed_batch(vec![query.to_string()]).await?; - let embedding = query_embedding.into_iter().next().unwrap(); - search_vector(conn, &embedding, top_k)? - } else { - // Graceful degradation: use FTS only - warnings.push("Embedding service unavailable, using lexical search only".into()); - Vec::new() + // Attempt vector search with graceful degradation on any failure + let vec_results = match client { + Some(client) => { + // Try to embed query; gracefully degrade on transient failures + match client.embed_batch(vec![query.to_string()]).await { + Ok(embeddings) => { + let embedding = embeddings.into_iter().next().unwrap(); + search_vector(conn, &embedding, top_k)? + } + Err(e) => { + // Transient failure (network, timeout, rate limit, etc.) + // Log and fall back to FTS-only rather than failing the search + tracing::warn!("Vector search failed, falling back to lexical: {}", e); + warnings.push(format!( + "Vector search unavailable ({}), using lexical search only", + e + )); + Vec::new() + } + } + } + None => { + // No client configured + warnings.push("Embedding service unavailable, using lexical search only".into()); + Vec::new() + } }; // RRF fusion @@ -1989,9 +2522,10 @@ pub async fn search_hybrid( ``` **Acceptance Criteria:** -- [ ] Unfiltered search uses topK=50 -- [ ] Any filter triggers topK=200 +- [ ] Unfiltered search uses topK=max(50, limit*10), capped at 1500 +- [ ] Filtered search uses topK=max(200, limit*50), capped at 1500 - [ ] Final results still limited by `--limit` +- [ ] Adaptive recall prevents "no results" under heavy filtering --- @@ -2044,16 +2578,32 @@ pub fn mark_dirty( Ok(()) } -/// Get dirty sources ordered by queue time (bounded). +/// Get dirty sources ready for processing. /// -/// Limits results to prevent unbounded processing during large syncs. +/// Uses `next_attempt_at` for efficient, index-friendly backoff queries. +/// Items with NULL `next_attempt_at` are ready immediately (first attempt). +/// Items with `next_attempt_at <= now` have waited long enough after failure. +/// +/// Benefits over SQL bitshift calculation: +/// - No overflow risk from large attempt_count values +/// - Index-friendly: `WHERE next_attempt_at <= ?` +/// - Jitter can be added in Rust when computing next_attempt_at +/// +/// This prevents hot-loop retries when a source consistently fails +/// to generate a document (e.g., malformed data, missing references). pub fn get_dirty_sources(conn: &Connection) -> Result> { + let now = now_ms(); + let mut stmt = conn.prepare( - "SELECT source_type, source_id FROM dirty_sources ORDER BY queued_at LIMIT ?" + "SELECT source_type, source_id + FROM dirty_sources + WHERE next_attempt_at IS NULL OR next_attempt_at <= ? + ORDER BY attempt_count ASC, queued_at ASC + LIMIT ?" )?; let results = stmt - .query_map([MAX_DIRTY_SOURCES_PER_RUN], |row| { + .query_map(rusqlite::params![now, MAX_DIRTY_SOURCES_PER_RUN], |row| { let type_str: String = row.get(0)?; let source_type = match type_str.as_str() { "issue" => SourceType::Issue, @@ -2094,6 +2644,9 @@ pub fn clear_dirty( - [ ] Duplicates ignored - [ ] Queue cleared after document regeneration - [ ] Processing bounded per run (max 500) +- [ ] Exponential backoff uses `next_attempt_at` (index-friendly, no overflow) +- [ ] Backoff computed with jitter to prevent thundering herd +- [ ] Failed items prioritized lower than fresh items (ORDER BY attempt_count ASC) --- @@ -2146,10 +2699,16 @@ pub fn queue_discussion_fetch( Ok(()) } -/// Get pending fetches with exponential backoff. +/// Get pending fetches ready for processing. /// -/// Only returns items that have waited long enough based on attempt_count. -/// Backoff formula: min_wait_ms = 1000 * 2^attempt_count (capped at 1 hour) +/// Uses `next_attempt_at` for efficient, index-friendly backoff queries. +/// Items with NULL `next_attempt_at` are ready immediately (first attempt). +/// Items with `next_attempt_at <= now` have waited long enough after failure. +/// +/// Benefits over SQL bitshift calculation: +/// - No overflow risk from large attempt_count values +/// - Index-friendly: `WHERE next_attempt_at <= ?` +/// - Jitter can be added in Rust when computing next_attempt_at /// /// Limited to `max_items` to bound API calls per sync run. pub fn get_pending_fetches(conn: &Connection, max_items: usize) -> Result> { @@ -2158,8 +2717,7 @@ pub fn get_pending_fetches(conn: &Connection, max_items: usize) -> Result MIN(3600000, 1000 * (1 << attempt_count)) + WHERE next_attempt_at IS NULL OR next_attempt_at <= ? ORDER BY attempt_count ASC, queued_at ASC LIMIT ?" )?; @@ -2199,32 +2757,63 @@ pub fn complete_fetch( Ok(()) } -/// Record fetch failure. +/// Record fetch failure and compute next retry time. +/// +/// Computes `next_attempt_at` using exponential backoff with jitter: +/// - Base delay: 1000ms * 2^attempt_count +/// - Cap: 1 hour (3600000ms) +/// - Jitter: ±10% to prevent thundering herd pub fn record_fetch_error( conn: &Connection, project_id: i64, noteable_type: NoteableType, noteable_iid: i64, error: &str, + current_attempt: i64, ) -> Result<()> { + let now = now_ms(); + let next_attempt = compute_next_attempt_at(now, current_attempt + 1); + conn.execute( "UPDATE pending_discussion_fetches SET attempt_count = attempt_count + 1, last_attempt_at = ?, - last_error = ? + last_error = ?, + next_attempt_at = ? WHERE project_id = ? AND noteable_type = ? AND noteable_iid = ?", - rusqlite::params![now_ms(), error, project_id, noteable_type.as_str(), noteable_iid], + rusqlite::params![now, error, next_attempt, project_id, noteable_type.as_str(), noteable_iid], )?; Ok(()) } + +/// Compute next_attempt_at with exponential backoff and jitter. +/// +/// Formula: now + min(3600000, 1000 * 2^attempt_count) * (0.9 to 1.1) +/// - Capped at 1 hour to prevent runaway delays +/// - ±10% jitter prevents synchronized retries after outages +pub fn compute_next_attempt_at(now: i64, attempt_count: i64) -> i64 { + use rand::Rng; + + // Cap attempt_count to prevent overflow (2^30 > 1 hour anyway) + let capped_attempts = attempt_count.min(30) as u32; + let base_delay_ms = 1000_i64.saturating_mul(1 << capped_attempts); + let capped_delay_ms = base_delay_ms.min(3_600_000); // 1 hour cap + + // Add ±10% jitter + let jitter_factor = rand::thread_rng().gen_range(0.9..=1.1); + let delay_with_jitter = (capped_delay_ms as f64 * jitter_factor) as i64; + + now + delay_with_jitter +} ``` **Acceptance Criteria:** - [ ] Updated entities queued for discussion fetch - [ ] Success removes from queue -- [ ] Failure increments attempt_count +- [ ] Failure increments attempt_count and sets next_attempt_at - [ ] Processing bounded per run (max 100) -- [ ] Exponential backoff respects attempt_count +- [ ] Exponential backoff uses `next_attempt_at` (index-friendly, no overflow) +- [ ] Backoff computed with jitter to prevent thundering herd --- @@ -2340,38 +2929,64 @@ fn record_dirty_error( last_attempt_at = ?, last_error = ? WHERE source_type = ? AND source_id = ?", - rusqlite::params![crate::core::time::now_ms(), error, source_type.as_str(), source_id], + rusqlite::params![now_ms(), error, source_type.as_str(), source_id], )?; Ok(()) } +/// Get existing content hash for a document, if it exists. +/// +/// IMPORTANT: Uses `optional()` to distinguish between: +/// - No row found -> Ok(None) +/// - Row found -> Ok(Some(hash)) +/// - DB error -> Err(...) +/// +/// Using `.ok()` would hide real DB errors (disk I/O, corruption, etc.) +/// which should propagate up for proper error handling. fn get_existing_hash( conn: &Connection, source_type: SourceType, source_id: i64, ) -> Result> { + use rusqlite::OptionalExtension; + let mut stmt = conn.prepare( "SELECT content_hash FROM documents WHERE source_type = ? AND source_id = ?" )?; let hash: Option = stmt .query_row(rusqlite::params![source_type.as_str(), source_id], |row| row.get(0)) - .ok(); + .optional()?; Ok(hash) } fn upsert_document(conn: &Connection, doc: &DocumentData) -> Result<()> { - // Upsert main document + use rusqlite::OptionalExtension; + + // Check existing hashes before upserting (for write optimization) + let existing: Option<(i64, String, String)> = conn + .query_row( + "SELECT id, labels_hash, paths_hash FROM documents + WHERE source_type = ? AND source_id = ?", + rusqlite::params![doc.source_type.as_str(), doc.source_id], + |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)), + ) + .optional()?; + + // Upsert main document (includes labels_hash, paths_hash) conn.execute( "INSERT INTO documents (source_type, source_id, project_id, author_username, label_names, + labels_hash, paths_hash, created_at, updated_at, url, title, content_text, content_hash, is_truncated, truncated_reason) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(source_type, source_id) DO UPDATE SET author_username = excluded.author_username, label_names = excluded.label_names, + labels_hash = excluded.labels_hash, + paths_hash = excluded.paths_hash, updated_at = excluded.updated_at, url = excluded.url, title = excluded.title, @@ -2385,6 +3000,8 @@ fn upsert_document(conn: &Connection, doc: &DocumentData) -> Result<()> { doc.project_id, doc.author_username, serde_json::to_string(&doc.labels)?, + doc.labels_hash, + doc.paths_hash, doc.created_at, doc.updated_at, doc.url, @@ -2396,31 +3013,46 @@ fn upsert_document(conn: &Connection, doc: &DocumentData) -> Result<()> { ], )?; - // Get inserted/updated document ID - let doc_id = get_document_id(conn, doc.source_type, doc.source_id)?; + // Get document ID (either existing or newly inserted) + let doc_id = match existing { + Some((id, _, _)) => id, + None => get_document_id(conn, doc.source_type, doc.source_id)?, + }; - // Update labels - conn.execute( - "DELETE FROM document_labels WHERE document_id = ?", - [doc_id], - )?; - for label in &doc.labels { + // Only update labels if hash changed (reduces write amplification) + let labels_changed = match &existing { + Some((_, old_hash, _)) => old_hash != &doc.labels_hash, + None => true, // New document, must insert + }; + if labels_changed { conn.execute( - "INSERT INTO document_labels (document_id, label_name) VALUES (?, ?)", - rusqlite::params![doc_id, label], + "DELETE FROM document_labels WHERE document_id = ?", + [doc_id], )?; + for label in &doc.labels { + conn.execute( + "INSERT INTO document_labels (document_id, label_name) VALUES (?, ?)", + rusqlite::params![doc_id, label], + )?; + } } - // Update paths - conn.execute( - "DELETE FROM document_paths WHERE document_id = ?", - [doc_id], - )?; - for path in &doc.paths { + // Only update paths if hash changed (reduces write amplification) + let paths_changed = match &existing { + Some((_, _, old_hash)) => old_hash != &doc.paths_hash, + None => true, // New document, must insert + }; + if paths_changed { conn.execute( - "INSERT INTO document_paths (document_id, path) VALUES (?, ?)", - rusqlite::params![doc_id, path], + "DELETE FROM document_paths WHERE document_id = ?", + [doc_id], )?; + for path in &doc.paths { + conn.execute( + "INSERT INTO document_paths (document_id, path) VALUES (?, ?)", + rusqlite::params![doc_id, path], + )?; + } } Ok(()) @@ -2506,7 +3138,7 @@ pub fn print_sync(result: &SyncResult, elapsed_secs: u64) { println!(" Elapsed: {}m {}s", elapsed_secs / 60, elapsed_secs % 60); } -/// Print JSON sync output for robot mode. +/// Print JSON output for robot mode. pub fn print_sync_json(result: &SyncResult, elapsed_ms: u64) { let output = serde_json::json!({ "ok": true, @@ -2559,10 +3191,14 @@ pub struct SyncArgs { | Module | Test File | Coverage | |--------|-----------|----------| -| Document extractor | `src/documents/extractor.rs` (mod tests) | Issue/MR/discussion extraction | +| Document extractor | `src/documents/extractor.rs` (mod tests) | Issue/MR/discussion extraction, consistent headers | | Truncation | `src/documents/truncation.rs` (mod tests) | All edge cases | | RRF ranking | `src/search/rrf.rs` (mod tests) | Score computation, merging | | Content hash | `src/documents/extractor.rs` (mod tests) | Deterministic hashing | +| FTS query sanitization | `src/search/fts.rs` (mod tests) | `to_fts_query()` edge cases: `-`, `"`, `:`, `*`, `C++` | +| SourceType parsing | `src/documents/extractor.rs` (mod tests) | `parse()` accepts aliases: `mr`, `mrs`, `issue`, etc. | +| SearchFilters | `src/search/filters.rs` (mod tests) | `has_any_filter()`, `clamp_limit()` | +| Backoff logic | `src/ingestion/dirty_tracker.rs` (mod tests) | Exponential backoff query timing | ### Integration Tests @@ -2600,12 +3236,18 @@ Each query must have at least one expected URL in top 10 results. | `gi generate-docs` (re-run) | 0 regenerated | Hash comparison works | | `gi embed` | Progress, count | Completes, count matches docs | | `gi embed` (re-run) | 0 embedded | Skips unchanged | +| `gi embed --retry-failed` | Processes failed | Only failed docs processed | | `gi stats` | Coverage stats | Shows 100% after embed | +| `gi stats` | Queue depths | Shows dirty_sources and pending_discussion_fetches counts | | `gi search "auth" --mode=lexical` | Results | Works without Ollama | | `gi search "auth"` | Hybrid results | Vector + FTS combined | +| `gi search "auth"` (Ollama down) | FTS results + warning | Graceful degradation, warning in response | | `gi search "auth" --explain` | Rank breakdown | Shows vector/FTS/RRF | | `gi search "auth" --type=mr` | Filtered results | Only MRs | +| `gi search "auth" --type=mrs` | Filtered results | Alias works | | `gi search "auth" --label=bug` | Filtered results | Only labeled docs | +| `gi search "-DWITH_SSL"` | Results | Leading dash doesn't cause FTS error | +| `gi search 'C++'` | Results | Special chars in query work | | `gi search "nonexistent123"` | No results | Graceful empty state | | `gi sync` | Full pipeline | All steps complete | | `gi sync --no-embed` | Skip embedding | Docs generated, not embedded | @@ -2623,6 +3265,10 @@ Each query must have at least one expected URL in top 10 results. - [ ] No orphaned embeddings (embeddings.rowid without matching documents.id) - [ ] Discussion documents exclude system notes - [ ] Discussion documents include parent title +- [ ] All `dirty_sources` entries reference existing source entities +- [ ] All `pending_discussion_fetches` entries reference existing projects +- [ ] `attempt_count` >= 0 for all queue entries (never negative) +- [ ] `last_attempt_at` is NULL when `attempt_count` = 0 --- @@ -2633,6 +3279,8 @@ Checkpoint 3 is complete when: 1. **Lexical search works without Ollama** - `gi search "query" --mode=lexical` returns relevant results - All filters functional + - FTS5 syntax errors prevented by query sanitization + - Special characters in queries work correctly (`-DWITH_SSL`, `C++`) 2. **Semantic search works with Ollama** - `gi embed` completes successfully @@ -2641,19 +3289,28 @@ Checkpoint 3 is complete when: 3. **Hybrid search combines both** - Documents appearing in both retrievers rank higher - - Graceful degradation when Ollama unavailable + - Graceful degradation when Ollama unavailable (falls back to FTS) + - Transient embed failures don't fail the entire search + - Warning message included in response on degradation 4. **Incremental sync is efficient** - `gi sync` only processes changed entities - Re-embedding only happens for changed documents - Progress visible during long syncs + - Queue backoff prevents hot-loop retries on persistent failures 5. **Data integrity maintained** - All counts match between tables - No orphaned records - Hashes consistent + - `get_existing_hash()` properly distinguishes "not found" from DB errors -6. **Tests pass** - - Unit tests for core algorithms +6. **Observability** + - `gi stats` shows queue depths and failed item counts + - Failed items visible for operator intervention + - Deterministic ordering ensures consistent paging + +7. **Tests pass** + - Unit tests for core algorithms (including FTS sanitization, backoff) - Integration tests for pipelines - Golden queries return expected results diff --git a/docs/prd/cp1-cp2-alignment-audit.md b/docs/prd/cp1-cp2-alignment-audit.md index 9160e89..1870862 100644 --- a/docs/prd/cp1-cp2-alignment-audit.md +++ b/docs/prd/cp1-cp2-alignment-audit.md @@ -1,5 +1,7 @@ # CP1 ↔ CP2 Alignment Audit +> **Note:** The project was renamed from "gitlab-inbox" to "gitlore" and the CLI from "gi" to "lore". References to "gi" in this document should be read as "lore". + **Created:** 2026-01-26 **Purpose:** Document deviations between CP1 (Issue Ingestion) and CP2 (MR Ingestion) PRDs that could cause implementation drift. Use this checklist to verify alignment before CP2 implementation. diff --git a/docs/robot-mode-design.md b/docs/robot-mode-design.md index 836a396..792cbc3 100644 --- a/docs/robot-mode-design.md +++ b/docs/robot-mode-design.md @@ -2,19 +2,19 @@ ## Overview -Robot mode optimizes the `gi` CLI for AI agent consumption with structured JSON output, meaningful exit codes, and token-efficient responses. +Robot mode optimizes the `lore` CLI for AI agent consumption with structured JSON output, meaningful exit codes, and token-efficient responses. ## Activation ```bash # Explicit flag -gi --robot list issues +lore --robot list issues # Auto-detection (when stdout is not a TTY) -gi list issues | jq . +lore list issues | jq . # Environment variable -GI_ROBOT=1 gi list issues +LORE_ROBOT=true lore list issues ``` ## Global Flags @@ -51,8 +51,8 @@ When `--robot` is active, errors are JSON on stderr: { "error": { "code": "CONFIG_NOT_FOUND", - "message": "Config file not found at ~/.config/gi/config.toml", - "suggestion": "Run 'gi init' to create configuration" + "message": "Config file not found at ~/.config/lore/config.toml", + "suggestion": "Run 'lore init' to create configuration" } } ``` @@ -75,7 +75,7 @@ All commands return consistent JSON structure: ## Command-Specific Output -### gi list issues --robot +### lore list issues --robot ```json { @@ -100,7 +100,7 @@ All commands return consistent JSON structure: } ``` -### gi show issue 123 --robot +### lore show issue 123 --robot ```json { @@ -134,7 +134,7 @@ All commands return consistent JSON structure: } ``` -### gi ingest --type issues --robot +### lore ingest --type issues --robot ```json { @@ -157,7 +157,7 @@ All commands return consistent JSON structure: } ``` -### gi count issues --robot +### lore count issues --robot ```json { @@ -173,7 +173,7 @@ All commands return consistent JSON structure: } ``` -### gi doctor --robot +### lore doctor --robot ```json { @@ -181,7 +181,7 @@ All commands return consistent JSON structure: "data": { "success": true, "checks": { - "config": { "status": "ok", "path": "~/.config/gi/config.toml" }, + "config": { "status": "ok", "path": "~/.config/lore/config.toml" }, "database": { "status": "ok", "version": 6 }, "gitlab": { "status": "ok", "user": "username" }, "projects": [ @@ -192,7 +192,7 @@ All commands return consistent JSON structure: } ``` -### gi sync-status --robot +### lore sync-status --robot ```json { diff --git a/src/cli/commands/doctor.rs b/src/cli/commands/doctor.rs index 3d44f9b..db1c949 100644 --- a/src/cli/commands/doctor.rs +++ b/src/cli/commands/doctor.rs @@ -178,7 +178,7 @@ fn check_database(config: Option<&Config>) -> DatabaseCheck { return DatabaseCheck { result: CheckResult { status: CheckStatus::Error, - message: Some("Database file not found. Run \"gi init\" first.".to_string()), + message: Some("Database file not found. Run \"lore init\" first.".to_string()), }, path: Some(db_path.display().to_string()), schema_version: None, @@ -302,7 +302,7 @@ fn check_projects(config: Option<&Config>) -> ProjectsCheck { return ProjectsCheck { result: CheckResult { status: CheckStatus::Error, - message: Some("Database not found. Run \"gi init\" first.".to_string()), + message: Some("Database not found. Run \"lore init\" first.".to_string()), }, configured: Some(configured), resolved: Some(0), @@ -320,7 +320,7 @@ fn check_projects(config: Option<&Config>) -> ProjectsCheck { result: CheckResult { status: CheckStatus::Error, message: Some(format!( - "{configured} configured, 0 resolved. Run \"gi init\" to resolve projects." + "{configured} configured, 0 resolved. Run \"lore init\" to resolve projects." )), }, configured: Some(configured), @@ -459,7 +459,7 @@ async fn check_ollama(config: Option<&Config>) -> OllamaCheck { /// Format and print doctor results to console. pub fn print_doctor_results(result: &DoctorResult) { - println!("\ngi doctor\n"); + println!("\nlore doctor\n"); print_check("Config", &result.checks.config.result); print_check("Database", &result.checks.database.result); diff --git a/src/cli/commands/ingest.rs b/src/cli/commands/ingest.rs index 5481390..6b08ee8 100644 --- a/src/cli/commands/ingest.rs +++ b/src/cli/commands/ingest.rs @@ -124,7 +124,7 @@ pub async fn run_ingest( ))); } return Err(GiError::Other( - "No projects configured. Run 'gi init' first.".to_string(), + "No projects configured. Run 'lore init' first.".to_string(), )); } diff --git a/src/cli/commands/init.rs b/src/cli/commands/init.rs index a142b9b..92a6a31 100644 --- a/src/cli/commands/init.rs +++ b/src/cli/commands/init.rs @@ -143,7 +143,7 @@ pub async fn run_init(inputs: InitInputs, options: InitOptions) -> Result, /// Machine-readable JSON output (auto-enabled when piped) - #[arg(long, global = true, env = "GI_ROBOT")] + #[arg(long, global = true, env = "LORE_ROBOT")] pub robot: bool, #[command(subcommand)] diff --git a/src/core/db.rs b/src/core/db.rs index 7b8ca51..25ec0fd 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -159,7 +159,7 @@ pub fn run_migrations_from_dir(conn: &Connection, migrations_dir: &Path) -> Resu } /// Verify database pragmas are set correctly. -/// Used by gi doctor command. +/// Used by lore doctor command. pub fn verify_pragmas(conn: &Connection) -> (bool, Vec) { let mut issues = Vec::new(); diff --git a/src/core/error.rs b/src/core/error.rs index 6133523..dc88892 100644 --- a/src/core/error.rs +++ b/src/core/error.rs @@ -1,4 +1,4 @@ -//! Custom error types for gitlab-inbox. +//! Custom error types for gitlore. //! //! Uses thiserror for ergonomic error definitions with structured error codes. @@ -65,10 +65,10 @@ impl ErrorCode { } } -/// Main error type for gitlab-inbox. +/// Main error type for gitlore. #[derive(Error, Debug)] pub enum GiError { - #[error("Config file not found at {path}. Run \"gi init\" first.")] + #[error("Config file not found at {path}. Run \"lore init\" first.")] ConfigNotFound { path: String }, #[error("Invalid config: {details}")] @@ -158,18 +158,18 @@ impl GiError { /// Get a suggestion for how to fix this error. pub fn suggestion(&self) -> Option<&'static str> { match self { - Self::ConfigNotFound { .. } => Some("Run 'gi init' to create configuration"), - Self::ConfigInvalid { .. } => Some("Check config file syntax or run 'gi init' to recreate"), + Self::ConfigNotFound { .. } => Some("Run 'lore init' to create configuration"), + Self::ConfigInvalid { .. } => Some("Check config file syntax or run 'lore init' to recreate"), Self::GitLabAuthFailed => Some("Verify token has read_api scope and is not expired"), Self::GitLabNotFound { .. } => Some("Check the resource path exists and you have access"), Self::GitLabRateLimited { .. } => Some("Wait and retry, or reduce request frequency"), Self::GitLabNetworkError { .. } => Some("Check network connection and GitLab URL"), Self::DatabaseLocked { .. } => Some("Wait for other sync to complete or use --force"), - Self::MigrationFailed { .. } => Some("Check database file permissions or reset with 'gi reset'"), + Self::MigrationFailed { .. } => Some("Check database file permissions or reset with 'lore reset'"), Self::TokenNotSet { .. } => Some("Export the token environment variable"), - Self::Database(_) => Some("Check database file permissions or reset with 'gi reset'"), + Self::Database(_) => Some("Check database file permissions or reset with 'lore reset'"), Self::Http(_) => Some("Check network connection"), - Self::NotFound(_) => Some("Verify the entity exists using 'gi list'"), + Self::NotFound(_) => Some("Verify the entity exists using 'lore list'"), Self::Ambiguous(_) => Some("Use --project flag to disambiguate"), _ => None, } diff --git a/src/core/paths.rs b/src/core/paths.rs index d8e6064..b2db216 100644 --- a/src/core/paths.rs +++ b/src/core/paths.rs @@ -6,9 +6,9 @@ use std::path::PathBuf; /// /// Resolution order: /// 1. CLI flag override (if provided) -/// 2. GI_CONFIG_PATH environment variable -/// 3. XDG default (~/.config/gi/config.json) -/// 4. Local fallback (./gi.config.json) if exists +/// 2. LORE_CONFIG_PATH environment variable +/// 3. XDG default (~/.config/lore/config.json) +/// 4. Local fallback (./lore.config.json) if exists /// 5. Returns XDG default even if not exists pub fn get_config_path(cli_override: Option<&str>) -> PathBuf { // 1. CLI flag override @@ -17,18 +17,18 @@ pub fn get_config_path(cli_override: Option<&str>) -> PathBuf { } // 2. Environment variable - if let Ok(path) = std::env::var("GI_CONFIG_PATH") { + if let Ok(path) = std::env::var("LORE_CONFIG_PATH") { return PathBuf::from(path); } // 3. XDG default - let xdg_path = get_xdg_config_dir().join("gi").join("config.json"); + let xdg_path = get_xdg_config_dir().join("lore").join("config.json"); if xdg_path.exists() { return xdg_path; } // 4. Local fallback (for development) - let local_path = PathBuf::from("gi.config.json"); + let local_path = PathBuf::from("lore.config.json"); if local_path.exists() { return local_path; } @@ -38,9 +38,9 @@ pub fn get_config_path(cli_override: Option<&str>) -> PathBuf { } /// Get the data directory path. -/// Uses XDG_DATA_HOME or defaults to ~/.local/share/gi +/// Uses XDG_DATA_HOME or defaults to ~/.local/share/lore pub fn get_data_dir() -> PathBuf { - get_xdg_data_dir().join("gi") + get_xdg_data_dir().join("lore") } /// Get the database file path. @@ -49,7 +49,7 @@ pub fn get_db_path(config_override: Option<&str>) -> PathBuf { if let Some(path) = config_override { return PathBuf::from(path); } - get_data_dir().join("gi.db") + get_data_dir().join("lore.db") } /// Get the backup directory path. diff --git a/src/lib.rs b/src/lib.rs index 2538c96..7d7ae5d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ -//! GitLab Inbox - Semantic search for GitLab issues, MRs, and discussions. +//! Gitlore - Semantic search for GitLab issues, MRs, and discussions. //! -//! A self-hosted CLI tool that consolidates GitLab notifications into a unified inbox -//! with semantic search capabilities. +//! A self-hosted CLI tool that syncs GitLab data to a local SQLite database +//! with fast querying and semantic search capabilities. pub mod cli; pub mod core; diff --git a/src/main.rs b/src/main.rs index 6a6889d..600ffa8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,4 @@ -//! GitLab Inbox CLI entry point. +//! Gitlore CLI entry point. use clap::Parser; use console::style; @@ -8,8 +8,8 @@ use tracing_subscriber::EnvFilter; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; -use gi::Config; -use gi::cli::commands::{ +use lore::Config; +use lore::cli::commands::{ InitInputs, InitOptions, ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser, print_count, print_count_json, print_doctor_results, print_ingest_summary, print_ingest_summary_json, print_list_issues, print_list_issues_json, print_list_mrs, @@ -18,11 +18,11 @@ use gi::cli::commands::{ run_doctor, run_ingest, run_init, run_list_issues, run_list_mrs, run_show_issue, run_show_mr, run_sync_status, }; -use gi::cli::{Cli, Commands}; -use gi::core::db::{create_connection, get_schema_version, run_migrations}; -use gi::core::error::{GiError, RobotErrorOutput}; -use gi::core::paths::get_config_path; -use gi::core::paths::get_db_path; +use lore::cli::{Cli, Commands}; +use lore::core::db::{create_connection, get_schema_version, run_migrations}; +use lore::core::error::{GiError, RobotErrorOutput}; +use lore::core::paths::get_config_path; +use lore::core::paths::get_db_path; #[tokio::main] async fn main() { @@ -37,7 +37,7 @@ async fn main() { ) .with( EnvFilter::from_default_env() - .add_directive("gi=info".parse().unwrap()) + .add_directive("lore=info".parse().unwrap()) .add_directive("warn".parse().unwrap()), ) .with(indicatif_layer) @@ -326,7 +326,7 @@ async fn handle_init( ); println!( "{}", - style("\nSetup complete! Run 'gi doctor' to verify.").blue() + style("\nSetup complete! Run 'lore doctor' to verify.").blue() ); Ok(()) @@ -612,7 +612,7 @@ fn handle_version(robot_mode: bool) -> Result<(), Box> { }; println!("{}", serde_json::to_string(&output)?); } else { - println!("gi version {}", version); + println!("lore version {}", version); } Ok(()) } @@ -641,7 +641,7 @@ fn handle_backup(robot_mode: bool) -> Result<(), Box> { }; println!("{}", serde_json::to_string(&output)?); } else { - println!("gi backup - not yet implemented"); + println!("lore backup - not yet implemented"); } Ok(()) } @@ -657,7 +657,7 @@ fn handle_reset(robot_mode: bool) -> Result<(), Box> { }; println!("{}", serde_json::to_string(&output)?); } else { - println!("gi reset - not yet implemented"); + println!("lore reset - not yet implemented"); } Ok(()) } @@ -702,7 +702,7 @@ async fn handle_migrate( error: RobotErrorSuggestionData { code: "DB_ERROR".to_string(), message: format!("Database not found at {}", db_path.display()), - suggestion: "Run 'gi init' first".to_string(), + suggestion: "Run 'lore init' first".to_string(), }, }; eprintln!("{}", serde_json::to_string(&output)?); @@ -713,7 +713,7 @@ async fn handle_migrate( ); eprintln!( "{}", - style("Run 'gi init' first to create the database.").yellow() + style("Run 'lore init' first to create the database.").yellow() ); } std::process::exit(10); // DB_ERROR exit code diff --git a/tests/diffnote_position_tests.rs b/tests/diffnote_position_tests.rs index ddc50b9..c5cca1c 100644 --- a/tests/diffnote_position_tests.rs +++ b/tests/diffnote_position_tests.rs @@ -1,7 +1,7 @@ //! Tests for DiffNote position extraction in note transformer. -use gi::gitlab::transformers::discussion::transform_notes_with_diff_position; -use gi::gitlab::types::{ +use lore::gitlab::transformers::discussion::transform_notes_with_diff_position; +use lore::gitlab::types::{ GitLabAuthor, GitLabDiscussion, GitLabLineRange, GitLabLineRangePoint, GitLabNote, GitLabNotePosition, }; diff --git a/tests/fixture_tests.rs b/tests/fixture_tests.rs index a8e2256..9f07ddd 100644 --- a/tests/fixture_tests.rs +++ b/tests/fixture_tests.rs @@ -1,6 +1,6 @@ //! Tests for test fixtures - verifies they deserialize correctly. -use gi::gitlab::types::{GitLabDiscussion, GitLabIssue}; +use lore::gitlab::types::{GitLabDiscussion, GitLabIssue}; use serde::de::DeserializeOwned; use std::path::PathBuf; diff --git a/tests/gitlab_types_tests.rs b/tests/gitlab_types_tests.rs index 133c56a..8248dad 100644 --- a/tests/gitlab_types_tests.rs +++ b/tests/gitlab_types_tests.rs @@ -1,6 +1,6 @@ //! Tests for GitLab API response type deserialization. -use gi::gitlab::types::{ +use lore::gitlab::types::{ GitLabAuthor, GitLabDiscussion, GitLabIssue, GitLabMergeRequest, GitLabMilestone, GitLabNote, GitLabNotePosition, GitLabReferences, GitLabReviewer, }; diff --git a/tests/mr_discussion_tests.rs b/tests/mr_discussion_tests.rs index 6dd2722..f37fc00 100644 --- a/tests/mr_discussion_tests.rs +++ b/tests/mr_discussion_tests.rs @@ -1,7 +1,7 @@ //! Tests for MR discussion transformer. -use gi::gitlab::transformers::discussion::transform_mr_discussion; -use gi::gitlab::types::{GitLabAuthor, GitLabDiscussion, GitLabNote}; +use lore::gitlab::transformers::discussion::transform_mr_discussion; +use lore::gitlab::types::{GitLabAuthor, GitLabDiscussion, GitLabNote}; fn make_author() -> GitLabAuthor { GitLabAuthor { diff --git a/tests/mr_transformer_tests.rs b/tests/mr_transformer_tests.rs index 4372233..44c414d 100644 --- a/tests/mr_transformer_tests.rs +++ b/tests/mr_transformer_tests.rs @@ -1,7 +1,7 @@ //! Tests for MR transformer module. -use gi::gitlab::transformers::merge_request::transform_merge_request; -use gi::gitlab::types::{GitLabAuthor, GitLabMergeRequest, GitLabReferences, GitLabReviewer}; +use lore::gitlab::transformers::merge_request::transform_merge_request; +use lore::gitlab::types::{GitLabAuthor, GitLabMergeRequest, GitLabReferences, GitLabReviewer}; fn make_test_mr() -> GitLabMergeRequest { GitLabMergeRequest {