Wave 4: Full CLI command implementations - fetch, list, show, search, tags, aliases, doctor, cache lifecycle (bd-16o, bd-3km, bd-1dj, bd-acf, bd-3bl, bd-30a, bd-2s6, bd-1d4)

2026-02-12 12:54:10 -05:00
parent 9b29490f5f
commit faa6281790
13 changed files with 5157 additions and 110 deletions
--- a/src/core/search.rs
+++ b/src/core/search.rs
@@ -0,0 +1,634 @@
+use serde::Serialize;
+
+use super::indexer::method_rank;
+use super::spec::SpecIndex;
+
+// ---------------------------------------------------------------------------
+// Public types
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone, Serialize)]
+pub struct SearchResult {
+    pub result_type: SearchResultType,
+    pub name: String,
+    pub method: Option<String>,
+    pub summary: Option<String>,
+    pub rank: usize,
+    pub score: u32,
+    pub matches: Vec<Match>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum SearchResultType {
+    Endpoint,
+    Schema,
+}
+
+impl SearchResultType {
+    fn ordinal(self) -> u8 {
+        match self {
+            Self::Endpoint => 0,
+            Self::Schema => 1,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize)]
+pub struct Match {
+    pub field: String,
+    pub snippet: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct SearchOptions {
+    pub search_paths: bool,
+    pub search_descriptions: bool,
+    pub search_schemas: bool,
+    pub case_sensitive: bool,
+    pub exact: bool,
+    pub limit: usize,
+}
+
+impl Default for SearchOptions {
+    fn default() -> Self {
+        Self {
+            search_paths: true,
+            search_descriptions: true,
+            search_schemas: true,
+            case_sensitive: false,
+            exact: false,
+            limit: 20,
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Field weights
+// ---------------------------------------------------------------------------
+
+const WEIGHT_PATH: f64 = 10.0;
+const WEIGHT_SUMMARY: f64 = 5.0;
+const WEIGHT_DESCRIPTION: f64 = 2.0;
+const WEIGHT_SCHEMA_NAME: f64 = 8.0;
+
+// ---------------------------------------------------------------------------
+// Search engine
+// ---------------------------------------------------------------------------
+
+pub struct SearchEngine<'a> {
+    index: &'a SpecIndex,
+}
+
+impl<'a> SearchEngine<'a> {
+    pub fn new(index: &'a SpecIndex) -> Self {
+        Self { index }
+    }
+
+    pub fn search(&self, query: &str, opts: &SearchOptions) -> Vec<SearchResult> {
+        let query = query.trim();
+        if query.is_empty() {
+            return Vec::new();
+        }
+
+        let terms = tokenize(query, opts.exact);
+        let total_terms = terms.len();
+
+        let mut results: Vec<SearchResult> = Vec::new();
+
+        // Search endpoints
+        if opts.search_paths || opts.search_descriptions {
+            for ep in &self.index.endpoints {
+                let mut raw_score: f64 = 0.0;
+                let mut matched_terms: usize = 0;
+                let mut matches: Vec<Match> = Vec::new();
+
+                for term in &terms {
+                    let mut term_matched = false;
+
+                    if opts.search_paths && contains_term(&ep.path, term, opts.case_sensitive) {
+                        raw_score += WEIGHT_PATH;
+                        matches.push(Match {
+                            field: "path".into(),
+                            snippet: safe_snippet(&ep.path, term, opts.case_sensitive),
+                        });
+                        term_matched = true;
+                    }
+
+                    if (opts.search_descriptions || opts.search_paths)
+                        && let Some(ref summary) = ep.summary
+                        && contains_term(summary, term, opts.case_sensitive)
+                    {
+                        raw_score += WEIGHT_SUMMARY;
+                        matches.push(Match {
+                            field: "summary".into(),
+                            snippet: safe_snippet(summary, term, opts.case_sensitive),
+                        });
+                        term_matched = true;
+                    }
+
+                    if opts.search_descriptions
+                        && let Some(ref desc) = ep.description
+                        && contains_term(desc, term, opts.case_sensitive)
+                    {
+                        raw_score += WEIGHT_DESCRIPTION;
+                        matches.push(Match {
+                            field: "description".into(),
+                            snippet: safe_snippet(desc, term, opts.case_sensitive),
+                        });
+                        term_matched = true;
+                    }
+
+                    if term_matched {
+                        matched_terms += 1;
+                    }
+                }
+
+                if raw_score > 0.0 {
+                    let coverage_boost = 1.0 + (matched_terms as f64 / total_terms.max(1) as f64);
+                    let final_score = raw_score * coverage_boost;
+                    let quantized = (final_score * 100.0).round() as u32;
+
+                    results.push(SearchResult {
+                        result_type: SearchResultType::Endpoint,
+                        name: ep.path.clone(),
+                        method: Some(ep.method.clone()),
+                        summary: ep.summary.clone(),
+                        rank: 0, // assigned after sort
+                        score: quantized,
+                        matches,
+                    });
+                }
+            }
+        }
+
+        // Search schemas
+        if opts.search_schemas {
+            for schema in &self.index.schemas {
+                let mut raw_score: f64 = 0.0;
+                let mut matched_terms: usize = 0;
+                let mut matches: Vec<Match> = Vec::new();
+
+                for term in &terms {
+                    if contains_term(&schema.name, term, opts.case_sensitive) {
+                        raw_score += WEIGHT_SCHEMA_NAME;
+                        matches.push(Match {
+                            field: "schema_name".into(),
+                            snippet: safe_snippet(&schema.name, term, opts.case_sensitive),
+                        });
+                        matched_terms += 1;
+                    }
+                }
+
+                if raw_score > 0.0 {
+                    let coverage_boost = 1.0 + (matched_terms as f64 / total_terms.max(1) as f64);
+                    let final_score = raw_score * coverage_boost;
+                    let quantized = (final_score * 100.0).round() as u32;
+
+                    results.push(SearchResult {
+                        result_type: SearchResultType::Schema,
+                        name: schema.name.clone(),
+                        method: None,
+                        summary: None,
+                        rank: 0,
+                        score: quantized,
+                        matches,
+                    });
+                }
+            }
+        }
+
+        // Deterministic sort: score DESC, type ordinal ASC, name ASC, method_rank ASC
+        results.sort_by(|a, b| {
+            b.score
+                .cmp(&a.score)
+                .then_with(|| a.result_type.ordinal().cmp(&b.result_type.ordinal()))
+                .then_with(|| a.name.cmp(&b.name))
+                .then_with(|| {
+                    let a_rank = a.method.as_deref().map(method_rank).unwrap_or(u8::MAX);
+                    let b_rank = b.method.as_deref().map(method_rank).unwrap_or(u8::MAX);
+                    a_rank.cmp(&b_rank)
+                })
+        });
+
+        // Assign 1-based ranks and apply limit
+        results.truncate(opts.limit);
+        for (i, result) in results.iter_mut().enumerate() {
+            result.rank = i + 1;
+        }
+
+        results
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+fn tokenize(query: &str, exact: bool) -> Vec<String> {
+    if exact {
+        vec![query.to_string()]
+    } else {
+        query.split_whitespace().map(String::from).collect()
+    }
+}
+
+fn contains_term(haystack: &str, needle: &str, case_sensitive: bool) -> bool {
+    if case_sensitive {
+        haystack.contains(needle)
+    } else {
+        let h = haystack.to_lowercase();
+        let n = needle.to_lowercase();
+        h.contains(&n)
+    }
+}
+
+/// Build a Unicode-safe snippet around the first occurrence of `needle` in
+/// `haystack`. The context window is 50 characters. Ellipses are added when
+/// the snippet is truncated.
+fn safe_snippet(haystack: &str, needle: &str, case_sensitive: bool) -> String {
+    let (h_search, n_search) = if case_sensitive {
+        (haystack.to_string(), needle.to_string())
+    } else {
+        (haystack.to_lowercase(), needle.to_lowercase())
+    };
+
+    let byte_pos = match h_search.find(&n_search) {
+        Some(pos) => pos,
+        None => return haystack.chars().take(50).collect(),
+    };
+
+    // Convert byte position to char index.
+    let char_start = haystack[..byte_pos].chars().count();
+    let needle_char_len = needle.chars().count();
+    let haystack_chars: Vec<char> = haystack.chars().collect();
+    let total_chars = haystack_chars.len();
+
+    const WINDOW: usize = 50;
+
+    // Centre the window around the match.
+    let context_budget = WINDOW.saturating_sub(needle_char_len);
+    let left_context = context_budget / 2;
+
+    let snippet_start = char_start.saturating_sub(left_context);
+    let snippet_end = (snippet_start + WINDOW).min(total_chars);
+
+    let prefix = if snippet_start > 0 { "..." } else { "" };
+    let suffix = if snippet_end < total_chars { "..." } else { "" };
+
+    let snippet_body: String = haystack_chars[snippet_start..snippet_end].iter().collect();
+
+    format!("{prefix}{snippet_body}{suffix}")
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::core::spec::{
+        IndexInfo, IndexedEndpoint, IndexedParam, IndexedSchema, IndexedTag, SpecIndex,
+    };
+
+    fn petstore_index() -> SpecIndex {
+        SpecIndex {
+            index_version: 1,
+            generation: 1,
+            content_hash: "sha256:test".into(),
+            openapi: "3.0.3".into(),
+            info: IndexInfo {
+                title: "Petstore".into(),
+                version: "1.0.0".into(),
+            },
+            endpoints: vec![
+                IndexedEndpoint {
+                    path: "/pets".into(),
+                    method: "GET".into(),
+                    summary: Some("List all pets".into()),
+                    description: Some("Returns a list of pets from the store".into()),
+                    operation_id: Some("listPets".into()),
+                    tags: vec!["pets".into()],
+                    deprecated: false,
+                    parameters: vec![IndexedParam {
+                        name: "limit".into(),
+                        location: "query".into(),
+                        required: false,
+                        description: Some("Max items".into()),
+                    }],
+                    request_body_required: false,
+                    request_body_content_types: vec![],
+                    security_schemes: vec![],
+                    security_required: false,
+                    operation_ptr: "/paths/~1pets/get".into(),
+                },
+                IndexedEndpoint {
+                    path: "/pets".into(),
+                    method: "POST".into(),
+                    summary: Some("Create a pet".into()),
+                    description: None,
+                    operation_id: Some("createPet".into()),
+                    tags: vec!["pets".into()],
+                    deprecated: false,
+                    parameters: vec![],
+                    request_body_required: true,
+                    request_body_content_types: vec!["application/json".into()],
+                    security_schemes: vec![],
+                    security_required: false,
+                    operation_ptr: "/paths/~1pets/post".into(),
+                },
+                IndexedEndpoint {
+                    path: "/pets/{petId}".into(),
+                    method: "GET".into(),
+                    summary: Some("Info for a specific pet".into()),
+                    description: Some("Detailed information about a single pet".into()),
+                    operation_id: Some("showPetById".into()),
+                    tags: vec!["pets".into()],
+                    deprecated: false,
+                    parameters: vec![IndexedParam {
+                        name: "petId".into(),
+                        location: "path".into(),
+                        required: true,
+                        description: Some("The id of the pet".into()),
+                    }],
+                    request_body_required: false,
+                    request_body_content_types: vec![],
+                    security_schemes: vec![],
+                    security_required: false,
+                    operation_ptr: "/paths/~1pets~1{petId}/get".into(),
+                },
+                IndexedEndpoint {
+                    path: "/store/inventory".into(),
+                    method: "GET".into(),
+                    summary: Some("Returns store inventory".into()),
+                    description: None,
+                    operation_id: Some("getInventory".into()),
+                    tags: vec!["store".into()],
+                    deprecated: false,
+                    parameters: vec![],
+                    request_body_required: false,
+                    request_body_content_types: vec![],
+                    security_schemes: vec![],
+                    security_required: false,
+                    operation_ptr: "/paths/~1store~1inventory/get".into(),
+                },
+            ],
+            schemas: vec![
+                IndexedSchema {
+                    name: "Pet".into(),
+                    schema_ptr: "/components/schemas/Pet".into(),
+                },
+                IndexedSchema {
+                    name: "Error".into(),
+                    schema_ptr: "/components/schemas/Error".into(),
+                },
+                IndexedSchema {
+                    name: "PetList".into(),
+                    schema_ptr: "/components/schemas/PetList".into(),
+                },
+            ],
+            tags: vec![
+                IndexedTag {
+                    name: "pets".into(),
+                    description: Some("Pet operations".into()),
+                    endpoint_count: 3,
+                },
+                IndexedTag {
+                    name: "store".into(),
+                    description: Some("Store operations".into()),
+                    endpoint_count: 1,
+                },
+            ],
+        }
+    }
+
+    #[test]
+    fn test_search_basic() {
+        let index = petstore_index();
+        let engine = SearchEngine::new(&index);
+        let opts = SearchOptions::default();
+
+        let results = engine.search("pet", &opts);
+        assert!(
+            !results.is_empty(),
+            "should find 'pet' in petstore endpoints"
+        );
+
+        // All results should mention pet somewhere
+        for r in &results {
+            let has_pet = r
+                .matches
+                .iter()
+                .any(|m| m.snippet.to_lowercase().contains("pet"));
+            assert!(has_pet, "result {:?} should match 'pet'", r.name);
+        }
+
+        // Ranks should be sequential 1-based
+        for (i, r) in results.iter().enumerate() {
+            assert_eq!(r.rank, i + 1, "rank should be 1-based sequential");
+        }
+    }
+
+    #[test]
+    fn test_search_scores_deterministic() {
+        let index = petstore_index();
+        let engine = SearchEngine::new(&index);
+        let opts = SearchOptions::default();
+
+        let run1 = engine.search("pet", &opts);
+        let run2 = engine.search("pet", &opts);
+
+        assert_eq!(run1.len(), run2.len());
+        for (a, b) in run1.iter().zip(run2.iter()) {
+            assert_eq!(a.score, b.score, "scores should be identical across runs");
+            assert_eq!(a.rank, b.rank, "ranks should be identical across runs");
+            assert_eq!(a.name, b.name, "names should be identical across runs");
+            assert_eq!(
+                a.method, b.method,
+                "methods should be identical across runs"
+            );
+        }
+    }
+
+    #[test]
+    fn test_search_exact_mode() {
+        let index = petstore_index();
+        let engine = SearchEngine::new(&index);
+
+        // "list all" as two tokens: should match broadly
+        let loose_opts = SearchOptions {
+            exact: false,
+            ..SearchOptions::default()
+        };
+        let loose = engine.search("list all", &loose_opts);
+
+        // "list all" as exact phrase: only matches if that exact phrase appears
+        let exact_opts = SearchOptions {
+            exact: true,
+            ..SearchOptions::default()
+        };
+        let exact = engine.search("list all", &exact_opts);
+
+        // Exact should be a subset of (or equal to) loose results
+        assert!(
+            exact.len() <= loose.len(),
+            "exact mode should return fewer or equal results"
+        );
+
+        // The exact match should find "List all pets" summary
+        assert!(
+            !exact.is_empty(),
+            "exact 'list all' should match 'List all pets'"
+        );
+    }
+
+    #[test]
+    fn test_search_case_sensitive() {
+        let index = petstore_index();
+        let engine = SearchEngine::new(&index);
+
+        // Case-insensitive (default): "PET" matches "pet", "/pets", etc.
+        let insensitive = SearchOptions {
+            case_sensitive: false,
+            ..SearchOptions::default()
+        };
+        let results_insensitive = engine.search("PET", &insensitive);
+
+        // Case-sensitive: "PET" should NOT match lowercase "pet" or "/pets"
+        let sensitive = SearchOptions {
+            case_sensitive: true,
+            ..SearchOptions::default()
+        };
+        let results_sensitive = engine.search("PET", &sensitive);
+
+        assert!(
+            results_sensitive.len() < results_insensitive.len(),
+            "case-sensitive 'PET' should match fewer results than case-insensitive"
+        );
+    }
+
+    #[test]
+    fn test_safe_snippet_unicode() {
+        // Emoji and multi-byte characters
+        let haystack = "Hello \u{1F600} world of pets and \u{1F431} cats everywhere";
+        let snippet = safe_snippet(haystack, "pets", false);
+        assert!(
+            snippet.contains("pets"),
+            "snippet should contain the search term"
+        );
+        // Must not panic on multi-byte boundaries
+    }
+
+    #[test]
+    fn test_safe_snippet_truncation() {
+        let long = "a".repeat(200);
+        let haystack = format!("{long}needle{long}");
+        let snippet = safe_snippet(&haystack, "needle", false);
+        assert!(snippet.contains("needle"));
+        assert!(
+            snippet.contains("..."),
+            "should have ellipsis for truncation"
+        );
+        // Snippet should be around 50 chars + ellipsis markers
+        let body_len = snippet.replace("...", "").chars().count();
+        assert!(body_len <= 50, "snippet body should be at most 50 chars");
+    }
+
+    #[test]
+    fn test_empty_query_returns_empty() {
+        let index = petstore_index();
+        let engine = SearchEngine::new(&index);
+        let opts = SearchOptions::default();
+
+        assert!(engine.search("", &opts).is_empty());
+        assert!(engine.search("   ", &opts).is_empty());
+    }
+
+    #[test]
+    fn test_search_limit() {
+        let index = petstore_index();
+        let engine = SearchEngine::new(&index);
+        let opts = SearchOptions {
+            limit: 2,
+            ..SearchOptions::default()
+        };
+
+        let results = engine.search("pet", &opts);
+        assert!(results.len() <= 2, "should respect limit");
+    }
+
+    #[test]
+    fn test_search_schemas_only() {
+        let index = petstore_index();
+        let engine = SearchEngine::new(&index);
+        let opts = SearchOptions {
+            search_paths: false,
+            search_descriptions: false,
+            search_schemas: true,
+            ..SearchOptions::default()
+        };
+
+        let results = engine.search("Pet", &opts);
+        assert!(!results.is_empty());
+        for r in &results {
+            assert_eq!(
+                r.result_type,
+                SearchResultType::Schema,
+                "should only return schemas"
+            );
+        }
+    }
+
+    #[test]
+    fn test_search_paths_only() {
+        let index = petstore_index();
+        let engine = SearchEngine::new(&index);
+        let opts = SearchOptions {
+            search_paths: true,
+            search_descriptions: false,
+            search_schemas: false,
+            ..SearchOptions::default()
+        };
+
+        let results = engine.search("store", &opts);
+        assert!(!results.is_empty());
+        for r in &results {
+            assert_eq!(
+                r.result_type,
+                SearchResultType::Endpoint,
+                "should only return endpoints"
+            );
+        }
+    }
+
+    #[test]
+    fn test_multi_term_coverage_boost() {
+        let index = petstore_index();
+        let engine = SearchEngine::new(&index);
+        let opts = SearchOptions::default();
+
+        // "pets store" has two terms; an endpoint matching both gets higher coverage
+        let results = engine.search("pets list", &opts);
+        if results.len() >= 2 {
+            // The first result should have a higher score due to more term matches
+            assert!(
+                results[0].score >= results[1].score,
+                "results should be sorted by score descending"
+            );
+        }
+    }
+
+    #[test]
+    fn test_no_match_returns_empty() {
+        let index = petstore_index();
+        let engine = SearchEngine::new(&index);
+        let opts = SearchOptions::default();
+
+        let results = engine.search("zzzznotfound", &opts);
+        assert!(
+            results.is_empty(),
+            "gibberish query should return no results"
+        );
+    }
+}