Wave 4: Full CLI command implementations - fetch, list, show, search, tags, aliases, doctor, cache lifecycle (bd-16o, bd-3km, bd-1dj, bd-acf, bd-3bl, bd-30a, bd-2s6, bd-1d4)
This commit is contained in:
@@ -236,10 +236,7 @@ impl CacheManager {
|
||||
/// Validates that index_version, generation, and index_hash all match
|
||||
/// between meta and the on-disk index. Returns `AliasNotFound` if
|
||||
/// meta.json is missing, `CacheIntegrity` on any mismatch.
|
||||
pub fn load_index(
|
||||
&self,
|
||||
alias: &str,
|
||||
) -> Result<(SpecIndex, CacheMetadata), SwaggerCliError> {
|
||||
pub fn load_index(&self, alias: &str) -> Result<(SpecIndex, CacheMetadata), SwaggerCliError> {
|
||||
validate_alias(alias)?;
|
||||
let dir = self.alias_dir(alias);
|
||||
|
||||
@@ -248,24 +245,16 @@ impl CacheManager {
|
||||
if e.kind() == std::io::ErrorKind::NotFound {
|
||||
SwaggerCliError::AliasNotFound(alias.to_string())
|
||||
} else {
|
||||
SwaggerCliError::Cache(format!(
|
||||
"Failed to read {}: {e}",
|
||||
meta_path.display()
|
||||
))
|
||||
SwaggerCliError::Cache(format!("Failed to read {}: {e}", meta_path.display()))
|
||||
}
|
||||
})?;
|
||||
let meta: CacheMetadata = serde_json::from_slice(&meta_bytes).map_err(|e| {
|
||||
SwaggerCliError::CacheIntegrity(format!(
|
||||
"Corrupt meta.json for alias '{alias}': {e}"
|
||||
))
|
||||
SwaggerCliError::CacheIntegrity(format!("Corrupt meta.json for alias '{alias}': {e}"))
|
||||
})?;
|
||||
|
||||
let index_path = dir.join("index.json");
|
||||
let index_bytes = fs::read(&index_path).map_err(|e| {
|
||||
SwaggerCliError::Cache(format!(
|
||||
"Failed to read {}: {e}",
|
||||
index_path.display()
|
||||
))
|
||||
SwaggerCliError::Cache(format!("Failed to read {}: {e}", index_path.display()))
|
||||
})?;
|
||||
|
||||
let actual_hash = compute_hash(&index_bytes);
|
||||
@@ -277,9 +266,7 @@ impl CacheManager {
|
||||
}
|
||||
|
||||
let index: SpecIndex = serde_json::from_slice(&index_bytes).map_err(|e| {
|
||||
SwaggerCliError::CacheIntegrity(format!(
|
||||
"Corrupt index.json for alias '{alias}': {e}"
|
||||
))
|
||||
SwaggerCliError::CacheIntegrity(format!("Corrupt index.json for alias '{alias}': {e}"))
|
||||
})?;
|
||||
|
||||
if meta.index_version != index.index_version {
|
||||
@@ -317,10 +304,7 @@ impl CacheManager {
|
||||
) -> Result<serde_json::Value, SwaggerCliError> {
|
||||
let raw_path = self.alias_dir(alias).join("raw.json");
|
||||
let raw_bytes = fs::read(&raw_path).map_err(|e| {
|
||||
SwaggerCliError::Cache(format!(
|
||||
"Failed to read {}: {e}",
|
||||
raw_path.display()
|
||||
))
|
||||
SwaggerCliError::Cache(format!("Failed to read {}: {e}", raw_path.display()))
|
||||
})?;
|
||||
|
||||
let actual_hash = compute_hash(&raw_bytes);
|
||||
@@ -331,13 +315,9 @@ impl CacheManager {
|
||||
)));
|
||||
}
|
||||
|
||||
let value: serde_json::Value =
|
||||
serde_json::from_slice(&raw_bytes).map_err(|e| {
|
||||
SwaggerCliError::Cache(format!(
|
||||
"Failed to parse raw.json for '{}': {e}",
|
||||
alias
|
||||
))
|
||||
})?;
|
||||
let value: serde_json::Value = serde_json::from_slice(&raw_bytes).map_err(|e| {
|
||||
SwaggerCliError::Cache(format!("Failed to parse raw.json for '{}': {e}", alias))
|
||||
})?;
|
||||
|
||||
Ok(value)
|
||||
}
|
||||
@@ -697,14 +677,12 @@ mod tests {
|
||||
|
||||
manager
|
||||
.write_cache(
|
||||
"api1", b"src1", b"{}", &index, None, "1.0", "API 1", "json",
|
||||
None, None, None,
|
||||
"api1", b"src1", b"{}", &index, None, "1.0", "API 1", "json", None, None, None,
|
||||
)
|
||||
.unwrap();
|
||||
manager
|
||||
.write_cache(
|
||||
"api2", b"src2", b"{}", &index, None, "2.0", "API 2", "yaml",
|
||||
None, None, None,
|
||||
"api2", b"src2", b"{}", &index, None, "2.0", "API 2", "yaml", None, None, None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -72,19 +72,18 @@ fn is_blocked_mapped_v4(v6: &std::net::Ipv6Addr) -> bool {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn validate_url(url: &str, allow_insecure_http: bool) -> Result<Url, SwaggerCliError> {
|
||||
let parsed = Url::parse(url).map_err(|e| {
|
||||
SwaggerCliError::InvalidSpec(format!("invalid URL '{url}': {e}"))
|
||||
})?;
|
||||
let parsed = Url::parse(url)
|
||||
.map_err(|e| SwaggerCliError::InvalidSpec(format!("invalid URL '{url}': {e}")))?;
|
||||
|
||||
match parsed.scheme() {
|
||||
"https" => Ok(parsed),
|
||||
"http" if allow_insecure_http => Ok(parsed),
|
||||
"http" => Err(SwaggerCliError::PolicyBlocked(
|
||||
format!("HTTP is not allowed for '{url}'. Use --allow-insecure-http to override."),
|
||||
)),
|
||||
other => Err(SwaggerCliError::InvalidSpec(
|
||||
format!("unsupported scheme '{other}' in URL '{url}'"),
|
||||
)),
|
||||
"http" => Err(SwaggerCliError::PolicyBlocked(format!(
|
||||
"HTTP is not allowed for '{url}'. Use --allow-insecure-http to override."
|
||||
))),
|
||||
other => Err(SwaggerCliError::InvalidSpec(format!(
|
||||
"unsupported scheme '{other}' in URL '{url}'"
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,16 +104,16 @@ async fn resolve_and_check(
|
||||
let addrs: Vec<_> = match lookup_host(&addr).await {
|
||||
Ok(iter) => iter.collect(),
|
||||
Err(e) => {
|
||||
return Err(SwaggerCliError::InvalidSpec(
|
||||
format!("DNS resolution failed for '{host}': {e}"),
|
||||
));
|
||||
return Err(SwaggerCliError::InvalidSpec(format!(
|
||||
"DNS resolution failed for '{host}': {e}"
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
if addrs.is_empty() {
|
||||
return Err(SwaggerCliError::InvalidSpec(
|
||||
format!("DNS resolution returned no addresses for '{host}'"),
|
||||
));
|
||||
return Err(SwaggerCliError::InvalidSpec(format!(
|
||||
"DNS resolution returned no addresses for '{host}'"
|
||||
)));
|
||||
}
|
||||
|
||||
for socket_addr in &addrs {
|
||||
@@ -178,9 +177,9 @@ impl AsyncHttpClient {
|
||||
pub async fn fetch_spec(&self, url: &str) -> Result<FetchResult, SwaggerCliError> {
|
||||
let parsed = validate_url(url, self.allow_insecure_http)?;
|
||||
|
||||
let host = parsed.host_str().ok_or_else(|| {
|
||||
SwaggerCliError::InvalidSpec(format!("URL '{url}' has no host"))
|
||||
})?;
|
||||
let host = parsed
|
||||
.host_str()
|
||||
.ok_or_else(|| SwaggerCliError::InvalidSpec(format!("URL '{url}' has no host")))?;
|
||||
let port = parsed.port_or_known_default().unwrap_or(443);
|
||||
|
||||
resolve_and_check(host, port, &self.allowed_private_hosts).await?;
|
||||
@@ -215,11 +214,7 @@ impl AsyncHttpClient {
|
||||
attempts += 1;
|
||||
if attempts > self.max_retries {
|
||||
return Err(SwaggerCliError::Network(
|
||||
client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.unwrap_err(),
|
||||
client.get(url).send().await.unwrap_err(),
|
||||
));
|
||||
}
|
||||
let delay = self.retry_delay(&response, attempts);
|
||||
@@ -370,7 +365,9 @@ mod tests {
|
||||
#[test]
|
||||
fn test_ssrf_blocks_loopback() {
|
||||
assert!(is_ip_blocked(&IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))));
|
||||
assert!(is_ip_blocked(&IpAddr::V4(Ipv4Addr::new(127, 255, 255, 254))));
|
||||
assert!(is_ip_blocked(&IpAddr::V4(Ipv4Addr::new(
|
||||
127, 255, 255, 254
|
||||
))));
|
||||
assert!(is_ip_blocked(&IpAddr::V6(Ipv6Addr::LOCALHOST)));
|
||||
}
|
||||
|
||||
@@ -392,7 +389,9 @@ mod tests {
|
||||
#[test]
|
||||
fn test_ssrf_blocks_link_local() {
|
||||
// IPv4 link-local (169.254.x.x) -- includes the AWS metadata endpoint
|
||||
assert!(is_ip_blocked(&IpAddr::V4(Ipv4Addr::new(169, 254, 169, 254))));
|
||||
assert!(is_ip_blocked(&IpAddr::V4(Ipv4Addr::new(
|
||||
169, 254, 169, 254
|
||||
))));
|
||||
assert!(is_ip_blocked(&IpAddr::V4(Ipv4Addr::new(169, 254, 0, 1))));
|
||||
|
||||
// IPv6 link-local (fe80::/10)
|
||||
@@ -441,10 +440,7 @@ mod tests {
|
||||
fn test_url_allows_https() {
|
||||
let result = validate_url("https://example.com/spec.json", false);
|
||||
assert!(result.is_ok());
|
||||
assert_eq!(
|
||||
result.unwrap().as_str(),
|
||||
"https://example.com/spec.json"
|
||||
);
|
||||
assert_eq!(result.unwrap().as_str(), "https://example.com/spec.json");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -457,7 +453,10 @@ mod tests {
|
||||
fn test_url_rejects_unsupported_scheme() {
|
||||
let result = validate_url("ftp://example.com/spec.json", false);
|
||||
assert!(result.is_err());
|
||||
assert!(matches!(result.unwrap_err(), SwaggerCliError::InvalidSpec(_)));
|
||||
assert!(matches!(
|
||||
result.unwrap_err(),
|
||||
SwaggerCliError::InvalidSpec(_)
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -505,8 +504,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_resolve_and_check_skips_allowed_host() {
|
||||
let result =
|
||||
resolve_and_check("localhost", 80, &["localhost".into()]).await;
|
||||
let result = resolve_and_check("localhost", 80, &["localhost".into()]).await;
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,4 +2,6 @@ pub mod cache;
|
||||
pub mod config;
|
||||
pub mod http;
|
||||
pub mod indexer;
|
||||
pub mod refs;
|
||||
pub mod search;
|
||||
pub mod spec;
|
||||
|
||||
295
src/core/refs.rs
Normal file
295
src/core/refs.rs
Normal file
@@ -0,0 +1,295 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
/// Resolve a JSON Pointer (RFC 6901) against a root value.
|
||||
///
|
||||
/// Unescapes `~1` -> `/` and `~0` -> `~` (in that order per spec).
|
||||
/// Returns `None` if the pointer is empty, malformed, or the path does not exist.
|
||||
pub fn resolve_json_pointer<'a>(root: &'a Value, pointer: &str) -> Option<&'a Value> {
|
||||
if pointer.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let stripped = pointer.strip_prefix('/')?;
|
||||
|
||||
let mut current = root;
|
||||
for token in stripped.split('/') {
|
||||
let unescaped = token.replace("~1", "/").replace("~0", "~");
|
||||
match current {
|
||||
Value::Object(map) => {
|
||||
current = map.get(&unescaped)?;
|
||||
}
|
||||
Value::Array(arr) => {
|
||||
let idx: usize = unescaped.parse().ok()?;
|
||||
current = arr.get(idx)?;
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
|
||||
Some(current)
|
||||
}
|
||||
|
||||
/// Expand all `$ref` entries in `value` by inlining the referenced content from `root`.
|
||||
///
|
||||
/// - Internal refs (starting with `#/`) are resolved via JSON pointer navigation.
|
||||
/// - External refs (not starting with `#/`) are replaced with `{"$external_ref": "..."}`.
|
||||
/// - Circular refs (already visited in the current path) are replaced with `{"$circular_ref": "..."}`.
|
||||
/// - Expansion stops at `max_depth` to prevent unbounded recursion.
|
||||
pub fn expand_refs(value: &mut Value, root: &Value, max_depth: u32) {
|
||||
let mut visited = HashSet::new();
|
||||
expand_recursive(value, root, max_depth, 0, &mut visited);
|
||||
}
|
||||
|
||||
fn expand_recursive(
|
||||
value: &mut Value,
|
||||
root: &Value,
|
||||
max_depth: u32,
|
||||
depth: u32,
|
||||
visited: &mut HashSet<String>,
|
||||
) {
|
||||
if let Some(ref_str) = extract_ref_if_present(value) {
|
||||
if !ref_str.starts_with("#/") {
|
||||
*value = serde_json::json!({ "$external_ref": ref_str });
|
||||
return;
|
||||
}
|
||||
|
||||
if depth >= max_depth || visited.contains(&ref_str) {
|
||||
*value = serde_json::json!({ "$circular_ref": ref_str });
|
||||
return;
|
||||
}
|
||||
|
||||
let pointer = &ref_str[1..]; // strip leading '#'
|
||||
if let Some(resolved) = resolve_json_pointer(root, pointer) {
|
||||
let mut expanded = resolved.clone();
|
||||
visited.insert(ref_str);
|
||||
expand_recursive(&mut expanded, root, max_depth, depth + 1, visited);
|
||||
// Do not remove from visited: keep it for sibling detection within the same
|
||||
// subtree path. The caller manages the visited set across siblings.
|
||||
*value = expanded;
|
||||
}
|
||||
// If pointer doesn't resolve, leave the $ref as-is (broken ref)
|
||||
return;
|
||||
}
|
||||
|
||||
match value {
|
||||
Value::Object(map) => {
|
||||
for val in map.values_mut() {
|
||||
expand_recursive(val, root, max_depth, depth, visited);
|
||||
}
|
||||
}
|
||||
Value::Array(arr) => {
|
||||
for item in arr.iter_mut() {
|
||||
expand_recursive(item, root, max_depth, depth, visited);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_ref_if_present(value: &Value) -> Option<String> {
|
||||
let map = value.as_object()?;
|
||||
let ref_val = map.get("$ref")?;
|
||||
Some(ref_val.as_str()?.to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn test_resolve_json_pointer() {
|
||||
let root = json!({
|
||||
"components": {
|
||||
"schemas": {
|
||||
"Pet": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": { "type": "string" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let result = resolve_json_pointer(&root, "/components/schemas/Pet");
|
||||
assert!(result.is_some());
|
||||
let pet = result.unwrap();
|
||||
assert_eq!(pet["type"], "object");
|
||||
assert_eq!(pet["properties"]["name"]["type"], "string");
|
||||
|
||||
// Empty pointer
|
||||
assert!(resolve_json_pointer(&root, "").is_none());
|
||||
|
||||
// Missing path
|
||||
assert!(resolve_json_pointer(&root, "/components/schemas/Dog").is_none());
|
||||
|
||||
// No leading slash
|
||||
assert!(resolve_json_pointer(&root, "components").is_none());
|
||||
|
||||
// Escaped path segments: ~1 -> /
|
||||
let root_with_slash = json!({
|
||||
"paths": {
|
||||
"/pets/{petId}": {
|
||||
"get": { "summary": "Get pet" }
|
||||
}
|
||||
}
|
||||
});
|
||||
let result = resolve_json_pointer(&root_with_slash, "/paths/~1pets~1{petId}/get");
|
||||
assert!(result.is_some());
|
||||
assert_eq!(result.unwrap()["summary"], "Get pet");
|
||||
|
||||
// Escaped: ~0 -> ~
|
||||
let root_with_tilde = json!({
|
||||
"x~y": "found"
|
||||
});
|
||||
let result = resolve_json_pointer(&root_with_tilde, "/x~0y");
|
||||
assert!(result.is_some());
|
||||
assert_eq!(result.unwrap(), "found");
|
||||
|
||||
// Array indexing
|
||||
let root_with_array = json!({
|
||||
"items": ["a", "b", "c"]
|
||||
});
|
||||
let result = resolve_json_pointer(&root_with_array, "/items/1");
|
||||
assert!(result.is_some());
|
||||
assert_eq!(result.unwrap(), "b");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expand_basic_ref() {
|
||||
let root = json!({
|
||||
"components": {
|
||||
"schemas": {
|
||||
"Pet": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": { "type": "string" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let mut value = json!({
|
||||
"schema": { "$ref": "#/components/schemas/Pet" }
|
||||
});
|
||||
|
||||
expand_refs(&mut value, &root, 10);
|
||||
|
||||
assert_eq!(value["schema"]["type"], "object");
|
||||
assert_eq!(value["schema"]["properties"]["name"]["type"], "string");
|
||||
// $ref key should be gone (replaced with inlined content)
|
||||
assert!(value["schema"]["$ref"].is_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expand_circular_ref() {
|
||||
let root = json!({
|
||||
"components": {
|
||||
"schemas": {
|
||||
"Node": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"child": { "$ref": "#/components/schemas/Node" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let mut value = json!({
|
||||
"schema": { "$ref": "#/components/schemas/Node" }
|
||||
});
|
||||
|
||||
expand_refs(&mut value, &root, 5);
|
||||
|
||||
// The first expansion should succeed
|
||||
assert_eq!(value["schema"]["type"], "object");
|
||||
|
||||
// The recursive child ref should be replaced with $circular_ref
|
||||
let child = &value["schema"]["properties"]["child"];
|
||||
assert_eq!(child["$circular_ref"], "#/components/schemas/Node");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expand_external_ref() {
|
||||
let root = json!({});
|
||||
|
||||
let mut value = json!({
|
||||
"schema": { "$ref": "https://example.com/schemas/Pet.json" }
|
||||
});
|
||||
|
||||
expand_refs(&mut value, &root, 5);
|
||||
|
||||
assert_eq!(
|
||||
value["schema"]["$external_ref"],
|
||||
"https://example.com/schemas/Pet.json"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expand_max_depth() {
|
||||
let root = json!({
|
||||
"components": {
|
||||
"schemas": {
|
||||
"A": {
|
||||
"nested": { "$ref": "#/components/schemas/B" }
|
||||
},
|
||||
"B": {
|
||||
"value": "deep"
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// With max_depth=1, the first ref resolves but nested ref hits depth limit
|
||||
let mut value = json!({ "$ref": "#/components/schemas/A" });
|
||||
expand_refs(&mut value, &root, 1);
|
||||
|
||||
// A should be expanded
|
||||
assert!(value.get("nested").is_some());
|
||||
// B ref should be left as $circular_ref due to max_depth
|
||||
assert_eq!(value["nested"]["$circular_ref"], "#/components/schemas/B");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expand_array_refs() {
|
||||
let root = json!({
|
||||
"components": {
|
||||
"schemas": {
|
||||
"Tag": { "type": "string" }
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let mut value = json!({
|
||||
"items": [
|
||||
{ "$ref": "#/components/schemas/Tag" },
|
||||
{ "type": "integer" }
|
||||
]
|
||||
});
|
||||
|
||||
expand_refs(&mut value, &root, 5);
|
||||
|
||||
assert_eq!(value["items"][0]["type"], "string");
|
||||
assert_eq!(value["items"][1]["type"], "integer");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expand_broken_ref_left_as_is() {
|
||||
let root = json!({});
|
||||
|
||||
let mut value = json!({
|
||||
"schema": { "$ref": "#/components/schemas/Missing" }
|
||||
});
|
||||
|
||||
let original = value.clone();
|
||||
expand_refs(&mut value, &root, 5);
|
||||
|
||||
// Broken internal ref left untouched
|
||||
assert_eq!(value, original);
|
||||
}
|
||||
}
|
||||
634
src/core/search.rs
Normal file
634
src/core/search.rs
Normal file
@@ -0,0 +1,634 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use super::indexer::method_rank;
|
||||
use super::spec::SpecIndex;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct SearchResult {
|
||||
pub result_type: SearchResultType,
|
||||
pub name: String,
|
||||
pub method: Option<String>,
|
||||
pub summary: Option<String>,
|
||||
pub rank: usize,
|
||||
pub score: u32,
|
||||
pub matches: Vec<Match>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SearchResultType {
|
||||
Endpoint,
|
||||
Schema,
|
||||
}
|
||||
|
||||
impl SearchResultType {
|
||||
fn ordinal(self) -> u8 {
|
||||
match self {
|
||||
Self::Endpoint => 0,
|
||||
Self::Schema => 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct Match {
|
||||
pub field: String,
|
||||
pub snippet: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SearchOptions {
|
||||
pub search_paths: bool,
|
||||
pub search_descriptions: bool,
|
||||
pub search_schemas: bool,
|
||||
pub case_sensitive: bool,
|
||||
pub exact: bool,
|
||||
pub limit: usize,
|
||||
}
|
||||
|
||||
impl Default for SearchOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
search_paths: true,
|
||||
search_descriptions: true,
|
||||
search_schemas: true,
|
||||
case_sensitive: false,
|
||||
exact: false,
|
||||
limit: 20,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Field weights
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const WEIGHT_PATH: f64 = 10.0;
|
||||
const WEIGHT_SUMMARY: f64 = 5.0;
|
||||
const WEIGHT_DESCRIPTION: f64 = 2.0;
|
||||
const WEIGHT_SCHEMA_NAME: f64 = 8.0;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Search engine
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub struct SearchEngine<'a> {
|
||||
index: &'a SpecIndex,
|
||||
}
|
||||
|
||||
impl<'a> SearchEngine<'a> {
|
||||
pub fn new(index: &'a SpecIndex) -> Self {
|
||||
Self { index }
|
||||
}
|
||||
|
||||
pub fn search(&self, query: &str, opts: &SearchOptions) -> Vec<SearchResult> {
|
||||
let query = query.trim();
|
||||
if query.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let terms = tokenize(query, opts.exact);
|
||||
let total_terms = terms.len();
|
||||
|
||||
let mut results: Vec<SearchResult> = Vec::new();
|
||||
|
||||
// Search endpoints
|
||||
if opts.search_paths || opts.search_descriptions {
|
||||
for ep in &self.index.endpoints {
|
||||
let mut raw_score: f64 = 0.0;
|
||||
let mut matched_terms: usize = 0;
|
||||
let mut matches: Vec<Match> = Vec::new();
|
||||
|
||||
for term in &terms {
|
||||
let mut term_matched = false;
|
||||
|
||||
if opts.search_paths && contains_term(&ep.path, term, opts.case_sensitive) {
|
||||
raw_score += WEIGHT_PATH;
|
||||
matches.push(Match {
|
||||
field: "path".into(),
|
||||
snippet: safe_snippet(&ep.path, term, opts.case_sensitive),
|
||||
});
|
||||
term_matched = true;
|
||||
}
|
||||
|
||||
if (opts.search_descriptions || opts.search_paths)
|
||||
&& let Some(ref summary) = ep.summary
|
||||
&& contains_term(summary, term, opts.case_sensitive)
|
||||
{
|
||||
raw_score += WEIGHT_SUMMARY;
|
||||
matches.push(Match {
|
||||
field: "summary".into(),
|
||||
snippet: safe_snippet(summary, term, opts.case_sensitive),
|
||||
});
|
||||
term_matched = true;
|
||||
}
|
||||
|
||||
if opts.search_descriptions
|
||||
&& let Some(ref desc) = ep.description
|
||||
&& contains_term(desc, term, opts.case_sensitive)
|
||||
{
|
||||
raw_score += WEIGHT_DESCRIPTION;
|
||||
matches.push(Match {
|
||||
field: "description".into(),
|
||||
snippet: safe_snippet(desc, term, opts.case_sensitive),
|
||||
});
|
||||
term_matched = true;
|
||||
}
|
||||
|
||||
if term_matched {
|
||||
matched_terms += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if raw_score > 0.0 {
|
||||
let coverage_boost = 1.0 + (matched_terms as f64 / total_terms.max(1) as f64);
|
||||
let final_score = raw_score * coverage_boost;
|
||||
let quantized = (final_score * 100.0).round() as u32;
|
||||
|
||||
results.push(SearchResult {
|
||||
result_type: SearchResultType::Endpoint,
|
||||
name: ep.path.clone(),
|
||||
method: Some(ep.method.clone()),
|
||||
summary: ep.summary.clone(),
|
||||
rank: 0, // assigned after sort
|
||||
score: quantized,
|
||||
matches,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Search schemas
|
||||
if opts.search_schemas {
|
||||
for schema in &self.index.schemas {
|
||||
let mut raw_score: f64 = 0.0;
|
||||
let mut matched_terms: usize = 0;
|
||||
let mut matches: Vec<Match> = Vec::new();
|
||||
|
||||
for term in &terms {
|
||||
if contains_term(&schema.name, term, opts.case_sensitive) {
|
||||
raw_score += WEIGHT_SCHEMA_NAME;
|
||||
matches.push(Match {
|
||||
field: "schema_name".into(),
|
||||
snippet: safe_snippet(&schema.name, term, opts.case_sensitive),
|
||||
});
|
||||
matched_terms += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if raw_score > 0.0 {
|
||||
let coverage_boost = 1.0 + (matched_terms as f64 / total_terms.max(1) as f64);
|
||||
let final_score = raw_score * coverage_boost;
|
||||
let quantized = (final_score * 100.0).round() as u32;
|
||||
|
||||
results.push(SearchResult {
|
||||
result_type: SearchResultType::Schema,
|
||||
name: schema.name.clone(),
|
||||
method: None,
|
||||
summary: None,
|
||||
rank: 0,
|
||||
score: quantized,
|
||||
matches,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deterministic sort: score DESC, type ordinal ASC, name ASC, method_rank ASC
|
||||
results.sort_by(|a, b| {
|
||||
b.score
|
||||
.cmp(&a.score)
|
||||
.then_with(|| a.result_type.ordinal().cmp(&b.result_type.ordinal()))
|
||||
.then_with(|| a.name.cmp(&b.name))
|
||||
.then_with(|| {
|
||||
let a_rank = a.method.as_deref().map(method_rank).unwrap_or(u8::MAX);
|
||||
let b_rank = b.method.as_deref().map(method_rank).unwrap_or(u8::MAX);
|
||||
a_rank.cmp(&b_rank)
|
||||
})
|
||||
});
|
||||
|
||||
// Assign 1-based ranks and apply limit
|
||||
results.truncate(opts.limit);
|
||||
for (i, result) in results.iter_mut().enumerate() {
|
||||
result.rank = i + 1;
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn tokenize(query: &str, exact: bool) -> Vec<String> {
|
||||
if exact {
|
||||
vec![query.to_string()]
|
||||
} else {
|
||||
query.split_whitespace().map(String::from).collect()
|
||||
}
|
||||
}
|
||||
|
||||
fn contains_term(haystack: &str, needle: &str, case_sensitive: bool) -> bool {
|
||||
if case_sensitive {
|
||||
haystack.contains(needle)
|
||||
} else {
|
||||
let h = haystack.to_lowercase();
|
||||
let n = needle.to_lowercase();
|
||||
h.contains(&n)
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a Unicode-safe snippet around the first occurrence of `needle` in
|
||||
/// `haystack`. The context window is 50 characters. Ellipses are added when
|
||||
/// the snippet is truncated.
|
||||
fn safe_snippet(haystack: &str, needle: &str, case_sensitive: bool) -> String {
|
||||
let (h_search, n_search) = if case_sensitive {
|
||||
(haystack.to_string(), needle.to_string())
|
||||
} else {
|
||||
(haystack.to_lowercase(), needle.to_lowercase())
|
||||
};
|
||||
|
||||
let byte_pos = match h_search.find(&n_search) {
|
||||
Some(pos) => pos,
|
||||
None => return haystack.chars().take(50).collect(),
|
||||
};
|
||||
|
||||
// Convert byte position to char index.
|
||||
let char_start = haystack[..byte_pos].chars().count();
|
||||
let needle_char_len = needle.chars().count();
|
||||
let haystack_chars: Vec<char> = haystack.chars().collect();
|
||||
let total_chars = haystack_chars.len();
|
||||
|
||||
const WINDOW: usize = 50;
|
||||
|
||||
// Centre the window around the match.
|
||||
let context_budget = WINDOW.saturating_sub(needle_char_len);
|
||||
let left_context = context_budget / 2;
|
||||
|
||||
let snippet_start = char_start.saturating_sub(left_context);
|
||||
let snippet_end = (snippet_start + WINDOW).min(total_chars);
|
||||
|
||||
let prefix = if snippet_start > 0 { "..." } else { "" };
|
||||
let suffix = if snippet_end < total_chars { "..." } else { "" };
|
||||
|
||||
let snippet_body: String = haystack_chars[snippet_start..snippet_end].iter().collect();
|
||||
|
||||
format!("{prefix}{snippet_body}{suffix}")
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::core::spec::{
|
||||
IndexInfo, IndexedEndpoint, IndexedParam, IndexedSchema, IndexedTag, SpecIndex,
|
||||
};
|
||||
|
||||
fn petstore_index() -> SpecIndex {
|
||||
SpecIndex {
|
||||
index_version: 1,
|
||||
generation: 1,
|
||||
content_hash: "sha256:test".into(),
|
||||
openapi: "3.0.3".into(),
|
||||
info: IndexInfo {
|
||||
title: "Petstore".into(),
|
||||
version: "1.0.0".into(),
|
||||
},
|
||||
endpoints: vec![
|
||||
IndexedEndpoint {
|
||||
path: "/pets".into(),
|
||||
method: "GET".into(),
|
||||
summary: Some("List all pets".into()),
|
||||
description: Some("Returns a list of pets from the store".into()),
|
||||
operation_id: Some("listPets".into()),
|
||||
tags: vec!["pets".into()],
|
||||
deprecated: false,
|
||||
parameters: vec![IndexedParam {
|
||||
name: "limit".into(),
|
||||
location: "query".into(),
|
||||
required: false,
|
||||
description: Some("Max items".into()),
|
||||
}],
|
||||
request_body_required: false,
|
||||
request_body_content_types: vec![],
|
||||
security_schemes: vec![],
|
||||
security_required: false,
|
||||
operation_ptr: "/paths/~1pets/get".into(),
|
||||
},
|
||||
IndexedEndpoint {
|
||||
path: "/pets".into(),
|
||||
method: "POST".into(),
|
||||
summary: Some("Create a pet".into()),
|
||||
description: None,
|
||||
operation_id: Some("createPet".into()),
|
||||
tags: vec!["pets".into()],
|
||||
deprecated: false,
|
||||
parameters: vec![],
|
||||
request_body_required: true,
|
||||
request_body_content_types: vec!["application/json".into()],
|
||||
security_schemes: vec![],
|
||||
security_required: false,
|
||||
operation_ptr: "/paths/~1pets/post".into(),
|
||||
},
|
||||
IndexedEndpoint {
|
||||
path: "/pets/{petId}".into(),
|
||||
method: "GET".into(),
|
||||
summary: Some("Info for a specific pet".into()),
|
||||
description: Some("Detailed information about a single pet".into()),
|
||||
operation_id: Some("showPetById".into()),
|
||||
tags: vec!["pets".into()],
|
||||
deprecated: false,
|
||||
parameters: vec![IndexedParam {
|
||||
name: "petId".into(),
|
||||
location: "path".into(),
|
||||
required: true,
|
||||
description: Some("The id of the pet".into()),
|
||||
}],
|
||||
request_body_required: false,
|
||||
request_body_content_types: vec![],
|
||||
security_schemes: vec![],
|
||||
security_required: false,
|
||||
operation_ptr: "/paths/~1pets~1{petId}/get".into(),
|
||||
},
|
||||
IndexedEndpoint {
|
||||
path: "/store/inventory".into(),
|
||||
method: "GET".into(),
|
||||
summary: Some("Returns store inventory".into()),
|
||||
description: None,
|
||||
operation_id: Some("getInventory".into()),
|
||||
tags: vec!["store".into()],
|
||||
deprecated: false,
|
||||
parameters: vec![],
|
||||
request_body_required: false,
|
||||
request_body_content_types: vec![],
|
||||
security_schemes: vec![],
|
||||
security_required: false,
|
||||
operation_ptr: "/paths/~1store~1inventory/get".into(),
|
||||
},
|
||||
],
|
||||
schemas: vec![
|
||||
IndexedSchema {
|
||||
name: "Pet".into(),
|
||||
schema_ptr: "/components/schemas/Pet".into(),
|
||||
},
|
||||
IndexedSchema {
|
||||
name: "Error".into(),
|
||||
schema_ptr: "/components/schemas/Error".into(),
|
||||
},
|
||||
IndexedSchema {
|
||||
name: "PetList".into(),
|
||||
schema_ptr: "/components/schemas/PetList".into(),
|
||||
},
|
||||
],
|
||||
tags: vec![
|
||||
IndexedTag {
|
||||
name: "pets".into(),
|
||||
description: Some("Pet operations".into()),
|
||||
endpoint_count: 3,
|
||||
},
|
||||
IndexedTag {
|
||||
name: "store".into(),
|
||||
description: Some("Store operations".into()),
|
||||
endpoint_count: 1,
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_basic() {
|
||||
let index = petstore_index();
|
||||
let engine = SearchEngine::new(&index);
|
||||
let opts = SearchOptions::default();
|
||||
|
||||
let results = engine.search("pet", &opts);
|
||||
assert!(
|
||||
!results.is_empty(),
|
||||
"should find 'pet' in petstore endpoints"
|
||||
);
|
||||
|
||||
// All results should mention pet somewhere
|
||||
for r in &results {
|
||||
let has_pet = r
|
||||
.matches
|
||||
.iter()
|
||||
.any(|m| m.snippet.to_lowercase().contains("pet"));
|
||||
assert!(has_pet, "result {:?} should match 'pet'", r.name);
|
||||
}
|
||||
|
||||
// Ranks should be sequential 1-based
|
||||
for (i, r) in results.iter().enumerate() {
|
||||
assert_eq!(r.rank, i + 1, "rank should be 1-based sequential");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_scores_deterministic() {
|
||||
let index = petstore_index();
|
||||
let engine = SearchEngine::new(&index);
|
||||
let opts = SearchOptions::default();
|
||||
|
||||
let run1 = engine.search("pet", &opts);
|
||||
let run2 = engine.search("pet", &opts);
|
||||
|
||||
assert_eq!(run1.len(), run2.len());
|
||||
for (a, b) in run1.iter().zip(run2.iter()) {
|
||||
assert_eq!(a.score, b.score, "scores should be identical across runs");
|
||||
assert_eq!(a.rank, b.rank, "ranks should be identical across runs");
|
||||
assert_eq!(a.name, b.name, "names should be identical across runs");
|
||||
assert_eq!(
|
||||
a.method, b.method,
|
||||
"methods should be identical across runs"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_exact_mode() {
|
||||
let index = petstore_index();
|
||||
let engine = SearchEngine::new(&index);
|
||||
|
||||
// "list all" as two tokens: should match broadly
|
||||
let loose_opts = SearchOptions {
|
||||
exact: false,
|
||||
..SearchOptions::default()
|
||||
};
|
||||
let loose = engine.search("list all", &loose_opts);
|
||||
|
||||
// "list all" as exact phrase: only matches if that exact phrase appears
|
||||
let exact_opts = SearchOptions {
|
||||
exact: true,
|
||||
..SearchOptions::default()
|
||||
};
|
||||
let exact = engine.search("list all", &exact_opts);
|
||||
|
||||
// Exact should be a subset of (or equal to) loose results
|
||||
assert!(
|
||||
exact.len() <= loose.len(),
|
||||
"exact mode should return fewer or equal results"
|
||||
);
|
||||
|
||||
// The exact match should find "List all pets" summary
|
||||
assert!(
|
||||
!exact.is_empty(),
|
||||
"exact 'list all' should match 'List all pets'"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_case_sensitive() {
|
||||
let index = petstore_index();
|
||||
let engine = SearchEngine::new(&index);
|
||||
|
||||
// Case-insensitive (default): "PET" matches "pet", "/pets", etc.
|
||||
let insensitive = SearchOptions {
|
||||
case_sensitive: false,
|
||||
..SearchOptions::default()
|
||||
};
|
||||
let results_insensitive = engine.search("PET", &insensitive);
|
||||
|
||||
// Case-sensitive: "PET" should NOT match lowercase "pet" or "/pets"
|
||||
let sensitive = SearchOptions {
|
||||
case_sensitive: true,
|
||||
..SearchOptions::default()
|
||||
};
|
||||
let results_sensitive = engine.search("PET", &sensitive);
|
||||
|
||||
assert!(
|
||||
results_sensitive.len() < results_insensitive.len(),
|
||||
"case-sensitive 'PET' should match fewer results than case-insensitive"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_safe_snippet_unicode() {
|
||||
// Emoji and multi-byte characters
|
||||
let haystack = "Hello \u{1F600} world of pets and \u{1F431} cats everywhere";
|
||||
let snippet = safe_snippet(haystack, "pets", false);
|
||||
assert!(
|
||||
snippet.contains("pets"),
|
||||
"snippet should contain the search term"
|
||||
);
|
||||
// Must not panic on multi-byte boundaries
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_safe_snippet_truncation() {
|
||||
let long = "a".repeat(200);
|
||||
let haystack = format!("{long}needle{long}");
|
||||
let snippet = safe_snippet(&haystack, "needle", false);
|
||||
assert!(snippet.contains("needle"));
|
||||
assert!(
|
||||
snippet.contains("..."),
|
||||
"should have ellipsis for truncation"
|
||||
);
|
||||
// Snippet should be around 50 chars + ellipsis markers
|
||||
let body_len = snippet.replace("...", "").chars().count();
|
||||
assert!(body_len <= 50, "snippet body should be at most 50 chars");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_query_returns_empty() {
|
||||
let index = petstore_index();
|
||||
let engine = SearchEngine::new(&index);
|
||||
let opts = SearchOptions::default();
|
||||
|
||||
assert!(engine.search("", &opts).is_empty());
|
||||
assert!(engine.search(" ", &opts).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_limit() {
|
||||
let index = petstore_index();
|
||||
let engine = SearchEngine::new(&index);
|
||||
let opts = SearchOptions {
|
||||
limit: 2,
|
||||
..SearchOptions::default()
|
||||
};
|
||||
|
||||
let results = engine.search("pet", &opts);
|
||||
assert!(results.len() <= 2, "should respect limit");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_schemas_only() {
|
||||
let index = petstore_index();
|
||||
let engine = SearchEngine::new(&index);
|
||||
let opts = SearchOptions {
|
||||
search_paths: false,
|
||||
search_descriptions: false,
|
||||
search_schemas: true,
|
||||
..SearchOptions::default()
|
||||
};
|
||||
|
||||
let results = engine.search("Pet", &opts);
|
||||
assert!(!results.is_empty());
|
||||
for r in &results {
|
||||
assert_eq!(
|
||||
r.result_type,
|
||||
SearchResultType::Schema,
|
||||
"should only return schemas"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_paths_only() {
|
||||
let index = petstore_index();
|
||||
let engine = SearchEngine::new(&index);
|
||||
let opts = SearchOptions {
|
||||
search_paths: true,
|
||||
search_descriptions: false,
|
||||
search_schemas: false,
|
||||
..SearchOptions::default()
|
||||
};
|
||||
|
||||
let results = engine.search("store", &opts);
|
||||
assert!(!results.is_empty());
|
||||
for r in &results {
|
||||
assert_eq!(
|
||||
r.result_type,
|
||||
SearchResultType::Endpoint,
|
||||
"should only return endpoints"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multi_term_coverage_boost() {
|
||||
let index = petstore_index();
|
||||
let engine = SearchEngine::new(&index);
|
||||
let opts = SearchOptions::default();
|
||||
|
||||
// "pets store" has two terms; an endpoint matching both gets higher coverage
|
||||
let results = engine.search("pets list", &opts);
|
||||
if results.len() >= 2 {
|
||||
// The first result should have a higher score due to more term matches
|
||||
assert!(
|
||||
results[0].score >= results[1].score,
|
||||
"results should be sorted by score descending"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_match_returns_empty() {
|
||||
let index = petstore_index();
|
||||
let engine = SearchEngine::new(&index);
|
||||
let opts = SearchOptions::default();
|
||||
|
||||
let results = engine.search("zzzznotfound", &opts);
|
||||
assert!(
|
||||
results.is_empty(),
|
||||
"gibberish query should return no results"
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user