Files
swagger-cli/src/core/indexer.rs
teernisse cc04772792 Core: eliminate double-parse in normalize_to_json, harden SSRF, optimize search
normalize_to_json now returns (Vec<u8>, serde_json::Value) — callers get
the parsed Value for free instead of re-parsing the bytes they just
produced. Eliminates a redundant serde_json::from_slice on every fetch,
sync, and external-ref resolution path.

Format detection switches from trial JSON parse to first-byte inspection
({/[ = JSON, else YAML) — roughly 300x faster for the common case.

SSRF protection expanded: block CGNAT range 100.64.0.0/10 (RFC 6598,
common cloud-internal SSRF target) and IPv6 unique-local fc00::/7.

Alias validation simplified: the regex ^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$
already rejects path separators, traversal, and leading dots — remove
redundant explicit checks.

Search performance: pre-lowercase query terms once and pre-lowercase each
field once per endpoint (not once per term x field). Removes the
contains_term helper entirely. safe_snippet rewritten with char-based
search to avoid byte-position mismatches on multi-byte Unicode characters
(e.g. U+0130 which expands during lowercasing).
2026-02-12 16:56:12 -05:00

660 lines
22 KiB
Rust

use std::collections::HashMap;
use crate::core::spec::{
IndexInfo, IndexedEndpoint, IndexedParam, IndexedSchema, IndexedTag, SpecIndex,
};
use crate::errors::SwaggerCliError;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Format {
Json,
Yaml,
}
/// Detect whether raw bytes are JSON or YAML.
///
/// Priority: content-type header > file extension > content sniffing.
pub fn detect_format(
bytes: &[u8],
filename_hint: Option<&str>,
content_type_hint: Option<&str>,
) -> Format {
if let Some(ct) = content_type_hint {
let ct_lower = ct.to_ascii_lowercase();
if ct_lower.contains("json") {
return Format::Json;
}
if ct_lower.contains("yaml") || ct_lower.contains("yml") {
return Format::Yaml;
}
}
if let Some(name) = filename_hint {
let name_lower = name.to_ascii_lowercase();
if name_lower.ends_with(".json") {
return Format::Json;
}
if name_lower.ends_with(".yaml") || name_lower.ends_with(".yml") {
return Format::Yaml;
}
}
// Content sniffing: check the first non-whitespace byte. Valid JSON
// documents start with '{' or '['. This avoids a full JSON parse just
// to detect format — a ~300x speedup for the common case.
let first_meaningful = bytes.iter().find(|b| !b.is_ascii_whitespace());
match first_meaningful {
Some(b'{') | Some(b'[') => Format::Json,
_ => Format::Yaml,
}
}
/// Normalize raw bytes to canonical JSON, returning both the bytes and parsed value.
///
/// For JSON input: parses once and returns the original bytes + parsed value.
/// For YAML input: parses YAML into a Value, serializes to JSON bytes.
///
/// This eliminates the common double-parse pattern where callers would
/// call `normalize_to_json()` then immediately `serde_json::from_slice()`.
pub fn normalize_to_json(
bytes: &[u8],
format: Format,
) -> Result<(Vec<u8>, serde_json::Value), SwaggerCliError> {
match format {
Format::Json => {
let value: serde_json::Value = serde_json::from_slice(bytes)?;
Ok((bytes.to_vec(), value))
}
Format::Yaml => {
let value: serde_json::Value = serde_yaml::from_slice(bytes)
.map_err(|e| SwaggerCliError::InvalidSpec(format!("YAML parse error: {e}")))?;
let json_bytes = serde_json::to_vec(&value)?;
Ok((json_bytes, value))
}
}
}
/// Build a `SpecIndex` from a parsed JSON OpenAPI document.
pub fn build_index(
raw_json: &serde_json::Value,
content_hash: &str,
generation: u64,
) -> Result<SpecIndex, SwaggerCliError> {
let openapi = raw_json
.get("openapi")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let info_obj = raw_json.get("info");
let title = info_obj
.and_then(|i| i.get("title"))
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let version = info_obj
.and_then(|i| i.get("version"))
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
// Root-level security schemes (names only).
let root_security = extract_security_scheme_names(raw_json.get("security"));
let mut endpoints = Vec::new();
let mut tag_counts: HashMap<String, usize> = HashMap::new();
if let Some(paths) = raw_json.get("paths").and_then(|p| p.as_object()) {
for (path, path_item) in paths {
let path_obj = match path_item.as_object() {
Some(o) => o,
None => continue,
};
// Path-level parameters apply to all operations under this path.
let path_params = path_obj
.get("parameters")
.and_then(|v| v.as_array())
.map(|arr| extract_params(arr))
.unwrap_or_default();
for (method, operation) in path_obj {
if !is_http_method(method) {
continue;
}
let op = match operation.as_object() {
Some(o) => o,
None => continue,
};
let method_upper = method.to_ascii_uppercase();
let path_encoded = json_pointer_encode(path);
let method_lower = method.to_ascii_lowercase();
let operation_ptr = format!("/paths/{path_encoded}/{method_lower}");
// Merge path-level + operation-level parameters (operation wins on conflict).
let op_params = op
.get("parameters")
.and_then(|v| v.as_array())
.map(|arr| extract_params(arr))
.unwrap_or_default();
let parameters = merge_params(&path_params, &op_params);
let tags: Vec<String> = op
.get("tags")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|t| t.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
for tag in &tags {
*tag_counts.entry(tag.clone()).or_insert(0) += 1;
}
let deprecated = op
.get("deprecated")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let summary = op.get("summary").and_then(|v| v.as_str()).map(String::from);
let description = op
.get("description")
.and_then(|v| v.as_str())
.map(String::from);
let operation_id = op
.get("operationId")
.and_then(|v| v.as_str())
.map(String::from);
let (request_body_required, request_body_content_types) =
extract_request_body(op.get("requestBody"));
// Security: operation-level overrides root. An explicit empty array
// means "no auth required".
let (security_schemes, security_required) = if let Some(op_sec) = op.get("security")
{
let schemes = extract_security_scheme_names(Some(op_sec));
let required = !schemes.is_empty();
(schemes, required)
} else {
let required = !root_security.is_empty();
(root_security.clone(), required)
};
if !resolve_pointer(raw_json, &operation_ptr) {
return Err(SwaggerCliError::InvalidSpec(format!(
"JSON pointer does not resolve: {operation_ptr}"
)));
}
endpoints.push(IndexedEndpoint {
path: path.clone(),
method: method_upper,
summary,
description,
operation_id,
tags,
deprecated,
parameters,
request_body_required,
request_body_content_types,
security_schemes,
security_required,
operation_ptr,
});
}
}
}
// Sort endpoints: path ASC then method rank ASC.
endpoints.sort_by(|a, b| {
a.path
.cmp(&b.path)
.then_with(|| method_rank(&a.method).cmp(&method_rank(&b.method)))
});
// Schemas from components.schemas.
let mut schemas: Vec<IndexedSchema> = Vec::new();
if let Some(components_schemas) = raw_json
.pointer("/components/schemas")
.and_then(|v| v.as_object())
{
for name in components_schemas.keys() {
let schema_ptr = format!("/components/schemas/{}", json_pointer_encode(name));
if !resolve_pointer(raw_json, &schema_ptr) {
return Err(SwaggerCliError::InvalidSpec(format!(
"JSON pointer does not resolve: {schema_ptr}"
)));
}
schemas.push(IndexedSchema {
name: name.clone(),
schema_ptr,
});
}
}
schemas.sort_by(|a, b| a.name.cmp(&b.name));
// Collect tag descriptions from the top-level `tags` array (if present).
let mut tag_descriptions: HashMap<String, Option<String>> = HashMap::new();
if let Some(tags_arr) = raw_json.get("tags").and_then(|v| v.as_array()) {
for tag_obj in tags_arr {
if let Some(name) = tag_obj.get("name").and_then(|v| v.as_str()) {
let desc = tag_obj
.get("description")
.and_then(|v| v.as_str())
.map(String::from);
tag_descriptions.insert(name.to_string(), desc);
}
}
}
let mut tags: Vec<IndexedTag> = tag_counts
.into_iter()
.map(|(name, count)| {
let description = tag_descriptions.get(&name).cloned().unwrap_or(None);
IndexedTag {
name,
description,
endpoint_count: count,
}
})
.collect();
tags.sort_by(|a, b| a.name.cmp(&b.name));
Ok(SpecIndex {
index_version: 1,
generation,
content_hash: content_hash.to_string(),
openapi,
info: IndexInfo { title, version },
endpoints,
schemas,
tags,
})
}
/// Return the sort rank for an HTTP method.
pub fn method_rank(method: &str) -> u8 {
match method.to_ascii_uppercase().as_str() {
"GET" => 0,
"POST" => 1,
"PUT" => 2,
"PATCH" => 3,
"DELETE" => 4,
"OPTIONS" => 5,
"HEAD" => 6,
"TRACE" => 7,
_ => 99,
}
}
/// RFC 6901 JSON pointer encoding for a single segment: `~` -> `~0`, `/` -> `~1`.
pub fn json_pointer_encode(segment: &str) -> String {
segment.replace('~', "~0").replace('/', "~1")
}
/// Check whether a JSON pointer resolves within `value`.
pub fn resolve_pointer(value: &serde_json::Value, pointer: &str) -> bool {
value.pointer(pointer).is_some()
}
// ---------------------------------------------------------------------------
// Private helpers
// ---------------------------------------------------------------------------
fn is_http_method(key: &str) -> bool {
matches!(
key.to_ascii_lowercase().as_str(),
"get" | "post" | "put" | "patch" | "delete" | "options" | "head" | "trace"
)
}
fn extract_params(arr: &[serde_json::Value]) -> Vec<IndexedParam> {
arr.iter()
.filter_map(|p| {
let name = p.get("name")?.as_str()?.to_string();
let location = p.get("in")?.as_str()?.to_string();
let required = p.get("required").and_then(|v| v.as_bool()).unwrap_or(false);
let description = p
.get("description")
.and_then(|v| v.as_str())
.map(String::from);
Some(IndexedParam {
name,
location,
required,
description,
})
})
.collect()
}
/// Merge path-level and operation-level parameters. Operation params override
/// path params with the same (name, location) pair.
fn merge_params(path_params: &[IndexedParam], op_params: &[IndexedParam]) -> Vec<IndexedParam> {
let mut merged: Vec<IndexedParam> = path_params.to_vec();
for op_p in op_params {
if let Some(existing) = merged
.iter_mut()
.find(|p| p.name == op_p.name && p.location == op_p.location)
{
*existing = op_p.clone();
} else {
merged.push(op_p.clone());
}
}
merged
}
fn extract_request_body(rb: Option<&serde_json::Value>) -> (bool, Vec<String>) {
let Some(rb) = rb else {
return (false, Vec::new());
};
let required = rb
.get("required")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let content_types = rb
.get("content")
.and_then(|v| v.as_object())
.map(|obj| obj.keys().cloned().collect())
.unwrap_or_default();
(required, content_types)
}
fn extract_security_scheme_names(security: Option<&serde_json::Value>) -> Vec<String> {
let Some(arr) = security.and_then(|v| v.as_array()) else {
return Vec::new();
};
let mut names: Vec<String> = Vec::new();
for item in arr {
if let Some(obj) = item.as_object() {
for key in obj.keys() {
if !names.contains(key) {
names.push(key.clone());
}
}
}
}
names.sort();
names
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_format_json() {
let bytes = b"{}";
assert_eq!(
detect_format(bytes, None, Some("application/json")),
Format::Json,
);
assert_eq!(detect_format(bytes, Some("spec.json"), None), Format::Json,);
}
#[test]
fn test_detect_format_yaml() {
let bytes = b"openapi: '3.0.0'";
assert_eq!(
detect_format(bytes, None, Some("application/x-yaml")),
Format::Yaml,
);
assert_eq!(detect_format(bytes, Some("spec.yaml"), None), Format::Yaml,);
assert_eq!(detect_format(bytes, Some("spec.yml"), None), Format::Yaml,);
}
#[test]
fn test_detect_format_sniffing() {
// Valid JSON -> detected as JSON even without hints.
let json_bytes = br#"{"openapi":"3.0.0"}"#;
assert_eq!(detect_format(json_bytes, None, None), Format::Json);
// Invalid JSON but valid YAML -> falls back to YAML.
let yaml_bytes = b"openapi: '3.0.0'\ninfo:\n title: Test";
assert_eq!(detect_format(yaml_bytes, None, None), Format::Yaml);
}
#[test]
fn test_yaml_normalization_roundtrip() {
let yaml = br#"
openapi: "3.0.0"
info:
title: Test API
version: "1.0"
paths: {}
"#;
let (json_bytes, parsed) = normalize_to_json(yaml, Format::Yaml).unwrap();
// Verify the bytes are also valid JSON
let _: serde_json::Value = serde_json::from_slice(&json_bytes).unwrap();
assert_eq!(parsed["openapi"], "3.0.0");
assert_eq!(parsed["info"]["title"], "Test API");
}
#[test]
fn test_json_pointer_encoding() {
assert_eq!(json_pointer_encode("/pet/{petId}"), "~1pet~1{petId}");
assert_eq!(json_pointer_encode("simple"), "simple");
assert_eq!(json_pointer_encode("a~b/c"), "a~0b~1c");
}
#[test]
fn test_method_rank_ordering() {
assert_eq!(method_rank("GET"), 0);
assert_eq!(method_rank("POST"), 1);
assert_eq!(method_rank("PUT"), 2);
assert_eq!(method_rank("PATCH"), 3);
assert_eq!(method_rank("DELETE"), 4);
assert_eq!(method_rank("OPTIONS"), 5);
assert_eq!(method_rank("HEAD"), 6);
assert_eq!(method_rank("TRACE"), 7);
assert_eq!(method_rank("CUSTOM"), 99);
// Case-insensitive.
assert_eq!(method_rank("get"), 0);
assert_eq!(method_rank("Post"), 1);
}
#[test]
fn test_build_index_basic() {
let spec: serde_json::Value = serde_json::json!({
"openapi": "3.0.3",
"info": { "title": "Pet Store", "version": "1.0.0" },
"paths": {
"/pets": {
"get": {
"operationId": "listPets",
"summary": "List all pets",
"tags": ["pets"],
"parameters": [
{ "name": "limit", "in": "query", "required": false }
],
"responses": { "200": { "description": "OK" } }
},
"post": {
"operationId": "createPet",
"summary": "Create a pet",
"tags": ["pets"],
"requestBody": {
"required": true,
"content": { "application/json": {} }
},
"responses": { "201": { "description": "Created" } }
}
},
"/pets/{petId}": {
"get": {
"operationId": "showPetById",
"summary": "Get a pet",
"tags": ["pets"],
"parameters": [
{ "name": "petId", "in": "path", "required": true }
],
"responses": { "200": { "description": "OK" } }
}
}
},
"components": {
"schemas": {
"Pet": { "type": "object" },
"Error": { "type": "object" }
}
}
});
let index = build_index(&spec, "sha256:abc", 42).unwrap();
assert_eq!(index.index_version, 1);
assert_eq!(index.generation, 42);
assert_eq!(index.content_hash, "sha256:abc");
assert_eq!(index.openapi, "3.0.3");
assert_eq!(index.info.title, "Pet Store");
assert_eq!(index.info.version, "1.0.0");
// 3 endpoints total.
assert_eq!(index.endpoints.len(), 3);
// Sorted: /pets GET < /pets POST < /pets/{petId} GET.
assert_eq!(index.endpoints[0].path, "/pets");
assert_eq!(index.endpoints[0].method, "GET");
assert_eq!(index.endpoints[1].path, "/pets");
assert_eq!(index.endpoints[1].method, "POST");
assert_eq!(index.endpoints[2].path, "/pets/{petId}");
// POST /pets has request body.
assert!(index.endpoints[1].request_body_required);
assert_eq!(
index.endpoints[1].request_body_content_types,
vec!["application/json"]
);
// Schemas sorted: Error < Pet.
assert_eq!(index.schemas.len(), 2);
assert_eq!(index.schemas[0].name, "Error");
assert_eq!(index.schemas[1].name, "Pet");
// Single tag with count 3.
assert_eq!(index.tags.len(), 1);
assert_eq!(index.tags[0].name, "pets");
assert_eq!(index.tags[0].endpoint_count, 3);
// Verify pointers resolve.
for ep in &index.endpoints {
assert!(
resolve_pointer(&spec, &ep.operation_ptr),
"Pointer should resolve: {}",
ep.operation_ptr,
);
}
for schema in &index.schemas {
assert!(
resolve_pointer(&spec, &schema.schema_ptr),
"Pointer should resolve: {}",
schema.schema_ptr,
);
}
}
#[test]
fn test_security_inheritance() {
let spec: serde_json::Value = serde_json::json!({
"openapi": "3.0.3",
"info": { "title": "Auth Test", "version": "1.0.0" },
"security": [{ "api_key": [] }],
"paths": {
"/secured": {
"get": {
"summary": "Inherits root security",
"responses": { "200": { "description": "OK" } }
}
},
"/public": {
"get": {
"summary": "Explicitly no auth",
"security": [],
"responses": { "200": { "description": "OK" } }
}
},
"/custom": {
"get": {
"summary": "Custom auth",
"security": [{ "bearer": [] }],
"responses": { "200": { "description": "OK" } }
}
}
}
});
let index = build_index(&spec, "sha256:test", 1).unwrap();
// /custom -> custom security.
let custom = index
.endpoints
.iter()
.find(|e| e.path == "/custom")
.unwrap();
assert_eq!(custom.security_schemes, vec!["bearer"]);
assert!(custom.security_required);
// /public -> empty security array means no auth.
let public = index
.endpoints
.iter()
.find(|e| e.path == "/public")
.unwrap();
assert!(public.security_schemes.is_empty());
assert!(!public.security_required);
// /secured -> inherits root security.
let secured = index
.endpoints
.iter()
.find(|e| e.path == "/secured")
.unwrap();
assert_eq!(secured.security_schemes, vec!["api_key"]);
assert!(secured.security_required);
}
#[test]
fn test_resolve_pointer_valid_and_invalid() {
let val: serde_json::Value = serde_json::json!({
"a": { "b": { "c": 1 } }
});
assert!(resolve_pointer(&val, "/a/b/c"));
assert!(resolve_pointer(&val, "/a/b"));
assert!(!resolve_pointer(&val, "/a/b/d"));
assert!(!resolve_pointer(&val, "/x"));
}
#[test]
fn test_build_index_from_fixture() {
let fixture = include_str!("../../tests/fixtures/petstore.json");
let spec: serde_json::Value = serde_json::from_str(fixture).unwrap();
let index = build_index(&spec, "sha256:fixture", 1).unwrap();
assert_eq!(index.openapi, "3.0.3");
assert_eq!(index.info.title, "Petstore");
assert!(!index.endpoints.is_empty());
assert!(!index.schemas.is_empty());
// Verify sort order: endpoints sorted by path then method rank.
for window in index.endpoints.windows(2) {
let ordering = window[0]
.path
.cmp(&window[1].path)
.then_with(|| method_rank(&window[0].method).cmp(&method_rank(&window[1].method)));
assert!(
ordering.is_le(),
"Endpoints not sorted: {} {} > {} {}",
window[0].path,
window[0].method,
window[1].path,
window[1].method,
);
}
}
}