"""APIClient - Agent - Claude AI integration.""" import json import re import httpx from app.core import storage # Max characters to send to Claude (roughly 60k tokens) _MAX_CONTENT_CHARS = 80_000 def _strip_html(html: str) -> str: """Strip HTML tags and collapse whitespace for cleaner AI input.""" # Remove script/style blocks entirely html = re.sub(r"<(script|style)[^>]*>.*?", " ", html, flags=re.S | re.I) # Remove HTML tags html = re.sub(r"<[^>]+>", " ", html) # Decode common entities html = (html .replace("&", "&").replace("<", "<").replace(">", ">") .replace(""", '"').replace("'", "'").replace(" ", " ")) # Collapse whitespace html = re.sub(r"\s{3,}", "\n\n", html) return html.strip() _SYSTEM_PROMPT = """\ You are an expert API documentation analyzer for APIClient - Agent. Given API documentation (which may be a spec, a web page, framework docs, or raw text), extract or infer all useful API endpoints and return structured JSON. Return ONLY valid JSON - no markdown, no commentary, just the JSON object. Schema: { "collection_name": "API Name", "base_url": "https://api.example.com", "auth_type": "bearer|basic|apikey|none", "doc_type": "openapi|rest|framework|graphql|unknown", "endpoints": [ { "name": "Human readable name", "method": "GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS", "path": "/v1/resource", "description": "What this endpoint does", "headers": {"Header-Name": "value or {{variable}}"}, "params": {"param_name": "example_value"}, "body": "", "body_type": "raw|form-urlencoded|form-data", "content_type": "application/json", "test_script": "pm.test('Status 200', lambda: pm.response.to_have_status(200))" } ], "environment_variables": { "base_url": "https://api.example.com", "token": "" }, "notes": "Any important setup notes for the user" } Rules: - Use {{variable_name}} for ALL dynamic values (tokens, IDs, model names, etc.) - Always output realistic example values for query params and bodies - Generate a test_script for every endpoint - Detect auth pattern and add the correct header to every endpoint - If the documentation is a FRAMEWORK (e.g. it documents URL patterns like {domain}/{endpoint}/{model} rather than fixed paths), do the following: * Set doc_type to "framework" * Use {{base_url}} as the domain placeholder * Use {{model}} as a placeholder for the resource/model name * Generate one endpoint per HTTP method the framework supports (GET list, GET single, POST create, PATCH update, DELETE delete, plus any special ops) * Set notes explaining that the user must replace {{model}} with actual model names e.g. "res.partner", "sale.order", "product.template" etc. - If it is a GRAPHQL API, generate a POST /graphql endpoint with example query body - If auth options are shown (API key, OAuth, Basic), include ALL variants as separate environment variables so the user can choose - Keep paths clean - strip trailing slashes, normalise to lowercase """ class AIError(Exception): pass def get_api_key() -> str: return storage.get_setting("anthropic_api_key", "") def set_api_key(key: str): storage.set_setting("anthropic_api_key", key.strip()) def analyze_docs(content: str, progress_cb=None) -> dict: """ Send API documentation content to Claude and return parsed collection dict. progress_cb(message: str) is called with status updates during streaming. Raises AIError on failure. """ api_key = get_api_key() if not api_key: raise AIError("No Anthropic API key configured. Go to Tools → AI Assistant → Settings.") if progress_cb: progress_cb("Sending to Claude AI…") headers = { "x-api-key": api_key, "anthropic-version": "2023-06-01", "content-type": "application/json", } payload = { "model": "claude-opus-4-6", "max_tokens": 8192, "system": _SYSTEM_PROMPT, "messages": [{"role": "user", "content": content}], } full_text = "" try: with httpx.stream( "POST", "https://api.anthropic.com/v1/messages", headers=headers, json=payload, timeout=120.0, ) as resp: if resp.status_code != 200: body = resp.read().decode() raise AIError(f"API error {resp.status_code}: {body[:300]}") for line in resp.iter_lines(): if not line.startswith("data:"): continue data_str = line[5:].strip() if data_str == "[DONE]": break try: event = json.loads(data_str) delta = event.get("delta", {}) if delta.get("type") == "text_delta": chunk = delta.get("text", "") full_text += chunk if progress_cb and len(full_text) % 500 < len(chunk): progress_cb(f"Receiving response… ({len(full_text)} chars)") except json.JSONDecodeError: continue except httpx.TimeoutException: raise AIError("Request timed out. The documentation may be too large.") except httpx.RequestError as e: raise AIError(f"Network error: {e}") if progress_cb: progress_cb("Parsing AI response…") return _parse_ai_response(full_text) def _parse_ai_response(text: str) -> dict: """Extract and validate the JSON from the AI response.""" text = text.strip() # Strip markdown code fences if present if text.startswith("```"): lines = text.split("\n") text = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:]) try: data = json.loads(text) except json.JSONDecodeError: # Try to find JSON object in the text start = text.find("{") end = text.rfind("}") + 1 if start >= 0 and end > start: try: data = json.loads(text[start:end]) except json.JSONDecodeError: raise AIError("AI returned invalid JSON. Try again or simplify the documentation.") else: raise AIError("AI response did not contain a JSON object.") # Validate required keys if "endpoints" not in data: raise AIError("AI response missing 'endpoints' key.") return data def fetch_url_content(url: str) -> str: """Fetch content from a URL, strip HTML if needed, and truncate if too large.""" try: resp = httpx.get(url, follow_redirects=True, timeout=30.0, headers={ "User-Agent": "EKIKA-API-Client/2.0 (documentation-fetcher)", "Accept": "application/json, text/yaml, text/html, */*", }) resp.raise_for_status() except httpx.HTTPStatusError as e: raise AIError(f"HTTP {e.response.status_code} fetching URL.") except httpx.RequestError as e: raise AIError(f"Could not fetch URL: {e}") ct = resp.headers.get("content-type", "") text = resp.text # If HTML page - strip tags for cleaner AI input if "html" in ct and not _looks_like_spec(text): text = _strip_html(text) # Truncate if too large if len(text) > _MAX_CONTENT_CHARS: text = text[:_MAX_CONTENT_CHARS] + "\n\n[Content truncated for length]" return text def _looks_like_spec(text: str) -> bool: """Quick check: is this likely a JSON/YAML OpenAPI spec?""" t = text.lstrip() return t.startswith("{") or t.startswith("openapi:") or t.startswith("swagger:")