"""APIClient - Agent - Claude AI integration."""
import json
import re
import httpx

from app.core import storage

# Max characters to send to Claude (roughly 60k tokens)
_MAX_CONTENT_CHARS = 80_000


def _strip_html(html: str) -> str:
    """Strip HTML tags and collapse whitespace for cleaner AI input."""
    # Remove script/style blocks entirely
    html = re.sub(r"<(script|style)[^>]*>.*?</(script|style)>", " ", html, flags=re.S | re.I)
    # Remove HTML tags
    html = re.sub(r"<[^>]+>", " ", html)
    # Decode common entities
    html = (html
            .replace("&amp;", "&").replace("&lt;", "<").replace("&gt;", ">")
            .replace("&quot;", '"').replace("&#39;", "'").replace("&nbsp;", " "))
    # Collapse whitespace
    html = re.sub(r"\s{3,}", "\n\n", html)
    return html.strip()

_SYSTEM_PROMPT = """\
You are an expert API documentation analyzer for APIClient - Agent.
Given API documentation (which may be a spec, a web page, framework docs, or raw text),
extract or infer all useful API endpoints and return structured JSON.

Return ONLY valid JSON - no markdown, no commentary, just the JSON object.

Schema:
{
  "collection_name": "API Name",
  "base_url": "https://api.example.com",
  "auth_type": "bearer|basic|apikey|none",
  "doc_type": "openapi|rest|framework|graphql|unknown",
  "endpoints": [
    {
      "name": "Human readable name",
      "method": "GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS",
      "path": "/v1/resource",
      "description": "What this endpoint does",
      "headers": {"Header-Name": "value or {{variable}}"},
      "params": {"param_name": "example_value"},
      "body": "",
      "body_type": "raw|form-urlencoded|form-data",
      "content_type": "application/json",
      "test_script": "pm.test('Status 200', lambda: pm.response.to_have_status(200))"
    }
  ],
  "environment_variables": {
    "base_url": "https://api.example.com",
    "token": ""
  },
  "notes": "Any important setup notes for the user"
}

Rules:
- Use {{variable_name}} for ALL dynamic values (tokens, IDs, model names, etc.)
- Always output realistic example values for query params and bodies
- Generate a test_script for every endpoint
- Detect auth pattern and add the correct header to every endpoint
- If the documentation is a FRAMEWORK (e.g. it documents URL patterns like
  {domain}/{endpoint}/{model} rather than fixed paths), do the following:
    * Set doc_type to "framework"
    * Use {{base_url}} as the domain placeholder
    * Use {{model}} as a placeholder for the resource/model name
    * Generate one endpoint per HTTP method the framework supports (GET list,
      GET single, POST create, PATCH update, DELETE delete, plus any special ops)
    * Set notes explaining that the user must replace {{model}} with actual model names
      e.g. "res.partner", "sale.order", "product.template" etc.
- If it is a GRAPHQL API, generate a POST /graphql endpoint with example query body
- If auth options are shown (API key, OAuth, Basic), include ALL variants as separate
  environment variables so the user can choose
- Keep paths clean - strip trailing slashes, normalise to lowercase
"""


class AIError(Exception):
    pass


def get_api_key() -> str:
    return storage.get_setting("anthropic_api_key", "")


def set_api_key(key: str):
    storage.set_setting("anthropic_api_key", key.strip())


def analyze_docs(content: str, progress_cb=None) -> dict:
    """
    Send API documentation content to Claude and return parsed collection dict.
    progress_cb(message: str) is called with status updates during streaming.
    Raises AIError on failure.
    """
    api_key = get_api_key()
    if not api_key:
        raise AIError("No Anthropic API key configured. Go to Tools → AI Assistant → Settings.")

    if progress_cb:
        progress_cb("Sending to Claude AI…")

    headers = {
        "x-api-key":         api_key,
        "anthropic-version": "2023-06-01",
        "content-type":      "application/json",
    }
    payload = {
        "model":      "claude-opus-4-6",
        "max_tokens": 8192,
        "system":     _SYSTEM_PROMPT,
        "messages":   [{"role": "user", "content": content}],
    }

    full_text = ""
    try:
        with httpx.stream(
            "POST",
            "https://api.anthropic.com/v1/messages",
            headers=headers,
            json=payload,
            timeout=120.0,
        ) as resp:
            if resp.status_code != 200:
                body = resp.read().decode()
                raise AIError(f"API error {resp.status_code}: {body[:300]}")

            for line in resp.iter_lines():
                if not line.startswith("data:"):
                    continue
                data_str = line[5:].strip()
                if data_str == "[DONE]":
                    break
                try:
                    event = json.loads(data_str)
                    delta = event.get("delta", {})
                    if delta.get("type") == "text_delta":
                        chunk = delta.get("text", "")
                        full_text += chunk
                        if progress_cb and len(full_text) % 500 < len(chunk):
                            progress_cb(f"Receiving response… ({len(full_text)} chars)")
                except json.JSONDecodeError:
                    continue

    except httpx.TimeoutException:
        raise AIError("Request timed out. The documentation may be too large.")
    except httpx.RequestError as e:
        raise AIError(f"Network error: {e}")

    if progress_cb:
        progress_cb("Parsing AI response…")

    return _parse_ai_response(full_text)


def _parse_ai_response(text: str) -> dict:
    """Extract and validate the JSON from the AI response."""
    text = text.strip()

    # Strip markdown code fences if present
    if text.startswith("```"):
        lines = text.split("\n")
        text = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:])

    try:
        data = json.loads(text)
    except json.JSONDecodeError:
        # Try to find JSON object in the text
        start = text.find("{")
        end   = text.rfind("}") + 1
        if start >= 0 and end > start:
            try:
                data = json.loads(text[start:end])
            except json.JSONDecodeError:
                raise AIError("AI returned invalid JSON. Try again or simplify the documentation.")
        else:
            raise AIError("AI response did not contain a JSON object.")

    # Validate required keys
    if "endpoints" not in data:
        raise AIError("AI response missing 'endpoints' key.")

    return data


def fetch_url_content(url: str) -> str:
    """Fetch content from a URL, strip HTML if needed, and truncate if too large."""
    try:
        resp = httpx.get(url, follow_redirects=True, timeout=30.0, headers={
            "User-Agent": "EKIKA-API-Client/2.0 (documentation-fetcher)",
            "Accept":     "application/json, text/yaml, text/html, */*",
        })
        resp.raise_for_status()
    except httpx.HTTPStatusError as e:
        raise AIError(f"HTTP {e.response.status_code} fetching URL.")
    except httpx.RequestError as e:
        raise AIError(f"Could not fetch URL: {e}")

    ct   = resp.headers.get("content-type", "")
    text = resp.text

    # If HTML page - strip tags for cleaner AI input
    if "html" in ct and not _looks_like_spec(text):
        text = _strip_html(text)

    # Truncate if too large
    if len(text) > _MAX_CONTENT_CHARS:
        text = text[:_MAX_CONTENT_CHARS] + "\n\n[Content truncated for length]"

    return text


def _looks_like_spec(text: str) -> bool:
    """Quick check: is this likely a JSON/YAML OpenAPI spec?"""
    t = text.lstrip()
    return t.startswith("{") or t.startswith("openapi:") or t.startswith("swagger:")