{"service":"Article Extraction API","version":"1.0.0","description":"Extract structured article data from HTML using Google Gemini AI","endpoints":{"POST /extract":{"description":"Extract article content from URL or HTML","body":{"url":"URL to fetch and extract (optional)","html":"Raw HTML string to extract (optional)"},"note":"Provide either 'url' or 'html', not both","response":{"title":"Article title","author":"Author name","date":"Publication date (ISO format if possible)","content":"Article body as plain text","images":["Array of image URLs"],"canonical_url":"Canonical URL","tags":["Array of tags/keywords"],"source":"gemini or cache","raw_html_length":"Original HTML length","tokens_input":"Gemini API input tokens used","tokens_output":"Gemini API output tokens used"}}},"example":{"request":{"url":"https://example.com/article.html"},"curl":"curl -X POST http://localhost:8000/extract -H \"Content-Type: application/json\" -d '{\"url\": \"https://example.com/article\"}'"},"features":["Automatic HTML fetching with requests","HTML cleaning (removes scripts, styles, iframes)","AI-powered content extraction with Gemini 2.5 Flash","In-memory caching (last 100 requests)","Robust JSON parsing with retry logic","CORS enabled for cross-origin requests"]}