# robots.txt - Block unusual bots and non-search crawlers # Last updated: 2025 # Allow major search engines User-agent: Googlebot Allow: / User-agent: Bingbot Allow: / User-agent: Slurp Allow: / User-agent: DuckDuckBot Allow: / User-agent: Baiduspider Allow: / User-agent: YandexBot Allow: / User-agent: facebookexternalhit Allow: / User-agent: Twitterbot Allow: / User-agent: LinkedInBot Allow: / User-agent: WhatsApp Allow: / User-agent: Applebot Allow: / # Block aggressive scrapers and bandwidth-heavy bots User-agent: AhrefsBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: MegaIndex Disallow: / User-agent: BLEXBot Disallow: / User-agent: PetalBot Disallow: / User-agent: YisouSpider Disallow: / User-agent: CCBot Disallow: / User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: PerplexityBot Disallow: / User-agent: ClaudeBot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: Claude-Web Disallow: / # Block AI training bots User-agent: Google-Extended Disallow: / User-agent: CCBot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: OpenAI-GPTBot Disallow: / # Block aggressive SEO tools User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: SiteAuditBot Disallow: / User-agent: Screaming Frog SEO Spider Disallow: / # Block known bandwidth-heavy crawlers User-agent: MegaIndex.ru Disallow: / User-agent: BLEXBot Disallow: / User-agent: linkdexbot Disallow: / User-agent: spbot Disallow: / User-agent: twengabot Disallow: / User-agent: postrank Disallow: / User-agent: turnitinbot Disallow: / User-agent: sindice Disallow: / User-agent: Aboundex Disallow: / User-agent: AspiegelBot Disallow: / User-agent: binlar Disallow: / User-agent: BUbiNG Disallow: / User-agent: Curious George Disallow: / User-agent: eCatch Disallow: / User-agent: efcrawler Disallow: / User-agent: FlipboardProxy Disallow: / User-agent: GSLFbot Disallow: / User-agent: HTTrack Disallow: / User-agent: panscient.com Disallow: / User-agent: pmafind Disallow: / User-agent: PySocial Disallow: / User-agent: SearchmetricsBot Disallow: / User-agent: TwikleBot Disallow: / User-agent: VoilaBot Disallow: / User-agent: ZyBorg Disallow: / # Block download/scraping tools User-agent: wget Disallow: / User-agent: curl Disallow: / User-agent: libwww Disallow: / User-agent: lwp-trivial Disallow: / User-agent: Python-urllib Disallow: / User-agent: Python-requests Disallow: / User-agent: httpx Disallow: / # Block malicious or suspicious bots User-agent: sqlmap Disallow: / User-agent: nikto Disallow: / User-agent: nessus Disallow: / User-agent: w3af Disallow: / User-agent: skipfish Disallow: / User-agent: OpenVAS Disallow: / User-agent: ZmEu Disallow: / User-agent: Indy Library Disallow: / User-agent: libwww-perl Disallow: / User-agent: Go-http-client Disallow: / # Block generic/unknown bots (catch-all patterns) User-agent: *bot* Disallow: / User-agent: *spider* Disallow: / User-agent: *crawler* Disallow: / User-agent: *scraper* Disallow: / # Additional protection - block bots with suspicious patterns User-agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;) Disallow: / User-agent: Mozilla/5.0 (compatible;) Disallow: / User-agent: * Crawl-delay: 10 # Sitemap location (optional - add your sitemap URL) # Sitemap: https://yourwebsite.com/sitemap.xml