# robots.txt for IntelligencePro Knowledge Platform
# Policy: intentionally permissive. The platform is built for AI
# agents — being indexed by training crawlers is a "memory" surface
# that compounds; being fetched by retrieval bots is a real-time
# surface that compounds further. We name the major LLM agents
# explicitly so anyone reading this file knows the welcome is
# intentional, not a default oversight.

User-agent: *
Allow: /
# Customer-acquisition L10 (cycle 344, PRB-1 finding): explicit
# Allow rules for the surfaces an AI agent + training crawler
# need to index — defends against any future Disallow:/api
# accidentally cutting off the cryptographic substrate.
# Five surfaces are load-bearing for an agent that wants to:
#   • Verify an IntelligencePro attestation (W3C VC 2.0 envelope)
#   • Discover the 21-schema attestation catalog
#   • Read the JSON-LD @contexts that travel HF/Kaggle/OpenML
#   • Probe the BitstringStatusList revocation endpoint
#   • Walk the agent-onboarding docs
Allow: /credentials/
Allow: /api/credentials/judgment/
Allow: /.well-known/
Allow: /openapi.json
Allow: /llms.txt
Allow: /agent-docs
Disallow: /api/admin
Disallow: /api/agent/v1/use-tool
Disallow: /api/agent/v1/contribute

# Anthropic ────────────────────────────────────────────────────────
User-agent: ClaudeBot
Allow: /
User-agent: Claude-User
Allow: /
User-agent: Claude-SearchBot
Allow: /
User-agent: anthropic-ai
Allow: /

# OpenAI ───────────────────────────────────────────────────────────
User-agent: GPTBot
Allow: /
User-agent: OAI-SearchBot
Allow: /
User-agent: ChatGPT-User
Allow: /

# Perplexity ───────────────────────────────────────────────────────
User-agent: PerplexityBot
Allow: /
User-agent: Perplexity-User
Allow: /

# Common Crawl (powers many open-source training datasets) ────────
User-agent: CCBot
Allow: /

# Google / Apple / Meta opt-out tokens — we explicitly opt IN ─────
User-agent: Google-Extended
Allow: /
User-agent: Applebot-Extended
Allow: /
User-agent: meta-externalagent
Allow: /

# DuckDuckGo Assist + ByteDance ────────────────────────────────────
User-agent: DuckAssistBot
Allow: /
User-agent: Bytespider
Allow: /

# 2026-era additions (cycle 344, customer-acquisition L10) ─────────
# As of 2026-Q2 these bots crawl actively but were missing from
# the pre-cycle-344 stanza set. Adding them explicitly makes the
# welcome intentional rather than implicit-via-default-Allow.

# Cohere ───────────────────────────────────────────────────────────
User-agent: cohere-ai
Allow: /
User-agent: cohere-training-data-crawler
Allow: /

# Mistral / Le Chat ────────────────────────────────────────────────
User-agent: MistralAI-User
Allow: /

# xAI / Grok ───────────────────────────────────────────────────────
User-agent: xAI-Bot
Allow: /

# AI2 (Allen Institute) ────────────────────────────────────────────
User-agent: AI2Bot
Allow: /

# Diffbot (powers many enterprise AI search products) ─────────────
User-agent: Diffbot
Allow: /

# You.com + Brave Search + Kagi (search-side AI surfaces) ────────
User-agent: YouBot
Allow: /
User-agent: kagibot
Allow: /

# LinkedIn / Meta AI knowledge ingestion ──────────────────────────
User-agent: LinkedInBot
Allow: /
User-agent: FacebookBot
Allow: /

# Sitemap pointer for traditional crawlers + an AI-readable
# sitemap pointer (Mintlify convention; cycle 264 surface)
Sitemap: https://ip.tekton.cc/sitemap.xml