{
  "generatedAt": "2026-07-04T17:55:53.345Z",
  "dateModified": "2026-07-05",
  "project": "LLMs.txt Kit",
  "domain": "llmstxtkit.com",
  "status": "ai_crawler_user_agent_lookup_pack_ready",
  "purpose": "A free, source-backed AI crawler user-agent lookup pack that helps humans and AI agents separate search crawlers, training-use controls, user-triggered fetchers, ads validators, and open dataset crawlers before changing robots.txt, WAF rules, or log reports.",
  "humanUrl": "https://llmstxtkit.com/tools/ai-crawler-user-agent-lookup.html",
  "resourceUrl": "https://llmstxtkit.com/resources/ai-crawler-user-agent-lookup-pack.html",
  "dataJsonUrl": "https://llmstxtkit.com/data/ai-crawler-user-agent-lookup-pack.json",
  "wellKnownJsonUrl": "https://llmstxtkit.com/.well-known/ai-crawler-user-agent-lookup-pack.json",
  "answerPageUrl": "https://llmstxtkit.com/answers/ai-crawler-user-agent-lookup.html",
  "answerPackEndpointSample": "https://llmstxtkit.com/data/ai-answer-pack.php?q=ai%20crawler%20user%20agent",
  "proofLookupEndpointSample": "https://llmstxtkit.com/data/ai-proof-lookup.php?q=ai%20crawler%20user%20agent",
  "intentRouterEndpointSample": "https://llmstxtkit.com/data/ai-intent-router.php?q=ai%20crawler%20user%20agent",
  "querySuggestionsEndpointSample": "https://llmstxtkit.com/data/ai-query-suggestions.php?q=ai%20crawler%20user%20agent",
  "citationBundleEndpointSample": "https://llmstxtkit.com/data/ai-citation-bundle.php?q=ai%20crawler%20user%20agent",
  "targetQueries": [
    "ai crawler user agent",
    "ai crawler user agents",
    "ai bot user agents",
    "ai crawler list",
    "ai search crawler list",
    "crawler user agent lookup",
    "bot user agent lookup",
    "gptbot user agent",
    "oai-searchbot user agent",
    "chatgpt-user user agent",
    "google-extended user agent",
    "applebot extended robots txt",
    "perplexitybot user agent",
    "ccbot user agent",
    "bot detection user agent lookup"
  ],
  "summary": {
    "crawlerRecordCount": 11,
    "officialSourceCount": 5,
    "robotsTxtApplicableCount": 9,
    "userTriggeredFetcherCount": 2
  },
  "officialSourceNotes": [
    {
      "source": "OpenAI crawler docs",
      "url": "https://developers.openai.com/api/docs/bots",
      "verifiedAt": "2026-07-04T17:55:53.345Z",
      "keyPoint": "OAI-SearchBot, GPTBot, ChatGPT-User, and OAI-AdsBot have different documented roles and should not be collapsed into one crawler policy."
    },
    {
      "source": "Google common crawlers",
      "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-common-crawlers",
      "verifiedAt": "2026-07-04T17:55:53.345Z",
      "keyPoint": "Google-Extended is a robots.txt product token without a separate HTTP request user-agent string and does not affect Google Search ranking or inclusion."
    },
    {
      "source": "Applebot documentation",
      "url": "https://support.apple.com/en-us/119829",
      "verifiedAt": "2026-07-04T17:55:53.345Z",
      "keyPoint": "Applebot-Extended does not crawl webpages; it controls how Apple can use data crawled by Applebot."
    },
    {
      "source": "Perplexity crawler docs",
      "url": "https://docs.perplexity.ai/docs/resources/perplexity-crawlers",
      "verifiedAt": "2026-07-04T17:55:53.345Z",
      "keyPoint": "PerplexityBot and Perplexity-User are separate, with published IP range endpoints and different crawler/user-triggered roles."
    },
    {
      "source": "Common Crawl CCBot",
      "url": "https://commoncrawl.org/ccbot",
      "verifiedAt": "2026-07-04T17:55:53.345Z",
      "keyPoint": "Common Crawl documents the CCBot user-agent, robots.txt opt-out, reverse DNS verification, and ccbot.json IP ranges."
    }
  ],
  "crawlerRecords": [
    {
      "id": "oai-searchbot",
      "operator": "OpenAI",
      "token": "OAI-SearchBot",
      "category": "search_discovery",
      "role": "Automatic search crawler for ChatGPT search surfaces.",
      "robotsTxtApplies": true,
      "recommendedDefault": "Allow when ChatGPT search visibility matters; decide separately from GPTBot.",
      "visibilityNote": "OpenAI documents OAI-SearchBot as the crawler used to surface websites in ChatGPT search features.",
      "trainingUseNote": "Search/discovery role, not the GPTBot training-use role.",
      "fullUserAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36; compatible; OAI-SearchBot/1.3; +https://openai.com/searchbot",
      "ipVerificationUrl": "https://openai.com/searchbot.json",
      "sourceUrl": "https://developers.openai.com/api/docs/bots",
      "sourceSummary": "OpenAI says OAI-SearchBot is for search, and sites opted out of OAI-SearchBot will not be shown in ChatGPT search answers except possible navigational links.",
      "logVerification": "Match the OAI-SearchBot token, then verify against the published OpenAI searchbot IP JSON before using it as identity proof.",
      "robotsExample": "User-agent: OAI-SearchBot\nAllow: /"
    },
    {
      "id": "gptbot",
      "operator": "OpenAI",
      "token": "GPTBot",
      "category": "training_use_crawler",
      "role": "Crawler for content that may be used to improve OpenAI generative AI foundation models.",
      "robotsTxtApplies": true,
      "recommendedDefault": "Decide from training-use policy; do not block OAI-SearchBot just because GPTBot is blocked.",
      "visibilityNote": "Separate from OAI-SearchBot; blocking GPTBot is not the same as opting out of ChatGPT search discovery.",
      "trainingUseNote": "OpenAI describes GPTBot as training-use related.",
      "fullUserAgent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; GPTBot/1.3; +https://openai.com/gptbot",
      "ipVerificationUrl": "https://openai.com/gptbot.json",
      "sourceUrl": "https://developers.openai.com/api/docs/bots",
      "sourceSummary": "OpenAI says disallowing GPTBot indicates a site's content should not be used in training generative AI foundation models.",
      "logVerification": "Match GPTBot, then verify against OpenAI's published GPTBot IP JSON where identity matters.",
      "robotsExample": "User-agent: GPTBot\nDisallow: /"
    },
    {
      "id": "chatgpt-user",
      "operator": "OpenAI",
      "token": "ChatGPT-User",
      "category": "user_triggered_fetch",
      "role": "User-requested fetcher for certain ChatGPT and Custom GPT actions.",
      "robotsTxtApplies": false,
      "recommendedDefault": "Monitor separately in logs; use OAI-SearchBot for Search opt-outs and automatic crawl policy.",
      "visibilityNote": "OpenAI says ChatGPT-User is not used to determine whether content may appear in Search.",
      "trainingUseNote": "User-triggered fetcher, not an automatic training crawler.",
      "fullUserAgent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot",
      "ipVerificationUrl": "https://openai.com/chatgpt-user.json",
      "sourceUrl": "https://developers.openai.com/api/docs/bots",
      "sourceSummary": "OpenAI says ChatGPT-User actions are initiated by a user, so robots.txt rules may not apply.",
      "logVerification": "Treat as user-triggered evidence; verify IPs if using it for bot identity claims.",
      "robotsExample": "User-agent: OAI-SearchBot\nAllow: /\n# ChatGPT-User is user-triggered; monitor logs separately."
    },
    {
      "id": "oai-adsbot",
      "operator": "OpenAI",
      "token": "OAI-AdsBot",
      "category": "ads_landing_page_validation",
      "role": "OpenAI ads landing-page safety and relevance validation.",
      "robotsTxtApplies": true,
      "recommendedDefault": "Only relevant if submitting ads on ChatGPT; do not confuse with organic search crawling.",
      "visibilityNote": "Ads validation is separate from search visibility.",
      "trainingUseNote": "OpenAI says OAI-AdsBot collected landing-page data is not used to train generative AI foundation models.",
      "fullUserAgent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-AdsBot/1.0; +https://openai.com/adsbot",
      "ipVerificationUrl": "https://openai.com/adsbot.json",
      "sourceUrl": "https://developers.openai.com/api/docs/bots",
      "sourceSummary": "OpenAI documents OAI-AdsBot for submitted ads landing-page checks.",
      "logVerification": "Verify against the published OAI-AdsBot IP JSON when ad review traffic matters.",
      "robotsExample": "User-agent: OAI-AdsBot\nAllow: /"
    },
    {
      "id": "googlebot",
      "operator": "Google",
      "token": "Googlebot",
      "category": "search_discovery",
      "role": "Google Search crawler.",
      "robotsTxtApplies": true,
      "recommendedDefault": "Allow for public pages that should be eligible for Google Search.",
      "visibilityNote": "Blocking Googlebot can block normal Google Search crawling.",
      "trainingUseNote": "Do not use Googlebot rules to express Google-Extended product preferences.",
      "fullUserAgent": "Use the Googlebot token or documented Googlebot UA patterns; do not pin to one Chrome version.",
      "ipVerificationUrl": "https://developers.google.com/search/docs/crawling-indexing/verifying-googlebot",
      "sourceUrl": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-common-crawlers",
      "sourceSummary": "Google's crawler documentation separates Googlebot search crawling from product tokens such as Google-Extended.",
      "logVerification": "Use reverse DNS or Google's documented verification flow before trusting a Googlebot user-agent string.",
      "robotsExample": "User-agent: Googlebot\nAllow: /"
    },
    {
      "id": "google-extended",
      "operator": "Google",
      "token": "Google-Extended",
      "category": "ai_use_control_token",
      "role": "Robots.txt product token for certain Gemini model training and grounding controls.",
      "robotsTxtApplies": true,
      "recommendedDefault": "Set separately from Googlebot; do not expect a separate Google-Extended HTTP user-agent in logs.",
      "visibilityNote": "Google says Google-Extended does not affect inclusion or ranking in Google Search.",
      "trainingUseNote": "Controls whether content Google crawls may be used for specified Gemini training and grounding uses.",
      "fullUserAgent": "No separate HTTP request user-agent string; crawling uses existing Google user-agent strings.",
      "ipVerificationUrl": "https://developers.google.com/search/docs/crawling-indexing/verifying-googlebot",
      "sourceUrl": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-common-crawlers",
      "sourceSummary": "Google documents Google-Extended as a standalone product token, not a separate HTTP user-agent string.",
      "logVerification": "Look for Googlebot or other Google crawler strings in logs; Google-Extended itself is a robots.txt control token.",
      "robotsExample": "User-agent: Google-Extended\nDisallow: /"
    },
    {
      "id": "applebot",
      "operator": "Apple",
      "token": "Applebot",
      "category": "search_discovery",
      "role": "Apple web crawler for Spotlight, Siri, Safari, and related Apple ecosystem search experiences.",
      "robotsTxtApplies": true,
      "recommendedDefault": "Allow public pages if Apple ecosystem discovery matters.",
      "visibilityNote": "Apple says enabling Applebot in robots.txt allows website content to appear in Apple search results.",
      "trainingUseNote": "Apple says Applebot-crawled data may also be used for Apple foundation models unless Applebot-Extended or page-level controls apply.",
      "fullUserAgent": "Applebot appears inside the user-agent string; Apple documents a general Safari/WebKit format.",
      "ipVerificationUrl": "https://search.developer.apple.com/applebot.json",
      "sourceUrl": "https://support.apple.com/en-us/119829",
      "sourceSummary": "Apple documents Applebot identification through reverse DNS and published CIDR JSON.",
      "logVerification": "Verify reverse DNS under applebot.apple.com or match the published Applebot CIDR JSON.",
      "robotsExample": "User-agent: Applebot\nAllow: /"
    },
    {
      "id": "applebot-extended",
      "operator": "Apple",
      "token": "Applebot-Extended",
      "category": "ai_use_control_token",
      "role": "Secondary robots.txt control for Apple foundation-model training usage.",
      "robotsTxtApplies": true,
      "recommendedDefault": "Use when you want Applebot discovery but need a separate Apple training-use decision.",
      "visibilityNote": "Apple says pages disallowing Applebot-Extended can still be included in search results.",
      "trainingUseNote": "Applebot-Extended controls how Apple can use data crawled by Applebot.",
      "fullUserAgent": "Apple says Applebot-Extended does not crawl webpages.",
      "ipVerificationUrl": "https://search.developer.apple.com/applebot.json",
      "sourceUrl": "https://support.apple.com/en-us/119829",
      "sourceSummary": "Apple documents Applebot-Extended as a secondary user agent for content usage controls, not a page crawler.",
      "logVerification": "Do not expect crawl hits from Applebot-Extended; verify Applebot itself for crawler identity.",
      "robotsExample": "User-agent: Applebot-Extended\nDisallow: /"
    },
    {
      "id": "perplexitybot",
      "operator": "Perplexity",
      "token": "PerplexityBot",
      "category": "search_answer_discovery",
      "role": "Perplexity search result crawler.",
      "robotsTxtApplies": true,
      "recommendedDefault": "Allow if Perplexity search/answer visibility matters, and whitelist published IP ranges if a WAF blocks it.",
      "visibilityNote": "Perplexity says PerplexityBot is designed to surface and link websites in search results.",
      "trainingUseNote": "Perplexity says PerplexityBot is not used to crawl content for AI foundation models.",
      "fullUserAgent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; PerplexityBot/1.0; +https://perplexity.ai/perplexitybot)",
      "ipVerificationUrl": "https://www.perplexity.com/perplexitybot.json",
      "sourceUrl": "https://docs.perplexity.ai/docs/resources/perplexity-crawlers",
      "sourceSummary": "Perplexity documents PerplexityBot and Perplexity-User separately, with published IP JSON endpoints.",
      "logVerification": "Match PerplexityBot and verify against Perplexity's published IP range JSON.",
      "robotsExample": "User-agent: PerplexityBot\nAllow: /"
    },
    {
      "id": "perplexity-user",
      "operator": "Perplexity",
      "token": "Perplexity-User",
      "category": "user_triggered_fetch",
      "role": "User-requested fetcher for Perplexity user actions.",
      "robotsTxtApplies": false,
      "recommendedDefault": "Monitor separately from PerplexityBot; verify IP ranges for WAF allow rules.",
      "visibilityNote": "Perplexity documents this as user-action support rather than web crawling.",
      "trainingUseNote": "Perplexity says it is not used for web crawling or collecting content for training AI foundation models.",
      "fullUserAgent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Perplexity-User/1.0; +https://perplexity.ai/perplexity-user)",
      "ipVerificationUrl": "https://www.perplexity.com/perplexity-user.json",
      "sourceUrl": "https://docs.perplexity.ai/docs/resources/perplexity-crawlers",
      "sourceSummary": "Perplexity says this fetcher generally ignores robots.txt because a user requested the fetch.",
      "logVerification": "Treat as user-triggered and verify against Perplexity's published Perplexity-User IP JSON.",
      "robotsExample": "# Perplexity-User is user-triggered. Review WAF rules and logs separately."
    },
    {
      "id": "ccbot",
      "operator": "Common Crawl",
      "token": "CCBot",
      "category": "open_web_dataset_crawler",
      "role": "Crawler for Common Crawl's open web crawl dataset.",
      "robotsTxtApplies": true,
      "recommendedDefault": "Allow if open web dataset participation is acceptable; block if broad dataset reuse is outside policy.",
      "visibilityNote": "CCBot is not a search-result crawler for a specific answer engine.",
      "trainingUseNote": "Common Crawl data can be used by many downstream researchers, companies, and AI workflows.",
      "fullUserAgent": "CCBot/2.0 (https://commoncrawl.org/faq/)",
      "ipVerificationUrl": "https://index.commoncrawl.org/ccbot.json",
      "sourceUrl": "https://commoncrawl.org/ccbot",
      "sourceSummary": "Common Crawl documents the CCBot user-agent string, robots.txt opt-out example, reverse DNS verification, and IP JSON.",
      "logVerification": "Verify with Common Crawl reverse DNS or the published ccbot.json IP ranges.",
      "robotsExample": "User-agent: CCBot\nDisallow: /"
    }
  ],
  "lookupWorkflow": [
    "Search by token, operator, category, or use case.",
    "Use the category to separate search visibility, model-training preferences, user-triggered fetches, ads validation, and broad dataset crawling.",
    "Copy the robots.txt example only after confirming the crawler's documented role.",
    "For log analysis, treat user-agent strings as clues; verify important bots with IP JSON, reverse DNS, or provider guidance.",
    "Route follow-up questions through the answer pack, proof lookup, intent router, query suggestions, or citation bundle before broad crawling."
  ],
  "safetyGuardrails": {
    "publicDataOnly": true,
    "userAgentStringsCanBeSpoofed": true,
    "verifyImportantBotsWithIpOrReverseDns": true,
    "doNotTreatCrawlerHitsAsHumanTraffic": true,
    "noRankingGuarantee": true,
    "citeOfficialSourceLinks": true
  },
  "proofLinks": [
    {
      "label": "AI Crawler User-Agent Lookup tool",
      "url": "https://llmstxtkit.com/tools/ai-crawler-user-agent-lookup.html",
      "type": "tool"
    },
    {
      "label": "AI Crawler User-Agent Lookup Pack",
      "url": "https://llmstxtkit.com/resources/ai-crawler-user-agent-lookup-pack.html",
      "type": "resource"
    },
    {
      "label": "Lookup pack JSON",
      "url": "https://llmstxtkit.com/data/ai-crawler-user-agent-lookup-pack.json",
      "type": "dataset"
    },
    {
      "label": "Lookup pack well-known JSON",
      "url": "https://llmstxtkit.com/.well-known/ai-crawler-user-agent-lookup-pack.json",
      "type": "dataset"
    },
    {
      "label": "OpenAI crawler documentation",
      "url": "https://developers.openai.com/api/docs/bots",
      "type": "official_reference"
    },
    {
      "label": "Google common crawlers",
      "url": "https://developers.google.com/crawling/docs/crawlers-fetchers/google-common-crawlers",
      "type": "official_reference"
    },
    {
      "label": "Applebot documentation",
      "url": "https://support.apple.com/en-us/119829",
      "type": "official_reference"
    },
    {
      "label": "Perplexity crawler documentation",
      "url": "https://docs.perplexity.ai/docs/resources/perplexity-crawlers",
      "type": "official_reference"
    },
    {
      "label": "Common Crawl CCBot",
      "url": "https://commoncrawl.org/ccbot",
      "type": "official_reference"
    },
    {
      "label": "AI crawler robots.txt checker",
      "url": "https://llmstxtkit.com/tools/ai-crawler-robots-txt-checker.html",
      "type": "tool"
    },
    {
      "label": "Bot detection log analyzer",
      "url": "https://llmstxtkit.com/tools/bot-detection-log-analyzer.html",
      "type": "tool"
    }
  ]
}
