{
  "generatedAt": "2026-06-28T20:58:52.380Z",
  "project": "LLMs.txt Kit",
  "domain": "llmstxtkit.com",
  "origin": "https://llmstxtkit.com",
  "status": "live_opportunity_scan_has_manual_review_candidates",
  "inputPath": "dist/live-opportunity-search-input.json",
  "summary": {
    "priorityActions": 5,
    "readyPriorityActions": 5,
    "searchVisibilityObserved": true,
    "organicProofObserved": false,
    "searchAttempts": 4,
    "candidates": 5,
    "actionableCandidates": 2,
    "postNowCandidates": 0,
    "skippedCandidates": 3
  },
  "rules": [
    "Do not post to external platforms from this scan automatically.",
    "Open every manual-review candidate in the browser before posting.",
    "Answer the question first; add a tracked link only if it materially helps.",
    "Disclose affiliation for owned tools and pages.",
    "Record public evidence immediately after posting, acceptance, skip, or removal.",
    "Do not count this scan itself as real distribution proof."
  ],
  "searchAttempts": [
    {
      "id": "reddit-json-search",
      "method": "reddit-search-json",
      "status": "blocked_by_platform_403",
      "queries": [
        "llms.txt validator",
        "llms.txt checker",
        "Googlebot Google-Extended robots.txt",
        "create llms.txt",
        "GPTBot access logs"
      ],
      "decision": "Do not infer candidate threads from blocked search. Use browser/manual logged-in search before posting on Reddit."
    },
    {
      "id": "duckduckgo-llms-validator",
      "method": "duckduckgo-html",
      "status": "results_reviewed",
      "query": "site:reddit.com/r/SEO llms.txt validator OR checker",
      "notes": "Results were mostly third-party validator tools or irrelevant AI detector posts, not current community questions."
    },
    {
      "id": "duckduckgo-google-robots",
      "method": "duckduckgo-html",
      "status": "results_reviewed",
      "query": "site:reddit.com/r/SEO Googlebot Google-Extended robots.txt",
      "notes": "Results were older generic robots.txt Reddit threads, not a clean current Google-Extended/GPTBot question."
    },
    {
      "id": "duckduckgo-stackoverflow-meta",
      "method": "duckduckgo-html",
      "status": "candidate_found",
      "query": "Stack Overflow Do LLM crawlers respect the robots meta tag",
      "notes": "Found a directly relevant Stack Overflow question about LLM crawlers and robots meta tags."
    }
  ],
  "candidates": [
    {
      "id": "stackoverflow-llm-robots-meta-tag-live",
      "source": "stackoverflow",
      "url": "https://stackoverflow.com/questions/77648557/do-llm-crawlers-respect-the-robots-meta-tag",
      "title": "Do LLM crawlers respect the robots meta tag?",
      "mappedPlacementId": "stackoverflow-llm-robots-meta-tag",
      "mappedPriorityId": null,
      "confidence": "high_relevance_manual_review",
      "decision": "manual_review_before_post",
      "reason": "The question exactly matches the robots meta / noindex / LLM crawler control angle, but Stack Overflow requires strict relevance, original value, and clear affiliation disclosure.",
      "recommendedAction": "Open the page in browser, confirm no duplicate answer already covers the same practical distinction, then post only if the answer adds concrete value.",
      "skipIf": [
        "The question already has a complete accepted answer covering robots.txt vs robots meta vs authentication.",
        "The answer would mainly promote our guide.",
        "Affiliation cannot be disclosed naturally."
      ],
      "actionability": "manual_review",
      "priorityRank": null,
      "priorityScore": null,
      "trackedUrl": "https://llmstxtkit.com/blog/do-llm-crawlers-respect-robots-meta-tag.html?utm_source=stackoverflow&utm_medium=answer&utm_campaign=first-distribution&utm_content=stackoverflow-llm-robots-meta-tag",
      "draft": "Short answer: I would not treat robots meta tags or noindex as a universal LLM crawler opt-out.\n\nThe practical distinction is:\n- robots.txt is read before fetching a URL, so it is the normal place to express crawler access preferences for bots that document and honor those rules\n- robots meta tags and X-Robots-Tag headers are only visible after a crawler fetches the page\n- noindex is primarily an indexing/serving directive, not a guaranteed AI training opt-out\n- if robots.txt blocks a URL, the crawler may never see page-level meta tags for that URL\n- private/account/customer content should use authentication or permissions, not crawler directives\n\nFor AI/search crawlers, I would also avoid mixing tokens together. GPTBot and OAI-SearchBot are different use cases, and Googlebot and Google-Extended are different Google controls.\n\nDisclosure: I maintain a small free guide/checklist for this exact robots.txt vs robots meta vs X-Robots-Tag decision here: https://llmstxtkit.com/blog/do-llm-crawlers-respect-robots-meta-tag.html?utm_source=stackoverflow&utm_medium=answer&utm_campaign=first-distribution&utm_content=stackoverflow-llm-robots-meta-tag",
      "evidenceCommand": "npm run distribution:evidence -- --placement-id 'stackoverflow-llm-robots-meta-tag' --url PUBLIC_POST_URL --posted-at YYYY-MM-DD --status 'posted' --note 'Affiliation disclosed; answer-first placement.'",
      "canRecordEvidence": true,
      "evidenceStatus": "recordable_after_public_url_exists"
    },
    {
      "id": "devto-llmstxt-robots-not-enough",
      "source": "devto",
      "url": "https://dev.to/chudi_nnorukam/llmstxt-explained-the-robotstxt-for-ai-crawlers-1min",
      "title": "robots.txt Is Not Enough for AI Crawlers. You Need llms.txt.",
      "mappedPlacementId": "reddit-llms-txt-question",
      "mappedPriorityId": "reddit-llms-txt-question",
      "confidence": "medium_relevance_comment_risk",
      "decision": "comment_only_if_discussion_is_active",
      "reason": "The page is related to llms.txt, but it is an article rather than a question. A comment should add nuance, not compete or drop a tool link.",
      "recommendedAction": "Only comment if there is an active discussion or a specific misconception in comments. Prefer a no-link educational comment unless a tool link is clearly useful.",
      "skipIf": [
        "Comments are closed or inactive.",
        "The article is not asking for tools or validation workflows.",
        "The comment would read like a promotional link."
      ],
      "actionability": "conditional",
      "priorityRank": 4,
      "priorityScore": 4,
      "trackedUrl": "https://llmstxtkit.com/?utm_source=reddit&utm_medium=community-answer&utm_campaign=first-distribution&utm_content=reddit-llms-txt-question",
      "draft": "I would treat llms.txt as a curated public map, not as an SEO shortcut.\n\nThe useful version is usually small:\n- one plain-language site summary\n- canonical public pages only\n- notes about what the page should not be used to infer\n- no private URLs, hidden claims, or keyword stuffing\n\nI made a free generator here if you want a starting point: https://llmstxtkit.com/?utm_source=reddit&utm_medium=community-answer&utm_campaign=first-distribution&utm_content=reddit-llms-txt-question",
      "evidenceCommand": "npm run distribution:evidence -- --placement-id 'reddit-llms-txt-question' --url PUBLIC_POST_URL --posted-at YYYY-MM-DD --status 'posted' --note 'Affiliation disclosed; answer-first placement.'",
      "canRecordEvidence": true,
      "evidenceStatus": "recordable_after_public_url_exists"
    },
    {
      "id": "reddit-google-search-console-robots-file",
      "source": "reddit",
      "url": "https://www.reddit.com/r/SEO/comments/16fspdv/google_search_console_cannot_find_my_robotstxt/",
      "title": "Google Search Console cannot find my robots.txt file",
      "mappedPlacementId": "reddit-google-robots-generator",
      "mappedPriorityId": "reddit-google-robots-generator",
      "confidence": "low_old_adjacent",
      "decision": "skip_for_now",
      "reason": "Adjacent robots.txt topic, but older and focused on Search Console fetch issues, not Googlebot/Google-Extended/AI crawler policy generation.",
      "recommendedAction": "Do not post unless a newer live thread asks the same policy question.",
      "actionability": "skip",
      "priorityRank": 2,
      "priorityScore": 12,
      "trackedUrl": "https://llmstxtkit.com/tools/ai-robots-txt-generator.html?utm_source=reddit&utm_medium=community-answer&utm_campaign=first-distribution&utm_content=reddit-google-robots-generator",
      "draft": "If you are generating robots.txt rules for Google, I would separate Googlebot from Google-Extended.\n\nThe usual safe split is:\n- keep Googlebot allowed if Google Search traffic matters\n- decide separately whether Google-Extended should be allowed or blocked\n- write GPTBot, OAI-SearchBot, Applebot, PerplexityBot, and CCBot rules separately\n- remember robots.txt is not privacy protection; private content still needs login/auth\n\nI made a free Google/AI crawler robots.txt generator here: https://llmstxtkit.com/tools/ai-robots-txt-generator.html?utm_source=reddit&utm_medium=community-answer&utm_campaign=first-distribution&utm_content=reddit-google-robots-generator",
      "evidenceCommand": "npm run distribution:evidence -- --placement-id 'reddit-google-robots-generator' --url PUBLIC_POST_URL --posted-at YYYY-MM-DD --status 'posted' --note 'Affiliation disclosed; answer-first placement.'",
      "canRecordEvidence": true,
      "evidenceStatus": "recordable_after_public_url_exists"
    },
    {
      "id": "reddit-disallow-useragents",
      "source": "reddit",
      "url": "https://www.reddit.com/r/SEO/comments/f57l1y/why_would_you_disallow_user_agents_from_crawling/",
      "title": "Why would you disallow user agents from crawling via robots.txt?",
      "mappedPlacementId": "reddit-google-robots-generator",
      "mappedPriorityId": "reddit-google-robots-generator",
      "confidence": "low_old_generic",
      "decision": "skip_for_now",
      "reason": "Old generic robots.txt discussion; likely low traffic upside and higher necropost risk.",
      "recommendedAction": "Use only as research context, not a posting target.",
      "actionability": "skip",
      "priorityRank": 2,
      "priorityScore": 12,
      "trackedUrl": "https://llmstxtkit.com/tools/ai-robots-txt-generator.html?utm_source=reddit&utm_medium=community-answer&utm_campaign=first-distribution&utm_content=reddit-google-robots-generator",
      "draft": "If you are generating robots.txt rules for Google, I would separate Googlebot from Google-Extended.\n\nThe usual safe split is:\n- keep Googlebot allowed if Google Search traffic matters\n- decide separately whether Google-Extended should be allowed or blocked\n- write GPTBot, OAI-SearchBot, Applebot, PerplexityBot, and CCBot rules separately\n- remember robots.txt is not privacy protection; private content still needs login/auth\n\nI made a free Google/AI crawler robots.txt generator here: https://llmstxtkit.com/tools/ai-robots-txt-generator.html?utm_source=reddit&utm_medium=community-answer&utm_campaign=first-distribution&utm_content=reddit-google-robots-generator",
      "evidenceCommand": "npm run distribution:evidence -- --placement-id 'reddit-google-robots-generator' --url PUBLIC_POST_URL --posted-at YYYY-MM-DD --status 'posted' --note 'Affiliation disclosed; answer-first placement.'",
      "canRecordEvidence": true,
      "evidenceStatus": "recordable_after_public_url_exists"
    },
    {
      "id": "validator-competitor-results",
      "source": "search-results",
      "url": null,
      "title": "Third-party llms.txt validator/checker result pages",
      "mappedPlacementId": "reddit-llms-validator-checker",
      "mappedPriorityId": "reddit-llms-validator-checker",
      "confidence": "not_a_distribution_target",
      "decision": "do_not_post",
      "reason": "Search results showed competitor/tool pages rather than user discussions. Do not attempt competitor comment/link drops.",
      "recommendedAction": "Keep improving our validator page and wait for genuine community questions.",
      "actionability": "reject",
      "priorityRank": 1,
      "priorityScore": 19,
      "trackedUrl": "https://llmstxtkit.com/tools/llms-txt-validator.html?utm_source=reddit&utm_medium=community-answer&utm_campaign=first-distribution&utm_content=reddit-llms-validator-checker",
      "draft": "Before publishing llms.txt, I would validate the draft for boring but important mistakes.\n\nCheck:\n- one clear H1 and short summary\n- canonical public URLs only\n- useful Core pages instead of every URL\n- no admin, checkout, account, staging, token, localhost, or customer-specific links\n- no keyword stuffing or ranking claims\n\nI made a free llms.txt validator/checker here: https://llmstxtkit.com/tools/llms-txt-validator.html?utm_source=reddit&utm_medium=community-answer&utm_campaign=first-distribution&utm_content=reddit-llms-validator-checker",
      "evidenceCommand": "npm run distribution:evidence -- --placement-id 'reddit-llms-validator-checker' --url PUBLIC_POST_URL --posted-at YYYY-MM-DD --status 'posted' --note 'Affiliation disclosed; answer-first placement.'",
      "canRecordEvidence": true,
      "evidenceStatus": "recordable_after_public_url_exists"
    }
  ],
  "actionableCandidates": [
    "stackoverflow-llm-robots-meta-tag-live",
    "devto-llmstxt-robots-not-enough"
  ],
  "skippedCandidates": [
    "reddit-google-search-console-robots-file",
    "reddit-disallow-useragents",
    "validator-competitor-results"
  ],
  "nextManualSearches": [
    {
      "priorityId": "reddit-llms-validator-checker",
      "query": "llms.txt validator checker reddit webdev SEO Shopify",
      "goal": "Find a current user asking how to validate llms.txt before publishing."
    },
    {
      "priorityId": "reddit-google-robots-generator",
      "query": "Googlebot Google-Extended GPTBot robots.txt Reddit SEO",
      "goal": "Find a current robots.txt policy question where generator output is genuinely useful."
    },
    {
      "priorityId": "shopify-community",
      "query": "Shopify llms.txt AI search visibility community",
      "goal": "Find a Shopify thread asking how to expose collection/product/policy context safely."
    },
    {
      "priorityId": "devto-log-analyzer",
      "query": "GPTBot access logs OAI-SearchBot server logs DEV community",
      "goal": "Find an active developer discussion about identifying crawler/bot hits."
    }
  ],
  "commands": {
    "refreshLiveOpportunityScan": "npm run distribution:scan",
    "refreshPriorityPack": "npm run distribution:priority",
    "recordEvidence": "npm run distribution:evidence -- --placement-id PLACEMENT_ID --url PUBLIC_POST_URL --posted-at YYYY-MM-DD --status posted",
    "refreshAttribution": "npm run campaign:attribution",
    "refreshMeasurement": "npm run traffic:measurement",
    "refreshGoalAudit": "npm run goal:audit"
  }
}
