{
  "version": "2026-v1",
  "testWindow": {
    "plannedStart": "2026-05-05",
    "plannedEnd": "2026-05-19",
    "actualStart": null,
    "actualEnd": null
  },
  "provider": "openrouter.ai",
  "providerNote": "All models accessed via OpenRouter API for unified billing and SDK. Each call records the upstream provider, the resolved model version, and the timestamp. The OpenRouter routing layer adds <100ms and a small cost premium versus direct API access; the trade-off is operational simplicity and reproducibility from a single key.",
  "callParameters": {
    "temperature": 0.7,
    "topP": 1.0,
    "maxTokens": 2048,
    "systemPrompt": null,
    "stopSequences": null,
    "runsPerPrompt": 3,
    "scoredRun": "best-of-three (highest scoring run after both human scorers complete; ties broken by earliest run)"
  },
  "models": [
    {
      "id": "claude-opus-4-7",
      "displayName": "Claude Opus 4.7",
      "provider": "Anthropic",
      "openrouterId": "anthropic/claude-opus-4.7",
      "tier": "frontier",
      "notes": "Anthropic's frontier model as of April 2026. Highest cost per call; tested for ceiling capability."
    },
    {
      "id": "claude-sonnet-4-6",
      "displayName": "Claude Sonnet 4.6",
      "provider": "Anthropic",
      "openrouterId": "anthropic/claude-sonnet-4.6",
      "tier": "workhorse",
      "notes": "Anthropic's workhorse tier. Tested because this is the model Christian leaders use far more often than Opus in chat UIs and integrations."
    },
    {
      "id": "gpt-5",
      "displayName": "GPT-5",
      "provider": "OpenAI",
      "openrouterId": "openai/gpt-5",
      "tier": "frontier",
      "notes": "OpenAI's current frontier model. Most-used AI by Christian leaders generally."
    },
    {
      "id": "gemini-2-5-pro",
      "displayName": "Gemini 2.5 Pro",
      "provider": "Google",
      "openrouterId": "google/gemini-2.5-pro",
      "tier": "frontier",
      "notes": "Google's current frontier model."
    },
    {
      "id": "deepseek-v3",
      "displayName": "DeepSeek V3",
      "provider": "DeepSeek",
      "openrouterId": "deepseek/deepseek-chat",
      "tier": "frontier",
      "notes": "Included because the Gospel Coalition AI Christian Benchmark (2025) found DeepSeek R1 unexpectedly strong on theological prompts. Editorial honesty: do not exclude a strong performer because it is non-American."
    }
  ],
  "deferredToFutureEditions": [
    {
      "id": "llama-4-70b",
      "reason": "Open-source baseline. Defer to 2027 to constrain v1 scope. Adds testing time and a sixth scoring column without proportional narrative value for the inaugural edition."
    },
    {
      "id": "grok",
      "reason": "Defer to 2027 pending stable API access via OpenRouter and clearer pinned versioning."
    },
    {
      "id": "mistral-large",
      "reason": "Defer to 2027. Strong European model; not a top-of-mind tool for the target audience yet."
    }
  ],
  "consumerUxValidation": {
    "description": "Five representative prompts will be manually run through each model's official chat UI (claude.ai, chatgpt.com, gemini.google.com, deepseek.com) on the same date as the API run. Differences between API and chat-UI responses will be reported as a sidebar in the report. This is editorial color, not part of the scored dataset.",
    "selectedPrompts": ["MP-01", "TL-01", "TL-02", "SF-01", "SF-02"]
  }
}