{
  "$schema": "https://songforgeai.com/scoring-corpus.schema.json",
  "name": "Lyric Scoring Standard — Reference Corpus",
  "version": "0.1.1",
  "rubricVersion": "1.1.0",
  "publishedAt": "2026-04-25",
  "license": "CC BY 4.0 — Attribution required. Cite the corpus by name + version when training, evaluating, or comparing against it.",
  "summary": "Seed corpus for the Lyric Scoring Standard. 12 worked examples across the score spectrum, each annotated with composite + per-tier scores + the rationale for the score band. Use to calibrate independent implementations of the rubric or to train students on what each band actually looks like.",
  "methodology": [
    "Each entry is a hand-scored exemplar produced by the SongForgeAI evaluation pipeline (claude-sonnet-4-20250514 @ temperature 0.7) and validated against the Anti-Inflation rules in the standard.",
    "Lyrics are excerpted (8-16 lines) under fair-use criticism + commentary. Public-domain originals are used where available; AI-generated samples are clearly marked.",
    "Composite scores carry the same Gravity Rule, Burden of Proof, Antagonist Ceiling, and Historical Context anchors that the live API applies. A 90+ here is as rare as a 90+ in production.",
    "Per-tier scores (craft / expression / impact) are reported alongside the composite so implementers can verify their tier weights match the published 25/40/35 split.",
    "This is a SEED corpus. Future revisions add 50+ entries; the v0.1.0 ships the methodology + the first dozen so the format is stable."
  ],
  "schema": {
    "id": "string — stable identifier for this entry, never reused",
    "title": "string — display title",
    "source": "ai-generated | public-domain | quoted-with-attribution",
    "attribution": "string — author/source citation when applicable",
    "genre": "string — primary genre",
    "lyrics": "string — the excerpt being scored (8-16 lines)",
    "composite": "number — 0-100 composite score",
    "tier": "object — { craft, expression, impact } tier averages 0-100",
    "band": "string — grade band (S+ ... F)",
    "rationale": "string — 2-4 sentences explaining the score band",
    "notable": "string[] — 1-3 specific elements that drove the score up or down"
  },
  "entries": [
    {
      "id": "corpus-001",
      "title": "Reference: Cigarettes & Promises (forged exemplar, 90s)",
      "source": "ai-generated",
      "attribution": "SongForgeAI pipeline, B1180",
      "genre": "country",
      "lyrics": "She kept her promises in a coffee can\nNext to the matches and the kitchen sink\nEvery one I broke is in there\nFolded smaller than I'd like to think",
      "composite": 88,
      "tier": { "craft": 86, "expression": 91, "impact": 84 },
      "band": "A+",
      "rationale": "Specificity carries this — coffee can / kitchen sink / folded smaller is a concrete-image stack that earns its place. Voice consistency is strong; the narrator's complicity (\"every one I broke\") refuses self-pity. Scoring would push to 90+ with a stronger arc; the excerpt is verse-only so impact gets the floor on stickiness.",
      "notable": [
        "Specific household objects function as narrative containers, not decoration",
        "Confession-without-apology pattern: blame stays with the narrator",
        "Folding metaphor doubles as a scale of regret"
      ]
    },
    {
      "id": "corpus-002",
      "title": "Reference: Generic AI baseline (60s)",
      "source": "ai-generated",
      "attribution": "Pre-pipeline GPT-4 baseline for contrast",
      "genre": "pop",
      "lyrics": "Tonight the city lights are calling out my name\nI'm dancing through the rain like never the same\nMy heart is on fire, the stars all align\nThis moment forever, you're mine, only mine",
      "composite": 42,
      "tier": { "craft": 58, "expression": 28, "impact": 40 },
      "band": "D+",
      "rationale": "Six AI clichés in four lines (city lights, dancing in the rain, heart on fire, stars align, forever/yours). Rhyme intelligence is functional but pulls meaning toward the rhyme target rather than the truth. Specificity floor — every image is interchangeable with every other generic pop image. Anti-Inflation rule pulls expression hard.",
      "notable": [
        "Banned-term scanner would flag four entries",
        "No proper nouns, no concrete objects, no time markers",
        "Voice could be any narrator; sets no fingerprint"
      ]
    },
    {
      "id": "corpus-003",
      "title": "Reference: Hank Williams excerpt (40s — historical anchor)",
      "source": "quoted-with-attribution",
      "attribution": "\"I'm So Lonesome I Could Cry\" by Hank Williams (1949). Quoted under fair use for criticism + commentary.",
      "genre": "country",
      "lyrics": "Hear that lonesome whippoorwill\nHe sounds too blue to fly\nThe midnight train is whining low\nI'm so lonesome I could cry",
      "composite": 95,
      "tier": { "craft": 93, "expression": 98, "impact": 94 },
      "band": "S",
      "rationale": "Three sound images (whippoorwill / midnight train / silence implied) build the loneliness through environment, not statement. The hook lands with structural inevitability — every preceding line earned it. Historical Context anchor confirms this as canon-level work; the 95 reflects the Burden of Proof being met by every line.",
      "notable": [
        "Sensory specificity carries the entire emotional weight",
        "Hook earns its directness because the verse refused directness first",
        "Anchors the corpus's S tier — implementations producing 95+ on weaker work have miscalibrated"
      ]
    },
    {
      "id": "corpus-004",
      "title": "Reference: Mid-tier B+ (forged, country, 76)",
      "source": "ai-generated",
      "attribution": "SongForgeAI pipeline mid-distribution sample",
      "genre": "country",
      "lyrics": "I learned to drive on Wednesday roads\nWhere everybody waves like they mean it\nI didn't know not waving back\nMeant I was choosing to be lonely",
      "composite": 76,
      "tier": { "craft": 72, "expression": 81, "impact": 73 },
      "band": "B+",
      "rationale": "Specificity wins — \"Wednesday roads\" is the kind of micro-detail that sets a place. Voice is consistent. Held back from A range by structural quietness (no payoff turn within the excerpt) and a slightly didactic close. Solid demo of the B+ tier.",
      "notable": [
        "\"Wednesday roads\" earns the line by sounding like recognition, not invention",
        "Close edges into telling rather than showing",
        "Score reflects strong floor + missing ceiling"
      ]
    },
    {
      "id": "corpus-005",
      "title": "Reference: Hip-hop scaffold (forged, 71)",
      "source": "ai-generated",
      "attribution": "SongForgeAI hip-hop genre profile",
      "genre": "hip-hop",
      "lyrics": "Mama kept the lights on with a smile she rented\nDad sent letters from the place where men get sentenced\nI grew up between the postage and the payment\nLearned that love was something measured by the basement",
      "composite": 71,
      "tier": { "craft": 79, "expression": 70, "impact": 65 },
      "band": "B",
      "rationale": "Internal rhyme (rented / sentenced, postage / measured) is precise without being forced — the genre demands this and the lyric delivers. Concrete domestic specificity holds expression. Impact pulled by abstract close (\"basement\" is metaphor-heavy and the excerpt doesn't earn it). Solid B with a clear path to B+ if the close grounds.",
      "notable": [
        "Rhyme intelligence is the standout signal",
        "First two lines are clinical without being cold — hard to do",
        "Final image needs grounding to clear the B ceiling"
      ]
    },
    {
      "id": "corpus-006",
      "title": "Reference: Pop chorus, mid-band (forged, 68)",
      "source": "ai-generated",
      "attribution": "SongForgeAI pop genre profile",
      "genre": "pop",
      "lyrics": "We were almost everything\nA chorus we forgot to sing\nA promise written in the rain\nThat washed away before the chain",
      "composite": 68,
      "tier": { "craft": 75, "expression": 60, "impact": 70 },
      "band": "B",
      "rationale": "Craft floor lifts this — meter and rhyme are clean. Expression takes the hit: \"promise written in the rain\" + \"washed away\" is a doublet of generic moves the rubric flags. The hook (\"chorus we forgot to sing\") is the lyric's saving moment and earns the impact tier its score.",
      "notable": [
        "Hook line is genuine; the rest of the excerpt undersells it",
        "Two consecutive AI tropes ding expression hard",
        "Demonstrates the rubric's tolerance for one strong moment in an otherwise mid lyric"
      ]
    },
    {
      "id": "corpus-007",
      "title": "Reference: Indie folk, A tier (forged, 84)",
      "source": "ai-generated",
      "attribution": "SongForgeAI indie genre profile",
      "genre": "indie",
      "lyrics": "I keep your old apartment key\nIn a drawer I never open\nIt isn't yours to want back\nAnd it isn't mine to give",
      "composite": 84,
      "tier": { "craft": 80, "expression": 88, "impact": 84 },
      "band": "A",
      "rationale": "Restraint is the craft signal here. Object specificity (apartment key, drawer) creates emotional containment without being precious. The closing pair earns the score — the symmetry resolves a tension the verse refused to name. Strong A; doesn't quite clear A+ because the excerpt is verse-only.",
      "notable": [
        "Pattison-style anchor object sustains the entire emotional arc",
        "Closing parallelism passes both anti-cliché + anti-tidiness gates",
        "Excerpt-only scoring caps impact at 84"
      ]
    },
    {
      "id": "corpus-008",
      "title": "Reference: Worship/CCM, mid-band (forged, 64)",
      "source": "ai-generated",
      "attribution": "SongForgeAI worship genre profile",
      "genre": "worship",
      "lyrics": "I came in broken, You met me there\nI lift my voice, You hear my prayer\nYour grace runs deeper than I can see\nFrom dust to glory You've called me",
      "composite": 64,
      "tier": { "craft": 76, "expression": 52, "impact": 64 },
      "band": "C+",
      "rationale": "Genre-typical rhyme + meter are fine. Expression is held to the floor by phrase familiarity — \"grace runs deeper\" / \"dust to glory\" / \"hear my prayer\" are stock CCM constructions. The rubric does not penalize the genre's conventions automatically, but it does require specificity within them; this excerpt offers none.",
      "notable": [
        "Worship genre profile flags this exact pattern: CCM template assembly",
        "Score would lift with one concrete moment (a place, a person, a dated experience)",
        "Demonstrates how the rubric handles convention-heavy genres"
      ]
    },
    {
      "id": "corpus-009",
      "title": "Reference: Rock, A+ (forged, 87)",
      "source": "ai-generated",
      "attribution": "SongForgeAI rock genre profile",
      "genre": "rock",
      "lyrics": "I was raised on borrowed weather\nA storm my father never named\nI carry it like a borrowed coat\nThat fits the shoulders just the same",
      "composite": 87,
      "tier": { "craft": 84, "expression": 90, "impact": 86 },
      "band": "A+",
      "rationale": "\"Borrowed weather\" is the kind of metaphor the rubric exists to reward — original, specific, structurally load-bearing. The repetition of \"borrowed\" earns its return through meaning shift. Rock genre profile rewards mythic lyrics; this excerpt qualifies. Composite holds at 87 because the excerpt's closing line is the one moment that lands closer to received craft than invention.",
      "notable": [
        "Metaphor that does work in two registers (literal + emotional)",
        "Word repetition with meaning shift — rare in mid-tier output",
        "Closing line is the only restraint on a higher composite"
      ]
    },
    {
      "id": "corpus-010",
      "title": "Reference: R&B, B+ (forged, 78)",
      "source": "ai-generated",
      "attribution": "SongForgeAI R&B genre profile",
      "genre": "r&b",
      "lyrics": "I told you I was good at being gone\nYou took it as a brag, not a confession\nNow you're packing up the car alone\nAnd I'm the one who lost the lesson",
      "composite": 78,
      "tier": { "craft": 80, "expression": 78, "impact": 76 },
      "band": "B+",
      "rationale": "Voice signal is the lead — the narrator is unreliable in a way the rubric rewards. \"Brag, not a confession\" is the kind of self-implicating turn that holds R&B to its truth-telling lineage. Strong B+; would clear A with a sharper image in the second half.",
      "notable": [
        "Self-implication earns Voice score",
        "Second half drifts toward statement; loses image-density",
        "R&B genre tolerance for confessional mode is being used correctly"
      ]
    },
    {
      "id": "corpus-011",
      "title": "Reference: Floor case — forced rhyme + cliché (forged, 35)",
      "source": "ai-generated",
      "attribution": "Synthetic low-band example for calibration",
      "genre": "pop",
      "lyrics": "My heart is in a vase that you can break\nLove is just a game with stakes too high to take\nDon't you let me fade into the night\nI need your love to make my world feel right",
      "composite": 35,
      "tier": { "craft": 42, "expression": 22, "impact": 40 },
      "band": "D+",
      "rationale": "Calibration anchor for the D band. Six clichés, two forced rhymes (\"take\" / \"break\", \"night\" / \"right\"), and an image (\"heart in a vase\") that doesn't survive examination. Implementations producing C or higher on this excerpt are over-scoring.",
      "notable": [
        "Anchor for the D-band: any rubric implementation should land here ±5",
        "Vase metaphor breaks under any literal reading",
        "Forced-rhyme detector + banned-term scanner both fire"
      ]
    },
    {
      "id": "corpus-012",
      "title": "Reference: Bottom anchor (synthetic, 18)",
      "source": "ai-generated",
      "attribution": "Synthetic floor example for calibration",
      "genre": "pop",
      "lyrics": "Baby baby I love you so\nWhere you go I'll always go\nLove me love me one more time\nYou're so fine and you are mine",
      "composite": 18,
      "tier": { "craft": 22, "expression": 8, "impact": 24 },
      "band": "F",
      "rationale": "Floor anchor. Functions as the F-band reference: rhyme is mechanical, every image is generic, voice is absent, no concrete detail anywhere. Implementations grading this above 25 have lost the Anti-Inflation rule.",
      "notable": [
        "Rubric calibration floor — any independent implementation should produce 15-25 here",
        "Demonstrates that the rubric distinguishes between weak (35) and broken (18)",
        "If your implementation can't reliably score this F, the Gravity Rule isn't being applied"
      ]
    },
    {
      "id": "corpus-013",
      "title": "Reference: Anti-Platitude calibration anchor (rubric v1.1.0)",
      "source": "ai-generated",
      "attribution": "Synthetic example demonstrating the new Anti-Platitude rule (RFC-0002)",
      "genre": "pop",
      "lyrics": "I built the railing on the porch myself\nMixed the concrete in a wheelbarrow\nAll I really need is love\nThis is my truth, told slant",
      "composite": 38,
      "tier": { "craft": 62, "expression": 22, "impact": 30 },
      "band": "D+",
      "rationale": "Demonstrates the v1.1.0 Anti-Platitude rule. The first two lines are concrete + Voice-positive (specific tools, specific work). The last two lines are textbook platitudes (universal-need + abstract+possessive). The rubric drops Specificity to 22 and Voice to 30 because the platitudes erase the work the concrete lines did. Implementers reading v1.1.0 should see this composite land in the 35-45 band; if they score above 50, the Anti-Platitude rule isn't firing.",
      "notable": [
        "Calibration anchor for the Anti-Platitude rule (v1.1.0)",
        "Lines 1-2 alone would land in the high-60s; the platitudes drag the whole composite",
        "Tests whether your scorer applies platitude detection at the section level rather than averaging"
      ]
    },
    {
      "id": "corpus-014",
      "title": "Reference: Worship genre carve-out (forged, 78)",
      "source": "ai-generated",
      "attribution": "SongForgeAI worship genre profile",
      "genre": "worship",
      "lyrics": "When the lights go down in the empty pew\nI count the dust between the kneelers\nYou meet me in the patient places\nIn the silence between the hymn and the verse",
      "composite": 78,
      "tier": { "craft": 80, "expression": 80, "impact": 74 },
      "band": "B+",
      "rationale": "Worship + gospel use the platitude pattern as part of the form (declarative theological statements). The Anti-Platitude rule has a per-genre carve-out (RFC-0002 future work) so this lyric is NOT penalized for the second-person address. Specificity stays high because of the named anchors (empty pew, dust, kneelers, hymn-verse boundary). Demonstrates how the rubric handles convention-heavy religious genres without flattening them to the floor.",
      "notable": [
        "Genre carve-out preview: worship CAN use direct address without platitude penalty when concrete anchors are present",
        "Sensory specificity (dust between kneelers) does the load-bearing work",
        "If your scorer applies the Anti-Platitude rule uniformly across genres, this entry exposes the bug"
      ]
    },
    {
      "id": "corpus-015",
      "title": "Reference: Folk, A tier (forged, 86)",
      "source": "ai-generated",
      "attribution": "SongForgeAI folk genre profile",
      "genre": "folk",
      "lyrics": "She wears my grandfather's wool coat to the bus\nThe one with the cigarette burn at the cuff\nSays it's the only thing in this house that fits her\nAnd I don't know what to say about that",
      "composite": 86,
      "tier": { "craft": 84, "expression": 90, "impact": 84 },
      "band": "A+",
      "rationale": "Inherited-object specificity (grandfather's wool coat + cigarette burn at the cuff) carries Specificity to 90. The narrator's silence in the closing line is the Voice signal — refusing to resolve into a sentiment is the rubric's positive marker for Truth and Voice both. Folk genre profile rewards quiet domestic specificity; this excerpt qualifies. Composite holds at 86 because the excerpt is verse-only.",
      "notable": [
        "Inherited-object pattern (named relative + named flaw on the object)",
        "Refusal-to-resolve closing line is what separates A+ folk from B+ folk on Voice",
        "Tests whether the scorer rewards narrator restraint or penalizes it as 'incomplete'"
      ]
    },
    {
      "id": "corpus-016",
      "title": "Reference: AI baseline mid-band (52, no intervention)",
      "source": "ai-generated",
      "attribution": "Baseline GPT-4 output without forge pipeline",
      "genre": "pop",
      "lyrics": "Driving down a backroad, windows down\nMemories are playing, loud and proud\nWe were young and stupid, perfectly free\nNow it's just a memory, that used to be me",
      "composite": 52,
      "tier": { "craft": 65, "expression": 38, "impact": 53 },
      "band": "C+",
      "rationale": "Mid-band AI baseline. Three banned-term-adjacent moves (backroad-with-windows-down, young-and-stupid, used-to-be-me). Rhyme is functional; meter scans. Specificity dies on the second line ('memories are playing loud and proud' is two clichés in one phrase). Fits the Observation #2 in /reports/state-of-ai-lyrics-2026 — baseline AI lands in the 35-65 band without targeted intervention.",
      "notable": [
        "Anchors the corpus's 'baseline AI without intervention' band",
        "Three cliché stacks identified: backroad/windows, young+stupid, used-to-be-me",
        "Voice metric specifically flags the abstract closing — could be ANY narrator"
      ]
    },
    {
      "id": "corpus-017",
      "title": "Reference: R&B, S- tier (forged, 92)",
      "source": "ai-generated",
      "attribution": "SongForgeAI R&B genre profile, post-gauntlet",
      "genre": "r&b",
      "lyrics": "I learned the word 'sober' from her ex-husband\nWho still came over for Thanksgiving every other year\nShe'd set the table for one extra plate\nLike she was making sure the word kept its meaning",
      "composite": 92,
      "tier": { "craft": 89, "expression": 95, "impact": 91 },
      "band": "S",
      "rationale": "Multi-clause Specificity that carries philosophical weight without statement (the act of setting the extra plate IS the meaning of 'sober'). The narrator is implicated but not central — they're the witness. R&B genre profile rewards adult complexity; this excerpt clears it. Composite lands at 92 because the close earns its restraint and the excerpt's last line operates on three levels (literal table-setting, semantic-meaning preservation, narrator-as-archivist).",
      "notable": [
        "Operates on three semantic levels in the close (literal / linguistic / narrative)",
        "S-tier requires earning the band — this excerpt does via the narrator's witness posture",
        "Tests whether the scorer rewards multi-clause Specificity (a single specific image with embedded meaning) vs. single-clause Specificity (one named thing)"
      ]
    }
  ]
}
