Cookbook

Prompting Claude for "JSON mode"

Get reliable JSON output from Claude using effective prompting techniques without constrained sampling.

Alex Albert
Alex Albert
@alexalbertt
Published on March 3, 2024

Prompting Claude for "JSON Mode"

Claude doesn't have a formal "JSON Mode" with constrained sampling. But not to worry -- you can still get reliable JSON from Claude! This recipe will show you how.

First, let's look at Claude's default behavior.

python
%pip install anthropic
python
import json
import re
from pprint import pprint
 
from anthropic import Anthropic
python
client = Anthropic()
MODEL_NAME = "claude-opus-4-1"
python
message = (
    client.messages.create(
        model=MODEL_NAME,
        max_tokens=1024,
        messages=[
            {
                "role": "user",
                "content": "Give me a JSON dict with names of famous athletes & their sports.",
            },
        ],
    )
    .content[0]
    .text
)
print(message)
Here is a JSON dictionary with names of famous athletes and their respective sports:

{
  "athletes": [
    {
      "name": "Usain Bolt",
      "sport": "Track and Field"
    },
    {
      "name": "Michael Phelps",
      "sport": "Swimming"
    },
    {
      "name": "Serena Williams",
      "sport": "Tennis"
    },
    {
      "name": "LeBron James",
      "sport": "Basketball"
    },
    {
      "name": "Lionel Messi",
      "sport": "Soccer"
    },
    {
      "name": "Simone Biles",
      "sport": "Gymnastics"
    },
    {
      "name": "Tom Brady",
      "sport": "American Football"
    },
    {
      "name": "Muhammad Ali",
      "sport": "Boxing"
    },
    {
      "name": "Nadia Comaneci",
      "sport": "Gymnastics"
    },
    {
      "name": "Michael Jordan",
      "sport": "Basketball"
    },
    {
      "name": "Pelé",
      "sport": "Soccer"
    },
    {
      "name": "Roger Federer",
      "sport": "Tennis"
    }
  ]
}

Claude followed instructions and outputted a nice dictionary, which we can extract with code:

python
def extract_json(response):
    json_start = response.index("{")
    json_end = response.rfind("}")
    return json.loads(response[json_start : json_end + 1])
 
 
extract_json(message)
{'athletes': [{'name': 'Usain Bolt', 'sport': 'Track and Field'},
  {'name': 'Michael Phelps', 'sport': 'Swimming'},
  {'name': 'Serena Williams', 'sport': 'Tennis'},
  {'name': 'LeBron James', 'sport': 'Basketball'},
  {'name': 'Lionel Messi', 'sport': 'Soccer'},
  {'name': 'Simone Biles', 'sport': 'Gymnastics'},
  {'name': 'Tom Brady', 'sport': 'American Football'},
  {'name': 'Muhammad Ali', 'sport': 'Boxing'},
  {'name': 'Nadia Comaneci', 'sport': 'Gymnastics'},
  {'name': 'Michael Jordan', 'sport': 'Basketball'},
  {'name': 'Pelé', 'sport': 'Soccer'},
  {'name': 'Roger Federer', 'sport': 'Tennis'}]}

But what if we want Claude to skip the preamble and go straight to the JSON? One simple way is to prefill Claude's response and include a "{" character.

python
message = (
    client.messages.create(
        model=MODEL_NAME,
        max_tokens=1024,
        messages=[
            {
                "role": "user",
                "content": "Give me a JSON dict with names of famous athletes & their sports.",
            },
            {"role": "assistant", "content": "Here is the JSON requested:\n{"},
        ],
    )
    .content[0]
    .text
)
print(message)
"athletes":[
      {
         "name":"Michael Jordan",
         "sport":"Basketball"
      },
      {
         "name":"Babe Ruth",
         "sport":"Baseball"
      },
      {
         "name":"Muhammad Ali",
         "sport":"Boxing"
      },
      {
         "name":"Serena Williams",
         "sport":"Tennis"
      },
      {
         "name":"Wayne Gretzky",
         "sport":"Hockey"
      },
      {
         "name":"Michael Phelps",
         "sport":"Swimming"
      },
      {
         "name":"Usain Bolt",
         "sport":"Track and Field"
      },
      {
         "name":"Mia Hamm",
         "sport":"Soccer"
      },
      {
         "name":"Michael Schumacher",
         "sport":"Formula 1 Racing"
      },
      {
         "name":"Simone Biles",
         "sport":"Gymnastics"
      }
   ]
}

Now all we have to do is add back the "{" that we prefilled and we can extract the JSON.

python
output_json = json.loads("{" + message[: message.rfind("}") + 1])
output_json
{'athletes': [{'name': 'Michael Jordan', 'sport': 'Basketball'},
  {'name': 'Babe Ruth', 'sport': 'Baseball'},
  {'name': 'Muhammad Ali', 'sport': 'Boxing'},
  {'name': 'Serena Williams', 'sport': 'Tennis'},
  {'name': 'Wayne Gretzky', 'sport': 'Hockey'},
  {'name': 'Michael Phelps', 'sport': 'Swimming'},
  {'name': 'Usain Bolt', 'sport': 'Track and Field'},
  {'name': 'Mia Hamm', 'sport': 'Soccer'},
  {'name': 'Michael Schumacher', 'sport': 'Formula 1 Racing'},
  {'name': 'Simone Biles', 'sport': 'Gymnastics'}]}

For very long and complicated prompts, which contain multiple JSON outputs so that a string search for "{" and "}" don't do the trick, you can also have Claude output each JSON item in specified tags for future extraction.

python
message = (
    client.messages.create(
        model=MODEL_NAME,
        max_tokens=1024,
        messages=[
            {
                "role": "user",
                "content": """Give me a JSON dict with the names of 5 famous athletes & their sports.
Put this dictionary in <athlete_sports> tags.
 
Then, for each athlete, output an additional JSON dictionary. In each of these additional dictionaries:
- Include two keys: the athlete's first name and last name.
- For the values, list three words that start with the same letter as that name.
Put each of these additional dictionaries in separate <athlete_name> tags.""",
            },
            {"role": "assistant", "content": "Here is the JSON requested:"},
        ],
    )
    .content[0]
    .text
)
print(message)

{
  "Michael Jordan": "Basketball",
  "Serena Williams": "Tennis",
  "Lionel Messi": "Soccer", 
  "Usain Bolt": "Track and Field",
  "Michael Phelps": "Swimming"
}



{
  "first": ["Magnificent", "Motivating", "Memorable"],
  "last": ["Joyful", "Jumping", "Jocular"]
}



{
  "first": ["Skillful", "Strong", "Superstar"],
  "last": ["Winning", "Willful", "Wise"]
}



{
  "first": ["Legendary", "Lively", "Leaping"],
  "last": ["Magical", "Marvelous", "Masterful"]  
}



{
  "first": ["Unbeatable", "Unbelievable", "Unstoppable"],
  "last": ["Brave", "Bold", "Brilliant"]
}



{
  "first": ["Marvelous", "Methodical", "Medalist"],
  "last": ["Powerful", "Persevering", "Precise"]
}

Now, we can use an extraction regex to get all the dictionaries.

python
import re
 
 
def extract_between_tags(tag: str, string: str, strip: bool = False) -> list[str]:
    ext_list = re.findall(f"<{tag}>(.+?)</{tag}>", string, re.DOTALL)
    if strip:
        ext_list = [e.strip() for e in ext_list]
    return ext_list
 
 
athlete_sports_dict = json.loads(extract_between_tags("athlete_sports", message)[0])
athlete_name_dicts = [json.loads(d) for d in extract_between_tags("athlete_name", message)]
python
pprint(athlete_sports_dict)
{'Lionel Messi': 'Soccer',
 'Michael Jordan': 'Basketball',
 'Michael Phelps': 'Swimming',
 'Serena Williams': 'Tennis',
 'Usain Bolt': 'Track and Field'}
python
pprint(athlete_name_dicts, width=1)
[{'first': ['Magnificent',
            'Motivating',
            'Memorable'],
  'last': ['Joyful',
           'Jumping',
           'Jocular']},
 {'first': ['Skillful',
            'Strong',
            'Superstar'],
  'last': ['Winning',
           'Willful',
           'Wise']},
 {'first': ['Legendary',
            'Lively',
            'Leaping'],
  'last': ['Magical',
           'Marvelous',
           'Masterful']},
 {'first': ['Unbeatable',
            'Unbelievable',
            'Unstoppable'],
  'last': ['Brave',
           'Bold',
           'Brilliant']},
 {'first': ['Marvelous',
            'Methodical',
            'Medalist'],
  'last': ['Powerful',
           'Persevering',
           'Precise']}]

So to recap:

  • You can use string parsing to extract the text between "json" and "" to get the JSON.
  • You can remove preambles before the JSON via a partial Assistant message. (However, this removes the possibility of having Claude do "Chain of Thought" for increased intelligence before beginning to output the JSON.)
  • You can get rid of text that comes after the JSON by using a stop sequence.
  • You can instruct Claude to output JSON in XML tags to make it easy to collect afterward for more complex prompts.