The biggest challenge with LLMs in production isn’t getting good responses—it’s getting parseable responses. When you need JSON for your pipeline, “Here’s the data you requested:” followed by markdown-wrapped output breaks everything. Here’s how to reliably extract structured data.
The Problem#
1
2
3
4
5
6
7
8
| response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": "Extract the person's name and age from: 'John Smith is 34 years old'"}]
)
print(response.choices[0].message.content)
# "The person's name is John Smith and their age is 34."
# ... not what we needed
|
You wanted {"name": "John Smith", "age": 34}. You got prose.
Solution 1: JSON Mode#
OpenAI and Anthropic both support JSON mode, which guarantees valid JSON output:
1
2
3
4
5
6
7
8
9
10
11
12
| # OpenAI
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
response_format={"type": "json_object"},
messages=[
{"role": "system", "content": "Extract data as JSON with 'name' and 'age' fields."},
{"role": "user", "content": "John Smith is 34 years old"}
]
)
data = json.loads(response.choices[0].message.content)
# {"name": "John Smith", "age": 34}
|
1
2
3
4
5
6
7
8
9
| # Anthropic Claude
response = client.messages.create(
model="claude-3-opus-20240229",
max_tokens=1024,
messages=[
{"role": "user", "content": "Extract as JSON with 'name' and 'age' fields: John Smith is 34 years old"}
],
# Claude doesn't have json_mode, but follows instructions well
)
|
Caveat: JSON mode guarantees valid JSON syntax, not valid schema. You still need to validate the structure.
More reliable than JSON mode—you define the exact schema:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
| # OpenAI Function Calling
tools = [{
"type": "function",
"function": {
"name": "extract_person",
"description": "Extract person information",
"parameters": {
"type": "object",
"properties": {
"name": {"type": "string", "description": "Full name"},
"age": {"type": "integer", "description": "Age in years"}
},
"required": ["name", "age"]
}
}
}]
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "John Smith is 34 years old"}],
tools=tools,
tool_choice={"type": "function", "function": {"name": "extract_person"}}
)
args = json.loads(response.choices[0].message.tool_calls[0].function.arguments)
# {"name": "John Smith", "age": 34}
|
The model is constrained to output arguments matching your schema.
Solution 3: Pydantic + Instructor#
The cleanest approach—define Python classes, get Python objects:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| import instructor
from pydantic import BaseModel
from openai import OpenAI
client = instructor.patch(OpenAI())
class Person(BaseModel):
name: str
age: int
person = client.chat.completions.create(
model="gpt-4-turbo-preview",
response_model=Person,
messages=[{"role": "user", "content": "John Smith is 34 years old"}]
)
print(person.name) # "John Smith"
print(person.age) # 34
|
Instructor handles retries, validation, and even streaming. It works with OpenAI, Anthropic, and local models.
Complex Nested Structures#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
| from typing import List, Optional
from pydantic import BaseModel, Field
class Address(BaseModel):
street: str
city: str
country: str
postal_code: Optional[str] = None
class Person(BaseModel):
name: str
age: int
email: Optional[str] = None
addresses: List[Address] = Field(default_factory=list)
class ExtractedData(BaseModel):
people: List[Person]
document_date: Optional[str] = None
data = client.chat.completions.create(
model="gpt-4-turbo-preview",
response_model=ExtractedData,
messages=[{"role": "user", "content": complex_document}]
)
for person in data.people:
print(f"{person.name}: {len(person.addresses)} addresses")
|
Solution 4: Outlines (Local Models)#
For local models via vLLM or llama.cpp, Outlines provides grammar-constrained generation:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| import outlines
model = outlines.models.transformers("mistralai/Mistral-7B-v0.1")
schema = '''{
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"}
},
"required": ["name", "age"]
}'''
generator = outlines.generate.json(model, schema)
result = generator("John Smith is 34 years old")
# {"name": "John Smith", "age": 34}
|
Outlines constrains token generation at inference time—the model literally cannot output invalid JSON.
Handling Failures#
Even with these tools, extraction can fail. Build in retries and fallbacks:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
| from tenacity import retry, stop_after_attempt, retry_if_exception_type
from pydantic import ValidationError
@retry(
stop=stop_after_attempt(3),
retry=retry_if_exception_type((ValidationError, json.JSONDecodeError))
)
def extract_with_retry(text: str) -> Person:
return client.chat.completions.create(
model="gpt-4-turbo-preview",
response_model=Person,
messages=[{"role": "user", "content": text}]
)
# With fallback
def extract_safe(text: str) -> Optional[Person]:
try:
return extract_with_retry(text)
except Exception as e:
logger.warning(f"Extraction failed: {e}")
return None
|
Validation Patterns#
Don’t trust LLM output blindly:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
| from pydantic import BaseModel, Field, field_validator
class Person(BaseModel):
name: str = Field(min_length=1, max_length=100)
age: int = Field(ge=0, le=150)
email: Optional[str] = None
@field_validator('name')
@classmethod
def name_not_empty(cls, v):
if not v.strip():
raise ValueError('Name cannot be empty')
return v.strip()
@field_validator('email')
@classmethod
def valid_email(cls, v):
if v and '@' not in v:
raise ValueError('Invalid email format')
return v
|
Batch Processing#
For high volume, batch your requests:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| import asyncio
from openai import AsyncOpenAI
client = instructor.patch(AsyncOpenAI())
async def extract_person(text: str) -> Person:
return await client.chat.completions.create(
model="gpt-4-turbo-preview",
response_model=Person,
messages=[{"role": "user", "content": text}]
)
async def process_batch(texts: List[str]) -> List[Person]:
tasks = [extract_person(text) for text in texts]
return await asyncio.gather(*tasks, return_exceptions=True)
# Process 100 texts concurrently
results = asyncio.run(process_batch(texts))
|
Cost Optimization#
Structured extraction doesn’t need the biggest model:
1
2
3
4
5
6
7
8
9
10
11
12
13
| # Use cheaper models for simple extraction
simple_extraction = client.chat.completions.create(
model="gpt-3.5-turbo", # 10x cheaper than GPT-4
response_model=Person,
messages=[{"role": "user", "content": text}]
)
# Reserve GPT-4 for complex reasoning
complex_extraction = client.chat.completions.create(
model="gpt-4-turbo-preview",
response_model=ComplexAnalysis,
messages=[{"role": "user", "content": complex_document}]
)
|
Quick Reference#
| Method | Pros | Cons |
|---|
| JSON Mode | Simple, built-in | No schema enforcement |
| Function Calling | Schema enforced | More verbose setup |
| Instructor | Pythonic, validated | Extra dependency |
| Outlines | Works locally, guaranteed valid | Only for local models |
The Pattern That Works#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
| # 1. Define your schema with Pydantic
class MyData(BaseModel):
field1: str
field2: int
# 2. Use Instructor for extraction
client = instructor.patch(OpenAI())
# 3. Wrap in retry logic
@retry(stop=stop_after_attempt(3))
def extract(text: str) -> MyData:
return client.chat.completions.create(
model="gpt-3.5-turbo",
response_model=MyData,
messages=[{"role": "user", "content": text}]
)
# 4. Validate and handle failures
result = extract(text)
|
Structured output turns LLMs from chat toys into data processing engines. The key is constraining the output space—through JSON mode, function calling, or grammar constraints—so the model can’t give you unparseable results. Pick the method that fits your stack, add validation, and build reliable pipelines.