Multimodal Evaluators
Example 1: Document Extraction Verification
1
import base64
import json
from pathlib import Path
import requests
from fiddler_evals import init
from fiddler_evals.evaluators import CustomJudge
URL = 'https://your-org.fiddler.ai'
TOKEN = 'your-access-token'
init(url=URL, token=TOKEN)
def load_document(source: str) -> tuple[str, str]:
"""
Load a document from a file path or URL.
:param source: Local file path or HTTP(S) URL
:returns: Tuple of (base64_data, mime_type)
"""
mime_types = {
'.pdf': 'application/pdf',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.webp': 'image/webp',
}
if source.startswith(('http://', 'https://')):
headers = {'User-Agent': 'FiddlerEvals/1.0'}
response = requests.get(source, headers=headers, timeout=10)
response.raise_for_status()
content = response.content
ext = Path(source).suffix.lower()
else:
path = Path(source)
ext = path.suffix.lower()
content = path.read_bytes()
mime_type = mime_types.get(ext, 'application/octet-stream')
b64_data = base64.b64encode(content).decode('utf-8')
return b64_data, mime_type2
extraction_judge = CustomJudge(
prompt_template="""
You are verifying data extraction accuracy. Compare the extracted data
against the source document and determine if the extraction is correct.
Verify fields "metric" and "outputType" accurately match the source document.
Respond with:
- extraction_accurate: True if all extracted fields match the source document
- errors_found: Briefly list any extraction errors, or "None" if accurate
Source Document:
{{ document }}
Extracted Data:
{{ extracted_data }}
""",
output_fields={
'extraction_accurate': {'type': 'boolean'},
'errors_found': {'type': 'string'},
},
model='fiddler/ministral3-8b',
)3

# Load the document
b64_data, mime_type = load_document('https://media.githubusercontent.com/media/fiddler-labs/fiddler-examples/main/cookbooks/assets/multimodal-text-statistics-table.png')
# Extracted data to verify against the source document
# only 1 of 4 rows shown, as a 'bad extraction'
extracted_json = [{'metric': 'Textstat', 'outputType': 'float'}]
scores = extraction_judge.score(
inputs={
'document': [
{
'media_type': mime_type,
'encoding': 'base64',
'data': b64_data,
}
],
'extracted_data': json.dumps(extracted_json),
}
)
scores_dict = {s.name: s for s in scores}
print(f'Extraction accurate: {scores_dict["extraction_accurate"].value}')
print(f'Errors found: {scores_dict["errors_found"].label}')
# Example output:
# Extraction accurate: 0.0
# Errors found: Incomplete extraction: Missing 'Evaluate', 'Sentiment', and 'Token Count' metrics. The 'outputType' for 'Textstat' is correct, but the extraction only includes one entry instead of all four metrics listed in the source document.Example 2: Document Summarization Faithfulness
1
2
Tips
Stay Within Size Limits
Context
Limit