Reducto Classify API
Classify documents into a defined set of categories and run citation lookups against parsed content. Billed at 0.5 credits per page of context (default 5 pages = 2.5 credits per document).
Classify documents into a defined set of categories and run citation lookups against parsed content. Billed at 0.5 credits per page of context (default 5 pages = 2.5 credits per document).
openapi: 3.1.0
info:
title: Reducto Classify API
version: 1.0.0
description: Classify documents into a defined set of categories and run citation lookups against parsed content.
contact:
name: Reducto Support
email: [email protected]
url: https://reducto.ai/contact
license:
name: Reducto Terms of Service
url: https://reducto.ai/terms
servers:
- url: https://platform.reducto.ai
description: Reducto production platform
security:
- SkippableHTTPBearer: []
tags:
- name: Classify
paths:
/classify:
post:
summary: Classify
operationId: classify_classify_post
security:
- SkippableHTTPBearer: []
parameters:
- name: user-id
in: header
required: false
schema:
anyOf:
- type: string
- type: 'null'
title: User-Id
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/ClassifyConfig'
responses:
'200':
description: Successful Response
content:
application/json:
schema:
$ref: '#/components/schemas/ClassifyResponse'
'422':
description: Validation Error
content:
application/json:
schema:
$ref: '#/components/schemas/HTTPValidationError'
tags:
- Classify
/cite:
post:
summary: Cite
description: 'Locate exact bounding boxes for text citations in a parsed document.
Requires the document to have been parsed with return_ocr_data=true.'
operationId: cite_cite_post
security:
- SkippableHTTPBearer: []
parameters:
- name: user-id
in: header
required: false
schema:
anyOf:
- type: string
- type: 'null'
title: User-Id
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CiteRequest'
responses:
'200':
description: Successful Response
content:
application/json:
schema:
$ref: '#/components/schemas/CiteResponse'
'422':
description: Validation Error
content:
application/json:
schema:
$ref: '#/components/schemas/HTTPValidationError'
tags:
- Classify
components:
schemas:
ClassifyResponse:
properties:
response_type:
type: string
const: classify
title: Response Type
default: classify
job_id:
type: string
title: Job Id
result:
$ref: '#/components/schemas/ClassifyResponseCategory'
response_confidence:
anyOf:
- $ref: '#/components/schemas/ResponseConfidence'
- type: 'null'
duration:
anyOf:
- type: number
- type: 'null'
title: Duration
description: The duration of the classify request in seconds.
type: object
required:
- job_id
- result
title: ClassifyResponse
description: Response from classify job - returned when polling /job/{job_id}
CitationQuery:
properties:
text:
type: string
minLength: 1
title: Text
description: Text to locate. Whitespace is normalized for matching.
bbox_filter:
anyOf:
- $ref: '#/components/schemas/BoundingBox'
- type: 'null'
description: Optional region to limit search.
type: object
required:
- text
title: CitationQuery
description: A text to locate in the document.
ValidationError:
properties:
loc:
items:
anyOf:
- type: string
- type: integer
type: array
title: Location
msg:
type: string
title: Message
type:
type: string
title: Error Type
input:
title: Input
ctx:
type: object
title: Context
type: object
required:
- loc
- msg
- type
title: ValidationError
ParseChunk-Input:
properties:
content:
type: string
title: Content
description: The content of the chunk extracted from the document.
embed:
type: string
title: Embed
description: Chunk content optimized for embedding and retrieval.
enriched:
anyOf:
- type: string
- type: 'null'
title: Enriched
description: The enriched content of the chunk extracted from the document.
enrichment_success:
type: boolean
title: Enrichment Success
description: Whether the enrichment was successful.
default: false
blocks:
items:
$ref: '#/components/schemas/ParseBlock-Input'
type: array
title: Blocks
type: object
required:
- content
- embed
- enriched
- blocks
title: ParseChunk
CiteResponse:
properties:
results:
items:
$ref: '#/components/schemas/CitationQueryResult'
type: array
title: Results
description: Results in same order as input queries (1:1 correspondence).
duration:
type: number
title: Duration
description: Processing time in seconds.
type: object
required:
- results
- duration
title: CiteResponse
description: Citation location results.
ResponseConfidence:
properties:
categories:
items:
$ref: '#/components/schemas/CategoryConfidence'
type: array
title: Categories
type: object
required:
- categories
title: ResponseConfidence
description: Overall confidence breakdown for classification response.
FullResult-Input:
properties:
type:
type: string
const: full
title: Type
description: type = 'full'
chunks:
items:
$ref: '#/components/schemas/ParseChunk-Input'
type: array
title: Chunks
ocr:
anyOf:
- $ref: '#/components/schemas/OCRResult-Input'
- type: 'null'
custom:
anyOf:
- {}
- type: 'null'
title: Custom
type: object
required:
- type
- chunks
title: FullResult
ClassifyResponseCategory:
properties:
category:
type: string
title: Category
type: object
required:
- category
title: ClassifyResponseCategory
CriteriaConfidence:
properties:
criterion:
type: string
title: Criterion
confidence:
type: string
enum:
- high
- low
title: Confidence
type: object
required:
- criterion
- confidence
title: CriteriaConfidence
description: Confidence result for a single criterion.
BoundingBox:
properties:
left:
type: number
title: Left
top:
type: number
title: Top
width:
type: number
title: Width
height:
type: number
title: Height
page:
type: integer
title: Page
description: The page number of the bounding box (1-indexed).
original_page:
type: integer
title: Original Page
description: The page number in the original document of the bounding box (1-indexed).
type: object
required:
- left
- top
- width
- height
- page
title: BoundingBox
CategoryConfidence:
properties:
category:
type: string
title: Category
confidence:
type: number
title: Confidence
criteria_confidence:
items:
$ref: '#/components/schemas/CriteriaConfidence'
type: array
title: Criteria Confidence
type: object
required:
- category
- confidence
- criteria_confidence
title: CategoryConfidence
description: Confidence result for a category.
CitationMatch:
properties:
page:
type: integer
title: Page
description: Page number (1-indexed).
bboxes:
items:
$ref: '#/components/schemas/BoundingBox'
type: array
title: Bboxes
description: Bounding boxes for the match. Multiple boxes for multi-line text.
type: object
required:
- page
- bboxes
title: CitationMatch
description: A location where the citation text was found.
GranularConfidence:
properties:
extract_confidence:
anyOf:
- type: number
- type: 'null'
title: Extract Confidence
parse_confidence:
anyOf:
- type: number
- type: 'null'
title: Parse Confidence
type: object
title: GranularConfidence
PageRange:
properties:
start:
anyOf:
- type: integer
- type: 'null'
title: Start
description: The page number to start processing from (1-indexed).
end:
anyOf:
- type: integer
- type: 'null'
title: End
description: The page number to stop processing at (1-indexed).
type: object
title: PageRange
OCRWord:
properties:
text:
type: string
title: Text
bbox:
$ref: '#/components/schemas/BoundingBox'
confidence:
anyOf:
- type: number
- type: 'null'
title: Confidence
description: OCR confidence score between 0 and 1, where 1 indicates highest confidence
chunk_index:
anyOf:
- type: integer
- type: 'null'
title: Chunk Index
description: The index of the chunk that the word belongs to.
rotation:
anyOf:
- type: integer
- type: 'null'
title: Rotation
description: The rotation angle in degrees, from 0 to 360, counterclockwise.
type: object
required:
- text
- bbox
title: OCRWord
OCRResult-Input:
properties:
words:
items:
$ref: '#/components/schemas/OCRWord'
type: array
title: Words
lines:
items:
$ref: '#/components/schemas/OCRLine'
type: array
title: Lines
type: object
required:
- words
- lines
title: OCRResult
UploadResponse:
properties:
file_id:
type: string
title: File Id
presigned_url:
anyOf:
- type: string
- type: 'null'
title: Presigned Url
type: object
required:
- file_id
title: UploadResponse
CiteRequest:
properties:
source:
anyOf:
- type: string
- $ref: '#/components/schemas/FullResult-Input'
title: Source
description: Either 'jobid://<job_id>' string or full parse result object. The parse must have been run with return_ocr_data=true.
queries:
items:
$ref: '#/components/schemas/CitationQuery'
type: array
minItems: 1
title: Queries
description: List of text citations to locate.
type: object
required:
- source
- queries
title: CiteRequest
description: Request to locate text citations in a parsed document.
ClassificationCategory:
properties:
category:
type: string
title: Category
description: The category name/label that documents will be classified into (e.g., 'invoice', 'contract', 'receipt').
criteria:
items:
type: string
type: array
title: Criteria
description: A list of criteria, keywords, or descriptions that define what characteristics a document must have
to be classified into this category (e.g., ['contains billing information', 'has itemized charges']).
type: object
required:
- category
- criteria
title: ClassificationCategory
description: A single classification category with its matching criteria.
ParseBlock-Input:
properties:
type:
type: string
enum:
- Header
- Footer
- Title
- Section Header
- Page Number
- List Item
- Figure
- Table
- Key Value
- Text
- Comment
- Signature
title: Type
description: The type of block extracted from the document.
bbox:
$ref: '#/components/schemas/BoundingBox'
description: The bounding box of the block extracted from the document.
content:
type: string
title: Content
description: The content of the block extracted from the document.
image_url:
anyOf:
- type: string
- type: 'null'
title: Image Url
description: (Experimental) The URL of the image associated with the block.
chart_data:
anyOf:
- items:
type: string
type: array
- type: 'null'
title: Chart Data
description: (Experimental) The URL/link to chart data JSON for figure blocks processed by chart agent.
confidence:
anyOf:
- type: string
- type: 'null'
title: Confidence
description: The confidence for the block. It is either low or high and takes into account factors like OCR and
table structure
default: low
granular_confidence:
anyOf:
- $ref: '#/components/schemas/GranularConfidence'
- type: 'null'
description: Granular confidence scores for the block. It is a dictionary of confidence scores for the block. The
confidence scores will not be None if the user has enabled numeric confidence scores.
extra:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
title: Extra
description: Extra metadata fields for the block. Fields like 'is_chart' will only appear when set to True.
type: object
required:
- type
- bbox
- content
title: ParseBlock
ClassifyConfig:
properties:
persist_results:
type: boolean
title: Persist Results
description: If True, persist the results indefinitely. Defaults to False.
default: false
input:
anyOf:
- type: string
- items:
type: string
type: array
- $ref: '#/components/schemas/UploadResponse'
title: Input
description: "For parse/split/extract pipelines, the URL of the document to be processed. You can provide one of\
\ the following:\n 1. A publicly available URL\n 2. A presigned S3 URL\n 3. A\
\ reducto:// prefixed URL obtained from the /upload endpoint after directly uploading a document\n \
\ 4. A jobid:// prefixed URL obtained from a previous /parse invocation\n 5. A list of URLs (for multi-document\
\ pipelines, V3 API only)\n\n For edit pipelines, this should be a string containing the edit instructions "
classification_schema:
items:
$ref: '#/components/schemas/ClassificationCategory'
type: array
title: Classification Schema
description: A list of classification categories and their matching criteria.
default: []
page_range:
anyOf:
- $ref: '#/components/schemas/PageRange'
- items:
$ref: '#/components/schemas/PageRange'
type: array
- items:
type: integer
type: array
- type: 'null'
title: Page Range
description: The page range to process (1-indexed). By default, the first 5 pages are used. If more than 25 pages
are selected, only the first 25 (after sorting) are used. Only applies to PDFs; ignored for other document types.
document_metadata:
anyOf:
- type: string
- type: 'null'
title: Document Metadata
description: Optional document-level metadata to include in classification prompts.
type: object
required:
- input
title: ClassifyConfig
CitationQueryResult:
properties:
matches:
items:
$ref: '#/components/schemas/CitationMatch'
type: array
title: Matches
description: All locations where the text was found. Empty if no matches.
type: object
required:
- matches
title: CitationQueryResult
description: Results for one query.
OCRLine:
properties:
text:
type: string
title: Text
bbox:
$ref: '#/components/schemas/BoundingBox'
confidence:
anyOf:
- type: number
- type: 'null'
title: Confidence
description: OCR confidence score between 0 and 1, where 1 indicates highest confidence
chunk_index:
anyOf:
- type: integer
- type: 'null'
title: Chunk Index
description: The index of the chunk that the line belongs to.
rotation:
anyOf:
- type: integer
- type: 'null'
title: Rotation
description: The rotation angle in degrees, from 0 to 360, counterclockwise.
type: object
required:
- text
- bbox
title: OCRLine
HTTPValidationError:
properties:
detail:
items:
$ref: '#/components/schemas/ValidationError'
type: array
title: Detail
type: object
title: HTTPValidationError
securitySchemes:
SkippableHTTPBearer:
type: http
scheme: bearer