openapi: 3.0.3
info:
title: Affinda Documents API
version: 3.0.0
description: "Affinda Documents API \u2014 subset of the Affinda v3 Document Processing API."
servers:
- url: https://{region}.affinda.com
description: 'Select the correct server for your instance: api (AUS/Global), api.us1 (US), or api.eu1 (EU).'
variables:
region:
default: api
description: The instance region. Use 'api' for AUS/Global, 'api.us1' for US, or 'api.eu1' for EU. You can find your
region in the Affinda web app URL.
enum:
- api
- api.eu1
- api.us1
x-ms-parameter-location: client
security:
- ApiKeyAuth: []
paths:
/v3/documents:
get:
tags:
- Documents
summary: Get list of all documents
operationId: getAllDocuments
description: Returns all the document summaries for that user, limited to 300 per page.
parameters:
- $ref: '#/components/parameters/offsetParam'
- $ref: '#/components/parameters/limitParam'
- in: query
name: workspace
schema:
$ref: '#/components/schemas/identifier'
description: Filter by workspace.
- in: query
name: collection
schema:
$ref: '#/components/schemas/properties-identifier'
description: Filter by collection.
- in: query
name: state
schema:
$ref: '#/components/schemas/DocumentState'
description: Filter by the document's state.
- in: query
name: tags
schema:
type: array
items:
$ref: '#/components/schemas/id'
description: Filter by tag's IDs.
- in: query
name: created_dt
schema:
$ref: '#/components/schemas/DateRange'
description: Filter by created datetime.
- in: query
name: search
schema:
type: string
description: Partial, case-insensitive match with file name or tag name.
- in: query
name: ordering
schema:
type: array
items:
type: string
description: 'Sort the result set. A "-" at the beginning denotes DESC sort, e.g. -created_dt. Sort by multiple fields
is supported. Supported values include: ''file_name'', ''extractor'', ''created_dt'', ''validated_dt'', ''archived_dt''
and ''parsed__<dataPointSlug>''.'
- in: query
name: include_data
schema:
type: boolean
description: By default, this endpoint returns only the meta data of the documents. Set this to `true` will return
a summary of the data that was parsed. If you want to retrieve the full set of data for a document, use the `GET
/documents/{identifier}` endpoint.
- in: query
name: exclude
schema:
type: array
items:
$ref: '#/components/schemas/DocumentMeta_properties-identifier'
description: Exclude some documents from the result.
- in: query
name: in_review
schema:
type: boolean
description: Exclude documents that are currently being reviewed.
- in: query
name: failed
schema:
type: boolean
description: Filter by failed status.
- in: query
name: ready
schema:
type: boolean
description: Filter by ready status.
- in: query
name: validatable
schema:
type: boolean
description: Filter for validatable documents.
- in: query
name: has_challenges
schema:
type: boolean
description: Filter for documents with challenges.
- in: query
name: custom_identifier
schema:
type: string
description: Filter for documents with this custom identifier.
- in: query
name: compact
required: false
description: If "true", the response is compacted to annotations' parsed data. Annotations' meta data are excluded.
Default is "false".
schema:
type: boolean
- in: query
name: count
required: false
description: If "false", the documents count is not computed, thus saving time for large collections. Default is "true".
schema:
type: boolean
- in: query
name: snake_case
required: false
description: Whether to return the response in snake_case instead of camelCase. Default is false.
schema:
type: boolean
responses:
'200':
description: All documents for user
content:
application/json:
schema:
type: object
required:
- results
- count
allOf:
- $ref: '#/components/schemas/PaginatedResponse'
- type: object
properties:
results:
type: array
items:
$ref: '#/components/schemas/Document'
'400':
$ref: '#/components/responses/400Error'
'401':
$ref: '#/components/responses/401Error'
'403':
$ref: '#/components/responses/403Error'
default:
$ref: '#/components/responses/DefaultError'
post:
tags:
- Documents
summary: Upload a document for parsing
operationId: createDocument
description: '
Uploads a document for parsing via file upload or URL.
When successful, returns an `identifier` in the response for subsequent use with the [/documents/{identifier}](#get-/v3/documents/-identifier-)
endpoint to check processing status and retrieve results.<br/>'
responses:
'200':
description: Only returned when wait=True, will return the created document
content:
application/json:
schema:
$ref: '#/components/schemas/Document'
'201':
description: Only returned when wait=False, will return document uploaded and identifier created
x-summary: Document uploaded and created, use document.meta.identifier to poll for processing status
content:
application/json:
schema:
$ref: '#/components/schemas/Document'
'400':
$ref: '#/components/responses/400Error'
'401':
$ref: '#/components/responses/401Error'
'403':
$ref: '#/components/responses/403Error'
default:
$ref: '#/components/responses/DefaultError'
parameters:
- in: query
name: snake_case
required: false
description: Whether to return the response in snake_case instead of camelCase. Default is false.
schema:
type: boolean
requestBody:
description: Document to upload, either via file upload or URL to a file
required: true
content:
multipart/form-data:
schema:
$ref: '#/components/schemas/DocumentCreate'
/v3/documents/{identifier}:
get:
tags:
- Documents
summary: Get specific document
operationId: getDocument
description: Return a specific document.
parameters:
- in: path
required: true
name: identifier
description: Document's identifier
schema:
$ref: '#/components/schemas/DocumentMeta_properties-identifier'
- in: query
required: false
name: format
description: Specify which format you want the response to be. Default is "json"
schema:
$ref: '#/components/schemas/DocumentFormat'
- in: query
name: compact
required: false
description: If "true", the response is compacted to annotations' parsed data. Annotations' meta data are excluded.
Default is "false".
schema:
type: boolean
- in: query
name: snake_case
required: false
description: Whether to return the response in snake_case instead of camelCase. Default is false.
schema:
type: boolean
responses:
'200':
description: Successfully retrieved document.
content:
application/json:
schema:
$ref: '#/components/schemas/Document'
application/xml:
schema:
$ref: '#/components/schemas/Document'
'400':
$ref: '#/components/responses/400ErrorMaybeXML'
'401':
$ref: '#/components/responses/401ErrorMaybeXML'
default:
$ref: '#/components/responses/DefaultError'
patch:
tags:
- Documents
summary: Update a document
operationId: updateDocument
description: Update file name, expiry time, or move to another collection, etc.
parameters:
- in: path
required: true
name: identifier
description: Document's identifier
schema:
$ref: '#/components/schemas/DocumentMeta_properties-identifier'
- in: query
name: compact
required: false
description: If "true", the response is compacted to annotations' parsed data. Annotations' meta data are excluded.
Default is "false".
schema:
type: boolean
- in: query
name: snake_case
required: false
description: Whether to return the response in snake_case instead of camelCase. Default is false.
schema:
type: boolean
requestBody:
description: Document data to update
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/DocumentUpdate'
responses:
'200':
description: Successfully updated document data.
content:
application/json:
schema:
$ref: '#/components/schemas/Document'
'400':
$ref: '#/components/responses/400Error'
'401':
$ref: '#/components/responses/401Error'
default:
$ref: '#/components/responses/DefaultError'
delete:
tags:
- Documents
summary: Delete a document
operationId: deleteDocument
description: Deletes the specified document from the database.
parameters:
- in: path
required: true
name: identifier
description: Document's identifier
schema:
$ref: '#/components/schemas/DocumentMeta_properties-identifier'
responses:
'204':
$ref: '#/components/responses/204NoContent'
'400':
$ref: '#/components/responses/400Error'
'401':
$ref: '#/components/responses/401Error'
default:
$ref: '#/components/responses/DefaultError'
/v3/documents/{identifier}/redacted:
get:
tags:
- Documents
summary: Get redacted document
operationId: getRedactedDocument
description: Get the redacted version of a document. The original document is not modified.
parameters:
- in: path
required: true
name: identifier
description: Document identifier
schema:
$ref: '#/components/schemas/Identifier'
responses:
'200':
content:
application/pdf:
schema:
type: string
format: binary
description: Successfully retrieved redacted document
'400':
$ref: '#/components/responses/400Error'
'401':
$ref: '#/components/responses/401Error'
default:
$ref: '#/components/responses/DefaultError'
components:
securitySchemes:
ApiKeyAuth:
type: http
scheme: bearer
description: 'Basic authentication using an API key, e.g. `{Authorization: Bearer aff_0bb4fbdf97b7e4111ff6c0015471094155f91}`.
You can find your API key within the Settings page of the [Affinda web app](https://app.affinda.com/). You can obtain
an API key by [signing up for a free trial](https://app.affinda.com/auth/register).'
schemas:
Resume:
allOf:
- $ref: '#/components/schemas/Document'
- type: object
properties:
data:
$ref: '#/components/schemas/ResumeData'
x-ms-discriminator-value: resume
Invoice:
allOf:
- $ref: '#/components/schemas/Document'
- type: object
properties:
data:
$ref: '#/components/schemas/InvoiceData'
x-ms-discriminator-value: invoice
JobDescription:
allOf:
- $ref: '#/components/schemas/Document'
- type: object
properties:
data:
$ref: '#/components/schemas/JobDescriptionData'
x-ms-discriminator-value: job-description
ResumeRedact:
allOf:
- $ref: '#/components/schemas/Document'
- type: object
properties:
data:
type: object
properties:
redactedPdf:
type: string
description: URL to download the redacted resume.
x-ms-discriminator-value: resume-redact
DateRangeAnnotation:
type: object
nullable: true
allOf:
- $ref: '#/components/schemas/Annotation'
- type: object
properties:
parsed:
type: object
properties:
start:
$ref: '#/components/schemas/DateRangeValue'
end:
$ref: '#/components/schemas/DateRangeValue'
PhoneNumberAnnotation:
type: object
nullable: true
allOf:
- $ref: '#/components/schemas/Annotation'
- type: object
properties:
parsed:
type: object
properties:
rawText:
type: string
nullable: false
example: '+61412632636'
deprecated: true
formattedNumber:
type: string
nullable: true
example: +61 412 632 636
countryCode:
type: string
nullable: true
example: AU
internationalCountryCode:
type: integer
nullable: true
example: 61
minimum: 1
nationalNumber:
type: string
nullable: true
example: 0412 632 636
UrlAnnotation:
type: object
nullable: true
allOf:
- $ref: '#/components/schemas/Annotation'
- type: object
properties:
parsed:
type: object
properties:
url:
type: string
nullable: true
example: https://2nb9s.com/hello/world?foo=bar#baz
domain:
type: string
nullable: true
example: 2nb9s.com
identifier:
type: string
description: Uniquely identify a workspace.
example: mEFayXdO
properties-identifier:
type: string
description: Uniquely identify a collection.
example: mEFayXdO
DocumentState:
type: string
enum:
- uploaded
- review
- validated
- archived
- rejected
id:
type: integer
description: Uniquely identify a tag.
example: 1
minimum: 1
DateRange:
type: string
enum:
- today
- yesterday
- week
- month
- year
DocumentMeta_properties-identifier:
type: string
description: Unique identifier for the document
RequestError:
type: object
additionalProperties: false
required:
- type
- errors
properties:
type:
type: string
example: validation_error
errors:
type: array
items:
type: object
required:
- attr
- code
- detail
properties:
attr:
type: string
nullable: true
example: non_field_errors
code:
type: string
example: unique
detail:
type: string
example: This index name has already been used
PaginatedResponse:
type: object
required:
- count
properties:
count:
type: integer
example: 10
description: Number of items in results.
minimum: 0
next:
type: string
nullable: true
description: URL to request next page of results.
previous:
type: string
nullable: true
description: URL to request previous page of results.
FileName:
type: string
nullable: true
description: Optional filename of the file
example: Document.pdf
ExpiryTime:
type: string
nullable: true
format: date-time
description: The date/time in ISO-8601 format when the document will be automatically deleted. Defaults to no expiry.
customIdentifier:
type: string
nullable: true
description: Optional identifier for the document that you can set to track the document in the Affinda system. Is
not required to be unique.
example: 46ab8b02-0e5b-420c-877c-8b678d46a834
PageMeta:
type: object
required:
- id
- pageIndex
- image
- width
- height
- rotation
properties:
id:
type: integer
minimum: 1
pageIndex:
type: integer
example: 0
minimum: 0
description: Page number within the document, starts from 0.
image:
type: string
nullable: true
example: https://affinda-api.s3.amazonaws.com/media/pages/Page.png?AWSAccessKeyId=KEY&Signature=SIG&Expires=1663302062
description: The URL to the image of the page.
imageTranslated:
type: string
nullable: true
example: https://affinda-api.s3.amazonaws.com/media/pages/PageTranslated.png?AWSAccessKeyId=KEY&Signature=SIG&Expires=1663302062
description: The URL to the translated image of the page.
height:
type: number
example: 700
description: Height of the page's image in px.
width:
type: number
example: 500
description: Width of the page's image in px.
rotation:
type: integer
example: 90
minimum: -360
maximum: 360
description: The degree of rotation applied to the page. Greater than 0 indicates clockwise rotation. Less than
0 indicates counter-clockwise rotation.
name:
type: string
Extractor_properties-identifier:
type: string
description: Uniquely identify an extractor.
example: resume
validatable:
type: boolean
Identifier:
type: string
description: A random string that uniquely identify the resource.
ValidationRule:
type: object
additionalProperties: false
nullable: false
description: A validation rule for a collection
required:
- slug
- dataPoints
properties:
slug:
type: string
description: The slug of the validation rule, in lowercase snake_case
pattern: ^[a-z0-9_]+$
example: supplier_name_is_alphanumeric
dataPoints:
type: array
description: The data point identifier that this validation rule applies to, can be an empty list if the rule doens't
use any data points as sources
items:
$ref: '#/components/schemas/Identifier'
UserNullable:
type: object
nullable: true
properties:
id:
type: integer
description: Uniquely identify a user.
example: 1
minimum: 1
name:
type: string
example: Carl Johnson
username:
type: string
example: carljohnson
email:
type: string
example: [email protected]
avatar:
type: string
nullable: true
description: URL of the user's avatar.
example: https://affinda-api.s3.amazonaws.com/media/user-avatar.png?AWSAccessKeyId=KEY&Signature=SIG
errorCode:
type: string
nullable: true
example: document_conversion_failed
errorDetail:
type: string
nullable: true
example: Unable to convert word document
Tag:
type: object
required:
- id
- name
- workspace
- documentCount
properties:
id:
type: integer
description: Uniquely identify a tag.
example: 1
minimum: 1
name:
type: string
workspace:
$ref: '#/components/schemas/identifier'
documentCount:
type: integer
minimum: 0
description: Number of documents tagged with this.
User:
type: object
properties:
id:
type: integer
description: Uniquely identify a user.
example: 1
minimum: 1
name:
type: string
example: Carl Johnson
username:
type: string
example: carljohnson
email:
type: string
example: [email protected]
avatar:
type: string
nullable: true
description: URL of the user's avatar.
example: https://affinda-api.s3.amazonaws.com/media/user-avatar.png?AWSAccessKeyId=KEY&Signature=SIG
RegionBias:
type: object
nullable: true
properties:
country:
type: string
nullable: true
description: A single alpha-2 country code (e.g. AU) used by google geocoding service
countries:
type: array
items:
type: string
nullable: true
description: A list of alpha-2 country codes used by Pelias
squareCoordinates:
type: array
items:
type: number
nullable: true
description: A list of coordinates used by Pelias in the shape of [min_lon, min_lat, max_lon, max_lat]
strict:
type: boolean
description: 'If true, the location must be within the region, as opposed to prefering locations within the region.
Default to false.
'
example: true
DocumentMeta:
type: object
required:
- identifier
- pages
- workspace
properties:
identifier:
type: string
description: Unique identifier for the document
customIdentifier:
type: string
nullable: true
description: Optional identifier for the document that you can set to track the document in the Affinda system. Is
not required to be unique.
example: 46ab8b02-0e5b-420c-877c-8b678d46a834
fileName:
$ref: '#/components/schemas/FileName'
ready:
type: boolean
nullable: false
example: true
description: If true, the document has finished processing. Particularly useful if an endpoint request specified
wait=False, when polling use this variable to determine when to stop polling
readyDt:
type: string
format: date-time
example: '2020-12-10T01:43:32.276724Z'
nullable: true
description: The datetime when the document was ready
failed:
type: boolean
nullable: false
example: false
description: If true, some exception was raised during processing. Check the 'error' field of the main return object.
expiryTime:
$ref: '#/components/schemas/ExpiryTime'
language:
type: string
nullable: true
example: en
description: The document's language.
pdf:
type: string
nullable: true
example: https://affinda-api.s3.amazonaws.com/media/documents/Document.pdf?AWSAccessKeyId=KEY&Signature=SIG&Expires=1663302062
description: The URL to the document's pdf (if the uploaded document is not already pdf, it's converted to pdf as
part of the parsing process).
parentDocument:
type: object
nullable: true
description: If this document is part of a splitted document, this attribute points to the original document that
this document is splitted from.
properties:
identifier:
$ref: '#/components/schemas/DocumentMeta_properties-identifier'
customIdentifier:
$ref: '#/components/schemas/customIdentifier'
childDocuments:
type: array
description: If this document has been splitted into a number of child documents, this attribute points to those
child documents.
items:
type: object
properties:
identifier:
$ref: '#/components/schemas/DocumentMeta_properties-identifier'
customIdentifier:
$ref: '#/components/schemas/customIdentifier'
pages:
type: array
items:
$ref: '#/components/schemas/PageMeta'
description: The document's pages.
isOcrd:
type: boolean
ocrConfidence:
type: number
nullable: true
reviewUrl:
type: string
nullable: true
documentType:
type: string
description: The document type's identifier. Provide if you already know the document type.
nullable: true
collection:
type: object
nullable: true
required:
- identifier
properties:
identifier:
$ref: '#/components/schemas/properties-identifier'
name:
$ref: '#/components/schemas/name'
extractor:
type: object
nullable: true
properties:
identifier:
$ref: '#/components/schemas/Extractor_properties-identifier'
name:
$ref: '#/components/schemas/name'
baseExtractor:
type: string
description: Base extractor's identifier.
nullable: true
validatable:
$ref: '#/components/schemas/validatable'
validationRules:
type: array
items:
$ref: '#/components/schemas/ValidationRule'
autoRefreshValidationResults:
type: boolean
description: If True, validation results are refreshed whenever annotations are changed.
workspace:
type: object
required:
- identifier
properties:
identifier:
$ref: '#/components/schemas/identifier'
name:
$ref: '#/components/schemas/name'
archivedDt:
type: string
format: date-time
nullable: true
isArchived:
type: boolean
skipParse:
type: boolean
confirmedDt:
type: string
format: date-time
nullable: true
confirmedBy:
$ref: '#/components/schemas/UserNullable'
isConfirmed:
type: boolean
rejectedDt:
type: string
format: date-time
nullable: true
rejectedBy:
$ref: '#/components/schemas/UserNullable'
archivedBy:
$ref: '#/components/schemas/UserNullable'
isRejected:
type: boolean
createdDt:
type: string
format: date-time
errorCode:
$ref: '#/components/schemas/errorCode'
errorDetail:
$ref: '#/components/schemas/errorDetail'
file:
type: string
nullable: true
description: URL to view the file.
html:
type: string
nullable: true
description: URL to view the file converted to HTML.
llmHint:
type: string
nullable: true
description: Optional hint inserted into the LLM prompt when processing this document.
tags:
type: array
items:
$ref: '#/components/schemas/Tag'
createdBy:
$ref: '#/components/schemas/User'
sourceEmail:
type: string
nullable: true
description: If the document is created via email ingestion, this field stores the email file's URL.
sourceEmailAddress:
type: string
nullable: true
description: If the document is created via email ingestion, this field stores the email's From address.
regionBias:
$ref: '#/components/schemas/RegionBias'
DocumentError:
type: object
additionalProperties: false
properties:
errorCode:
type: string
nullable: true
example: document_conversion_failed
errorDetail:
type: string
nullable: true
example: Unable to convert word document
DocumentWarning:
type: object
additionalProperties: false
properties:
warningCode:
type: string
nullable: true
example: too_many_pages
warningDetail:
type: string
nullable: true
example: File exceeds maximum number of pages allowed, parsing the first 10 pages only.
Document:
type: object
required:
- meta
- extractor
properties:
data:
type: object
extractor:
type: string
meta:
$ref: '#/components/schemas/DocumentMeta'
error:
$ref: '#/components/schemas/DocumentError'
warnings:
type: array
items:
$ref: '#/components/schemas/DocumentWarning'
discriminator:
propertyName: extractor
x-csharp-usage: input,output
File:
type: string
format: binary
description: 'File as binary data blob. Supported formats: PDF, DOC, DOCX, TXT, RTF, HTML, PNG, JPG, TIFF, ODT, XLS,
XLSX'
Wait:
type: boolean
description: If "true" (default), will return a response only after processing has completed. If "false", will return
an empty data object which can be polled at the GET endpoint until processing is complete.
example: true
default: true
nullable: false
Language:
type: string
nullable: true
description: Language code in ISO 639-1 format. Must specify zh-cn or zh-tw for Chinese.
example: en
RejectDuplicatesMaybeString:
type: boolean
description: If "true", parsing will fail when the uploaded document is duplicate of an existing document, no credits
will be consumed. If "false", will parse the document normally whether its a duplicate or not. If not provided, will
fallback to the workspace settings.
example: true
nullable: true
DocumentCreate:
type: object
properties:
file:
$ref: '#/components/schemas/File'
url:
type: string
nullable: true
description: URL to download the document.
example: https://api.affinda.com/static/sample_resumes/example.docx
collection:
$ref: '#/components/schemas/properties-identifier'
documentType:
type: string
description: The document type's identifier. Provide if you already know the document type.
nullable: true
workspace:
$ref: '#/components/schemas/identifier'
wait:
$ref: '#/components/schemas/Wait'
identifier:
type: string
description: Deprecated in favor of `customIdentifier`.
deprecated: true
customIdentifier:
type: string
description: Specify a custom identifier for the document if you need one, not required to be unique.
fileName:
$ref: '#/components/schemas/FileName'
expiryTime:
# --- truncated at 32 KB (212 KB total) ---
# Full source: https://raw.githubusercontent.com/api-evangelist/affinda/refs/heads/main/openapi/affinda-documents-api-openapi.yml