openapi: 3.0.1
info:
title: PDF.co API
description: "PDF.co Web API \u2014 programmatic PDF and document automation: AI invoice parsing, document parsing, PDF\
\ conversion (to/from PDF, Excel, CSV, JSON, XML, HTML, images), OCR, merging, splitting, compression, barcodes, e-signature\
\ workflows, and more. Authenticated via the x-api-key header."
version: '1.0'
contact:
name: PDF.co Support
url: https://support.pdf.co/en
email: [email protected]
license:
name: Proprietary
url: https://pdf.co/terms
termsOfService: https://pdf.co/terms
servers:
- url: https://api.pdf.co
security:
- ApiKeyAuth: []
tags:
- name: Extraction
- name: Editing
- name: PDF Conversion
- name: Excel Conversion
- name: PDF Merging & Splitting
- name: Forms
- name: Find & Search
- name: Document, File & System
- name: Pages
- name: Barcodes
components:
securitySchemes:
ApiKeyAuth:
type: apiKey
in: header
name: x-api-key
schemas:
url:
type: string
format: uri
description: URL to the source file [`url` attribute](/api-reference/url-input-and-request-limits).
httpusername:
type: string
description: HTTP auth user name if required to access source URL.
httppassword:
type: string
description: HTTP auth password if required to access source URL.
template:
type: string
description: The raw format of the document parser template to be used directly. see [Template](/api-reference/documentparser/parser)
inline:
default: false
type: boolean
description: Set to true to return results inside the response. Otherwise, the endpoint will return a URL to the output
file generated.
password:
type: string
description: Password for the PDF file.
async:
type: boolean
default: false
description: Set `async` to `true` for long processes to run in the background, API will then return a `jobId` which
you can use with the [Background Job Check endpoint](/api-reference/job-check). Also see [Webhooks & Callbacks](/api-reference/webhooks)
callback:
type: string
format: uri
description: The callback URL (or Webhook) used to receive the POST data. see [Webhooks & Callbacks](/api-reference/webhooks).
This is only applicable when `async` is set to `true`.
pages0:
type: string
description: 'Page indices/ranges (0-based). Items are comma-separated. Each item is one of: N (e.g., 0), N-M (e.g.,
3-7), N- (open-ended, e.g., 10-), or !N (reverse index; !0 is last page, !1 is second-to-last). Whitespace is allowed.
If not specified, the default configuration processes all pages.'
pattern: ^\s*(?:!?\d+\s*-\s*!?\d+|!?\d+\s*-\s*|!?\d+)\s*(?:,\s*(?:!?\d+\s*-\s*!?\d+|!?\d+\s*-\s*|!?\d+)\s*)*$
example: 0,2,5-10, !0, !5-!2
pages1:
type: string
description: Page indices/ranges (1-based). Same syntax as PagesBase0 but first page is 1; !1 is last page.
pattern: ^\s*(?:!?\d+\s*-\s*!?\d+|!?\d+\s*-\s*|!?\d+)\s*(?:,\s*(?:!?\d+\s*-\s*!?\d+|!?\d+\s*-\s*|!?\d+)\s*)*$
example: 1,2,3-7, !1, !6-!2
replacementlimit:
type: number
default: 0
description: Limit the number of searches & replacements for every item. The value 0 means every found occurrence will
be replaced.
casesensitive:
type: boolean
default: true
description: Set to `false` to don't use case-sensitive search.
regex:
type: boolean
default: false
description: Set to `true` to use regular expression for search string(s).
unwrap:
type: boolean
default: false
description: Unwrap lines into a single line within table cells in provided PDF documents. This is only applicable when
`lineGrouping` is set to `1`.
rect:
type: string
format: '{x} {y} {width} {height}'
description: Defines coordinates for extraction. Use[PDF Edit Add Helper](https://app.pdf.co/pdf-edit-add-helper) to
get or measure PDF coordinates. The format is `{x} {y} {width} {height}`.
example: 10 20 300 400
lang:
type: string
description: 'Set the language for OCR (text from image) to use for scanned PDF, PNG, and JPG documents input when extracting
text. see [Language Support](/api-reference/language-support). You can also use 2 languages simultaneously like this:
`eng+deu` (any combination).'
pattern: ^[a-z]{3}(\+[a-z]{3})*$
default: eng
example: eng+deu
linegrouping:
type: string
pattern: ^[123]$
enum:
- '1'
- '2'
- '3'
description: 'Controls how lines of text are grouped when extracting data from a PDF. Line grouping within table cells.
The available modes are: `1`, `2`, `3`. For more information, see [Line Grouping](#line-grouping-options).'
name:
type: string
description: File name for generated output.
templateid:
type: number
description: Set ID of HTML template to be used. View and manage your templates at HTML to PDF Templates.
expiration:
type: number
default: 60
description: "Sets the expiration time for the output link, in minutes. After this period, generated output file(s)\
\ are automatically deleted from [PDF.co Temporary Files Storage](/api-reference/file-upload/overview). The maximum\
\ allowed duration depends on your subscription plan. For permanent storage of input files (e.g., reusable images,\
\ PDF templates, documents), use `PDF.co Built\u2011In Files Storage`."
margins:
type: string
format: '{topMargin} {rightMargin} {bottomMargin} {leftMargin}'
description: Set custom margins, overriding CSS default margins. Specify the margins in the format `{top} {right} {bottom}
{left}`. You can use`px`,`mm`,`cm`or`in`units. Also, you can set margins for all sides at once using a single value.
example: 10px 10px 10px 10px
papersize:
description: "Specifies the paper size. Accepts standard sizes like 'Letter', 'Legal', 'Tabloid', 'Ledger', 'A0'\u2013\
'A6'. You can also set a custom size by providing width and height separated by a space, with optional units: `px`\
\ (pixels), `mm` (millimeters), `cm` (centimeters), or `in` (inches). Examples: '200 300', '200px 300px', '200mm 300mm',\
\ '20cm 30cm', '6in 8in'."
oneOf:
- type: string
enum:
- Letter
- Legal
- Tabloid
- Ledger
- A0
- A1
- A2
- A3
- A4
- A5
- A6
- type: string
pattern: ^\s*\d+(?:\.\d+)?(?:px|mm|cm|in)?\s+\d+(?:\.\d+)?(?:px|mm|cm|in)?\s*$
default: A4
example: A4
orientation:
type: string
enum:
- Portrait
- Landscape
default: Portrait
description: 'Sets the document orientation. Options: `Portrait` for vertical layout, and `Landscape` for horizontal
layout.'
printbackground:
type: boolean
default: true
description: Set to `false` to disable background colors and images are included when generating PDFs from HTML/URL
mediatype:
type: string
default: print
enum:
- print
- screen
- none
description: 'Controls how content is rendered when converting to PDF. Options: `print` (uses print styles), `screen`
(uses screen styles), `none` (no media type applied).'
donotwaitfullload:
type: boolean
default: false
description: Controls how thoroughly the converter waits for a page to load before converting HTML to PDF --- false
waits for full page load, while true speeds up conversion by waiting only for minimal loading.
header:
type: string
format: html
description: Set this to can add user definable HTML for the header to be applied on every page header. The format is
html.
footer:
type: string
format: html
description: Set this to can add user definable HTML for the footer to be applied on every page bottom. The format is
html.
searchstrings:
type: array
items:
type: string
description: The array of strings to search.
searchstring:
type: string
description: The string to search.
regexsearch:
type: boolean
default: false
description: Set to true to enable regular expression search for the `searchString(s)` parameter.
autosize:
type: boolean
description: Set to `true` to page dimensions adjust to content with automatic page sizing. If false, uses worksheet's
page setup.
type:
type: string
enum:
- AustralianPostCode
- Aztec
- Codabar
- CodablockF
- Code128
- Code16K
- Code39
- Code39Extended
- Code39Mod43
- Code39Mod43Extended
- Code93
- DataMatrix
- DPMDataMatrix
- EAN13
- EAN2
- EAN5
- EAN8
- GS1DataBarExpanded
- GS1DataBarExpandedStacked
- GS1DataBarLimited
- GS1DataBarOmnidirectional
- GS1DataBarStacked
- GTIN12
- GTIN13
- GTIN14
- GTIN8
- IntelligentMail
- Interleaved2of5
- ITF14
- MaxiCode
- MICR
- MicroPDF
- MSI
- PatchCode
- PDF417
- Pharmacode
- PostNet
- PZN
- QRCode
- RoyalMail
- RoyalMailKIX
- Trioptic
- UPCA
- UPCE
- UPU
default: QRCode
description: 'Set the barcode type to be used. See available barcode types in the [Supported Barcode Types](/api-reference/barcode/overview#supported-barcode-types) '
worksheetindex:
type: string
format: number
pattern: ^\d+$
description: Set the index of the worksheet to be used. The first worksheet has index 1.
file:
type: string
format: path to a local file
description: The path to the local file to be uploaded.
readOnly: true
x:
type: number
minimum: 0
description: X coordinate (zero point is in the top left corner). You can use [PDF Edit Add Helper](https://app.pdf.co/pdf-edit-add-helper)
to measure coordinates. The unit is `pt`.
y:
type: number
minimum: 0
description: Y coordinate (zero point is in the top left corner). You can use [PDF Edit Add Helper](https://app.pdf.co/pdf-edit-add-helper)
to measure coordinates. The unit is `pt`.
width:
type: number
minimum: 0
description: Width of the text box. You can use [PDF Edit Add Helper](https://app.pdf.co/pdf-edit-add-helper) to measure
pdf coordinates. The unit is `pt`.
height:
type: number
minimum: 0
description: Height of the text box. You can use [PDF Edit Add Helper](https://app.pdf.co/pdf-edit-add-helper) to measure
pdf coordinates. The unit is `pt`.
timeout:
type: number
description: Timeout for output links in seconds
readOnly: true
profiles:
type: string
description: Profiles are used configure extra options for specific API endpoints and may be unique to an API. For more
information, see [Profiles](/api-reference/profiles) and the documentation of each endpoint for profiles specific
to it.
templatedata:
type: string
description: please set to string converted from JSON with data for Mustache template or leave empty if no template
is used
readOnly: true
makeunsearchable:
type: boolean
customscript:
type: string
description: custom script to execute on page load
readOnly: true
legacyrendition:
type: boolean
description: use legacy rendition (optional)
rendertimeout:
type: number
description: timeout for rendering in milliseconds (optional)
readOnly: true
responses:
Success:
description: Success.
content:
application/json:
schema:
type: object
properties:
status:
type: string
example: success
description: Status of the API response.
message:
type: string
example: Success
description: Descriptive message for the response status.
url:
type: string
format: uri
example: https://pdf-temp-files.s3.us-west-2.amazonaws.com/output.pdf
description: URL to the output file.
jobId:
type: string
example: 6YSZD3U872ZYYFEDMQCQSGEEO8YSF5WA
description: Unique identifier for the job.
credits:
type: integer
example: 2
description: Credits used for this operation.
remainingCredits:
type: integer
example: 1480582
description: Credits remaining after this operation.
duration:
type: integer
example: 33
description: Time taken to complete the request, in milliseconds.
BadRequest:
description: Bad request. Typically due to bad input parameters or unreachable input URLs (e.g., access restrictions
like login or password).
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 400
message:
type: string
example: Bad request. Typically due to bad input parameters or unreachable input URLs (e.g., access restrictions
like login or password).
Unauthorized:
description: Unauthorized. Authentication is required and has failed or has not yet been provided.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 401
message:
type: string
example: Unauthorized. Authentication is required and has failed or has not yet been provided.
NotEnoughCredits:
description: Not enough credits.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 402
message:
type: string
example: Not enough credits.
Forbidden:
description: Access forbidden for input URL.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 403
message:
type: string
example: Access forbidden for input URL.
NotFound:
description: The requested resource could not be found.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 404
message:
type: string
example: The requested resource could not be found.
Timeout:
description: The server timed out waiting for the request.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 408
message:
type: string
example: The server timed out waiting for the request.
TooManyRequests:
description: Too many requests in a given time period.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 429
message:
type: string
example: Too many requests in a given time period.
InvalidPassword:
description: Invalid Password. Password protected document.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 441
message:
type: string
example: Invalid Password. Password protected document.
DamagedDocument:
description: Input document is damaged or of incorrect type.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 442
message:
type: string
example: Input document is damaged or of incorrect type.
PermissionsError:
description: 'Permissions. The operation is prohibited by document security settings. You can turn off this check by
setting the `profiles` param to `{CheckPermissions: false}`. Important: only use this if you are the owner or have
legal permission.'
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 443
message:
type: string
example: Permissions. The operation is prohibited by document security settings.
ProfilesParsingError:
description: Profiles parsing error. Please ensure that the configuration is supported. See `/profiles` samples.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 444
message:
type: string
example: Profiles parsing error. Please ensure that the configuration is supported.
TimeoutError:
description: Timeout error. For large documents, use asynchronous mode (`async=true`) and check status via `/job/check`.
For many-page files, use the `pages` parameter.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 445
message:
type: string
example: Timeout error. For large documents, use asynchronous mode (async=true) and check status via /job/check.
MissingFiles:
description: Some files required for conversion are missing.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 446
message:
type: string
example: Some files required for conversion are missing.
InvalidTemplate:
description: Invalid template.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 447
message:
type: string
example: Invalid template.
InvalidUrlOrHtml:
description: Invalid URL or HTML. Ensure the provided URL is valid and accessible.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 448
message:
type: string
example: Invalid URL or HTML. Ensure the provided URL is valid and accessible.
InvalidIndexRange:
description: Invalid index range. Page index is out of range. Use `/pdf/info` to get page count. First page is `0`.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 449
message:
type: string
example: Invalid index range. Page index is out of range.
InvalidPageRange:
description: Invalid page range specified.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 450
message:
type: string
example: Invalid page range specified.
InvalidUrl:
description: Invalid URL.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 452
message:
type: string
example: Invalid URL.
InvalidParameters:
description: Invalid parameters.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 454
message:
type: string
example: Invalid parameters.
InternalServerError:
description: Something went wrong. Please try again or contact support.
content:
application/json:
schema:
type: object
properties:
error:
type: boolean
example: true
status:
type: integer
example: 500
message:
type: string
example: Something went wrong. Please try again or contact support.
paths:
/v1/ai-invoice-parser:
post:
tags:
- Extraction
x-mint:
href: /api-tester/ai-invoice-parser
content: '## Prerequisites
Before using the AI Invoice Parser API, please note:
- **Invoices only**: The API processes invoices exclusively to ensure accurate parsing.
- **Asynchronous processing**: When you make a request, you get a JobID immediately while processing happens in
the background.
<Note>
To get your results, you can either:
- Poll the [**job/check**](/api-tester/job-check) endpoint using your `JobID`, or
- Provide a callback URL to get results automatically via webhook.
</Note>'
operationId: post_v1_ai-invoice-parser
summary: AI Invoice Parser
description: Process invoices faster than ever by extracting data and structuring it automatically with our advanced
AI. Get quick and accurate data from any invoice, no matter the layout.
requestBody:
content:
application/json:
schema:
type: object
properties:
url:
allOf:
- $ref: '#/components/schemas/url'
default: https://pdfco-test-files.s3.us-west-2.amazonaws.com/document-parser/sample-invoice.pdf
customfield:
type: string
description: JSON string containing [custom field](/api-reference/ai-invoice-parser/#custom-fields) names
to extract. Use `camelCase` for field names (e.g., `storeNumber`, `deliveryDate`). Multiple fields should
be comma-separated.
callback:
allOf:
- $ref: '#/components/schemas/callback'
default: https://example.com/callback/url/you/provided
additionalProperties: false
required:
- url
required: false
responses:
'200':
$ref: '#/components/responses/Success'
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'402':
$ref: '#/components/responses/NotEnoughCredits'
'403':
$ref: '#/components/responses/Forbidden'
'404':
$ref: '#/components/responses/NotFound'
'408':
$ref: '#/components/responses/Timeout'
'429':
$ref: '#/components/responses/TooManyRequests'
'441':
$ref: '#/components/responses/InvalidPassword'
'442':
$ref: '#/components/responses/DamagedDocument'
'443':
$ref: '#/components/responses/PermissionsError'
'444':
$ref: '#/components/responses/ProfilesParsingError'
'445':
$ref: '#/components/responses/TimeoutError'
'446':
$ref: '#/components/responses/MissingFiles'
'447':
$ref: '#/components/responses/InvalidTemplate'
'448':
$ref: '#/components/responses/InvalidUrlOrHtml'
'449':
$ref: '#/components/responses/InvalidIndexRange'
'450':
$ref: '#/components/responses/InvalidPageRange'
'452':
$ref: '#/components/responses/InvalidUrl'
'454':
$ref: '#/components/responses/InvalidParameters'
'500':
$ref: '#/components/responses/InternalServerError'
x-codegen-request-body-name: body
/v1/pdf/documentparser:
post:
tags:
- Extraction
x-mint:
href: /api-tester/documentparser
summary: Parse Document
operationId: post_v1_pdf_documentparser
requestBody:
content:
application/json:
schema:
type: object
properties:
file:
$ref: '#/components/schemas/file'
url:
allOf:
- $ref: '#/components/schemas/url'
default: https://pdfco-test-files.s3.us-west-2.amazonaws.com/document-parser/MultiPageTable.pdf
templateid:
$ref: '#/components/schemas/templateid'
template:
allOf:
- $ref: '#/components/schemas/template'
default: "{\r\n \"templateVersion\": 3,\r\n \"templatePriority\": 0,\r\n \"sourceId\": \"Multipage Table\
\ Test\",\r\n \"detectionRules\": {\r\n \"keywords\": [\r\n \"Sample document with multi-page\
\ table\"\r\n ]\r\n },\r\n \"fields\": {\r\n \"total\": {\r\n \"type\": \"regex\",\r\n \
\ \"expression\": \"TOTAL {{DECIMAL}}\",\r\n \"dataType\": \"decimal\"\r\n }\r\n },\r\n \"\
tables\": [\r\n {\r\n \"name\": \"table1\",\r\n \"start\": {\r\n \"expression\": \"\
Item\\\\s+Description\\\\s+Price\\\\s+Qty\\\\s+Extended Price\"\r\n },\r\n \"end\": {\r\n \
\ \"expression\": \"TOTAL\\\\s+\\\\d+\\\\.\\\\d\\\\d\"\r\n },\r\n \"row\": {\r\n \
\ \"expression\": \"^\\\\s*(?<itemNo>\\\\d+)\\\\s+(?<description>.+?)\\\\s+(?<price>\\\\d+\\\\.\\\\d\\\
\\d)\\\\s+(?<qty>\\\\d+)\\\\s+(?<extPrice>\\\\d+\\\\.\\\\d\\\\d)\"\r\n },\r\n \"columns\": [\r\
\n {\r\n \"name\": \"itemNo\",\r\n \"type\": \"integer\"\r\n },\r\n \
\ {\r\n \"name\": \"description\",\r\n \"type\": \"string\"\r\n },\r\n \
\ {\r\n \"name\": \"price\",\r\n \"type\": \"decimal\"\r\n },\r\n \
\ {\r\n \"name\": \"qty\",\r\n \"type\": \"integer\"\r\n },\r\n {\r\n\
\ \"name\": \"extPrice\",\r\n \"type\": \"decimal\"\r\n }\r\n ],\r\n \
\ \"multipage\": true\r\n }\r\n ]\r\n}"
inline:
allOf:
- $ref: '#/components/schemas/inline'
default: true
outputformat:
type: string
description: 'Format of output File. Valid values: JSON, YAML, XML, CSV.'
enum:
- JSON
- YAML
- XML
- CSV
default: JSON
generatecsvheaders:
type: boolean
name:
$ref: '#/components/schemas/name'
pages:
$ref: '#/components/schemas/pages0'
async:
allOf:
- $ref: '#/components/schemas/async'
default: false
password:
$ref: '#/components/schemas/password'
expiration:
$ref: '#/components/schemas/expiration'
timeout:
$ref: '#/components/schemas/timeout'
httpusername:
$ref: '#/components/schemas/httpusername'
httppassword:
$ref: '#/components/schemas/httppassword'
additionalProperties: false
required:
- url
required: false
responses:
'200':
$ref: '#/components/responses/Success'
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'402':
$ref: '#/components/responses/NotEnoughCredits'
'403':
$ref: '#/components/responses/Forbidden'
'404':
$ref: '#/components/responses/NotFound'
'408':
$ref: '#/components/responses/Timeout'
'429':
$ref: '#/components/responses/TooManyRequests'
'441':
$ref: '#/components/responses/InvalidPassword'
'442':
$ref: '#/components/responses/DamagedDocument'
'443':
$ref: '#/components/responses/PermissionsError'
'444':
$ref: '#/components/responses/ProfilesParsingError'
'445':
$ref: '#/components/responses/TimeoutError'
'446':
$ref: '#/components/responses/MissingFiles'
'447':
$ref: '#/components/responses/InvalidTemplate'
'448':
$ref: '#/components/responses/InvalidUrlOrHtml'
'449':
$ref: '#/components/responses/InvalidIndexRange'
'450':
$ref: '#/components/responses/InvalidPageRange'
'452':
$ref: '#/components/responses/InvalidUrl'
'454':
$ref: '#/components/responses/InvalidParameters'
'500':
$ref: '#/components/responses/InternalServerError'
x-codegen-request-body-name: body
description: This API method extracts data from documents based on a document parser extraction template. With this
API method, you can extract data from custom areas by searching form fields, tables, multiple
# --- truncated at 32 KB (269 KB total) ---
# Full source: https://raw.githubusercontent.com/api-evangelist/pdf-co/refs/heads/main/openapi/pdf-co-openapi.yml