All Products
Search
Document Center

Energy Expert:API additional information

Last Updated:Apr 25, 2025

GetDocParsingResult.Content structure

{
    "doc_info": {       # File basic information
        "languages": [  # Language
            "zh",
            "en"
        ],
        "doc_type": "pdf",  # File type, including 'pdf', 'csv', 'xlsx', 'doc', 'docx', and 'txt'.
        "pdf_toc": [{          # File level and page number
                "title": "xxx",  #Cover
                "level": 0,     #Level
                "page": 0       #Page number
            }],
        "pages": 366, # Number of pages
        "page_list": [{ # Page height and other information
                "imageWidth": 596.0, #Page width
                "imageHeight": 842.0, #Page height
                "pageIdAllDocs": 0, #Page IDs of all files
                "fileIndex": 0,  File index
                "pageIdCurDoc": 0,  #Page ID of the current file
                "angle": 0   #Page angle
            }],
       "doc_data":   #File content
      [{ "uniqueId":"about_us_para",   #Content block ID
        "page_num":"01",    #Page number
        "index": "xxx",    #index
        "name": "xxx",    #Content block name
        "type": "xxxx",  # Content block type, including ['Title', 'Text', 'Caption', 'Section-header', 'Footnote', 'Page-header', 'Formula','Page-footer', 'Table', 'Figure', and 'Picture']
        "subType":"xxx",  # Content block subtype
        "text": "xxx",   # Text of content block
        "before_text": xxx, # Text before content block
         "after_text": xxx,  # Text after content block
         "extInfo":[     # Coordinates and other information of content block
          {"uniqueId": "b0x1x0",  # ID of content subblock
            "pos": [{"x": 229.0, "y": 208.0}, {"x": 421.0, "y": 208.0}, {"x": 421.0, "y": 242.0}, {"x": 229.0, "y": 242.0}],  # Coordinates of content subblock, four coordinate points, x and y indicates axes x and y
            "text": "Kurt Götze",   #Text of content subblock
            "type": "Text",     # Type of content subblock
            "subType": "Text",  # Subtype of content subblock
            "pageNum": [0], # Page number of content subblock
            "index": 0    #index
          }]
        }]
      }