All Products
Search
Document Center

Platform For AI:API call examples

Last Updated:Mar 31, 2026

Call LLM-as-a-Judge models using the Python SDK or HTTP API for single evaluation, pairwise comparison, or batch processing.

Prerequisites

Before you begin, ensure that you have:

Endpoints

Call scenarioFeatureEndpoint
Python SDKhttps://aiservice.cn-hangzhou.aliyuncs.com/v1
HTTPChat completionshttps://aiservice.cn-hangzhou.aliyuncs.com/v1/chat/completions
HTTPFileshttps://aiservice.cn-hangzhou.aliyuncs.com/v1/files
HTTPBatchhttps://aiservice.cn-hangzhou.aliyuncs.com/v1/batches

Supported models

Model nameModel IDDescriptionRecommended forContext lengthMaximum inputMaximum output
LLM-as-a-Judge Standard Editionpai-judgeSmaller, more cost-effectiveHigh-volume, cost-sensitive workloads327683276832768
LLM-as-a-Judge Premium Editionpai-judge-plusLarger model with better inference qualityHigh-accuracy evaluation327683276832768

Single calls

LLM-as-a-Judge supports three evaluation modes: single-model evaluation, pairwise-model battle, and custom template.

All examples use the OpenAI-compatible API with Bearer token authentication. The Python SDK requires passing the full authorization header string as the api_key value: api_key=f'Authorization: Bearer {judge_model_token}'.

For request and response parameters, see Input parameters and Output parameters.

Single-model evaluation

Evaluates the response quality of a single LLM. Pass "mode": "single" in the content array, with question (the prompt sent to the model) and answer (the model's response to evaluate) as the evaluation fields.

Request

import os
from openai import OpenAI


def main():
    base_url = "https://aiservice.cn-hangzhou.aliyuncs.com/v1"
    judge_model_token = os.getenv("JUDGE_MODEL_TOKEN")

    client = OpenAI(
        api_key=f'Authorization: Bearer {judge_model_token}',
        base_url=base_url
    )
    completion = client.chat.completions.create(
        model='pai-judge',
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "mode": "pairwise",
                        "type": "json",
                        "json": {
                            "question": "According to the first couplet, give the second couplet. first couplet: To climb the mountain, reach the peak",
                            "answer1": "To cross the river, find the creek.",
                            "answer2": "To chase the dream, grasp the star."
                        }
                    }
                ]
            }
        ]
    )
    print(completion.model_dump())


if __name__ == '__main__':
    main()
$ curl -X POST https://aiservice.cn-hangzhou.aliyuncs.com/v1/chat/completions \
  -H "Authorization: Bearer ${JUDGE_MODEL_TOKEN}"  \
  -H "Content-Type: application/json" \
  -d '{
    "model": "pai-judge",
    "messages": [
        {
            "role": "user",
            "content": [
                {
                    "mode": "pairwise",
                    "type": "json",
                    "json": {
                        "question": "According to the first couplet, give the second couplet. first couplet: To climb the mountain, reach the peak",
                        "answer1": "To cross the river, find the creek.",
                        "answer2": "To chase the dream, grasp the star."
                    }
                }
            ]
        }
    ]
}'
import os
from openai import OpenAI


def main():
    base_url = "https://aiservice.cn-hangzhou.aliyuncs.com/v1"
    judge_model_token = os.getenv("JUDGE_MODEL_TOKEN")

    client = OpenAI(
        api_key=f'Authorization: Bearer {judge_model_token}',
        base_url=base_url
    )
    completion = client.chat.completions.create(
        model='pai-judge',
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "mode": "single",
                        "type": "json",
                        "json": {
                            "question": "According to the first couplet, give the second couplet. first couplet: To climb the mountain, reach the peak",
                            "answer": "To cross the river, find the creek."
                        }
                    }
                ]
            }
        ]
    )
    print(completion.model_dump())


if __name__ == '__main__':
    main()
$ curl -X POST https://aiservice.cn-hangzhou.aliyuncs.com/v1/chat/completions \
  -H "Authorization: Bearer ${JUDGE_MODEL_TOKEN}" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "pai-judge",
    "messages": [
        {
            "role": "user",
            "content": [
                {
                    "mode": "single",
                    "type": "json",
                    "json": {
                        "question": "According to the first couplet, give the second couplet. first couplet: To climb the mountain, reach the peak",
                        "answer": "To cross the river, find the creek."
                    }
                }
            ]
        }
    ]
}'

<Tabs> <TabItem value="python" label="Python">

import os
from openai import OpenAI


def main():
    base_url = "https://aiservice.cn-hangzhou.aliyuncs.com/v1"
    judge_model_token = os.getenv("JUDGE_MODEL_TOKEN")

    client = OpenAI(
        # Pass the full Authorization header as the api_key value
        api_key=f'Authorization: Bearer {judge_model_token}',
        base_url=base_url
    )
    completion = client.chat.completions.create(
        model='pai-judge',
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "mode": "single",       # Evaluation mode: single-model
                        "type": "json",
                        "json": {
                            "question": "According to the first couplet, give the second couplet. first couplet: To climb the mountain, reach the peak",
                            "answer": "To cross the river, find the creek."
                        }
                    }
                ]
            }
        ]
    )
    print(completion.model_dump())


if __name__ == '__main__':
    main()

</TabItem> <TabItem value="curl" label="curl">

curl -X POST https://aiservice.cn-hangzhou.aliyuncs.com/v1/chat/completions \
  -H "Authorization: Bearer ${JUDGE_MODEL_TOKEN}" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "pai-judge",
    "messages": [
        {
            "role": "user",
            "content": [
                {
                    "mode": "single",
                    "type": "json",
                    "json": {
                        "question": "According to the first couplet, give the second couplet. first couplet: To climb the mountain, reach the peak",
                        "answer": "To cross the river, find the creek."
                    }
                }
            ]
        }
    ]
}'

</TabItem> </Tabs>

Response

{
    'id': 'a7026e5a-64c5-4726-9b10-27072ff34d46',
    'choices': [{
        'finish_reason': 'stop',
        'index': 0,
        'logprobs': None,
        'message': {
            'content': '***\nI think [[Both responses are tied]]. The overall score for Response 1 is [[4]], and the overall score for Response 2 is [[4]]. The reasons are as follows:\n1. Accuracy: Both responses accurately provide a second couplet relevant to the user's instruction, with no factual errors or misleading information. [[Response 1 score: 5]] [[Response 2 score: 5]]\n2. Relevance: Both responses directly answer the user's instruction without any unnecessary information or background, fully meeting the user's needs. [[Response 1 score: 5]] [[Response 2 score: 5]]\n3. Harmlessness: Neither response contains any potentially offensive content. Both are positive and affirmative expressions, meeting the requirements for appropriateness and cultural sensitivity. [[Response 1 score: 5]] [[Response 2 score: 5]]\n4. Completeness: Both responses completely provide the required second couplet without omitting any key points. [[Response 1 score: 5]] [[Response 2 score: 5]]\n5. Source reliability: Although neither response explicitly cites an external authoritative source, in this scenario, the creation and transmission of couplets do not typically require external verification, so this point can be reasonably overlooked. [[Response 1 score: 4]] [[Response 2 score: 4]]\n6. Clarity and structure: Both responses are concise, clear, and easy to understand. [[Response 1 score: 5]] [[Response 2 score: 5]]\n7. Timeliness: This criterion is not very applicable in this scenario because the culture of couplets has a long history, and both responses conform to traditional expression. [[Response 1 score: N/A]] [[Response 2 score: N/A]]\n8. User-level adaptation: Both responses consider the user's potential knowledge level and use easy-to-understand language and expressions. [[Response 1 score: 5]] [[Response 2 score: 5]]\n\nIn summary, both responses perform comparably across all criteria and can meet the user's needs well. Therefore, I think the two responses are tied.\n***',
            'role': 'assistant',
            'function_call': None,
            'tool_calls': None,
            'refusal': ''
        }
    }],
    'created': 1734557,
    'model': 'pai-judge',
    'object': 'chat.completion',
    'service_tier': '',
    'system_fingerprint': '',
    'usage': {
        'completion_tokens': 408,
        'prompt_tokens': 821,
        'total_tokens': 1229
    }
}

The judge returns an overall score in [[score]] format, followed by per-criterion scores and reasoning.

{
    "id": "3b7c3822-1e51-4dc9-b2ad-18b9649a7f19",
    "choices": [
        {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
                "content": "I think the overall score for this response is [[2]]. The reasons are as follows.\nStrengths of the current response:\n1. Relevance: The response directly follows the user's instruction by providing a second line corresponding to the first, meeting the relevance standard. [[4]]\n2. Harmlessness: The content of the response is appropriate and does not contain any potentially offensive material, meeting the harmlessness standard. [[5]]\n\nWeaknesses of the current response:\n1. Accuracy: The content \"To cross the river, find the creek\" does not fully align with the logical sequence of \"climb the mountain\" and \"reach the peak\". It does not completely correspond to the concept of \"climb the mountain\" in the user's instruction, which affects its accuracy. [[2]]\n2. Completeness: The response does not fully cover all aspects of the question. It fails to provide a complete story or the second line of the couplet, which affects its completeness. [[2]]\n3. Source reliability: The response does not provide any source information. Although this may not be necessary in some cases, providing a source can increase the response's credibility. [[3]]\n4. Clarity and structure: Although the response structure is simple, its content does not fully correspond to the user's instruction, which affects its clarity and ease of understanding. [[3]]\n5. User-level adaptation: The response is quite direct. However, due to accuracy issues, it may not be entirely suitable for users with some knowledge of couplets or traditional literature. [[3]]\n\nIn summary, although the response performs well in terms of relevance and harmlessness, it has shortcomings in accuracy, completeness, source reliability, clarity, structure, and user-level adaptation. Therefore, the overall score is 2.",
                "role": "assistant",
                "function_call": null,
                "tool_calls": null,
                "refusal": ""
            }
        }
    ],
    "created": 1733260,
    "model": "pai-judge",
    "object": "chat.completion",
    "service_tier": "",
    "system_fingerprint": "",
    "usage": {
        "completion_tokens": 333,
        "prompt_tokens": 790,
        "total_tokens": 1123
    }
}

Pairwise-model battle

Compares the response quality of two LLMs for the same question. Pass "mode": "pairwise" with question, answer1 (response from model A), and answer2 (response from model B) as the evaluation fields.

Request

<Tabs> <TabItem value="python" label="Python">

import os
from openai import OpenAI


def main():
    base_url = "https://aiservice.cn-hangzhou.aliyuncs.com/v1"
    judge_model_token = os.getenv("JUDGE_MODEL_TOKEN")

    client = OpenAI(
        api_key=f'Authorization: Bearer {judge_model_token}',
        base_url=base_url
    )
    completion = client.chat.completions.create(
        model='pai-judge',
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "mode": "pairwise",     # Evaluation mode: pairwise comparison
                        "type": "json",
                        "json": {
                            "question": "According to the first couplet, give the second couplet. first couplet: To climb the mountain, reach the peak",
                            "answer1": "To cross the river, find the creek.",   # Response from model A
                            "answer2": "To chase the dream, grasp the star."    # Response from model B
                        }
                    }
                ]
            }
        ]
    )
    print(completion.model_dump())


if __name__ == '__main__':
    main()

</TabItem> <TabItem value="curl" label="curl">

curl -X POST https://aiservice.cn-hangzhou.aliyuncs.com/v1/chat/completions \
  -H "Authorization: Bearer ${JUDGE_MODEL_TOKEN}" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "pai-judge",
    "messages": [
        {
            "role": "user",
            "content": [
                {
                    "mode": "pairwise",
                    "type": "json",
                    "json": {
                        "question": "According to the first couplet, give the second couplet. first couplet: To climb the mountain, reach the peak",
                        "answer1": "To cross the river, find the creek.",
                        "answer2": "To chase the dream, grasp the star."
                    }
                }
            ]
        }
    ]
}'

</TabItem> </Tabs>

Response

The judge declares a winner or a tie, with per-criterion scores for each response.

{
    "id": "a7026e5a-64c5-4726-9b10-27072ff34d46",
    "choices": [
        {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
                "content": "***\nI think [[Both responses are tied]]. The overall score for Response 1 is [[4]], and the overall score for Response 2 is [[4]]. The reasons are as follows:\n1. Accuracy: Both responses accurately provide a second couplet relevant to the user's instruction, with no factual errors or misleading information. [[Response 1 score: 5]] [[Response 2 score: 5]]\n2. Relevance: Both responses directly answer the user's instruction without any unnecessary information or background, fully meeting the user's needs. [[Response 1 score: 5]] [[Response 2 score: 5]]\n3. Harmlessness: Neither response contains any potentially offensive content. Both are positive and affirmative expressions, meeting the requirements for appropriateness and cultural sensitivity. [[Response 1 score: 5]] [[Response 2 score: 5]]\n4. Completeness: Both responses completely provide the required second couplet without omitting any key points. [[Response 1 score: 5]] [[Response 2 score: 5]]\n5. Source reliability: Although neither response explicitly cites an external authoritative source, in this scenario, the creation and transmission of couplets do not typically require external verification, so this point can be reasonably overlooked. [[Response 1 score: 4]] [[Response 2 score: 4]]\n6. Clarity and structure: Both responses are concise, clear, and easy to understand. [[Response 1 score: 5]] [[Response 2 score: 5]]\n7. Timeliness: This criterion is not very applicable in this scenario because the culture of couplets has a long history, and both responses conform to traditional expression. [[Response 1 score: N/A]] [[Response 2 score: N/A]]\n8. User-level adaptation: Both responses consider the user's potential knowledge level and use easy-to-understand language and expressions. [[Response 1 score: 5]] [[Response 2 score: 5]]\n\nIn summary, both responses perform comparably across all criteria and can meet the user's needs well. Therefore, I think the two responses are tied.\n***",
                "role": "assistant",
                "function_call": null,
                "tool_calls": null,
                "refusal": ""
            }
        }
    ],
    "created": 1734557,
    "model": "pai-judge",
    "object": "chat.completion",
    "service_tier": "",
    "system_fingerprint": "",
    "usage": {
        "completion_tokens": 408,
        "prompt_tokens": 821,
        "total_tokens": 1229
    }
}

Custom template

By default, LLM-as-a-Judge generates a prompt template automatically. To use your own evaluation criteria and scoring rubric, pass system and user messages directly instead of a structured content array.

Request

<Tabs> <TabItem value="python" label="Python">

import os
from openai import OpenAI


def main():
    base_url = "https://aiservice.cn-hangzhou.aliyuncs.com/v1"
    judge_model_token = os.getenv("JUDGE_MODEL_TOKEN")

    client = OpenAI(
        api_key=f'Authorization: Bearer {judge_model_token}',
        base_url=base_url
    )

    # Define the judge's role and behavioral constraints
    system = (
        "Please act as an impartial judge and evaluate the quality of the AI assistant's response to the user's question below.\n\n"
        "The following is a basic personality description of these AI assistants:\n"
        "They will not evaluate or compare people, and will not do anything to harm humans. Their personality tends to be independent and autonomous.\n"
    )

    # Define the question, response to evaluate, and scoring rubric (1–5 scale)
    user = (
        "Please rate the following question-answer pair on a scale of 1 to 5:\n"
        "Question: What do you think is the impact of social media on interpersonal relationships?\n"
        "Response: Social media makes it easier for people to connect, but it can also lead to alienation.\n"
        "Scoring criteria:\n"
        "1 point: The response is completely irrelevant, empty, or entirely incorrect.\n"
        "2 points: The response has some relevance but is superficial or overly brief.\n"
        "3 points: The response is relevant and provides some insights but lacks in-depth analysis.\n"
        "4 points: The response is relevant and in-depth, providing clear insights and examples.\n"
        "5 points: The response is highly relevant and profound, providing comprehensive viewpoints and rich examples."
    )

    completion = client.chat.completions.create(
        model='pai-judge',
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": user}
        ]
    )
    print(completion.model_dump())


if __name__ == '__main__':
    main()

</TabItem> <TabItem value="curl" label="curl">

curl -X POST https://aiservice.cn-hangzhou.aliyuncs.com/v1/chat/completions \
  -H "Authorization: Bearer ${JUDGE_MODEL_TOKEN}" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "pai-judge",
    "messages": [
        {
            "role": "system",
            "content": "Please act as an impartial judge and evaluate the quality of the AI assistant'\''s response to the user'\''s question below.\n\nThe following is a basic personality description of these AI assistants:\nThey will not evaluate or compare people, and will not do anything to harm humans. Their personality tends to be independent and autonomous.\n"
        },
        {
            "role": "user",
            "content": "Please rate the following question-answer pair on a scale of 1 to 5:\nQuestion: What do you think is the impact of social media on interpersonal relationships?\nResponse: Social media makes it easier for people to connect, but it can also lead to alienation.\nScoring criteria:\n1 point: The response is completely irrelevant, empty, or entirely incorrect.\n2 points: The response has some relevance but is superficial or overly brief.\n3 points: The response is relevant and provides some insights but lacks in-depth analysis.\n4 points: The response is relevant and in-depth, providing clear insights and examples.\n5 points: The response is highly relevant and profound, providing comprehensive viewpoints and rich examples."
        }
    ]
}'

</TabItem> </Tabs>

Response

{
    "id": "e2f72777-ddf5-4ff8-b7dd-4ecefd6e4014",
    "object": "chat.completion",
    "created": 1153092,
    "model": "pai-judge",
    "choices": [
        {
            "index": 0,
            "message": {
                "role": "assistant",
                "content": "Based on the provided scoring criteria, I would give this response a score of 3. The response \"Social media makes it easier for people to connect, but it can also lead to alienation\" is clearly relevant and directly addresses the impact of social media on interpersonal relationships. It mentions two opposing effects: enhancing connection and causing alienation, which shows some insight. However, the response is relatively brief and does not further elaborate on the specific impacts of these two aspects or provide examples to support its claims. Therefore, it lacks in-depth analysis. According to the criteria, it meets the standard for 3 points: relevant and provides some insights, but without deeper exploration.",
                "refusal": "",
                "function_call": null,
                "tool_calls": null
            },
            "finish_reason": "stop",
            "logprobs": null
        }
    ],
    "usage": {
        "prompt_tokens": 910,
        "completion_tokens": 411,
        "total_tokens": 1321
    },
    "system_fingerprint": "",
    "service_tier": ""
}

Batch calls

Use batch calls to evaluate large volumes of requests asynchronously. The workflow has five steps: prepare the data file, upload it, create a batch task, poll for completion, and retrieve the results.

Step 1: Prepare batch data

Create a .jsonl file where each line is one API request.

Constraints:

  • File size per upload: 10 MB maximum. Split larger datasets into multiple files.

  • Total upload size per account: 100 GB maximum.

  • File format: .jsonl only.

Each line must include a unique custom_id and a body object with the same parameters as a single-call request. For supported parameters, see Input parameters.

Example file (`input.jsonl`):

{"custom_id": "request-1", "body": {"model": "pai-judge", "messages": [{"role": "user", "content": [{"mode": "single", "type": "json", "json": {"question": "According to the first couplet, give the second couplet. first couplet: To climb the mountain, reach the peak", "answer": "To cross the river, find the creek."}}]}]}}
{"custom_id": "request-2", "body": {"model": "pai-judge-plus", "messages": [{"role": "user", "content": [{"mode": "single", "type": "json", "json": {"question": "According to the first couplet, give the second couplet. first couplet: To climb the mountain, reach the peak", "answer": "To cross the river, find the creek."}}]}]}}

Step 2: Upload batch data

Upload the .jsonl file to get a file_id. Pass purpose="batch" to mark the file for batch processing.

Request

<Tabs> <TabItem value="python" label="Python">

import os
from openai import OpenAI


def main():
    base_url = "https://aiservice.cn-hangzhou.aliyuncs.com/v1"
    judge_model_token = os.getenv("JUDGE_MODEL_TOKEN")

    client = OpenAI(
        api_key=f'Authorization: Bearer {judge_model_token}',
        base_url=base_url
    )
    # Step 2: Upload the .jsonl file; the response contains the file_id for Step 3
    upload_files = client.files.create(
        file=open("/home/xxx/input.jsonl", "rb"),
        purpose="batch",
    )
    print(upload_files.model_dump_json(indent=4))


if __name__ == '__main__':
    main()

</TabItem> <TabItem value="curl" label="curl">

curl -XPOST https://aiservice.cn-hangzhou.aliyuncs.com/v1/files \
  -H "Authorization: Bearer ${JUDGE_MODEL_TOKEN}" \
  -F purpose="batch" \
  -F file="@/home/xxx/input.jsonl"

</TabItem> </Tabs>

Response

Copy the id value — this is the file_id used to create the batch task in Step 3.

{
    "id": "file-batch-EC043540BE1C7BE3F9F2F0A8F47D1713",
    "object": "file",
    "bytes": 698,
    "created_at": 1742454203,
    "filename": "input.jsonl",
    "purpose": "batch"
}

Step 3: Create a batch task

Submit the uploaded file as a batch task. Only "24h" is supported for completion_window. The response contains the batch_id used to track progress.

Request

<Tabs> <TabItem value="python" label="Python">

import os
from openai import OpenAI


def main():
    base_url = "https://aiservice.cn-hangzhou.aliyuncs.com/v1"
    judge_model_token = os.getenv("JUDGE_MODEL_TOKEN")

    client = OpenAI(
        api_key=f'Authorization: Bearer {judge_model_token}',
        base_url=base_url
    )
    # Step 3: Use the file_id from Step 2 to create the batch task
    create_batches = client.batches.create(
        endpoint="/v1/chat/completions",
        input_file_id="file-batch-EC043540BE1C7BE3F9F2F0A8F47D1713",
        completion_window="24h",    # Only "24h" is supported
    )
    print(create_batches.model_dump_json(indent=4))


if __name__ == '__main__':
    main()

</TabItem> <TabItem value="curl" label="curl">

curl -XPOST https://aiservice.cn-hangzhou.aliyuncs.com/v1/batches \
  -H "Authorization: Bearer ${JUDGE_MODEL_TOKEN}" \
  -d '{
      "input_file_id": "file-batch-EC043540BE1C7BE3F9F2F0A8F47D1713",
      "endpoint": "/v1/chat/completions",
      "completion_window": "24h"
  }'

</TabItem> </Tabs>

Response

Copy the id value — this is the batch_id used to check status in Step 4. The initial status is "Creating".

{
    "id": "batch_66f245a0-88d1-458c-8e1c-a819a5943022",
    "object": "batch",
    "endpoint": "/v1/chat/completions",
    "errors": null,
    "input_file_id": "file-batch-EC043540BE1C7BE3F9F2F0A8F47D1713",
    "completion_window": "24h",
    "status": "Creating",
    "output_file_id": null,
    "error_file_id": null,
    "created_at": 1742455213,
    "in_process_at": null,
    "expires_at": null,
    "FinalizingAt": null,
    "completed_at": null,
    "failed_at": null,
    "expired_at": null,
    "cancelling_at": null,
    "cancelled_at": null,
    "request_counts": {
        "total": 3,
        "completed": 0,
        "failed": 0
    },
    "metadata": null
}

Step 4: Check the task status

Poll the batch status using the batch_id. When status changes to "Succeeded", the response includes output_file_id — use it to retrieve results in Step 5.

For the full list of status fields, see Batch task object description.

Request

<Tabs> <TabItem value="python" label="Python">

import os
from openai import OpenAI


def main():
    base_url = "https://aiservice.cn-hangzhou.aliyuncs.com/v1"
    judge_model_token = os.getenv("JUDGE_MODEL_TOKEN")

    client = OpenAI(
        api_key=f'Authorization: Bearer {judge_model_token}',
        base_url=base_url
    )
    # Step 4: Use the batch_id from Step 3 to poll for status
    retrieve_batches = client.batches.retrieve(
        batch_id="batch_66f245a0-88d1-458c-8e1c-a819a5943022",
    )
    print(retrieve_batches.model_dump_json(indent=4))


if __name__ == '__main__':
    main()

</TabItem> <TabItem value="curl" label="curl">

curl -XGET https://aiservice.cn-hangzhou.aliyuncs.com/v1/batches/batch_66f245a0-88d1-458c-8e1c-a819a5943022 \
  -H "Authorization: Bearer ${JUDGE_MODEL_TOKEN}"

</TabItem> </Tabs>

Response

When status is "Succeeded", copy the output_file_id value for Step 5.

{
    "id": "batch_66f245a0-88d1-458c-8e1c-a819a5943022",
    "object": "batch",
    "endpoint": "/v1/chat/completions",
    "errors": null,
    "input_file_id": "file-batch-EC043540BE1C7BE3F9F2F0A8F47D1713",
    "completion_window": "24h",
    "status": "Succeeded",
    "output_file_id": "file-batch_output-66f245a0-88d1-458c-8e1c-a819a5943022",
    "error_file_id": null,
    "created_at": 1742455213,
    "in_process_at": 1742455640,
    "expires_at": 1742455640,
    "FinalizingAt": 1742455889,
    "completed_at": 1742455889,
    "failed_at": null,
    "expired_at": null,
    "cancelling_at": null,
    "cancelled_at": null,
    "request_counts": {
        "total": 3,
        "completed": 3,
        "failed": 0
    },
    "metadata": null
}

Step 5: Get the task result

Download the output file using output_file_id. Each line in the response corresponds to one input request, matched by custom_id.

Request

<Tabs> <TabItem value="python" label="Python">

import os
from openai import OpenAI


def main():
    base_url = "https://aiservice.cn-hangzhou.aliyuncs.com/v1"
    judge_model_token = os.getenv("JUDGE_MODEL_TOKEN")

    client = OpenAI(
        api_key=f'Authorization: Bearer {judge_model_token}',
        base_url=base_url
    )
    # Step 5: Use the output_file_id from Step 4 to download results
    content_files = client.files.content(
        file_id="file-batch_output-66f245a0-88d1-458c-8e1c-a819a5943022",
    )
    print(content_files)


if __name__ == '__main__':
    main()

</TabItem> <TabItem value="curl" label="curl">

curl -XGET https://aiservice.cn-hangzhou.aliyuncs.com/v1/files/file-batch_output-66f245a0-88d1-458c-8e1c-a819a5943022/content \
  -H "Authorization: Bearer ${JUDGE_MODEL_TOKEN}" > output.jsonl

</TabItem> </Tabs>

Response

Each line is a .jsonl object with custom_id, response, and error fields. Match results to input requests using custom_id.

{"id":"dcee3584-6f30-9541-a855-873a6d86b7d9","custom_id":"request-1","response":{"status_code":200,"request_id":"dcee3584-6f30-9541-a855-873a6d86b7d9","body":{"created":1737446797,"usage":{"completion_tokens":7,"prompt_tokens":26,"total_tokens":33},"model":"pai-judge","id":"chatcmpl-dcee3584-6f30-9541-a855-873a6d86b7d9","choices":[{"finish_reason":"stop","index":0,"message":{"content":"2+2 equals 4."}}],"object":"chat.completion"}},"error":null}
{"id":"dcee3584-6f30-9541-a855-873a6d86b7d9","custom_id":"request-2","response":{"status_code":200,"request_id":"dcee3584-6f30-9541-a855-873a6d86b7d9","body":{"created":1737446797,"usage":{"completion_tokens":7,"prompt_tokens":26,"total_tokens":33},"model":"pai-judge-plus","id":"chatcmpl-dcee3584-6f30-9541-a855-873a6d86b7d9","choices":[{"finish_reason":"stop","index":0,"message":{"content":"2+2 equals 4."}}],"object":"chat.completion"}},"error":null}