The performance I got was:
request taken: 229745.32ms
model_construct taken: 33.59ms
model_validate_json taken: 13.34ms
Output tokens: 2707
The performance I got was:
request taken: 229745.32ms
model_construct taken: 33.59ms
model_validate_json taken: 13.34ms
Output tokens: 2707
| import json | |
| import time | |
| from openai import AsyncClient | |
| from openai.types.chat.chat_completion import ChatCompletion | |
| async def main(): | |
| client = AsyncClient() | |
| start = time.perf_counter() | |
| res = await client.chat.completions.with_raw_response.create( | |
| model="gpt-4.1", | |
| logprobs=True, | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": "Be extremely verbose and detailed in your response.", | |
| }, | |
| { | |
| "role": "user", | |
| "content": "write me a very very long story about a short horse with an ego problem", | |
| }, | |
| ], | |
| extra_body={"max_completion_tokens": 32768}, | |
| ) | |
| end = time.perf_counter() | |
| print(f"request taken: {1000 * (end - start): .2f}ms") | |
| start = time.perf_counter() | |
| completion = ChatCompletion.model_construct(**res.http_response.json()) | |
| end = time.perf_counter() | |
| print(f"model_construct taken: {1000 * (end - start): .2f}ms") | |
| start = time.perf_counter() | |
| ChatCompletion.model_validate_json(res.http_response.content) | |
| end = time.perf_counter() | |
| print(f"model_validate_json taken: {1000 * (end - start): .2f}ms") | |
| assert completion.usage is not None | |
| print(f"\nOutput tokens: {completion.usage.completion_tokens}") | |
| if __name__ == "__main__": | |
| import asyncio | |
| asyncio.run(main()) |