Skip to content

Instantly share code, notes, and snippets.

@samuelcolvin
Created October 30, 2025 18:55
Show Gist options
  • Select an option

  • Save samuelcolvin/b432a00393849cadddde440697c23c73 to your computer and use it in GitHub Desktop.

Select an option

Save samuelcolvin/b432a00393849cadddde440697c23c73 to your computer and use it in GitHub Desktop.

The performance I got was:

request taken:  229745.32ms
model_construct taken:  33.59ms
model_validate_json taken:  13.34ms

Output tokens: 2707
import json
import time
from openai import AsyncClient
from openai.types.chat.chat_completion import ChatCompletion
async def main():
client = AsyncClient()
start = time.perf_counter()
res = await client.chat.completions.with_raw_response.create(
model="gpt-4.1",
logprobs=True,
messages=[
{
"role": "system",
"content": "Be extremely verbose and detailed in your response.",
},
{
"role": "user",
"content": "write me a very very long story about a short horse with an ego problem",
},
],
extra_body={"max_completion_tokens": 32768},
)
end = time.perf_counter()
print(f"request taken: {1000 * (end - start): .2f}ms")
start = time.perf_counter()
completion = ChatCompletion.model_construct(**res.http_response.json())
end = time.perf_counter()
print(f"model_construct taken: {1000 * (end - start): .2f}ms")
start = time.perf_counter()
ChatCompletion.model_validate_json(res.http_response.content)
end = time.perf_counter()
print(f"model_validate_json taken: {1000 * (end - start): .2f}ms")
assert completion.usage is not None
print(f"\nOutput tokens: {completion.usage.completion_tokens}")
if __name__ == "__main__":
import asyncio
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment