58 lines
1.9 KiB
Python
58 lines
1.9 KiB
Python
import os
|
||
import time
|
||
from openai import OpenAI
|
||
from dotenv import load_dotenv
|
||
|
||
load_dotenv()
|
||
|
||
# 配置你的環境
|
||
client = OpenAI(
|
||
base_url=os.getenv("OLLAMA_BASE_URL", "你的URL"),
|
||
api_key=os.getenv("OLLAMA_API_KEY", "你的APIKEY")
|
||
)
|
||
|
||
def test_model_speed(model_name, prompt="請寫一篇關於未來AI發展的500字文章。"):
|
||
print(f"🚀 正在測試模型: {model_name} ...")
|
||
|
||
start_time = time.time()
|
||
first_token_time = None
|
||
tokens_count = 0
|
||
full_response = ""
|
||
|
||
try:
|
||
stream = client.chat.completions.create(
|
||
model=model_name,
|
||
messages=[{"role": "user", "content": prompt}],
|
||
stream=True
|
||
)
|
||
|
||
for chunk in stream:
|
||
if chunk.choices[0].delta.content:
|
||
if first_token_time is None:
|
||
# 紀錄首字時間 (TTFT)
|
||
first_token_time = time.time() - start_time
|
||
|
||
content = chunk.choices[0].delta.content
|
||
full_response += content
|
||
# 粗略計算法:中文大約 1 字 = 0.6~1 token,英文 1 詞 = 1.3 token
|
||
# 這裡直接用字數估算,或者如果你想更準確,可以計算 chunk 的數量
|
||
tokens_count += 1
|
||
|
||
total_time = time.time() - start_time
|
||
generation_time = total_time - first_token_time
|
||
tps = tokens_count / generation_time if generation_time > 0 else 0
|
||
|
||
print("-" * 30)
|
||
print(f"📊 測試結果:")
|
||
print(f"⏱️ 首字延遲 (TTFT): {first_token_time:.2f} 秒")
|
||
print(f"⚡ 生成速度 (TPS): {tps:.2f} tokens/s")
|
||
print(f"🕒 總耗時: {total_time:.2f} 秒")
|
||
print(f"📝 總字數: {len(full_response)} 字")
|
||
print("-" * 30)
|
||
|
||
except Exception as e:
|
||
print(f"❌ 測試出錯: {e}")
|
||
|
||
if __name__ == "__main__":
|
||
# 替換成你實際想測的模型名稱
|
||
test_model_speed("deepseek-v3.1:671b-cloud") |