当前位置: 首页 > news >正文

golang测试模型的token输出速度

golang测试模型的token输出速度

package mainimport ("bytes""encoding/json""fmt""io""net/http""os/exec""runtime""strconv""strings""time"
)type Request struct {Model   string         `json:"model"`Prompt  string         `json:"prompt"`Stream  bool           `json:"stream"`Options map[string]any `json:"options,omitempty"`
}type Response struct {EvalCount    int    `json:"eval_count"`EvalDuration int64  `json:"eval_duration"`Response     string `json:"response"`
}type VersionResp struct {Version string `json:"version"`
}func main() {printSystemInfo()fmt.Println()printOllamaVersion()fmt.Println()runBenchmark()
}func printSystemInfo() {fmt.Println("=== System Info ===")printCPU()printMemory()printGPU()fmt.Println("===================")
}func printCPU() {model, cores := getCPUInfo()fmt.Printf("%-8s: %s\n", "CPU", model)fmt.Printf("%-8s: %s\n", "Cores", cores)
}func getCPUInfo() (model, cores string) {switch runtime.GOOS {case "linux":return getCPUInfoLinux()case "windows":return getCPUInfoWindows()case "darwin":return getCPUInfoDarwin()default:return "unknown", "unknown"}
}func getCPUInfoLinux() (model, cores string) {// 获取 CPU 型号cmd := exec.Command("grep", "-m1", "model name", "/proc/cpuinfo")out, err := cmd.Output()if err != nil {model = "unknown"} else {parts := strings.SplitN(string(out), ":", 2)if len(parts) == 2 {model = strings.TrimSpace(parts[1])}}// 获取核心数logical, _ := exec.Command("nproc").Output()physicalCmd := exec.Command("bash", "-c", "lscpu | grep 'Core(s) per socket' | awk '{print $4}'")physicalOut, err := physicalCmd.Output()if err != nil {cores = fmt.Sprintf("%sL", strings.TrimSpace(string(logical)))} else {socketCmd := exec.Command("bash", "-c", "lscpu | grep 'Socket(s)' | awk '{print $2}'")socketOut, _ := socketCmd.Output()physical := strings.TrimSpace(string(physicalOut))sockets := strings.TrimSpace(string(socketOut))p, _ := strconv.Atoi(physical)s, _ := strconv.Atoi(sockets)l, _ := strconv.Atoi(strings.TrimSpace(string(logical)))if p > 0 && s > 0 {cores = fmt.Sprintf("%dP / %dL", p*s, l)} else {cores = fmt.Sprintf("%dL", l)}}return
}func getCPUInfoWindows() (model, cores string) {// 获取 CPU 型号cmd := exec.Command("wmic", "cpu", "get", "name")out, err := cmd.Output()if err != nil {model = "unknown"} else {lines := strings.Split(strings.TrimSpace(string(out)), "\n")if len(lines) >= 2 {model = strings.TrimSpace(lines[1])}}// 获取核心数cmd = exec.Command("wmic", "cpu", "get", "NumberOfCores,NumberOfLogicalProcessors")out, err = cmd.Output()if err != nil {cores = "unknown"} else {lines := strings.Split(strings.TrimSpace(string(out)), "\n")if len(lines) >= 2 {parts := strings.Fields(lines[1])if len(parts) >= 2 {cores = fmt.Sprintf("%sP / %sL", parts[0], parts[1])}}}return
}func getCPUInfoDarwin() (model, cores string) {// 获取 CPU 型号cmd := exec.Command("sysctl", "-n", "machdep.cpu.brand_string")out, err := cmd.Output()if err != nil {model = "unknown"} else {model = strings.TrimSpace(string(out))}// 获取核心数physicalCmd := exec.Command("sysctl", "-n", "hw.physicalcpu")logicalCmd := exec.Command("sysctl", "-n", "hw.logicalcpu")pOut, pErr := physicalCmd.Output()lOut, lErr := logicalCmd.Output()if pErr != nil || lErr != nil {cores = "unknown"} else {cores = fmt.Sprintf("%sP / %sL", strings.TrimSpace(string(pOut)), strings.TrimSpace(string(lOut)))}return
}func printMemory() {totalGB := getMemoryTotal()fmt.Printf("%-8s: %s\n", "Memory", totalGB)
}func getMemoryTotal() string {switch runtime.GOOS {case "linux":return getMemoryTotalLinux()case "windows":return getMemoryTotalWindows()case "darwin":return getMemoryTotalDarwin()default:return "unknown"}
}func getMemoryTotalLinux() string {cmd := exec.Command("grep", "MemTotal", "/proc/meminfo")out, err := cmd.Output()if err != nil {return "unknown"}parts := strings.Fields(string(out))if len(parts) >= 2 {kb, _ := strconv.ParseFloat(parts[1], 64)return fmt.Sprintf("%.2f GB", kb/1024/1024)}return "unknown"
}func getMemoryTotalWindows() string {cmd := exec.Command("wmic", "os", "get", "TotalVisibleMemorySize")out, err := cmd.Output()if err != nil {return "unknown"}lines := strings.Split(strings.TrimSpace(string(out)), "\n")if len(lines) >= 2 {kb, _ := strconv.ParseFloat(strings.TrimSpace(lines[1]), 64)return fmt.Sprintf("%.2f GB", kb/1024/1024)}return "unknown"
}func getMemoryTotalDarwin() string {cmd := exec.Command("sysctl", "-n", "hw.memsize")out, err := cmd.Output()if err != nil {return "unknown"}bytes, _ := strconv.ParseFloat(strings.TrimSpace(string(out)), 64)return fmt.Sprintf("%.2f GB", bytes/1024/1024/1024)
}func printGPU() {cmd := exec.Command("nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader")out, err := cmd.Output()if err != nil {fmt.Printf("%-8s: No NVIDIA GPU detected or nvidia-smi not available\n", "GPU")return}lines := strings.Split(strings.TrimSpace(string(out)), "\n")for i, line := range lines {parts := strings.Split(line, ",")if len(parts) >= 2 {name := strings.TrimSpace(parts[0])memMB := strings.TrimSpace(strings.TrimSuffix(parts[1], " MiB"))if i == 0 {fmt.Printf("%-8s[%d]: %s\n", "GPU", i, name)fmt.Printf("%-8s: %s MB\n", "VRAM", memMB)} else {fmt.Printf("%-8s[%d]: %s | VRAM: %s MB\n", "GPU", i, name, memMB)}}}
}func printOllamaVersion() {fmt.Println("=== Ollama Info ===")url := "http://localhost:11434/api/version"resp, err := http.Get(url)if err != nil {fmt.Printf("%-8s: failed to connect (%v)\n", "Version", err)return}defer resp.Body.Close()body, err := io.ReadAll(resp.Body)if err != nil {fmt.Printf("%-8s: failed to read response (%v)\n", "Version", err)return}var v VersionRespif err := json.Unmarshal(body, &v); err != nil {fmt.Printf("%-8s: unknown (%s)\n", "Version", string(body))return}fmt.Printf("%-8s: %s\n", "Version", v.Version)fmt.Println("===================")
}func runBenchmark() {const (model  = "qwen3.5:9b"prompt = "你好,请介绍一下你自己")fmt.Println("=== Benchmark ===")fmt.Printf("%-12s: %s\n", "Model", model)fmt.Printf("%-12s: %s\n", "Prompt", prompt)fmt.Println("-------------------")url := "http://localhost:11434/api/generate"payload := Request{Model:  model,Prompt: prompt,Stream: false,Options: map[string]any{"think": false,},}data, err := json.Marshal(payload)if err != nil {fmt.Println("Marshal error:", err)return}start := time.Now()resp, err := http.Post(url, "application/json", bytes.NewBuffer(data))if err != nil {fmt.Println("Request error:", err)return}defer resp.Body.Close()var res Responseif err := json.NewDecoder(resp.Body).Decode(&res); err != nil {fmt.Println("Decode error:", err)return}duration := time.Since(start)evalSec := float64(res.EvalDuration) / 1e9var tokensPerSec float64if evalSec > 0 {tokensPerSec = float64(res.EvalCount) / evalSec}fmt.Printf("%-12s: %v\n", "Total Time", duration.Round(time.Millisecond))fmt.Printf("%-12s: %d\n", "Eval Tokens", res.EvalCount)fmt.Printf("%-12s: %.2f tokens/s\n", "Eval Speed", tokensPerSec)fmt.Printf("%-12s: %v\n", "Server Eval", time.Duration(res.EvalDuration)*time.Nanosecond)fmt.Println("=================")
}

测试结果

=== System Info ===
CPU     : AMD Ryzen 5 5600G with Radeon Graphics
Cores   : 6P / 12L
Memory  : 27.30 GB
GPU     : No NVIDIA GPU detected or nvidia-smi not available
====================== Ollama Info ===
Version : 0.18.2
====================== Benchmark ===
Model       : qwen3.5:9b
Prompt      : 你好,请介绍一下你自己
-------------------
Total Time  : 1m33.855s
Eval Tokens : 400
Eval Speed  : 4.32 tokens/s
Server Eval : 1m32.676335318s
==================== System Info ===
CPU     : AMD Ryzen 5 5600G with Radeon Graphics
Cores   : 6P / 12L
Memory  : 27.30 GB
GPU     : No NVIDIA GPU detected or nvidia-smi not available
====================== Ollama Info ===
Version : 0.18.2
====================== Benchmark ===
Model       : qwen3.5:0.8b
Prompt      : 你好,请介绍一下你自己
-------------------
Total Time  : 33.877s
Eval Tokens : 484
Eval Speed  : 15.66 tokens/s
Server Eval : 30.914075743s
==================== System Info ===
CPU     : AMD Ryzen 5 5600G with Radeon Graphics
Cores   : 6P / 12L
Memory  : 27.30 GB
GPU     : No NVIDIA GPU detected or nvidia-smi not available
====================== Ollama Info ===
Version : 0.18.2
====================== Benchmark ===
Model       : qwen3:0.6b
Prompt      : 你好,请介绍一下你自己
-------------------
Total Time  : 3.721s
Eval Tokens : 165
Eval Speed  : 58.01 tokens/s
Server Eval : 2.844187074s
==================== System Info ===
CPU     : AMD Ryzen 5 5600G with Radeon Graphics
Cores   : 6P / 12L
Memory  : 27.30 GB
GPU     : No NVIDIA GPU detected or nvidia-smi not available
====================== Ollama Info ===
Version : 0.18.2
====================== Benchmark ===
Model       : llama3.1:8b
Prompt      : 你好,请介绍一下你自己
-------------------
Total Time  : 6.548s
Eval Tokens : 47
Eval Speed  : 7.53 tokens/s
Server Eval : 6.24072249s
=================

模型性能对比

以下是将新提供的数据整合到原有对比表格中的结果:

指标 qwen3.5:9b qwen3.5:0.8b qwen3:0.6b llama3.1:8b qwen3.5:27b qwen2.5:0.5b
Model qwen3.5:9b qwen3.5:0.8b qwen3:0.6b llama3.1:8b qwen3.5:27b qwen2.5:0.5b
Prompt 你好,请介绍一下你自己 你好,请介绍一下你自己 你好,请介绍一下你自己 你好,请介绍一下你自己 你好,请介绍一下你自己 你好,请介绍一下你自己
Total Time 1m33.855s 33.877s 3.721s 6.548s 24m16.474s 2.969s
Eval Tokens 400 484 165 47 1622 70
Eval Speed (tokens/s) 4.32 15.66 58.01 7.53 1.13 55.88
Server Eval 1m32.676335318s 30.914075743s 2.844187074s 6.24072249s 23m59.117758659s 1.252627949s

表格说明:

  • Model:测试的模型名称。
  • Prompt:使用的提示文本。
  • Total Time:模型完成提示所需的总时间。
  • Eval Tokens:评估过程中处理的token数量。
  • Eval Speed:评估速度,以tokens/s为单位。
  • Server Eval:服务器端评估所需的时间。
http://www.jsqmd.com/news/530364/

相关文章:

  • MaaYuan游戏自动化助手:智能游戏管理的技术解决方案
  • The Riemannian Geometry of Conceptual Spaces: Behavioral Evidence for Cognitive Manifolds
  • 长尾关键词在SEO中优化关键词策略的应用与效果分析
  • AI 时代,我只关注这四件事
  • AI编程IDE大乱斗:Cursor、Trae、Claude Code、Augment Code横向评测(附国内用户避坑指南)
  • 水浸超声扫描显微镜市场剖析:至2032年这一规模将接近26.89亿元
  • BotMan附件处理终极指南:5种类型附件接收与处理技巧
  • Elastic UI Framework测试策略:从单元到组件的完整测试指南
  • 无锡半导体行业展会相关信息推荐,本地专业展会详情与观展攻略 - 品牌2026
  • 如何用Go语言打造你的专属小米手环心率监控系统?
  • Token:解决 Cookie+Session 痛点的新一代「身份凭证」
  • 优质wordpress主题网站推荐
  • TLV320音频编解码器WAV播放库设计与嵌入式实现
  • 3-24午夜盘思
  • 掌握Jasmine跨平台漫画浏览器的用户认证系统:从零到专业的三步进阶
  • mvn install:install-file
  • 如何用Brython构建跨平台响应式Web应用:面向移动端开发的完整指南
  • dds
  • OpCore Simplify革新:从配置困境到5分钟部署的突破指南
  • 3个核心优势:asmr-downloader如何解决ASMR资源管理难题
  • 终极代码质量指南:js-yaml 从代码规范到自动化测试的完整流程
  • 无锡半导体行业展会推荐,高规格产业展会一览与参展价值分析 - 品牌2026
  • 终极Bounce.js实战指南:5个顶级网站如何用CSS3动画提升用户体验
  • Bounce.js 插件开发终极指南:3步打造自定义CSS3动画扩展
  • 3步颠覆ComfyUI插件管理:让AI绘画效率提升10倍的开源工具
  • 当敏捷开发遇上硬件制造:复盘波音737MAX项目管理的‘死亡档期’与教训
  • MGSwipeTableCell代码重构终极指南:如何优化现有滑动代码结构
  • 智能税务系统:PDF-Parser-1.0处理增值税发票
  • RimSort:《边缘世界》模组高效管理工具全攻略
  • 鸽姆智库(GG3M)天使轮融资BP:全球首个C2文明元决策操作系统,千倍回报锚定星际永续