四博皮克斯苹果 AI 台灯技术架构方案
四博皮克斯苹果 AI 台灯技术架构方案
——机械臂 + 视觉识别 + AI 大模型 + 全开源量产底座
四博皮克斯苹果 AI 台灯再升级:在原有 AI 语音台灯基础上,新增机械臂控制、视觉识别能力和 AI 大模型交互能力。设备不仅可以听懂用户说话,还可以“看见”桌面环境,识别人、人手、书本、作业、物体位置,并通过机械臂完成角度调整、跟随照明、学习陪伴、姿态反馈和智能互动。
该方案适合做成:
AI 学习台灯 AI 桌面机器人 AI 陪伴灯 AI 机械臂灯 儿童学习陪伴设备 桌面智能助手 智能家居语音控制终端 品牌定制 AI 硬件1. 整体技术架构
┌────────────────────────────────────────────┐ │ AI 云端 / 本地服务 │ │ LLM 大模型 / 视觉识别 / OCR / ASR / TTS / 知识库 │ └──────────────────▲─────────────────────────┘ │ WebSocket / HTTPS / MQTT ┌──────────────────┴─────────────────────────┐ │ 四博皮克斯苹果 AI 台灯设备端 │ │ │ │ ┌──────────────────────────────────────┐ │ │ │ ESP32-S3 主控层 │ │ │ │ - FreeRTOS 多任务 │ │ │ │ - 摄像头采集 │ │ │ │ - 语音采集 / 播放 │ │ │ │ - 机械臂控制 │ │ │ │ - 灯光 PWM / 色温 / 亮度 │ │ │ │ - 屏幕 / 表情 UI │ │ │ │ - WebSocket AI 会话 │ │ │ │ - OTA 升级 │ │ │ │ - MCP / UART 扩展 │ │ │ └──────────────────────────────────────┘ │ │ │ │ ┌──────────┐ ┌──────────┐ ┌────────────┐ │ │ │ 摄像头 │ │ 麦克风 │ │ 喇叭 / 功放 │ │ │ └──────────┘ └──────────┘ └────────────┘ │ │ │ │ ┌──────────┐ ┌──────────┐ ┌────────────┐ │ │ │ 机械臂 │ │ LED 灯光 │ │ 屏幕 / 表情 │ │ │ └──────────┘ └──────────┘ └────────────┘ │ │ │ │ ┌──────────┐ ┌──────────┐ ┌────────────┐ │ │ │ Wi-Fi │ │ 蓝牙配网 │ │ 4G 可选 │ │ │ └──────────┘ └──────────┘ └────────────┘ │ └────────────────────────────────────────────┘2. 核心功能定义
2.1 视觉功能
1. 人体 / 人脸 / 手势检测 2. 桌面书本 / 作业区域识别 3. OCR 拍照识字 / 拍题扩展 4. 用户坐姿 / 学习状态检测 5. 光照区域识别 6. 机械臂自动跟随目标 7. 摄像头画面上传 AI 大模型做多模态理解2.2 机械臂功能
1. 上下抬头 2. 左右转向 3. 灯头俯仰 4. 自动回中 5. 跟随人脸 / 书本 / 手部位置 6. 根据语音指令调整角度 7. 根据视觉识别结果自动补光2.3 AI 大模型功能
1. 语音问答 2. 桌面环境理解 3. 学习陪伴 4. 拍照讲解 5. 灯光控制 6. 机械臂动作规划 7. 情绪化回复 8. 客户知识库接入3. 硬件组成建议
主控:ESP32-S3 / ESP32-S3R8 摄像头:OV2640 / GC0308 / 其他 DVP 摄像头 语音输入:I2S 麦克风 / 模拟麦克风 + Codec 语音输出:I2S Codec + 功放 + 喇叭 机械臂:2~4 路舵机 / 步进电机 / 减速电机 灯光:冷白 LED + 暖白 LED / RGB LED 显示:可选 1.28 寸圆屏 / 2 寸方屏 / 双屏 联网:Wi-Fi / BLE 配网 / 4G 可选 供电:DC 5V / Type-C / 电池可选 扩展:UART / I2C / SPI / GPIO / MCP推荐第一版技术原型:
ESP32-S3 + 摄像头 + 2 路舵机 + 冷暖双色 LED + 麦克风 + 喇叭 + Wi-Fi量产增强版:
ESP32-S3R8 + 摄像头 + 3~4 轴机械臂 + 双麦降噪 + 4G 可选 + 屏幕 + OTA4. 软件工程目录结构
sibo_ai_pixar_lamp/ ├── CMakeLists.txt ├── sdkconfig.defaults ├── main/ │ ├── app_main.c │ ├── board/ │ │ ├── board.h │ │ ├── board.c │ │ ├── pin_config.h │ ├── audio/ │ │ ├── audio_capture.c │ │ ├── audio_player.c │ │ ├── wakeup_engine.c │ ├── vision/ │ │ ├── camera_driver.c │ │ ├── vision_client.c │ │ ├── image_upload.c │ ├── arm/ │ │ ├── arm_controller.c │ │ ├── servo_driver.c │ │ ├── motion_planner.c │ ├── light/ │ │ ├── light_controller.c │ │ ├── pwm_led.c │ ├── ai/ │ │ ├── ai_session.c │ │ ├── ai_ws_client.c │ │ ├── ai_protocol.c │ ├── display/ │ │ ├── display_ui.c │ │ ├── expression_ui.c │ ├── mcp/ │ │ ├── mcp_uart.c │ │ ├── mcp_parser.c │ ├── ota/ │ │ ├── ota_service.c │ └── utils/ │ ├── app_event.c │ ├── json_helper.c5. 设备端状态机
typedef enum { LAMP_STATE_BOOTING = 0, LAMP_STATE_NET_CONFIG, LAMP_STATE_IDLE, LAMP_STATE_LISTENING, LAMP_STATE_THINKING, LAMP_STATE_SPEAKING, LAMP_STATE_VISION_DETECTING, LAMP_STATE_TRACKING, LAMP_STATE_MOVING_ARM, LAMP_STATE_UPGRADING, } lamp_state_t; typedef enum { APP_EVT_NONE = 0, APP_EVT_NET_CONNECTED, APP_EVT_WAKEUP, APP_EVT_SPEECH_END, APP_EVT_TTS_START, APP_EVT_TTS_DONE, APP_EVT_VISION_TRIGGER, APP_EVT_VISION_RESULT, APP_EVT_ARM_MOVE_DONE, APP_EVT_LIGHT_UPDATE, APP_EVT_MCP_COMMAND, } app_event_t; static lamp_state_t g_lamp_state = LAMP_STATE_BOOTING; static QueueHandle_t g_app_evt_queue = NULL; static void app_post_event(app_event_t evt) { if (g_app_evt_queue) { xQueueSend(g_app_evt_queue, &evt, 0); } } static void lamp_set_state(lamp_state_t state) { g_lamp_state = state; switch (state) { case LAMP_STATE_IDLE: display_show_status("待机中"); expression_set("idle"); break; case LAMP_STATE_LISTENING: display_show_status("正在聆听"); expression_set("listening"); break; case LAMP_STATE_THINKING: display_show_status("AI 思考中"); expression_set("thinking"); break; case LAMP_STATE_VISION_DETECTING: display_show_status("正在识别画面"); expression_set("looking"); break; case LAMP_STATE_TRACKING: display_show_status("目标跟随中"); expression_set("tracking"); break; case LAMP_STATE_MOVING_ARM: display_show_status("机械臂调整中"); expression_set("moving"); break; default: break; } }6. app_main 主程序骨架
#include "freertos/FreeRTOS.h" #include "freertos/task.h" #include "freertos/queue.h" #include "esp_log.h" #include "nvs_flash.h" #include "board.h" #include "camera_driver.h" #include "audio_capture.h" #include "audio_player.h" #include "ai_session.h" #include "arm_controller.h" #include "light_controller.h" #include "display_ui.h" #include "mcp_uart.h" #include "ota_service.h" static const char *TAG = "SIBO_PIXAR_LAMP"; void app_event_task(void *arg); void vision_task(void *arg); void arm_task(void *arg); void ai_task(void *arg); void light_task(void *arg); void app_main(void) { esp_err_t ret = nvs_flash_init(); if (ret != ESP_OK) { ESP_LOGW(TAG, "NVS init failed, erase and retry"); nvs_flash_erase(); ESP_ERROR_CHECK(nvs_flash_init()); } ESP_LOGI(TAG, "四博皮克斯苹果 AI 台灯启动"); g_app_evt_queue = xQueueCreate(16, sizeof(app_event_t)); board_init(); display_init(); expression_set("boot"); light_controller_init(); arm_controller_init(); camera_driver_init(); audio_capture_init(); audio_player_init(); ai_session_init(); mcp_uart_init(); ota_service_init(); xTaskCreate(app_event_task, "app_event", 4096, NULL, 8, NULL); xTaskCreate(vision_task, "vision_task", 8192, NULL, 6, NULL); xTaskCreate(arm_task, "arm_task", 4096, NULL, 5, NULL); xTaskCreate(ai_task, "ai_task", 8192, NULL, 6, NULL); xTaskCreate(light_task, "light_task", 4096, NULL, 4, NULL); lamp_set_state(LAMP_STATE_IDLE); ESP_LOGI(TAG, "系统初始化完成"); }7. 摄像头驱动代码
以 ESP32-S3 + DVP 摄像头为例:
#include "esp_camera.h" #include "esp_log.h" static const char *TAG = "CAMERA"; #define CAM_PIN_PWDN -1 #define CAM_PIN_RESET -1 #define CAM_PIN_XCLK 15 #define CAM_PIN_SIOD 4 #define CAM_PIN_SIOC 5 #define CAM_PIN_D7 16 #define CAM_PIN_D6 17 #define CAM_PIN_D5 18 #define CAM_PIN_D4 12 #define CAM_PIN_D3 10 #define CAM_PIN_D2 8 #define CAM_PIN_D1 9 #define CAM_PIN_D0 11 #define CAM_PIN_VSYNC 6 #define CAM_PIN_HREF 7 #define CAM_PIN_PCLK 13 esp_err_t camera_driver_init(void) { camera_config_t config = { .pin_pwdn = CAM_PIN_PWDN, .pin_reset = CAM_PIN_RESET, .pin_xclk = CAM_PIN_XCLK, .pin_sccb_sda = CAM_PIN_SIOD, .pin_sccb_scl = CAM_PIN_SIOC, .pin_d7 = CAM_PIN_D7, .pin_d6 = CAM_PIN_D6, .pin_d5 = CAM_PIN_D5, .pin_d4 = CAM_PIN_D4, .pin_d3 = CAM_PIN_D3, .pin_d2 = CAM_PIN_D2, .pin_d1 = CAM_PIN_D1, .pin_d0 = CAM_PIN_D0, .pin_vsync = CAM_PIN_VSYNC, .pin_href = CAM_PIN_HREF, .pin_pclk = CAM_PIN_PCLK, .xclk_freq_hz = 20000000, .ledc_timer = LEDC_TIMER_0, .ledc_channel = LEDC_CHANNEL_0, .pixel_format = PIXFORMAT_JPEG, .frame_size = FRAMESIZE_VGA, .jpeg_quality = 12, .fb_count = 2, .grab_mode = CAMERA_GRAB_LATEST, }; esp_err_t err = esp_camera_init(&config); if (err != ESP_OK) { ESP_LOGE(TAG, "摄像头初始化失败: 0x%x", err); return err; } ESP_LOGI(TAG, "摄像头初始化成功"); return ESP_OK; } camera_fb_t *camera_capture_frame(void) { camera_fb_t *fb = esp_camera_fb_get(); if (!fb) { ESP_LOGE(TAG, "摄像头采集失败"); return NULL; } ESP_LOGI(TAG, "采集图片成功, size=%d", fb->len); return fb; } void camera_release_frame(camera_fb_t *fb) { if (fb) { esp_camera_fb_return(fb); } }8. 视觉识别任务
视觉任务可以支持三种触发方式:
1. 用户语音触发:“看一下桌面” 2. 用户按键触发:拍照识别 3. 自动触发:定时检测用户位置 / 书本位置typedef enum { VISION_MODE_NONE = 0, VISION_MODE_DESCRIBE_SCENE, VISION_MODE_TRACK_FACE, VISION_MODE_FIND_BOOK, VISION_MODE_OCR, VISION_MODE_HAND_GESTURE, } vision_mode_t; typedef struct { vision_mode_t mode; int target_x; int target_y; int confidence; char text[256]; } vision_result_t; static QueueHandle_t g_vision_result_queue; static void vision_upload_to_ai(camera_fb_t *fb, vision_mode_t mode) { lamp_set_state(LAMP_STATE_VISION_DETECTING); /* * 实际项目中: * 1. 将 JPEG 图片通过 HTTP POST 上传到 AI 视觉服务 * 2. 请求参数带上 mode * 3. 服务端返回 JSON * * 示例返回: * { * "type": "face", * "target_x": 320, * "target_y": 180, * "confidence": 91, * "text": "检测到用户在画面偏左位置" * } */ } void vision_task(void *arg) { app_event_t evt; while (1) { if (xQueueReceive(g_app_evt_queue, &evt, pdMS_TO_TICKS(100))) { if (evt == APP_EVT_VISION_TRIGGER) { ESP_LOGI("VISION", "触发视觉识别"); camera_fb_t *fb = camera_capture_frame(); if (!fb) { continue; } vision_upload_to_ai(fb, VISION_MODE_DESCRIBE_SCENE); camera_release_frame(fb); } } vTaskDelay(pdMS_TO_TICKS(20)); } }9. 图片上传到 AI 视觉服务
#include "esp_http_client.h" #define VISION_SERVER_URL "https://your-ai-server.com/api/vision/analyze" static esp_err_t vision_http_event_handler(esp_http_client_event_t *evt) { switch (evt->event_id) { case HTTP_EVENT_ON_DATA: ESP_LOGI("VISION_HTTP", "收到视觉结果: %.*s", evt->data_len, (char *)evt->data); vision_parse_result((const char *)evt->data, evt->data_len); break; default: break; } return ESP_OK; } esp_err_t vision_upload_jpeg(camera_fb_t *fb, vision_mode_t mode) { esp_http_client_config_t config = { .url = VISION_SERVER_URL, .method = HTTP_METHOD_POST, .event_handler = vision_http_event_handler, .timeout_ms = 15000, }; esp_http_client_handle_t client = esp_http_client_init(&config); if (!client) { return ESP_FAIL; } char mode_header[32]; snprintf(mode_header, sizeof(mode_header), "%d", mode); esp_http_client_set_header(client, "Content-Type", "image/jpeg"); esp_http_client_set_header(client, "X-Vision-Mode", mode_header); esp_err_t err = esp_http_client_open(client, fb->len); if (err != ESP_OK) { esp_http_client_cleanup(client); return err; } int written = esp_http_client_write(client, (const char *)fb->buf, fb->len); if (written != fb->len) { ESP_LOGW("VISION_HTTP", "图片上传不完整"); } esp_http_client_fetch_headers(client); esp_http_client_read_response(client, NULL, 0); esp_http_client_close(client); esp_http_client_cleanup(client); return ESP_OK; }10. 解析视觉结果并驱动机械臂
服务端返回示例:
{ "object": "face", "target_x": 420, "target_y": 210, "frame_w": 640, "frame_h": 480, "confidence": 92, "action": "track", "reply": "我看到你在画面右侧,我把灯头转过去。" }设备端解析:
#include "cJSON.h" void vision_parse_result(const char *json, int len) { cJSON *root = cJSON_ParseWithLength(json, len); if (!root) { ESP_LOGE("VISION", "视觉 JSON 解析失败"); return; } cJSON *target_x = cJSON_GetObjectItem(root, "target_x"); cJSON *target_y = cJSON_GetObjectItem(root, "target_y"); cJSON *frame_w = cJSON_GetObjectItem(root, "frame_w"); cJSON *frame_h = cJSON_GetObjectItem(root, "frame_h"); cJSON *reply = cJSON_GetObjectItem(root, "reply"); cJSON *action = cJSON_GetObjectItem(root, "action"); if (reply && cJSON_IsString(reply)) { display_show_ai_text(reply->valuestring); ai_tts_speak(reply->valuestring); } if (action && strcmp(action->valuestring, "track") == 0) { int x = target_x ? target_x->valueint : 320; int y = target_y ? target_y->valueint : 240; int w = frame_w ? frame_w->valueint : 640; int h = frame_h ? frame_h->valueint : 480; arm_track_target(x, y, w, h); } cJSON_Delete(root); }11. 机械臂控制架构
11.1 机械臂自由度定义
第一版建议 3 轴:
Axis 0:底座左右旋转 pan Axis 1:灯臂上下俯仰 tilt Axis 2:灯头角度 headtypedef enum { ARM_AXIS_PAN = 0, ARM_AXIS_TILT, ARM_AXIS_HEAD, ARM_AXIS_MAX, } arm_axis_t; typedef struct { int current_angle; int target_angle; int min_angle; int max_angle; int speed; } arm_axis_state_t; static arm_axis_state_t g_arm[ARM_AXIS_MAX] = { [ARM_AXIS_PAN] = {90, 90, 20, 160, 3}, [ARM_AXIS_TILT] = {90, 90, 30, 150, 3}, [ARM_AXIS_HEAD] = {90, 90, 40, 140, 3}, };12. 舵机 PWM 驱动
ESP32-S3 使用 LEDC 输出 PWM:
#include "driver/ledc.h" #define SERVO_FREQ_HZ 50 #define SERVO_TIMER LEDC_TIMER_1 #define SERVO_MODE LEDC_LOW_SPEED_MODE #define SERVO_RESOLUTION LEDC_TIMER_13_BIT #define SERVO_PAN_GPIO 1 #define SERVO_TILT_GPIO 2 #define SERVO_HEAD_GPIO 3 static int servo_angle_to_duty(int angle) { /* * 50Hz 周期 20ms * 舵机一般 0.5ms~2.5ms 对应 0~180 度 * 13bit duty 最大 8191 */ const int duty_min = 205; // 0.5ms const int duty_max = 1024; // 2.5ms if (angle < 0) angle = 0; if (angle > 180) angle = 180; return duty_min + (duty_max - duty_min) * angle / 180; } static void servo_channel_init(ledc_channel_t channel, int gpio) { ledc_channel_config_t cfg = { .gpio_num = gpio, .speed_mode = SERVO_MODE, .channel = channel, .timer_sel = SERVO_TIMER, .duty = servo_angle_to_duty(90), .hpoint = 0, }; ledc_channel_config(&cfg); } void servo_driver_init(void) { ledc_timer_config_t timer_cfg = { .speed_mode = SERVO_MODE, .timer_num = SERVO_TIMER, .duty_resolution = SERVO_RESOLUTION, .freq_hz = SERVO_FREQ_HZ, .clk_cfg = LEDC_AUTO_CLK, }; ledc_timer_config(&timer_cfg); servo_channel_init(LEDC_CHANNEL_0, SERVO_PAN_GPIO); servo_channel_init(LEDC_CHANNEL_1, SERVO_TILT_GPIO); servo_channel_init(LEDC_CHANNEL_2, SERVO_HEAD_GPIO); } void servo_set_angle(arm_axis_t axis, int angle) { ledc_channel_t ch = LEDC_CHANNEL_0; if (axis == ARM_AXIS_PAN) { ch = LEDC_CHANNEL_0; } else if (axis == ARM_AXIS_TILT) { ch = LEDC_CHANNEL_1; } else if (axis == ARM_AXIS_HEAD) { ch = LEDC_CHANNEL_2; } int duty = servo_angle_to_duty(angle); ledc_set_duty(SERVO_MODE, ch, duty); ledc_update_duty(SERVO_MODE, ch); }13. 机械臂平滑运动控制
static int clamp_angle(int value, int min, int max) { if (value < min) return min; if (value > max) return max; return value; } void arm_set_target(arm_axis_t axis, int angle) { if (axis >= ARM_AXIS_MAX) { return; } g_arm[axis].target_angle = clamp_angle( angle, g_arm[axis].min_angle, g_arm[axis].max_angle ); } void arm_controller_init(void) { servo_driver_init(); for (int i = 0; i < ARM_AXIS_MAX; i++) { servo_set_angle(i, g_arm[i].current_angle); } } void arm_task(void *arg) { while (1) { for (int i = 0; i < ARM_AXIS_MAX; i++) { arm_axis_state_t *axis = &g_arm[i]; if (axis->current_angle < axis->target_angle) { axis->current_angle += axis->speed; if (axis->current_angle > axis->target_angle) { axis->current_angle = axis->target_angle; } servo_set_angle(i, axis->current_angle); } else if (axis->current_angle > axis->target_angle) { axis->current_angle -= axis->speed; if (axis->current_angle < axis->target_angle) { axis->current_angle = axis->target_angle; } servo_set_angle(i, axis->current_angle); } } vTaskDelay(pdMS_TO_TICKS(30)); } }14. 根据视觉目标跟随
摄像头画面坐标转换为机械臂角度:
void arm_track_target(int target_x, int target_y, int frame_w, int frame_h) { int center_x = frame_w / 2; int center_y = frame_h / 2; int offset_x = target_x - center_x; int offset_y = target_y - center_y; /* * 简单 P 控制: * offset_x > 0 说明目标在右侧,pan 增大 * offset_y > 0 说明目标偏下,tilt 调整 */ float kp_pan = 0.04f; float kp_tilt = 0.03f; int delta_pan = (int)(offset_x * kp_pan); int delta_tilt = (int)(offset_y * kp_tilt); int new_pan = g_arm[ARM_AXIS_PAN].target_angle + delta_pan; int new_tilt = g_arm[ARM_AXIS_TILT].target_angle - delta_tilt; arm_set_target(ARM_AXIS_PAN, new_pan); arm_set_target(ARM_AXIS_TILT, new_tilt); lamp_set_state(LAMP_STATE_TRACKING); ESP_LOGI("ARM", "视觉跟随 target=(%d,%d), pan=%d, tilt=%d", target_x, target_y, new_pan, new_tilt); }15. 灯光控制
15.1 冷暖双色 LED PWM
#define LED_COLD_GPIO 21 #define LED_WARM_GPIO 47 #define LED_PWM_TIMER LEDC_TIMER_2 #define LED_PWM_MODE LEDC_LOW_SPEED_MODE #define LED_PWM_FREQ 5000 #define LED_PWM_RES LEDC_TIMER_10_BIT static int g_brightness = 80; static int g_color_temp = 50; static void led_pwm_channel_init(ledc_channel_t ch, int gpio) { ledc_channel_config_t cfg = { .gpio_num = gpio, .speed_mode = LED_PWM_MODE, .channel = ch, .timer_sel = LED_PWM_TIMER, .duty = 0, .hpoint = 0, }; ledc_channel_config(&cfg); } void light_controller_init(void) { ledc_timer_config_t timer_cfg = { .speed_mode = LED_PWM_MODE, .timer_num = LED_PWM_TIMER, .duty_resolution = LED_PWM_RES, .freq_hz = LED_PWM_FREQ, .clk_cfg = LEDC_AUTO_CLK, }; ledc_timer_config(&timer_cfg); led_pwm_channel_init(LEDC_CHANNEL_3, LED_COLD_GPIO); led_pwm_channel_init(LEDC_CHANNEL_4, LED_WARM_GPIO); light_set_brightness_temp(80, 50); } void light_set_brightness_temp(int brightness, int color_temp) { if (brightness < 0) brightness = 0; if (brightness > 100) brightness = 100; if (color_temp < 0) color_temp = 0; if (color_temp > 100) color_temp = 100; g_brightness = brightness; g_color_temp = color_temp; /* * color_temp = 0 偏暖 * color_temp = 100 偏冷 */ int total = brightness * 1023 / 100; int cold = total * color_temp / 100; int warm = total - cold; ledc_set_duty(LED_PWM_MODE, LEDC_CHANNEL_3, cold); ledc_update_duty(LED_PWM_MODE, LEDC_CHANNEL_3); ledc_set_duty(LED_PWM_MODE, LEDC_CHANNEL_4, warm); ledc_update_duty(LED_PWM_MODE, LEDC_CHANNEL_4); ESP_LOGI("LIGHT", "亮度=%d 色温=%d cold=%d warm=%d", brightness, color_temp, cold, warm); }16. 语音控制灯光和机械臂
AI 大模型返回控制 JSON:
{ "type": "device_control", "action": "set_lamp", "brightness": 70, "color_temp": 30, "arm": { "pan": 90, "tilt": 120, "head": 100 }, "reply": "我已经把灯光调暖,并把灯头向下调整。" }设备端解析:
void ai_parse_device_control(const char *json) { cJSON *root = cJSON_Parse(json); if (!root) { return; } cJSON *type = cJSON_GetObjectItem(root, "type"); if (!type || strcmp(type->valuestring, "device_control") != 0) { cJSON_Delete(root); return; } cJSON *brightness = cJSON_GetObjectItem(root, "brightness"); cJSON *color_temp = cJSON_GetObjectItem(root, "color_temp"); if (brightness && color_temp) { light_set_brightness_temp( brightness->valueint, color_temp->valueint ); } cJSON *arm = cJSON_GetObjectItem(root, "arm"); if (arm) { cJSON *pan = cJSON_GetObjectItem(arm, "pan"); cJSON *tilt = cJSON_GetObjectItem(arm, "tilt"); cJSON *head = cJSON_GetObjectItem(arm, "head"); if (pan) { arm_set_target(ARM_AXIS_PAN, pan->valueint); } if (tilt) { arm_set_target(ARM_AXIS_TILT, tilt->valueint); } if (head) { arm_set_target(ARM_AXIS_HEAD, head->valueint); } } cJSON *reply = cJSON_GetObjectItem(root, "reply"); if (reply && cJSON_IsString(reply)) { display_show_ai_text(reply->valuestring); ai_tts_speak(reply->valuestring); } cJSON_Delete(root); }17. AI 会话 WebSocket 协议
17.1 设备发送语音事件
void ai_ws_send_wakeup(void) { ai_ws_send_json( "{" "\"type\":\"event\"," "\"event\":\"wakeup\"," "\"device\":\"sibo_pixar_lamp\"" "}" ); } void ai_ws_send_interrupt(void) { ai_ws_send_json( "{" "\"type\":\"event\"," "\"event\":\"interrupt\"," "\"reason\":\"barge_in\"" "}" ); }17.2 设备发送视觉请求
void ai_ws_send_vision_request(const char *mode) { char json[256]; snprintf(json, sizeof(json), "{" "\"type\":\"vision_request\"," "\"mode\":\"%s\"," "\"device\":\"sibo_pixar_lamp\"" "}", mode ); ai_ws_send_json(json); }17.3 设备发送状态上报
void ai_ws_report_device_state(void) { char json[512]; snprintf(json, sizeof(json), "{" "\"type\":\"device_state\"," "\"state\":%d," "\"brightness\":%d," "\"color_temp\":%d," "\"arm\":{" "\"pan\":%d," "\"tilt\":%d," "\"head\":%d" "}" "}", g_lamp_state, g_brightness, g_color_temp, g_arm[ARM_AXIS_PAN].current_angle, g_arm[ARM_AXIS_TILT].current_angle, g_arm[ARM_AXIS_HEAD].current_angle ); ai_ws_send_json(json); }18. MCP 扩展协议设计
四博方案可以保留 MCP / UART 扩展,方便客户 MCU 或其他控制系统接入。
18.1 MCP 功能表
set_light_brightness 设置亮度 set_light_color_temp 设置色温 set_arm_pose 设置机械臂姿态 look_at_user 看向用户 look_at_book 看向书本 start_vision 开始视觉识别 start_ocr 开始 OCR start_config 进入配网 set_expression 设置表情18.2 MCP 注册命令示例
static void mcp_register_lamp_commands(void) { mcp_send_line("AT\r\n"); mcp_send_line( "AT+ADDMCP=1,set_light_brightness,设置台灯亮度,F1,1,B\r\n" ); mcp_send_line( "AT+ADDMCP=1,set_light_color_temp,设置台灯色温,F2,1,T\r\n" ); mcp_send_line( "AT+ADDMCP=1,set_arm_pan,设置机械臂左右角度,F3,1,P\r\n" ); mcp_send_line( "AT+ADDMCP=1,set_arm_tilt,设置机械臂上下角度,F4,1,T\r\n" ); mcp_send_line( "AT+ADDMCP=0,look_at_user,看向用户,2,F5,01\r\n" ); mcp_send_line( "AT+ADDMCP=0,look_at_book,看向书本,2,F6,01\r\n" ); mcp_send_line( "AT+ADDMCP=0,start_vision,开始视觉识别,2,F7,01\r\n" ); mcp_send_line( "AT+ADDMCP=0,start_config,开始配网,2,F8,01\r\n" ); }18.3 MCP 指令处理
static void mcp_handle_lamp_cmd(uint8_t cmd, uint8_t *data, int len) { switch (cmd) { case 0xF1: light_set_brightness_temp(data[0], g_color_temp); break; case 0xF2: light_set_brightness_temp(g_brightness, data[0]); break; case 0xF3: arm_set_target(ARM_AXIS_PAN, data[0]); break; case 0xF4: arm_set_target(ARM_AXIS_TILT, data[0]); break; case 0xF5: app_post_event(APP_EVT_VISION_TRIGGER); display_show_status("正在寻找用户"); break; case 0xF6: app_post_event(APP_EVT_VISION_TRIGGER); display_show_status("正在寻找书本"); break; case 0xF7: app_post_event(APP_EVT_VISION_TRIGGER); break; case 0xF8: start_blufi_config(); break; default: ESP_LOGW("MCP", "未知台灯控制命令: 0x%02X", cmd); break; } }19. 云端视觉 + 大模型服务示例
下面给一个 Python FastAPI 原型,用于接收图片、调用视觉模型和大模型,然后返回机械臂动作建议。
from fastapi import FastAPI, File, UploadFile, Header from pydantic import BaseModel import uvicorn import cv2 import numpy as np app = FastAPI(title="Sibo Pixar AI Lamp Vision Server") class VisionResult(BaseModel): object: str target_x: int target_y: int frame_w: int frame_h: int confidence: int action: str reply: str @app.post("/api/vision/analyze", response_model=VisionResult) async def analyze_image( file: UploadFile = File(...), x_vision_mode: str = Header(default="describe") ): image_bytes = await file.read() np_arr = np.frombuffer(image_bytes, np.uint8) img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) h, w = img.shape[:2] # 这里先做技术原型:默认目标在画面中心偏右 # 后续可替换为人脸检测、手势检测、书本检测、OCR 或多模态模型 target_x = int(w * 0.62) target_y = int(h * 0.45) if x_vision_mode == "track_face": reply = "我看到你在画面右侧,我会把灯头转过去。" obj = "face" action = "track" elif x_vision_mode == "find_book": reply = "我正在寻找书本区域,并准备调整照明角度。" obj = "book" action = "track" elif x_vision_mode == "ocr": reply = "我已经拍下画面,可以开始识别文字。" obj = "text" action = "ocr" else: reply = "我已经看到了桌面环境,可以根据你的需要调整灯光。" obj = "scene" action = "describe" return VisionResult( object=obj, target_x=target_x, target_y=target_y, frame_w=w, frame_h=h, confidence=90, action=action, reply=reply ) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8080)20. 大模型动作规划 Prompt 示例
服务端可以把设备能力注册给大模型:
你是四博皮克斯苹果 AI 台灯的设备控制大脑。 设备能力: 1. 可以设置灯光亮度,范围 0-100。 2. 可以设置色温,范围 0-100,0 表示暖光,100 表示冷光。 3. 可以控制机械臂 pan,范围 20-160。 4. 可以控制机械臂 tilt,范围 30-150。 5. 可以控制灯头 head,范围 40-140。 6. 可以触发视觉识别。 7. 可以看向用户、看向书本、回到中间。 8. 回答必须输出 JSON,不要输出多余文本。 输出格式: { "type": "device_control", "reply": "给用户的自然语言回复", "brightness": 80, "color_temp": 50, "arm": { "pan": 90, "tilt": 90, "head": 90 }, "vision": { "enable": false, "mode": "none" } }用户说:“灯太暗了,帮我照一下书本。”
大模型应输出:
{ "type": "device_control", "reply": "好的,我把亮度调高,并准备识别书本位置来调整灯头。", "brightness": 90, "color_temp": 45, "arm": { "pan": 90, "tilt": 120, "head": 110 }, "vision": { "enable": true, "mode": "find_book" } }21. 量产 OTA 与开源策略
21.1 开源内容建议
1. ESP32-S3 设备端源码 2. 摄像头采集模块 3. 机械臂控制模块 4. LED 灯光控制模块 5. AI WebSocket 协议模块 6. MCP 扩展模块 7. 屏幕 UI 示例 8. Python 视觉服务 Demo 9. 硬件原理图 10. BOM 11. 结构件接口说明 12. 烧录工具和量产说明21.2 量产固件分层
bootloader partition table factory app ota_0 ota_1 nvs spiffs / littlefs model config factory calibration分区表示例:
# Name, Type, SubType, Offset, Size nvs, data, nvs, 0x9000, 0x6000 otadata, data, ota, 0xf000, 0x2000 phy_init, data, phy, 0x11000, 0x1000 factory, app, factory, 0x20000, 0x300000 ota_0, app, ota_0, , 0x300000 ota_1, app, ota_1, , 0x300000 storage, data, spiffs, , 0x20000022. 对外推广版技术描述
四博皮克斯苹果 AI 台灯再升级,在原有 AI 语音交互基础上,新增机械臂和视觉功能。设备基于 ESP32-S3 架构打造,集成摄像头、麦克风、喇叭、机械臂、LED 灯光和屏幕显示,支持 Wi-Fi / 蓝牙 / 4G 扩展联网,可接入 AI 大模型、视觉识别服务、OCR、知识库和客户自有业务系统。
通过视觉识别,台灯可以判断用户位置、书本位置和桌面环境;通过机械臂控制,台灯可以自动调整照明方向;通过 AI 大模型,设备可以理解用户意图,把“帮我照一下书本”“看一下我在写什么”“灯光调暖一点”“转过来看我”等自然语言转换成灯光、机械臂和视觉动作。
该方案全面开源,包含设备端源码、视觉服务 Demo、机械臂控制、灯光控制、AI 协议和 MCP 扩展接口,方便客户基于四博方案快速完成二次开发和量产落地。
四博提供的不只是一个 AI 台灯,而是一套集语音、视觉、机械臂、灯光、屏幕、AI 大模型和客户系统接入于一体的智能硬件开发底座。
