【FireBeetle 2 ESP32 P4 开发套件】物体识别的板端推理- Makelog(造物记)

本文介绍了 DFRobot FireBeetle 2 ESP32 P4 开发套件调用固件中预训练的模型实现物体识别的板端推理的项目设计。

项目介绍

固件更新：清除 Flash 、上传更新支持 CocoDetector 物体识别的固件；

板端推理：流程图、MicroPython 代码、效果展示；

固件更新

为了实现物体识别，需调用 espdl 深度学习 AI 模型 CocoDetector，重新上传固件。

下载并安装乐鑫官方提供的 Flash Download Tool 软件；

连接开发板，加载固件，起始地址 0x002000 ，波特率选择合适的配置;

点击 ERASE 清空 Flash 以避免固件更新报错；

点击 Start 按钮开始并完成固件上传；

固件 firmware_dsi.bin 见附件。

板端推理

包括流程图、工程代码、测试效果等。

流程图

工程代码

运行 Thonny IDE 软件，配置解释器和设备端口，新建文件并添加如下代码

from espdl import CocoDetector
from jpeg import Decoder, Encoder

# Input image path
input_image = "desktop.jpg"
output_dir = "out"
output_path = f"{output_dir}/{input_image.split('.')[0]}_out.jpg"

# 5×7 font (1 - on, 0 - off)
FONT = {
   '0': [
   0b01110,
   0b10001,
   0b10001,
   0b10001,
   0b10001,
   0b10001,
   0b01110
   ],
   '1': [
   0b00100,
   0b01100,
   0b00100,
   0b00100,
   0b00100,
   0b00100,
   0b01110
   ],
   '2': [
   0b01110,
   0b10001,
   0b00001,
   0b00010,
   0b00100,
   0b01000,
   0b11111
   ],
   '3': [
   0b01110,
   0b10001,
   0b00001,
   0b00110,
   0b00001,
   0b10001,
   0b01110
   ],
   '4': [
   0b00010,
   0b00110,
   0b01010,
   0b10010,
   0b11111,
   0b00010,
   0b00010
   ],
   '5': [
   0b11111,
   0b10000,
   0b11110,
   0b00001,
   0b00001,
   0b10001,
   0b01110
   ],
   '6': [
   0b00110,
   0b01000,
   0b10000,
   0b11110,
   0b10001,
   0b10001,
   0b01110
   ],
   '7': [
   0b11111,
   0b00001,
   0b00010,
   0b00100,
   0b01000,
   0b01000,
   0b01000
   ],
   '8': [
   0b01110,
   0b10001,
   0b10001,
   0b01110,
   0b10001,
   0b10001,
   0b01110
   ],
   '9': [
   0b01110,
   0b10001,
   0b10001,
   0b01111,
   0b00001,
   0b00010,
   0b01100
   ],
   ':': [
   0b00000,
   0b00000,
   0b00100,
   0b00000,
   0b00100,
   0b00000,
   0b00000
   ],
   ' ': [0]*7,
   '%': [
   0b11001,
   0b11010,
   0b00100,
   0b01011,
   0b10011,
   0b00000,
   0b00000
   ],
   'a': [
   0b00000,
   0b00000,
   0b01110,
   0b00001,
   0b01111,
   0b10001,
   0b01111
   ],
   'b': [
   0b10000,
   0b10000,
   0b10110,
   0b11001,
   0b10001,
   0b11001,
   0b10110
   ],
   'c': [
   0b00000,
   0b00000,
   0b01110,
   0b10000,
   0b10000,
   0b10001,
   0b01110
   ],
   'd': [
   0b00001,
   0b00001,
   0b01101,
   0b10011,
   0b10001,
   0b10011,
   0b01101
   ],
   'e': [
   0b00000,
   0b00000,
   0b01110,
   0b10001,
   0b11111,
   0b10000,
   0b01110
   ],
   'f': [
   0b00110,
   0b01001,
   0b01000,
   0b11100,
   0b01000,
   0b01000,
   0b01000
   ],
   'g': [
   0b00000,
   0b01111,
   0b10001,
   0b10001,
   0b01111,
   0b00001,
   0b01110
   ],
   'h': [
   0b10000,
   0b10000,
   0b10110,
   0b11001,
   0b10001,
   0b10001,
   0b10001
   ],
   'i': [
   0b00100,
   0b00000,
   0b01100,
   0b00100,
   0b00100,
   0b00100,
   0b01110
   ],
   'j': [
   0b00010,
   0b00000,
   0b00110,
   0b00010,
   0b00010,
   0b10010,
   0b01100
   ],
   'k': [
   0b10000,
   0b10000,
   0b10010,
   0b10100,
   0b11000,
   0b10100,
   0b10010
   ],
   'l': [
   0b01100,
   0b00100,
   0b00100,
   0b00100,
   0b00100,
   0b00100,
   0b01110
   ],
   'm': [
   0b00000,
   0b00000,
   0b11010,
   0b10101,
   0b10101,
   0b10101,
   0b10101
   ],
   'n': [
   0b00000,
   0b00000,
   0b10110,
   0b11001,
   0b10001,
   0b10001,
   0b10001
   ],
   'o': [
   0b00000,
   0b00000,
   0b01110,
   0b10001,
   0b10001,
   0b10001,
   0b01110
   ],
   'p': [
   0b00000,
   0b00000,
   0b10110,
   0b11001,
   0b11001,
   0b10110,
   0b10000
   ],
   'q': [
   0b00000,
   0b00000,
   0b01101,
   0b10011,
   0b10011,
   0b01101,
   0b00001
   ],
   'r': [
   0b00000,
   0b00000,
   0b10110,
   0b11000,
   0b10000,
   0b10000,
   0b10000
   ],
   's': [
   0b00000,
   0b00000,
   0b01110,
   0b10000,
   0b01110,
   0b00001,
   0b11110
   ],
   't': [
   0b01000,
   0b01000,
   0b11100,
   0b01000,
   0b01000,
   0b01001,
   0b00110
   ],
   'u': [
   0b00000,
   0b00000,
   0b10001,
   0b10001,
   0b10001,
   0b10011,
   0b01101
   ],
   'v': [
   0b00000,
   0b00000,
   0b10001,
   0b10001,
   0b10001,
   0b01010,
   0b00100
   ],
   'w': [
   0b00000,
   0b00000,
   0b10001,
   0b10101,
   0b10101,
   0b10101,
   0b01010
   ],
   'x': [
   0b00000,
   0b00000,
   0b10001,
   0b01010,
   0b00100,
   0b01010,
   0b10001
   ],
   'y': [
   0b00000,
   0b00000,
   0b10001,
   0b10001,
   0b01111,
   0b00001,
   0b01110
   ],
   'z': [
   0b00000,
   0b00000,
   0b11111,
   0b00010,
   0b00100,
   0b01000,
   0b11111
   ]
}

decoder = Decoder()
face_detector = CocoDetector() # Initialize without dimensions first
MSCOCO_CLASSES = [
   "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
   "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
   "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
   "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
   "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
   "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
   "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
   "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
   "scissors", "teddy bear", "hair drier", "toothbrush"
]

# Capture and process image
img = open(input_image, "rb").read() # Capture raw image (usually JPEG format)

# Calculate image dimensions from the buffer size (assuming RGB888: 3 bytes per pixel)
orig_width, orig_height = decoder.get_img_info(img)
print(f"Image size: {orig_width}x{orig_height}")

framebuffer = decoder.decode(img) # Convert to RGB888 format
framebuffer = bytearray(framebuffer)

# Initialize encoder with detected dimensions
encoder = Encoder(width=orig_width, height=orig_height, pixel_format="RGB888")
face_detector = CocoDetector(width=orig_width, height=orig_height)

# Run object detection
results = face_detector.run(framebuffer)

# Draw rectangle function
def draw_rectangle(buffer, width, height, x, y, w, h, color=(0, 0, 255), thickness=2):
   """
   Draw rectangle border on RGB888 format image buffer
   :param buffer: image buffer
   :param width: image width
   :param height: image height
   :param x: top-left x coordinate
   :param y: top-left y coordinate
   :param w: width
   :param h: height
   :param color: border color (RGB format)
   """
   def set_pixel(buffer, width, x, y, color):
   offset = (y * width + x) * 3
   if 0 <= x < width and 0 <= y < height and offset + 2 < len(buffer):
   buffer[offset] = color[0] # R
   buffer[offset + 1] = color[1] # G
   buffer[offset + 2] = color[2] # B

   # Draw bounding box
   for t in range(thickness):
   # Top
   for i in range(x - t, x + w + t + 1):
   set_pixel(buffer, width, i, y - t, color)
   # Bottom
   for i in range(x - t, x + w + t + 1):
   set_pixel(buffer, width, i, y + h + t, color)
   # Left
   for j in range(y - t, y + h + t + 1):
   set_pixel(buffer, width, x - t, j, color)
   # Right
   for j in range(y - t, y + h + t + 1):
   set_pixel(buffer, width, x + w + t, j, color)

def draw_string(buffer, width, height, x0, y0, text, color=(0, 0, 255), scale=1):
   """
   draw 5x7 string on RGB888 buffer
   """
   def set_pixel(buf, w, x, y, c):
   if 0 <= x < w and 0 <= y < height:
   offset = (y * w + x) * 3
   buf[offset:offset+3] = bytes(c)

   text = str(text).lower()
   px = x0
   for ch in text:
   pat = FONT.get(ch, [0]*7)
   for row in range(7):
   line = pat[row]
   for col in range(5):
   if line & (1 << (4-col)):
   for dy in range(scale):
   for dx in range(scale):
   set_pixel(buffer, width,
   px + col*scale + dx,
   y0 + row*scale + dy,
   color)
   px += 5*scale + 1 # string gap

# Draw detection boxes on image
for obj in results:
   x1, y1, x2, y2 = obj['box']
   draw_rectangle(framebuffer, orig_width, orig_height,
   x1, y1, x2-x1, y2-y1, color=(0, 255, 0), thickness=3)

   label = f"{MSCOCO_CLASSES[obj['category']]}:{int(obj['score']*100)}%"
   # label on left-top, scale is 2
   draw_string(framebuffer, orig_width, orig_height,
   x1, max(0, y1-8*2-1), label, color=(0, 255, 0), scale=2)

   print(label)

# Save output image
marked_img = encoder.encode(framebuffer)
with open(output_path, "wb") as f:
   f.write(marked_img)

print(f"Processed image saved to: {output_path}")

保存代码，将目标图片上传至板端根目录，运行程序，待推理完成，可在 out 文件夹获取识别结果图片。

效果演示

办公桌

其他场景

Shell 终端

终端打印识别结果，包括图片尺寸、物体所属类别、置信度、坐标信息等。

总结

本文介绍了 DFRobot FireBeetle 2 ESP32 P4 开发套件调用固件中预训练的模型实现物体识别的板端推理的项目设计，为该开发板在人工智能和边缘 AI 领域的应用提供了参考。

附件

firmware_dsi.zip

【FireBeetle 2 ESP32 P4 开发套件】物体识别的板端推理 简单