Hi everyone,
I'm working on a project using the M5Stack Atom Echo to stream audio over Wi-Fi to a Python socket server and save it as a .wav
file. However, I’ve been consistently running into the following audio quality issues:
- Audio sounds sped up or accelerated
- Audio degrades with repeated runs, especially compared to the quality after a fresh flash of the device
- Low fidelity or choppy output
- Sometimes segments of the audio are missing intermittently and unpredictably
What I’ve Tried So Far
To rule out common causes, I’ve implemented the following:
- Added a magic header (0xAA55) for packet boundary detection
- Added a CRC32 checksum to verify packet integrity
- Tracked packet sequence numbers to detect out-of-order or lost packets
- Tuned record size to 120 samples per packet to avoid fragmentation
- Increased DMA buffer length and count to prevent underruns
- Verified no packet loss over Wi-Fi by checking sequence and CRC
- Used sample rate of 17000 Hz and 16-bit mono audio
Despite these efforts, the audio still exhibits the above problems.
My Setup
Arduino (M5Stack Atom Echo)
- Records mic input at 17000 Hz, 16-bit mono
- Sends audio in chunks of 120 samples per packet (with sequence, magic, and CRC)
- Uses a large circular buffer to avoid data loss during recording
Python Socket Server
- Receives packets with proper buffering
- Validates magic, sequence, and CRC
- Appends valid audio frames and writes to a
.wav
file
Sample Code
Arduino Code (M5Stack Atom Echo)
#include <M5Unified.h>
#include <WiFi.h>
#include <CRC32.h>
const char* ssid = "xxx";
const char* password = "xxx";
const char* server_ip = "xx.xx.xx.xx";
const uint16_t server_port = 5000;
WiFiClient client;
CRC32 crc;
// Reduce packet size to avoid WiFi fragmentation (MTU < 1400 bytes)
static constexpr size_t record_length = 120; // Reduced from 240
static constexpr size_t record_number = 256; // Larger buffer
static constexpr size_t record_samplerate = 17000;
static constexpr size_t record_size = record_number * record_length;
static size_t rec_record_idx = 0;
static uint32_t packet_seq = 0;
int16_t* rec_data;
bool is_connected = false;
// Packet with header (magic + seq + crc)
struct AudioPacket {
uint16_t magic; // 0xAA55 for boundary detection
uint32_t seq;
uint32_t crc;
int16_t data[record_length];
} __attribute__((packed));
void setup() {
M5.begin();
Serial.begin(115200);
M5.Speaker.end();
// Configure mic for 16-bit mono 17000Hz
auto mic_cfg = M5.Mic.config();
mic_cfg.sample_rate = 17000;
mic_cfg.stereo = true; // Force mono
mic_cfg.dma_buf_len = record_number; // Larger DMA buffer to avoid underflow
mic_cfg.dma_buf_count = 8;
M5.Mic.config(mic_cfg); // Apply config
M5.Mic.begin(); // Initialize mic with default config
// Allocate buffer
rec_data = (int16_t*)heap_caps_malloc(record_size * sizeof(int16_t), MALLOC_CAP_8BIT);
memset(rec_data, 0, record_size * sizeof(int16_t));
// Wi-Fi connection with retry
WiFi.begin(ssid, password);
while (WiFi.status() != WL_CONNECTED) {
delay(500);
Serial.print(".");
}
Serial.println("\nWi-Fi connected");
}
void loop() {
// Reconnect if disconnected
if (!client.connected()) {
is_connected = client.connect(server_ip, server_port);
if (is_connected) {
Serial.println("Server connected");
packet_seq = 0; // Reset sequence on new connection
} else {
delay(1000);
return;
}
}
if (client.connected() && M5.Mic.isEnabled()) {
AudioPacket packet;
auto data = &rec_data[rec_record_idx * record_length];
if (M5.Mic.record(data, record_length, record_samplerate)) {
// Fill packet with magic number and reset CRC
packet.magic = 0xAA55;
packet.seq = packet_seq++;
crc.reset(); // Critical: reset CRC for each packet
memcpy(packet.data, data, record_length * sizeof(int16_t));
packet.crc = crc.calculate((uint8_t*)&packet.data, record_length * sizeof(int16_t));
client.write((uint8_t*)&packet, sizeof(AudioPacket));
delay(10);
if (++rec_record_idx >= record_number) {
memset(rec_data, 0, record_size * sizeof(int16_t));
rec_record_idx = 0;
}
}
}
}
Python Socket Server
import socket
import wave
import struct
import zlib
import numpy as np
import sounddevice as sd
HOST = '0.0.0.0'
PORT = 5000
OUTPUT_FILE = 'recorded_audio.wav'
SAMPLE_RATE = 17000
CHANNELS = 1
SAMPLE_WIDTH = 2
RECORD_LENGTH = 120 # Must match Arduino
PACKET_SIZE = 2 + 4 + 4 + (RECORD_LENGTH * 2) # magic(2) + seq(4) + crc(4) + data
frames = []
last_seq = -1
buffer = b''
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.bind((HOST, PORT))
sock.listen(1)
conn, addr = sock.accept()
with conn:
print(f"Connected by {addr}")
while True:
# Handle fragmented data by buffering
buffer += conn.recv(1024)
while len(buffer) >= PACKET_SIZE:
# Extract complete packet
packet = buffer[:PACKET_SIZE]
buffer = buffer[PACKET_SIZE:]
# Verify magic number
magic, seq, crc_received = struct.unpack('<HII', packet[:10])
if magic != 0xAA55:
print("Invalid packet magic number")
continue
# Verify sequence
if seq != last_seq + 1:
print(f"Sequence error: expected {last_seq + 1}, got {seq}")
last_seq = seq
# Verify CRC
audio_data = packet[10:]
crc_calculated = zlib.crc32(audio_data) & 0xFFFFFFFF
if crc_received != crc_calculated:
print(f"CRC error: received {crc_received}, calculated {crc_calculated}")
continue
frames.append(audio_data)
except:
# Save WAV
with wave.open(OUTPUT_FILE, 'wb') as wf:
wf.setnchannels(CHANNELS)
wf.setsampwidth(SAMPLE_WIDTH)
wf.setframerate(SAMPLE_RATE)
wf.writeframes(b''.join(frames))
print(f"Audio saved to {OUTPUT_FILE}")
Recorded Audio
Any advice or pointers would be greatly appreciated.
Thanks in advance.