1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
|
/*********************************************************************************************
Simple example of using the ESP32-S3's LCD peripheral for general-purpose
(non-LCD) parallel data output with DMA. Connect 8 LEDs (or logic analyzer),
cycles through a pattern among them at about 1 Hz.
This code is ONLY for the ESP32-S3, NOT the S2, C3 or original ESP32.
None of this is authoritative canon, just a lot of trial error w/datasheet
and register poking. Probably more robust ways of doing this still TBD.
FULL CREDIT goes to AdaFruit and https://github.com/PaintYourDragon
https://blog.adafruit.com/2022/06/21/esp32uesday-more-s3-lcd-peripheral-hacking-with-code/
https://github.com/adafruit/Adafruit_Protomatter/blob/master/src/arch/esp32-s3.h
PLEASE SUPPORT THEM!
********************************************************************************************/
#if __has_include (<hal/lcd_ll.h>)
// Stop compile errors: /src/platforms/esp32s3/gdma_lcd_parallel16.hpp:64:10: fatal error: hal/lcd_ll.h: No such file or directory
#pragma message "Compiling for ESP32-S3"
#ifdef ARDUINO_ARCH_ESP32
#include <Arduino.h>
#endif
#include "gdma_lcd_parallel16.hpp"
#include "esp_attr.h"
#include "esp_idf_version.h"
/*
dma_descriptor_t desc; // DMA descriptor for testing
uint8_t data[8][312]; // Transmit buffer (2496 bytes total)
uint16_t* dmabuff2;
*/
DRAM_ATTR volatile bool previousBufferFree = true;
// End-of-DMA-transfer callback
IRAM_ATTR bool gdma_on_trans_eof_callback(gdma_channel_handle_t dma_chan,
gdma_event_data_t *event_data, void *user_data) {
// This DMA callback seems to trigger a moment before the last data has
// issued (buffering between DMA & LCD peripheral?), so pause a moment
// before stopping LCD data out. The ideal delay may depend on the LCD
// clock rate...this one was determined empirically by monitoring on a
// logic analyzer. YMMV.
esp_rom_delay_us(100);
// The LCD peripheral stops transmitting at the end of the DMA xfer, but
// clear the lcd_start flag anyway -- we poll it in loop() to decide when
// the transfer has finished, and the same flag is set later to trigger
// the next transfer.
//LCD_CAM.lcd_user.lcd_start = 0;
previousBufferFree = true;
return true;
}
lcd_cam_dev_t* getDev()
{
return &LCD_CAM;
}
// ------------------------------------------------------------------------------
void Bus_Parallel16::config(const config_t& cfg)
{
_cfg = cfg;
//auto port = cfg.port;
_dev = getDev();
}
//https://github.com/adafruit/Adafruit_Protomatter/blob/master/src/arch/esp32-s3.h
bool Bus_Parallel16::init(void)
{
///dmabuff2 = (uint16_t*)heap_caps_malloc(sizeof(uint16_t) * 64*32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
// LCD_CAM peripheral isn't enabled by default -- MUST begin with this:
periph_module_enable(PERIPH_LCD_CAM_MODULE);
periph_module_reset(PERIPH_LCD_CAM_MODULE);
// Reset LCD bus
LCD_CAM.lcd_user.lcd_reset = 1;
esp_rom_delay_us(1000);
// uint32_t lcd_clkm_div_num = ((160000000 + 1) / _cfg.bus_freq);
// ESP_LOGI("", "Clock divider is %d", lcd_clkm_div_num);
// Configure LCD clock. Since this program generates human-perceptible
// output and not data for LED matrices or NeoPixels, use almost the
// slowest LCD clock rate possible. The S3-mini module used on Feather
// ESP32-S3 has a 40 MHz crystal. A 2-stage clock division of 1:16000
// is applied (250*64), yielding 2,500 Hz. Still much too fast for
// human eyes, so later we set up the data to repeat each output byte
// many times over.
//LCD_CAM.lcd_clock.clk_en = 0; // Enable peripheral clock
// LCD_CAM_LCD_CLK_SEL Select LCD module source clock. 0: clock source is disabled. 1: XTAL_CLK. 2: PLL_D2_CLK. 3: PLL_F160M_CLK. (R/W)
LCD_CAM.lcd_clock.lcd_clk_sel = 3; // Use 160Mhz Clock Source -> PLL_F160M_CLK
LCD_CAM.lcd_clock.lcd_ck_out_edge = 0; // PCLK low in 1st half cycle
LCD_CAM.lcd_clock.lcd_ck_idle_edge = 0; // PCLK low idle
LCD_CAM.lcd_clock.lcd_clkcnt_n = 1; // Should never be zero
//LCD_CAM.lcd_clock.lcd_clk_equ_sysclk = 0; // PCLK = CLK / (CLKCNT_N+1)
LCD_CAM.lcd_clock.lcd_clk_equ_sysclk = 1; // PCLK = CLK / 1 (... so 160Mhz still)
// https://esp32.com/viewtopic.php?f=5&t=24459&start=80#p94487
/* Re: ESP32-S3 LCD and I2S FULL documentation
* by ESP_Sprite » Fri Mar 25, 2022 2:06 am
*
* Are you sure you are staying within the limits of the psram throughput? If GDMA can't fetch data fast
* enough it leads to corruption. Also keep in mind that worst case scenario, the gdma can only use half of
* the bandwidth of the psram peripheral (as it's round-robin shared with the CPUs).
*/
// Fastest speed I can get with Octoal PSRAM to work before nothing shows. Based on manual testing.
// If using an ESP32-S3 with slower (half the bandwidth) Q-SPI (Quad), then the divisor will need to be '20' (8Mhz) which wil be flickery!
if (_cfg.psram_clk_override)
{
ESP_LOGI("S3", "DMA buffer is on PSRAM. Limiting clockspeed....");
//LCD_CAM.lcd_clock.lcd_clkm_div_num = 10; //16mhz is the fasted the Octal PSRAM can support it seems from faptastic's testing using an N8R8 variant (Octal SPI PSRAM).
// https://github.com/mrfaptastic/ESP32-HUB75-MatrixPanel-DMA/issues/441#issuecomment-1513631890
// and
// https://github.com/mrcodetastic/ESP32-HUB75-MatrixPanel-DMA/issues/442
LCD_CAM.lcd_clock.lcd_clkm_div_num = 12; // 13Mhz is the fastest output from PSRAM if we want to be able to service other peripherals as well.
}
else
{
#if defined(S3_LCD_DIV_NUM)
auto _div_num = S3_LCD_DIV_NUM;
#else
auto freq = (_cfg.bus_freq);
auto _div_num = 16; // 10Mhz
if (freq <= 10000000L) {
} else if (freq < 20000000L) {
_div_num = 10; // 16Mhz
} else {
_div_num = 7; // 22Mhz --- likely to have noise without a good connection
}
#endif
LCD_CAM.lcd_clock.lcd_clkm_div_num = _div_num;
}
ESP_LOGI("S3", "Clock divider is %d", (int)LCD_CAM.lcd_clock.lcd_clkm_div_num);
ESP_LOGD("S3", "Resulting output clock frequency: %d Mhz", (int)(160000000L/LCD_CAM.lcd_clock.lcd_clkm_div_num));
LCD_CAM.lcd_clock.lcd_clkm_div_a = 1; // 0/1 fractional divide
LCD_CAM.lcd_clock.lcd_clkm_div_b = 0;
// See section 26.3.3.1 of the ESP32S3 Technical Reference Manual
// for information on other clock sources and dividers.
// Configure LCD frame format. This is where we fiddle the peripheral
// to provide generic 8-bit output rather than actually driving an LCD.
// There's also a 16-bit mode but that's not shown here.
LCD_CAM.lcd_ctrl.lcd_rgb_mode_en = 0; // i8080 mode (not RGB)
LCD_CAM.lcd_rgb_yuv.lcd_conv_bypass = 0; // Disable RGB/YUV converter
LCD_CAM.lcd_misc.lcd_next_frame_en = 0; // Do NOT auto-frame
LCD_CAM.lcd_misc.lcd_bk_en = 1; // https://esp32.com/viewtopic.php?t=24459&start=60#p91835
LCD_CAM.lcd_data_dout_mode.val = 0; // No data delays
LCD_CAM.lcd_user.lcd_always_out_en = 1; // Enable 'always out' mode
LCD_CAM.lcd_user.lcd_8bits_order = 0; // Do not swap bytes
LCD_CAM.lcd_user.lcd_bit_order = 0; // Do not reverse bit order
LCD_CAM.lcd_user.lcd_2byte_en = 1; // 8-bit data mode
LCD_CAM.lcd_user.lcd_dummy = 1; // Dummy phase(s) @ LCD start
LCD_CAM.lcd_user.lcd_dummy_cyclelen = 1; // 1+1 dummy phase
LCD_CAM.lcd_user.lcd_cmd = 0; // No command at LCD start
// "Dummy phases" are initial LCD peripheral clock cycles before data
// begins transmitting when requested. After much testing, determined
// that at least one dummy phase MUST be enabled for DMA to trigger
// reliably. A problem with dummy phase(s) is if we're also using the
// LCD_PCLK_IDX signal (not used in this code, but Adafruit_Protomatter
// does)...the clock signal will start a couple of pulses before data,
// which may or may not be problematic in some situations. You can
// disable the dummy phase but need to keep the LCD TX FIFO primed
// in that case, which gets complex.
// always_out_en is set above to allow aribtrary-length transfers,
// else lcd_dout_cyclelen is used...but is limited to 8K. Long (>4K)
// transfers need DMA linked lists, not used here but mentioned later.
// Route 8 LCD data signals to GPIO pins
int8_t* pins = _cfg.pin_data;
for(int i = 0; i < 16; i++)
{
if (pins[i] >= 0) { // -1 value will CRASH the ESP32!
esp_rom_gpio_connect_out_signal(pins[i], LCD_DATA_OUT0_IDX + i, false, false);
gpio_hal_iomux_func_sel(GPIO_PIN_MUX_REG[pins[i]], PIN_FUNC_GPIO);
gpio_set_drive_capability((gpio_num_t)pins[i], (gpio_drive_cap_t)3);
}
}
/*
const struct {
int8_t pin;
uint8_t signal;
} mux[] = {
{ 43, LCD_DATA_OUT0_IDX }, // These are 8 consecutive pins down one
{ 42, LCD_DATA_OUT1_IDX }, // side of the ESP32-S3 Feather. The ESP32
{ 2, LCD_DATA_OUT2_IDX }, // has super flexible pin MUX capabilities,
{ 9, LCD_DATA_OUT3_IDX }, // so any signal can go to any pin!
{ 10, LCD_DATA_OUT4_IDX },
{ 11, LCD_DATA_OUT5_IDX },
{ 12, LCD_DATA_OUT6_IDX },
{ 13, LCD_DATA_OUT7_IDX },
};
for (int i = 0; i < 8; i++) {
esp_rom_gpio_connect_out_signal(mux[i].pin, LCD_DATA_OUT0_IDX + i, false, false);
gpio_hal_iomux_func_sel(GPIO_PIN_MUX_REG[mux[i].pin], PIN_FUNC_GPIO);
gpio_set_drive_capability((gpio_num_t)mux[i].pin, (gpio_drive_cap_t)3);
}
*/
// Clock
esp_rom_gpio_connect_out_signal(_cfg.pin_wr, LCD_PCLK_IDX, _cfg.invert_pclk, false);
gpio_hal_iomux_func_sel(GPIO_PIN_MUX_REG[_cfg.pin_wr], PIN_FUNC_GPIO);
gpio_set_drive_capability((gpio_num_t)_cfg.pin_wr, (gpio_drive_cap_t)3);
// This program has a known fixed-size data buffer (2496 bytes) that fits
// in a single DMA descriptor (max 4095 bytes). Large transfers would
// require a linked list of descriptors, but here it's just one...
/*
desc.dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
desc.dw0.suc_eof = 0; // Last descriptor
desc.next = &desc; // No linked list
*/
// Remaining descriptor elements are initialized before each DMA transfer.
// Allocate DMA channel and connect it to the LCD peripheral
static gdma_channel_alloc_config_t dma_chan_config = {
.sibling_chan = NULL,
.direction = GDMA_CHANNEL_DIRECTION_TX,
.flags = {
.reserve_sibling = 0
}
};
gdma_new_channel(&dma_chan_config, &dma_chan);
gdma_connect(dma_chan, GDMA_MAKE_TRIGGER(GDMA_TRIG_PERIPH_LCD, 0));
static gdma_strategy_config_t strategy_config = {
.owner_check = false,
.auto_update_desc = false
};
gdma_apply_strategy(dma_chan, &strategy_config);
#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 1, 0)
gdma_transfer_config_t transfer_config = {
#ifdef SPIRAM_DMA_BUFFER
.max_data_burst_size = 64,
.access_ext_mem = true
#else
.max_data_burst_size = 32,
.access_ext_mem = false
#endif
};
gdma_config_transfer(dma_chan, &transfer_config);
#else
gdma_transfer_ability_t ability = {
.sram_trans_align = 32,
.psram_trans_align = 64,
};
gdma_set_transfer_ability(dma_chan, &ability);
#endif
// Enable DMA transfer callback
static gdma_tx_event_callbacks_t tx_cbs = {
// .on_trans_eof is literally the only gdma tx event type available
.on_trans_eof = gdma_on_trans_eof_callback
};
gdma_register_tx_event_callbacks(dma_chan, &tx_cbs, NULL);
// This uses a busy loop to wait for each DMA transfer to complete...
// but the whole point of DMA is that one's code can do other work in
// the interim. The CPU is totally free while the transfer runs!
while (LCD_CAM.lcd_user.lcd_start); // Wait for DMA completion callback
// After much experimentation, each of these steps is required to get
// a clean start on the next LCD transfer:
gdma_reset(dma_chan); // Reset DMA to known state
esp_rom_delay_us(1000);
LCD_CAM.lcd_user.lcd_dout = 1; // Enable data out
LCD_CAM.lcd_user.lcd_update = 1; // Update registers
LCD_CAM.lcd_misc.lcd_afifo_reset = 1; // Reset LCD TX FIFO
return true; // no return val = illegal instruction
}
void Bus_Parallel16::release(void)
{
if (_i80_bus)
{
esp_lcd_del_i80_bus(_i80_bus);
_i80_bus = nullptr;
}
if (_dmadesc_a)
{
heap_caps_free(_dmadesc_a);
_dmadesc_a = nullptr;
_dmadesc_count = 0;
}
}
void Bus_Parallel16::enable_double_dma_desc(void)
{
ESP_LOGI("S3", "Enabled support for secondary DMA buffer.");
_double_dma_buffer = true;
}
// Need this to work for double buffers etc.
bool Bus_Parallel16::allocate_dma_desc_memory(size_t len)
{
if (_dmadesc_a) heap_caps_free(_dmadesc_a); // free all dma descrptios previously
_dmadesc_count = len;
ESP_LOGD("S3", "Allocating %d bytes memory for DMA descriptors.", (int)sizeof(HUB75_DMA_DESCRIPTOR_T) * len);
_dmadesc_a= (HUB75_DMA_DESCRIPTOR_T*)heap_caps_malloc(sizeof(HUB75_DMA_DESCRIPTOR_T) * len, MALLOC_CAP_DMA);
if (_dmadesc_a == nullptr)
{
ESP_LOGE("S3", "ERROR: Couldn't malloc _dmadesc_a. Not enough memory.");
return false;
}
if (_double_dma_buffer)
{
_dmadesc_b= (HUB75_DMA_DESCRIPTOR_T*)heap_caps_malloc(sizeof(HUB75_DMA_DESCRIPTOR_T) * len, MALLOC_CAP_DMA);
if (_dmadesc_b == nullptr)
{
ESP_LOGE("S3", "ERROR: Couldn't malloc _dmadesc_b. Not enough memory.");
_double_dma_buffer = false;
}
}
/// override static
_dmadesc_a_idx = 0;
_dmadesc_b_idx = 0;
return true;
}
void Bus_Parallel16::create_dma_desc_link(void *data, size_t size, bool dmadesc_b)
{
static constexpr size_t MAX_DMA_LEN = (4096-4);
if (size > MAX_DMA_LEN) {
size = MAX_DMA_LEN;
ESP_LOGW("S3", "Creating DMA descriptor which links to payload with size greater than MAX_DMA_LEN!");
}
if ( dmadesc_b == true)
{
_dmadesc_b[_dmadesc_b_idx].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
//_dmadesc_b[_dmadesc_b_idx].dw0.suc_eof = 0;
_dmadesc_b[_dmadesc_b_idx].dw0.suc_eof = (_dmadesc_b_idx == (_dmadesc_count-1));
_dmadesc_b[_dmadesc_b_idx].dw0.size = _dmadesc_b[_dmadesc_b_idx].dw0.length = size; //sizeof(data);
_dmadesc_b[_dmadesc_b_idx].buffer = data; //data;
if (_dmadesc_b_idx == _dmadesc_count-1) {
_dmadesc_b[_dmadesc_b_idx].next = (dma_descriptor_t *) &_dmadesc_b[0];
}
else {
_dmadesc_b[_dmadesc_b_idx].next = (dma_descriptor_t *) &_dmadesc_b[_dmadesc_b_idx+1];
}
_dmadesc_b_idx++;
}
else
{
if ( _dmadesc_a_idx >= _dmadesc_count)
{
ESP_LOGE("S3", "Attempted to create more DMA descriptors than allocated. Expecting max %u descriptors.", (unsigned int)_dmadesc_count);
return;
}
_dmadesc_a[_dmadesc_a_idx].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA;
//_dmadesc_a[_dmadesc_a_idx].dw0.suc_eof = 0;
_dmadesc_a[_dmadesc_a_idx].dw0.suc_eof = (_dmadesc_a_idx == (_dmadesc_count-1));
_dmadesc_a[_dmadesc_a_idx].dw0.size = _dmadesc_a[_dmadesc_a_idx].dw0.length = size; //sizeof(data);
_dmadesc_a[_dmadesc_a_idx].buffer = data; //data;
if (_dmadesc_a_idx == _dmadesc_count-1) {
_dmadesc_a[_dmadesc_a_idx].next = (dma_descriptor_t *) &_dmadesc_a[0];
}
else {
_dmadesc_a[_dmadesc_a_idx].next = (dma_descriptor_t *) &_dmadesc_a[_dmadesc_a_idx+1];
}
_dmadesc_a_idx++;
}
} // end create_dma_desc_link
void Bus_Parallel16::dma_transfer_start()
{
gdma_start(dma_chan, (intptr_t)&_dmadesc_a[0]); // Start DMA w/updated descriptor(s)
esp_rom_delay_us(100); // Must 'bake' a moment before...
LCD_CAM.lcd_user.lcd_start = 1; // Trigger LCD DMA transfer
} // end
void Bus_Parallel16::dma_transfer_stop()
{
LCD_CAM.lcd_user.lcd_reset = 1; // Trigger LCD DMA transfer
LCD_CAM.lcd_user.lcd_update = 1; // Trigger LCD DMA transfer
gdma_stop(dma_chan);
} // end
void Bus_Parallel16::flip_dma_output_buffer(int back_buffer_id)
{
// if ( _double_dma_buffer == false) return;
if ( back_buffer_id == 1) // change across to everything 'b''
{
_dmadesc_a[_dmadesc_count-1].next = (dma_descriptor_t *) &_dmadesc_b[0];
_dmadesc_b[_dmadesc_count-1].next = (dma_descriptor_t *) &_dmadesc_b[0];
}
else
{
_dmadesc_b[_dmadesc_count-1].next = (dma_descriptor_t *) &_dmadesc_a[0];
_dmadesc_a[_dmadesc_count-1].next = (dma_descriptor_t *) &_dmadesc_a[0];
}
//current_back_buffer_id ^= 1;
previousBufferFree = false;
//while (i2s_parallel_is_previous_buffer_free() == false) {}
while (!previousBufferFree);
} // end flip
#endif
|