RetroGame
write dma framebuffer
多數掌機使用的LCD輸出模組都會搭配DMA使用,藉此減輕CPU負擔,而加速顯示的最快方式則是軟體程式直接操作DMA暫存器,最典型的例子就是Notaz於Wiz掌機實作的PCSX ReARMed模擬器,Notaz將Wiz的硬體DMA暫存器映射成一塊顯示Buffer,然後SDL最後輸出畫面時,直接更新到該Buffer,接著Toggle DMA旗標輸出,成功讓PS模擬器加速運行。
RS97掌機由於具備較低的CPU速度,因此,司徒目前想到最快速的方式也就是Notaz這招,藉此提升PS模擬器的效能,但是,如何在RS97實作出DMA操作,則需要一個小程式測試,證明可以直接操作DMA暫存器並且可以輸出LCD畫面,但是比較大的問題在於DMA暫存器,因為目前Kernel還是只能使用官方的版本,因此,無法直接從Kernel配置DMA Buffer並設定給DMA暫存器,相反的,司徒則是直接讀取DMA指標並重新映射Buffer給軟體程式使用,而在畫面更新時,則不能呼叫SDL_Flip(),測試程式碼如下:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <SDL.h>
#include <SDL_image.h>
#include <SDL_ttf.h>
#define DMA_BASE 0x13420000
#define DSA0 (0x00 >> 2)
#define DTA0 (0x04 >> 2)
#define DTC0 (0x08 >> 2)
#define DRT0 (0x0C >> 2)
#define DCS0 (0x10 >> 2)
#define DCM0 (0x14 >> 2)
#define DDA0 (0x18 >> 2)
#define DSD0 (0x1C >> 2)
#define DSA1 (0x20 >> 2)
#define DTA1 (0x24 >> 2)
#define DTC1 (0x28 >> 2)
#define DRT1 (0x2C >> 2)
#define DCS1 (0x30 >> 2)
#define DCM1 (0x34 >> 2)
#define DDA1 (0x38 >> 2)
#define DSD1 (0x3C >> 2)
#define LCD_BASE 0x13050000
#define LCDDA0 (0x40 >> 2)
#define LCDSA0 (0x44 >> 2)
#define LCDFID0 (0x48 >> 2)
#define LCDCMD0 (0x4C >> 2)
#define LCDDA1 (0x50 >> 2)
#define LCDSA1 (0x54 >> 2)
#define LCDFID1 (0x58 >> 2)
#define LCDCMD1 (0x5C >> 2)
#define PAGE_SIZE 1024
int fd=-1;
volatile unsigned long *mem;
int map_it(unsigned long addr, unsigned long size)
{
fd = open("/dev/mem", O_RDWR | O_SYNC);
if(fd < 0){
printf("failed to open /dev/mem\n");
return -1;
}
mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, addr);
return 0;
}
void unmap_it(unsigned long size)
{
munmap((void*)mem, size);
close(fd);
}
int main(int argc, char* argv[])
{
uint32_t dma_addr;
map_it(LCD_BASE, PAGE_SIZE);
printf("LCDDA0: 0x%x\n", mem[LCDDA0]);
printf("LCDSA0: 0x%x\n", mem[LCDSA0]);
printf("LCDFID0: 0x%x\n", mem[LCDFID0]);
printf("LCDCMD0: 0x%x\n", mem[LCDCMD0]);
printf("LCDDA1: 0x%x\n", mem[LCDDA1]);
printf("LCDSA1: 0x%x\n", mem[LCDSA1]);
printf("LCDFID1: 0x%x\n", mem[LCDFID1]);
printf("LCDCMD1: 0x%x\n", mem[LCDCMD1]);
dma_addr = mem[LCDSA0];
unmap_it(PAGE_SIZE);
map_it(DMA_BASE, PAGE_SIZE);
printf("DSA0: 0x%x\n", mem[DSA0]);
printf("DTA0: 0x%x\n", mem[DTA0]);
printf("DTC0: 0x%x\n", mem[DTC0]);
printf("DRT0: 0x%x\n", mem[DRT0]);
printf("DCS0: 0x%x\n", mem[DCS0]);
printf("DCM0: 0x%x\n", mem[DCM0]);
printf("DDA0: 0x%x\n", mem[DDA0]);
printf("DSD0: 0x%x\n", mem[DSD0]);
printf("DSA1: 0x%x\n", mem[DSA1]);
printf("DTA1: 0x%x\n", mem[DTA1]);
printf("DTC1: 0x%x\n", mem[DTC1]);
printf("DRT1: 0x%x\n", mem[DRT1]);
printf("DCS1: 0x%x\n", mem[DCS1]);
printf("DCM1: 0x%x\n", mem[DCM1]);
printf("DDA1: 0x%x\n", mem[DDA1]);
printf("DSD1: 0x%x\n", mem[DSD1]);
unmap_it(PAGE_SIZE);
SDL_Init(SDL_INIT_VIDEO);
SDL_Surface *screen = SDL_SetVideoMode(320, 480, 16, SDL_HWSURFACE);
unsigned long c=0, idx=0;
const uint32_t size=320*480*2;
map_it(dma_addr, size);
volatile uint16_t *ptr = (volatile uint16_t*)mem;
uint16_t color[]={0xf800,0x7e0,0x1f};
while(1){
c = 0;
for(int y=0; y<480; y++){
for(int x=0; x<320; x++){
ptr[c++] = color[idx];
}
}
idx+= 1;
if(idx >= 3){
idx = 0;
}
SDL_Delay(100);
}
unmap_it(size);
SDL_Delay(5000);
SDL_Quit();
return 0;
}
P.S. O_SYNC可以配置Memory成沒有Cache機制的記憶體,但是,沒有Cache的缺點就是速度會比較慢一點。