RetroGame

write dma framebuffer


多數掌機使用的LCD輸出模組都會搭配DMA使用,藉此減輕CPU負擔,而加速顯示的最快方式則是軟體程式直接操作DMA暫存器,最典型的例子就是Notaz於Wiz掌機實作的PCSX ReARMed模擬器,Notaz將Wiz的硬體DMA暫存器映射成一塊顯示Buffer,然後SDL最後輸出畫面時,直接更新到該Buffer,接著Toggle DMA旗標輸出,成功讓PS模擬器加速運行。

RS97掌機由於具備較低的CPU速度,因此,司徒目前想到最快速的方式也就是Notaz這招,藉此提升PS模擬器的效能,但是,如何在RS97實作出DMA操作,則需要一個小程式測試,證明可以直接操作DMA暫存器並且可以輸出LCD畫面,但是比較大的問題在於DMA暫存器,因為目前Kernel還是只能使用官方的版本,因此,無法直接從Kernel配置DMA Buffer並設定給DMA暫存器,相反的,司徒則是直接讀取DMA指標並重新映射Buffer給軟體程式使用,而在畫面更新時,則不能呼叫SDL_Flip(),測試程式碼如下:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <SDL.h>
#include <SDL_image.h>
#include <SDL_ttf.h>

#define DMA_BASE  0x13420000
#define DSA0      (0x00 >> 2)
#define DTA0      (0x04 >> 2)
#define DTC0      (0x08 >> 2)
#define DRT0      (0x0C >> 2)
#define DCS0      (0x10 >> 2)
#define DCM0      (0x14 >> 2)
#define DDA0      (0x18 >> 2)
#define DSD0      (0x1C >> 2)
#define DSA1      (0x20 >> 2)
#define DTA1      (0x24 >> 2)
#define DTC1      (0x28 >> 2)
#define DRT1      (0x2C >> 2)
#define DCS1      (0x30 >> 2)
#define DCM1      (0x34 >> 2)
#define DDA1      (0x38 >> 2)
#define DSD1      (0x3C >> 2)

#define LCD_BASE  0x13050000
#define LCDDA0    (0x40 >> 2)
#define LCDSA0    (0x44 >> 2)
#define LCDFID0   (0x48 >> 2)
#define LCDCMD0   (0x4C >> 2)
#define LCDDA1    (0x50 >> 2)
#define LCDSA1    (0x54 >> 2)
#define LCDFID1   (0x58 >> 2)
#define LCDCMD1   (0x5C >> 2)

#define PAGE_SIZE  1024

int fd=-1;
volatile unsigned long *mem;

int map_it(unsigned long addr, unsigned long size)
{
  fd = open("/dev/mem", O_RDWR | O_SYNC);
  if(fd < 0){
    printf("failed to open /dev/mem\n");
    return -1;
  }
  mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, addr);
  return 0;
}

void unmap_it(unsigned long size)
{
  munmap((void*)mem, size);
  close(fd);
}

int main(int argc, char* argv[])
{
  uint32_t dma_addr;

  map_it(LCD_BASE, PAGE_SIZE);
  printf("LCDDA0: 0x%x\n", mem[LCDDA0]);
  printf("LCDSA0: 0x%x\n", mem[LCDSA0]);
  printf("LCDFID0: 0x%x\n", mem[LCDFID0]);
  printf("LCDCMD0: 0x%x\n", mem[LCDCMD0]);

  printf("LCDDA1: 0x%x\n", mem[LCDDA1]);
  printf("LCDSA1: 0x%x\n", mem[LCDSA1]);
  printf("LCDFID1: 0x%x\n", mem[LCDFID1]);
  printf("LCDCMD1: 0x%x\n", mem[LCDCMD1]);

  dma_addr = mem[LCDSA0];
  unmap_it(PAGE_SIZE);

  map_it(DMA_BASE, PAGE_SIZE);
  printf("DSA0: 0x%x\n", mem[DSA0]);
  printf("DTA0: 0x%x\n", mem[DTA0]);
  printf("DTC0: 0x%x\n", mem[DTC0]);
  printf("DRT0: 0x%x\n", mem[DRT0]);
  printf("DCS0: 0x%x\n", mem[DCS0]);
  printf("DCM0: 0x%x\n", mem[DCM0]);
  printf("DDA0: 0x%x\n", mem[DDA0]);
  printf("DSD0: 0x%x\n", mem[DSD0]);

  printf("DSA1: 0x%x\n", mem[DSA1]);
  printf("DTA1: 0x%x\n", mem[DTA1]);
  printf("DTC1: 0x%x\n", mem[DTC1]);
  printf("DRT1: 0x%x\n", mem[DRT1]);
  printf("DCS1: 0x%x\n", mem[DCS1]);
  printf("DCM1: 0x%x\n", mem[DCM1]);
  printf("DDA1: 0x%x\n", mem[DDA1]);
  printf("DSD1: 0x%x\n", mem[DSD1]);
  unmap_it(PAGE_SIZE);

  SDL_Init(SDL_INIT_VIDEO);
  SDL_Surface *screen = SDL_SetVideoMode(320, 480, 16, SDL_HWSURFACE);

  unsigned long c=0, idx=0;
  const uint32_t size=320*480*2;
  map_it(dma_addr, size);
  volatile uint16_t *ptr = (volatile uint16_t*)mem;
  uint16_t color[]={0xf800,0x7e0,0x1f};
  while(1){
    c = 0;
    for(int y=0; y<480; y++){
      for(int x=0; x<320; x++){
        ptr[c++] = color[idx];
      }
    }
    idx+= 1;
    if(idx >= 3){
      idx = 0;
    }
    SDL_Delay(100);
  }
  unmap_it(size);
  SDL_Delay(5000);
  SDL_Quit();
  return 0;    
}

P.S. O_SYNC可以配置Memory成沒有Cache機制的記憶體,但是,沒有Cache的缺點就是速度會比較慢一點。


返回上一頁