ぬの部屋（仮）
nu-no-he-ya

月火水木金土日

1234567

891011121314

15161718192021

22232425262728

293031

3456789

10111213141516

17181920212223

24252627282930

12345

6789101112

13141516171819

20212223242526

2728293031

1234567

891011121314

15161718192021

22232425262728

2930

123

45678910

11121314151617

18192021222324

25262728293031

123456

78910111213

14151617181920

21222324252627

28293031

2345678

9101112131415

16171819202122

23242526272829

1234

567891011

12131415161718

19202122232425

262728293031

123456

78910111213

14151617181920

21222324252627

282930

3456789

10111213141516

17181920212223

24252627282930

3456789

10111213141516

17181920212223

2425262728

12345

6789101112

13141516171819

20212223242526

2728293031

2345678

9101112131415

16171819202122

23242526272829

3031

123

45678910

11121314151617

18192021222324

252627282930

123456

78910111213

14151617181920

21222324252627

28293031

2345678

9101112131415

16171819202122

23242526272829

1234

567891011

12131415161718

19202122232425

262728293031

1234567

891011121314

15161718192021

22232425262728

293031

3456789

10111213141516

17181920212223

24252627282930

12345

6789101112

13141516171819

20212223242526

2728293031

1234567

891011121314

15161718192021

22232425262728

2930

123

45678910

11121314151617

18192021222324

25262728293031

1234

567891011

12131415161718

19202122232425

26272829

1234567

891011121314

15161718192021

22232425262728

293031

123

45678910

11121314151617

18192021222324

25262728293031

12345

6789101112

13141516171819

20212223242526

27282930

2345678

9101112131415

16171819202122

23242526272829

3031

123

45678910

11121314151617

18192021222324

252627282930

123456

78910111213

14151617181920

21222324252627

28293031

3456789

10111213141516

17181920212223

24252627282930

1234

567891011

12131415161718

19202122232425

2627282930

1234567

891011121314

15161718192021

22232425262728

293031

3456789

10111213141516

17181920212223

24252627282930

12345

6789101112

13141516171819

20212223242526

2728293031

12345

6789101112

13141516171819

20212223242526

2728

2345678

9101112131415

16171819202122

23242526272829

3031

1234

567891011

12131415161718

19202122232425

262728293031

123456

78910111213

14151617181920

21222324252627

282930

3456789

10111213141516

17181920212223

24252627282930

1234

567891011

12131415161718

19202122232425

2627282930

1234567

891011121314

15161718192021

22232425262728

293031

123

45678910

11121314151617

18192021222324

25262728293031

12345

6789101112

13141516171819

20212223242526

27282930

2345678

9101112131415

16171819202122

23242526272829

3031

123

45678910

11121314151617

18192021222324

252627282930

123456

78910111213

14151617181920

21222324252627

28293031

123456

78910111213

14151617181920

21222324252627

3456789

10111213141516

17181920212223

24252627282930

12345

6789101112

13141516171819

20212223242526

2728293031

1234567

891011121314

15161718192021

22232425262728

2930

123

45678910

11121314151617

18192021222324

25262728293031

12345

6789101112

13141516171819

20212223242526

27282930

2345678

9101112131415

16171819202122

23242526272829

3031

1234

567891011

12131415161718

19202122232425

262728293031

123456

78910111213

14151617181920

21222324252627

282930

3456789

10111213141516

17181920212223

24252627282930

1234

567891011

12131415161718

19202122232425

2627282930

1234567

891011121314

15161718192021

22232425262728

293031

1234567

891011121314

15161718192021

22232425262728

123

45678910

11121314151617

18192021222324

25262728293031

123456

78910111213

14151617181920

21222324252627

28293031

2345678

9101112131415

16171819202122

23242526272829

1234

567891011

12131415161718

19202122232425

262728293031

123456

78910111213

14151617181920

21222324252627

282930

3456789

10111213141516

17181920212223

24252627282930

12345

6789101112

13141516171819

20212223242526

2728293031

1234567

891011121314

15161718192021

22232425262728

2930

123

45678910

11121314151617

18192021222324

25262728293031

12345

6789101112

13141516171819

20212223242526

27282930

2345678

9101112131415

16171819202122

23242526272829

3031

3456789

10111213141516

17181920212223

242526272829

12345

6789101112

13141516171819

20212223242526

2728293031

2345678

9101112131415

16171819202122

23242526272829

3031

123

45678910

11121314151617

18192021222324

252627282930

123456

78910111213

14151617181920

21222324252627

28293031

2345678

9101112131415

16171819202122

23242526272829

1234

567891011

12131415161718

19202122232425

262728293031

1234567

891011121314

15161718192021

22232425262728

293031

3456789

10111213141516

17181920212223

24252627282930

12345

6789101112

13141516171819

20212223242526

2728293031

1234567

891011121314

15161718192021

22232425262728

2930

123

45678910

11121314151617

18192021222324

25262728293031

123

45678910

11121314151617

18192021222324

25262728

123456

78910111213

14151617181920

21222324252627

28293031

3456789

10111213141516

17181920212223

24252627282930

1234

567891011

12131415161718

19202122232425

2627282930

1234567

891011121314

15161718192021

22232425262728

293031

3456789

10111213141516

17181920212223

24252627282930

12345

6789101112

13141516171819

20212223242526

2728293031

2345678

9101112131415

16171819202122

23242526272829

3031

123

45678910

11121314151617

18192021222324

252627282930

123456

78910111213

14151617181920

21222324252627

28293031

2345678

9101112131415

16171819202122

23242526272829

1234

567891011

12131415161718

19202122232425

262728293031

1234567

891011121314

15161718192021

22232425262728

293031

123

45678910

11121314151617

18192021222324

25262728293031

12345

6789101112

13141516171819

20212223242526

27282930

123

45678910

11121314151617

18192021222324

252627282930

123456

78910111213

14151617181920

21222324252627

28293031

1234

567891011

12131415161718

19202122232425

2627282930

1234567

891011121314

15161718192021

22232425262728

293031

3456789

10111213141516

17181920212223

24252627282930

12345

6789101112

13141516171819

20212223242526

2728293031

12345

6789101112

13141516171819

20212223242526

2728

2345678

9101112131415

16171819202122

23242526272829

3031

123456

78910111213

14151617181920

21222324252627

282930

3456789

10111213141516

17181920212223

24252627282930

1234567

891011121314

15161718192021

22232425262728

293031

123

45678910

11121314151617

18192021222324

252627282930

123456

78910111213

14151617181920

21222324252627

28293031

123456

78910111213

14151617181920

21222324252627

28293031

1234

567891011

12131415161718

19202122232425

262728293031

123456

78910111213

14151617181920

21222324252627

282930

3456789

10111213141516

17181920212223

24252627282930

12345

6789101112

13141516171819

20212223242526

2728293031

1234567

891011121314

15161718192021

22232425262728

2930

123

45678910

11121314151617

18192021222324

25262728293031

12345

6789101112

13141516171819

20212223242526

27282930

2345678

9101112131415

16171819202122

23242526272829

3031

2345678

9101112131415

16171819202122

232425262728

1234

567891011

12131415161718

19202122232425

262728293031

1234567

891011121314

15161718192021

22232425262728

293031

3456789

10111213141516

17181920212223

24252627282930

12345

6789101112

13141516171819

20212223242526

2728293031

1234567

891011121314

15161718192021

22232425262728

2930

123

45678910

11121314151617

18192021222324

25262728293031

cudaMalloc/cudaFreeをcuda DLLの外側から呼び出す

mallocしたメモリはfreeするまで解放されない。それは知っている。

知っているが、CUDAプログラムをdll化した場合、まさかdll内関数から出た瞬間に自動解放されたりしないだろうな・・・？

という不安が頭をよぎったのでテスト。

以下、gpu_my_allocやdevice_to_hostなどは全てcuファイル内に書かれている。

C++側から別個に呼び出してもちゃんと動作するらしい。

C++側

#include <iostream>

#pragma warning(disable:4996)

#include "../CudaRuntime1/mytest.h"

#pragma comment(lib,"CudaRuntime1.lib")

void pnmP3_Write(const char* const fname, const int vmax, const int width, const int height, const unsigned char* const p);

int main()
{
  data_t dat;

  dat.width = 100;
  dat.height = 50;

  unsigned char *c = new unsigned char[dat.width * dat.height * 3];

  for (size_t i = 0; i < dat.width * dat.height; i++) {
    c[i * 3 + 0] = 0;
    c[i * 3 + 1] = 0;
    c[i * 3 + 2] = 255;
  }

  dat.rgbdata = c;




  //GPU側メモリ確保
  void* device = gpu_my_alloc(dat.width, dat.height);

  //GPU側へデータ転送
  host_to_device(&dat, device);

  //処理実行
  func_inverse(dat.width, dat.height, device);

  //CPU側へ結果を返却
  device_to_host(device, &dat);

  //GPU側のメモリ解放
  gpu_my_free(device);


  pnmP3_Write("test.ppm", 255, dat.width, dat.height, dat.rgbdata);

}

/////////////////////////////////////////////
//画像ファイル書き出し/////////////////////////
//! @brief PPM(RGB各1byte,カラー,テキスト)を書き込む
//! @param [in] fname ファイル名
//! @param [in] vmax 全てのRGBの中の最大値
//! @param [in] width 画像の幅
//! @param [in] height 画像の高さ
//! @param [in] p 画像のメモリへのアドレス
//! @details RGBRGBRGB....のメモリを渡すと、RGBテキストでファイル名fnameで書き込む
void pnmP3_Write(const char* const fname, const int vmax, const int width, const int height, const unsigned char* const p) { // PPM ASCII

  FILE* fp = fopen(fname, "wb");
  fprintf(fp, "P3\n%d %d\n%d\n", width, height, vmax);

  size_t k = 0;
  for (size_t i = 0; i < (size_t)height; i++) {
    for (size_t j = 0; j < (size_t)width; j++) {
      fprintf(fp, "%d %d %d ", p[k * 3 + 0], p[k * 3 + 1], p[k * 3 + 2]);
      k++;
    }
    fprintf(fp, "\n");
  }

  fclose(fp);
}

CUDA側

mytest.h

#ifdef __DLL_EXPORT_DO
#define DLL_PORT extern "C" _declspec(dllexport)
#else
#define DLL_PORT extern "C" _declspec(dllimport)
#endif

struct data_t {
  int width;
  int height;
  unsigned char* rgbdata;
};

//GPU側メモリ確保
DLL_PORT void* gpu_my_alloc(int width, int height);

//処理実行
DLL_PORT void func_inverse(int width,int height, void* device);

//GPU側へデータ転送
DLL_PORT void host_to_device(data_t* host, void* device);

//CPU側へ結果を返却
DLL_PORT void device_to_host(void* device, data_t* host);

//GPU側のメモリ解放
DLL_PORT void gpu_my_free(void* gpuptr);

mytest.cu

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>

#include "mytest.h"

struct gpudata {
  int width;
  int height;
  unsigned char* c;
};

__device__
void color_inverse(unsigned char* c, int width, int height) {
  c[0] = 255 - c[0];
  c[1] = 255 - c[1];
  c[2] = 255 - c[2];
}

__global__ void thread_inverse(gpudata data) {
  //このスレッドが担当する画素の位置を二次元座標で求める
  size_t xpos = blockIdx.x * blockDim.x + threadIdx.x;
  size_t ypos = blockIdx.y * blockDim.y + threadIdx.y;

  if (xpos < data.width && ypos < data.height) {
    size_t pos = (ypos * data.width + xpos) * 3;

    unsigned char* c = data.c + pos;
    // この関数はfunction.cuで定義されている
    color_inverse(c, xpos, ypos);
  }

}

void func_inverse(int width, int height, void* device){

  // 16*16 == 256 < 512
  int blockw = 16;
  int blockh = 16;
  dim3 block(blockw, blockh);

  int gridw = width / blockw + 1;
  int gridh = height / blockh + 1;

  dim3 grid(gridw,gridh);



  gpudata gpud;
  gpud.width = width;
  gpud.height = height;
  gpud.c = (unsigned char*)device;
  thread_inverse<<<grid,block>>> (gpud);//GPU側の関数を呼出


}

void host_to_device(data_t* host, void* device) {
  cudaMemcpy(
    device,
    host->rgbdata,
    host->width * host->height * 3,
    cudaMemcpyHostToDevice);//GPU側へ処理したいデータを転送

}

void device_to_host(void* device, data_t* host) {
  cudaMemcpy(
    host->rgbdata,
    device,
    host->width * host->height * 3,
    cudaMemcpyDeviceToHost);//GPU側から実行結果を取得

}


void* gpu_my_alloc(int width, int height) {
  unsigned char* g_gpu;
  cudaMalloc(
    (void**)&g_gpu, 
    width * height * 3);//GPU側メモリ確保

  return g_gpu;
}

void gpu_my_free(void* gpuptr) {
  cudaFree(gpuptr);//GPU側のメモリを解放
}