1 module grain.cuda.allocator;
2 
3 import grain.tensor : Opt;
4 import grain.allocator : CPUMallocator;
5 import grain.cuda.testing : checkCuda;
6 import grain.dpp.cuda_runtime_api;
7 
8 struct PinnedMallocator
9 {
10     Opt opt;
11     alias opt this;
12 
13     enum deviceof = "cpu";
14     enum pinned = true;
15 
16     /**
17     Standard allocator methods per the semantics defined above. The
18     $(D deallocate) method is $(D @system) because it
19     may move memory around, leaving dangling pointers in user code. Somewhat
20     paradoxically, $(D malloc) is $(D @safe) but that's only useful to safe
21     programs that can afford to leak memory allocated.
22     */
23     @trusted @nogc nothrow
24     void[] allocate()(size_t bytes)
25     {
26         import grain.dpp.cuda_runtime_api : cudaMallocHost;
27 
28         if (!bytes) return null;
29 
30         void* p;
31         checkCuda(cudaMallocHost(&p, bytes));
32         return p ? p[0 .. bytes] : null;
33     }
34 
35     /// Ditto
36     @system @nogc nothrow
37     bool deallocate()(void[] b)
38     {
39         import grain.dpp.cuda_runtime_api : cudaFreeHost;
40         checkCuda(cudaFreeHost(b.ptr));
41         return true;
42     }
43 
44     enum instance =  typeof(this)();
45 }
46 
47 
48 /// CUDA heap allocator
49 struct CuMallocator
50 {
51     Opt opt;
52     alias opt this;
53 
54     /// device indicator
55     enum deviceof = "cuda";
56     enum pinned = false;
57 
58     ///
59     @trusted @nogc nothrow
60     void[] allocate()(size_t bytes)
61     {
62         // import grain.dpp.cuda_driver : cuMemAlloc_v2, CUdeviceptr;
63         if (!bytes) return null;
64 
65         void* p;
66         cudaSetDevice(this.opt.deviceId);
67         checkCuda(cudaMalloc(&p, bytes));
68         return p ? p[0 .. bytes] : null;
69     }
70 
71     ///
72     @system @nogc nothrow
73     bool deallocate()(void[] b)
74     {
75         cudaSetDevice(this.opt.deviceId);
76         checkCuda(cudaFree(b.ptr));
77         return true;
78     }
79 
80     enum instance = CuMallocator();
81 }
82 
83 
84 @nogc
85 unittest
86 {
87     import grain.tensor : Tensor;
88     import grain.storage : RCStorage;
89     auto x = Tensor!(2, double, RCStorage!CuMallocator)(2, 3);
90     static assert(x.deviceof == "cuda");
91 }
92 
93 
94 import grain.storage : RCStorage;
95 import grain.tensor : Tensor;
96 
97 alias DefaultCuStorage = RCStorage!CuMallocator;
98 alias cuda = RCStorage!CuMallocator;
99 alias pinned = RCStorage!PinnedMallocator;
100 alias CuTensor(size_t dim, T) = Tensor!(dim, T, DefaultCuStorage);