1 module derelict.cudnn7;
2 
3 /**
4     Translation of cudnn.h
5     forked from https://github.com/henrygouk/DerelictCuDNN/blob/master/source/derelict/cudnn7.d
6 */
7 
8 import derelict.cuda.runtimeapi;
9 import derelict.util.loader;
10 
11 private
12 {
13     import derelict.util.system;
14 
15     static if(Derelict_OS_Linux)
16     {
17         version(X86_64)
18             enum libNames = "libcudnn.so.7";
19         else
20             static assert(0, "Need to implement cuDNN libNames for this arch.");
21     }
22     else static if(Derelict_OS_Windows)
23     {
24         version(X86_64)
25             enum libNames = "cudnn64_7.dll";
26         else
27             static assert(0, "There are no cuDNN libNames for this arch and operating system.");
28     }
29     else static if(Derelict_OS_Mac)
30     {
31         version(X86_64)
32             enum libNames = "libcudnn.7.dylib,libcudnn.dylib";
33         else
34             static assert(0, "There are no cuDNN libNames for this arch and operating system.");
35     }
36     else
37     {
38         static assert(0, "Need to implement cuDNN libNames for this operating system.");
39     }
40 
41     enum functionTypes = [
42         ["cudnnCreate", "cudnnHandle_t *"],
43         ["cudnnDestroy", "cudnnHandle_t"],
44         ["cudnnSetStream", "cudnnHandle_t", "cudaStream_t"],
45         ["cudnnGetStream", "cudnnHandle_t", "cudaStream_t *"],
46 
47 
48         ["cudnnCreateTensorDescriptor", "cudnnTensorDescriptor_t *"],
49         ["cudnnSetTensor4dDescriptor", "cudnnTensorDescriptor_t", "cudnnTensorFormat_t", "cudnnDataType_t", "int",
50             "int", "int", "int"],
51         ["cudnnSetTensor4dDescriptorEx", "cudnnTensorDescriptor_t", "cudnnDataType_t", "int", "int", "int", "int",
52             "int", "int", "int", "int"],
53         ["cudnnGetTensor4dDescriptor", "const cudnnTensorDescriptor_t", "cudnnDataType_t *", "int *", "int *", "int *",
54             "int *", "int *", "int *", "int *", "int *"],
55         ["cudnnSetTensorNdDescriptor", "cudnnTensorDescriptor_t", "cudnnDataType_t", "int", "int *", "int *"],
56         ["cudnnGetTensorNdDescriptor", "const cudnnTensorDescriptor_t", "int", "cudnnDataType_t *", "int *", "int",
57             "int"],
58         ["cudnnDestroyTensorDescriptor", "cudnnTensorDescriptor_t"],
59         ["cudnnTransformTensor", "cudnnHandle_t", "const void *", "const cudnnTensorDescriptor_t", "const void *",
60             "const void *", "const cudnnTensorDescriptor_t", "void *"],
61         ["cudnnAddTensor", "cudnnHandle_t", "const void *", "const cudnnTensorDescriptor_t", "const void *",
62             "const void *", "const cudnnTensorDescriptor_t", "void *"],
63         
64 
65         ["cudnnCreateOpTensorDescriptor", "cudnnOpTensorDescriptor_t *"],
66         ["cudnnSetOpTensorDescriptor", "cudnnOpTensorDescriptor_t", "cudnnOpTensorOp_t", "cudnnDataType_t",
67             "cudnnNanPropagation_t"],
68         ["cudnnGetOpTensorDescriptor", "const cudnnOpTensorDescriptor_t", "cudnnOpTensorOp_t *", "cudnnDataType_t *",
69             "cudnnNanPropagation_t *"],
70         ["cudnnDestroyOpTensorDescriptor", "cudnnOpTensorDescriptor_t"],
71         ["cudnnOpTensor", "cudnnHandle_t", "const cudnnOpTensorDescriptor_t", "const void *",
72             "const cudnnTensorDescriptor_t", "const void *", "const void *", "const cudnnTensorDescriptor_t",
73             "const void *", "const void *", "const cudnnTensorDescriptor_t", "void *"],
74         ["cudnnSetTensor", "cudnnHandle_t", "const cudnnTensorDescriptor_t", "void *", "const void *"],
75         ["cudnnScaleTensor", "cudnnHandle_t", "const cudnnTensorDescriptor_t", "void *", "const void *"],
76 
77         ["cudnnCreateReduceTensorDescriptor",
78          "cudnnReduceTensorDescriptor_t *"],
79         ["cudnnSetReduceTensorDescriptor",
80          "cudnnReduceTensorDescriptor_t", "cudnnReduceTensorOp_t",
81          "cudnnDataType_t", "cudnnNanPropagation_t", "cudnnReduceTensorIndices_t", "cudnnIndicesType_t"],
82         ["cudnnGetReduceTensorDescriptor",
83          "const cudnnReduceTensorDescriptor_t",
84          "cudnnReduceTensorOp_t *",
85          "cudnnDataType_t *",
86          "cudnnNanPropagation_t *",
87          "cudnnReduceTensorIndices_t *",
88          "cudnnIndicesType_t *"],
89         ["cudnnDestroyReduceTensorDescriptor",
90          "cudnnReduceTensorDescriptor_t"],
91         /* Helper function to return the minimum size of the index space to be passed to the reduction given the input and output tensors */
92         ["cudnnGetReductionIndicesSize",
93          "cudnnHandle_t",
94          "cudnnReduceTensorDescriptor_t",
95          "cudnnTensorDescriptor_t",
96          "cudnnTensorDescriptor_t",
97          "size_t *"],
98         /* Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output tensors */
99         ["cudnnGetReductionWorkspaceSize",
100          "cudnnHandle_t",
101          "const cudnnReduceTensorDescriptor_t",
102          "const cudnnTensorDescriptor_t",
103          "const cudnnTensorDescriptor_t",
104          "size_t *"],
105         /* Tensor operation : C = reduce op( alpha * A ) + beta * C */
106         /* The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. */
107         /* The indices space is ignored for reduce ops other than min or max. */
108         ["cudnnReduceTensor",
109          "cudnnHandle_t",
110          "const cudnnReduceTensorDescriptor_t",
111          "void *",                             //  indices,
112          "size_t",                             //  indicesSizeInBytes,
113          "void *",                             //  workspace,
114          "size_t",                             //  workspaceSizeInBytes,
115          "const void *",                       //  alpha,
116          "const cudnnTensorDescriptor_t",      //  aDesc,
117          "const void *",                       //  A,
118          "const void *",                       //  beta,
119          "const cudnnTensorDescriptor_t",      //  cDesc,
120          "void  *"                             //  C
121             ],
122 
123 
124         ["cudnnCreateFilterDescriptor", "cudnnFilterDescriptor_t *"],
125         ["cudnnSetFilter4dDescriptor", "cudnnFilterDescriptor_t", "cudnnDataType_t", "cudnnTensorFormat_t", "int",
126 			"int", "int", "int"],
127         ["cudnnGetFilter4dDescriptor", "cudnnDataType_t *", "cudnnTensorFormat_t *", "int *", "int *", "int *",
128 			"int *"],
129         ["cudnnSetFilterNdDescriptor", "cudnnFilterDescriptor_t", "cudnnDataType_t", "cudnnTensorFormat_t", "int",
130             "const int*"],
131         ["cudnnGetFilterNdDescriptor", "const cudnnFilterDescriptor_t", "int", "cudnnDataType_t *",
132             "cudnnTensorFormat_t *", "int *", "int*"],
133         ["cudnnDestroyFilterDescriptor", "cudnnFilterDescriptor_t"],
134         ["cudnnCreateConvolutionDescriptor", "cudnnConvolutionDescriptor_t *"],
135         ["cudnnSetConvolution2dDescriptor", "cudnnConvolutionDescriptor_t", "int", "int", "int", "int", "int",
136             "int", "cudnnConvolutionMode_t", "cudnnDataType_t"],
137         ["cudnnGetConvolution2dDescriptor", "const cudnnConvolutionDescriptor_t", "int *", "int *", "int *",
138             "int *", "int *", "int *", "cudnnConvolutionMode_t *", "cudnnDataType_t *"],
139         ["cudnnGetConvolution2dForwardOutputDim", "const cudnnConvolutionDescriptor_t",
140             "const cudnnTensorDescriptor_t", "const cudnnFilterDescriptor_t", "int", "int", "int", "int"],
141         ["cudnnSetConvolutionNdDescriptor", "cudnnConvolutionDescriptor_t", "int", "const int*", "const int*",
142             "const int*", "cudnnConvolutionMode_t", "cudnnDataType_t"],
143         ["cudnnGetConvolutionNdDescriptor", "const cudnnConvolutionDescriptor_t", "int", "int *", "int*", "int*",
144             "int*", "cudnnConvolutionMode_t *", "cudnnDataType_t *"],
145         ["cudnnGetConvolutionNdForwardOutputDim", "const cudnnConvolutionDescriptor_t",
146             "const cudnnTensorDescriptor_t", "const cudnnFilterDescriptor_t", "int", "int*"],
147         ["cudnnDestroyConvolutionDescriptor", "cudnnConvolutionDescriptor_t"],
148 
149         ["cudnnSetConvolutionGroupCount",
150          "cudnnConvolutionDescriptor_t", "int"],
151         ["cudnnGetConvolutionGroupCount",
152          "cudnnConvolutionDescriptor_t", "int*"],
153 
154         ["cudnnFindConvolutionForwardAlgorithm", "cudnnHandle_t", "const cudnnTensorDescriptor_t",
155             "const cudnnFilterDescriptor_t", "const cudnnConvolutionDescriptor_t", "const cudnnTensorDescriptor_t",
156             "const int", "int *", "cudnnConvolutionFwdAlgoPerf_t *"],
157         ["cudnnFindConvolutionForwardAlgorithmEx", "cudnnHandle_t", "const cudnnTensorDescriptor_t", "const void *",
158             "const cudnnFilterDescriptor_t", "const void *", "const cudnnConvolutionDescriptor_t",
159             "const cudnnTensorDescriptor_t", "void *", "const int", "int *", "cudnnConvolutionFwdAlgoPerf_t *",
160             "void *", "size_t"],
161         ["cudnnGetConvolutionForwardAlgorithm", "cudnnHandle_t", "const cudnnTensorDescriptor_t",
162             "const cudnnFilterDescriptor_t", "const cudnnConvolutionDescriptor_t", "const cudnnTensorDescriptor_t",
163             "cudnnConvolutionFwdPreference_t", "size_t", "cudnnConvolutionFwdAlgo_t *"],
164         ["cudnnGetConvolutionForwardWorkspaceSize", "cudnnHandle_t", "const cudnnTensorDescriptor_t",
165             "const cudnnFilterDescriptor_t", "const cudnnConvolutionDescriptor_t", "const cudnnTensorDescriptor_t",
166             "cudnnConvolutionFwdAlgo_t", "size_t *"],
167         ["cudnnConvolutionForward", "cudnnHandle_t", "const void *", "const cudnnTensorDescriptor_t", "const void *",
168             "const cudnnFilterDescriptor_t", "const void *", "const cudnnConvolutionDescriptor_t",
169             "cudnnConvolutionFwdAlgo_t", "void *", "size_t", "const void *", "const cudnnTensorDescriptor_t",
170             "void *"],
171         ["cudnnConvolutionBackwardBias", "cudnnHandle_t", "const void *", "const cudnnTensorDescriptor_t",
172             "const void *", "const void *", "const cudnnTensorDescriptor_t", "void *"],
173         
174 
175         ["cudnnFindConvolutionBackwardFilterAlgorithm", "cudnnHandle_t", "const cudnnTensorDescriptor_t",
176             "const cudnnTensorDescriptor_t", "const cudnnConvolutionDescriptor_t", "const cudnnFilterDescriptor_t",
177             "const int", "int *", "cudnnConvolutionBwdFilterAlgoPerf_t *"],
178         ["cudnnFindConvolutionBackwardFilterAlgorithmEx", "cudnnHandle_t", "const cudnnTensorDescriptor_t",
179             "const void *", "const cudnnTensorDescriptor_t", "const void *", "const cudnnConvolutionDescriptor_t",
180             "const cudnnFilterDescriptor_t", "void *", "const int", "int *", "cudnnConvolutionBwdFilterAlgoPerf_t *",
181             "void *", "size_t"],
182         ["cudnnGetConvolutionBackwardFilterAlgorithm", "cudnnHandle_t", "const cudnnTensorDescriptor_t",
183             "const cudnnTensorDescriptor_t", "const cudnnConvolutionDescriptor_t", "const cudnnFilterDescriptor_t",
184             "cudnnConvolutionBwdFilterPreference_t", "size_t", "cudnnConvolutionBwdFilterAlgo_t *"],
185         ["cudnnGetConvolutionBackwardFilterWorkspaceSize", "cudnnHandle_t", "const cudnnTensorDescriptor_t",
186             "const cudnnTensorDescriptor_t", "const cudnnConvolutionDescriptor_t", "const cudnnFilterDescriptor_t",
187             "cudnnConvolutionBwdFilterAlgo_t", "size_t *"],
188         ["cudnnConvolutionBackwardFilter", "cudnnHandle_t", "const void *", "const cudnnTensorDescriptor_t",
189             "const void *", "const cudnnTensorDescriptor_t", "const void *", "const cudnnConvolutionDescriptor_t",
190             "cudnnConvolutionBwdFilterAlgo_t", "void *", "size_t", "const void *", "const cudnnFilterDescriptor_t",
191             "void *"],
192         
193 
194         ["cudnnFindConvolutionBackwardDataAlgorithm", "cudnnHandle_t", "const cudnnFilterDescriptor_t",
195             "const cudnnTensorDescriptor_t", "const cudnnConvolutionDescriptor_t", "const cudnnTensorDescriptor_t",
196             "const int", "int *", "cudnnConvolutionBwdDataAlgoPerf_t *"],
197         ["cudnnFindConvolutionBackwardDataAlgorithmEx", "cudnnHandle_t", "const cudnnFilterDescriptor_t",
198             "const void *", "const cudnnTensorDescriptor_t", "const void *", "const cudnnConvolutionDescriptor_t",
199             "const cudnnTensorDescriptor_t", "void *", "const int", "int *", "cudnnConvolutionBwdDataAlgoPerf_t *",
200             "void *", "size_t"],
201         ["cudnnGetConvolutionBackwardDataAlgorithm", "cudnnHandle_t", "const cudnnFilterDescriptor_t",
202             "const cudnnTensorDescriptor_t", "const cudnnConvolutionDescriptor_t", "const cudnnTensorDescriptor_t",
203             "cudnnConvolutionBwdDataPreference_t", "size_t", "cudnnConvolutionBwdDataAlgo_t *"],
204         ["cudnnGetConvolutionBackwardDataWorkspaceSize", "cudnnHandle_t", "const cudnnFilterDescriptor_t",
205             "const cudnnTensorDescriptor_t", "const cudnnConvolutionDescriptor_t", "const cudnnTensorDescriptor_t",
206             "cudnnConvolutionBwdDataAlgo_t", "size_t *"],
207         ["cudnnConvolutionBackwardData", "cudnnHandle_t", "const void *", "const cudnnFilterDescriptor_t",
208             "const void *", "const cudnnTensorDescriptor_t", "const void *", "const cudnnConvolutionDescriptor_t",
209             "cudnnConvolutionBwdDataAlgo_t", "void *", "size_t", "const void *", "const cudnnTensorDescriptor_t",
210             "void *"],
211         ["cudnnIm2Col", "cudnnHandle_t", "const cudnnTensorDescriptor_t", "const void *",
212             "const cudnnFilterDescriptor_t", "const cudnnConvolutionDescriptor_t", "void *"],
213         
214 
215         ["cudnnSoftmaxForward", "cudnnHandle_t", "cudnnSoftmaxAlgorithm_t", "cudnnSoftmaxMode_t", "const void *",
216             "const cudnnTensorDescriptor_t", "const void *", "const void *", "const cudnnTensorDescriptor_t",
217             "void *"],
218         ["cudnnSoftmaxBackward", "cudnnHandle_t", "cudnnSoftmaxAlgorithm_t", "cudnnSoftmaxMode_t", "const void *",
219             "const cudnnTensorDescriptor_t", "const void *", "const cudnnTensorDescriptor_t", "const void *",
220             "const void *", "const cudnnTensorDescriptor_t", "void *"],
221         
222 
223         ["cudnnCreatePoolingDescriptor", "cudnnPoolingDescriptor_t *"],
224         ["cudnnSetPooling2dDescriptor", "cudnnPoolingDescriptor_t", "cudnnPoolingMode_t", "cudnnNanPropagation_t",
225             "int", "int", "int", "int", "int", "int"],
226         ["cudnnGetPooling2dDescriptor", "const cudnnPoolingDescriptor_t", "cudnnPoolingMode_t *",
227             "cudnnNanPropagation_t *", "int *", "int *", "int *", "int *", "int *", "int *"],
228         ["cudnnSetPoolingNdDescriptor", "cudnnPoolingDescriptor_t", "const cudnnPoolingMode_t",
229             "const cudnnNanPropagation_t", "int", "const int*", "const int*", "const int*"],
230         ["cudnnGetPoolingNdDescriptor", "cudnnPoolingDescriptor_t", "int", "cudnnPoolingMode_t *",
231             "cudnnNanPropagation_t *", "int *", "int*", "int*", "int*"],
232         ["cudnnGetPoolingNdForwardOutputDim", "const cudnnPoolingDescriptor_t", "const cudnnTensorDescriptor_t",
233             "int", "int*"],
234         ["cudnnGetPooling2dForwardOutputDim", "const cudnnPoolingDescriptor_t", "const cudnnTensorDescriptor_t",
235             "int *", "int *", "int *", "int *"],
236         ["cudnnDestroyPoolingDescriptor", "cudnnPoolingDescriptor_t"],
237         ["cudnnPoolingForward", "cudnnHandle_t", "const cudnnPoolingDescriptor_t", "const void *",
238             "const cudnnTensorDescriptor_t", "const void *", "const void *", "const cudnnTensorDescriptor_t",
239             "void *"],
240         ["cudnnPoolingBackward", "cudnnHandle_t", "const cudnnPoolingDescriptor_t", "const void *",
241             "const cudnnTensorDescriptor_t", "const void *", "const cudnnTensorDescriptor_t", "const void *",
242             "const cudnnTensorDescriptor_t", "const void *", "const void *", "const cudnnTensorDescriptor_t",
243             "void *"],
244         
245 
246         ["cudnnCreateActivationDescriptor", "cudnnActivationDescriptor_t *"],
247         ["cudnnSetActivationDescriptor", "cudnnActivationDescriptor_t", "cudnnActivationMode_t",
248             "cudnnNanPropagation_t", "double"],
249         ["cudnnGetActivationDescriptor", "const cudnnActivationDescriptor_t", "cudnnActivationMode_t *",
250             "cudnnNanPropagation_t ", "double *"],
251         ["cudnnDestroyActivationDescriptor", "cudnnActivationDescriptor_t"],
252         ["cudnnActivationForward", "cudnnHandle_t", "cudnnActivationDescriptor_t", "const void *",
253             "const cudnnTensorDescriptor_t", "const void *", "const void *", "const cudnnTensorDescriptor_t",
254             "void *"],
255         ["cudnnActivationBackward", "cudnnHandle_t", "cudnnActivationDescriptor_t", "const void *",
256             "const cudnnTensorDescriptor_t", "const void *", "const cudnnTensorDescriptor_t", "const void *",
257             "const cudnnTensorDescriptor_t", "const void *", "const void *", "const cudnnTensorDescriptor_t",
258             "void *"],
259         
260 
261         ["cudnnCreateLRNDescriptor", "cudnnLRNDescriptor_t *"],
262         ["cudnnSetLRNDescriptor", "cudnnLRNDescriptor_t", "uint", "double", "double", "double"],
263         ["cudnnGetLRNDescriptor", "cudnnLRNDescriptor_t", "uint *", "double *", "double *", "double *"],
264         ["cudnnDestroyLRNDescriptor", "cudnnLRNDescriptor_t"],
265         ["cudnnLRNCrossChannelForward", "cudnnHandle_t", "cudnnLRNDescriptor_t", "cudnnLRNMode_t", "const void *",
266             "const cudnnTensorDescriptor_t", "const void *", "const void *", "const cudnnTensorDescriptor_t",
267             "void *"],
268         ["cudnnLRNCrossChannelBackward", "cudnnHandle_t", "cudnnLRNDescriptor_t", "cudnnLRNMode_t", "const void *",
269             "const cudnnTensorDescriptor_t", "const void *", "const cudnnTensorDescriptor_t", "const void *",
270             "const cudnnTensorDescriptor_t", "const void *", "const void *", "const cudnnTensorDescriptor_t",
271             "void *"],
272         ["cudnnDivisiveNormalizationForward", "cudnnHandle_t", "cudnnLRNDescriptor_t", "cudnnDivNormMode_t",
273             "const void *", "const cudnnTensorDescriptor_t", "const void *", "const void *", "void *", "void *",
274             "const void *", "const cudnnTensorDescriptor_t", "void *"],
275         ["cudnnDivisiveNormalizationBackward", "cudnnHandle_t", "cudnnLRNDescriptor_t", "cudnnDivNormMode_t",
276             "const void *", "const cudnnTensorDescriptor_t", "const void *", "const void *", "const void *",
277             "void *", "void *", "const void *", "const cudnnTensorDescriptor_t", "void *", "void *"],
278         
279 
280         ["cudnnDeriveBNTensorDescriptor", "cudnnTensorDescriptor_t", "const cudnnTensorDescriptor_t",
281             "cudnnBatchNormMode_t"],
282         ["cudnnBatchNormalizationForwardTraining", "cudnnHandle_t", "cudnnBatchNormMode_t", "const void *",
283             "const void *", "const cudnnTensorDescriptor_t", "const void *", "const cudnnTensorDescriptor_t",
284             "void *", "const cudnnTensorDescriptor_t", "const void *", "const void *", "double", "void *", "void *",
285             "double", "void *", "void *"],
286         ["cudnnBatchNormalizationForwardInference", "cudnnHandle_t", "cudnnBatchNormMode_t", "const void *",
287             "const void *", "const cudnnTensorDescriptor_t", "const void *", "const cudnnTensorDescriptor_t", "void *",
288             "const cudnnTensorDescriptor_t", "const void *", "const void *", "const void *", "const void *", "double"],
289         ["cudnnBatchNormalizationBackward", "cudnnHandle_t", "cudnnBatchNormMode_t", "const void *", "const void *",
290             "const void *", "const void *", "const cudnnTensorDescriptor_t", "const void *",
291             "const cudnnTensorDescriptor_t", "const void *", "const cudnnTensorDescriptor_t", "void *",
292             "const cudnnTensorDescriptor_t", "const void *", "void *", "void *", "double", "const void *",
293             "const void *"],
294         
295 
296         ["cudnnCreateSpatialTransformerDescriptor", "cudnnSpatialTransformerDescriptor_t *"],
297         ["cudnnSetSpatialTransformerNdDescriptor", "cudnnSpatialTransformerDescriptor_t", "cudnnSamplerType_t",
298             "cudnnDataType_t", "const int", "const int*"],
299         ["cudnnDestroySpatialTransformerDescriptor", "cudnnSpatialTransformerDescriptor_t"],
300         ["cudnnSpatialTfGridGeneratorForward", "cudnnHandle_t", "const cudnnSpatialTransformerDescriptor_t",
301             "const void *", "void *"],
302         ["cudnnSpatialTfGridGeneratorBackward", "cudnnHandle_t", "const cudnnSpatialTransformerDescriptor_t",
303             "const void *", "void *"],
304         ["cudnnSpatialTfSamplerForward", "cudnnHandle_t", "cudnnSpatialTransformerDescriptor_t", "const void *",
305             "const cudnnTensorDescriptor_t", "const void *", "const void *", "const void *", "const void *",
306             "cudnnTensorDescriptor_t", "void *"],
307         ["cudnnSpatialTfSamplerBackward", "cudnnHandle_t", "cudnnSpatialTransformerDescriptor_t", "const void *",
308             "const cudnnTensorDescriptor_t", "const void *", "const void *", "const cudnnTensorDescriptor_t", "void *",
309             "const void *", "const cudnnTensorDescriptor_t", "const void *", "const void *", "const void *", "void *"],
310         
311 
312         ["cudnnCreateDropoutDescriptor", "cudnnDropoutDescriptor_t *"],
313         ["cudnnDestroyDropoutDescriptor", "cudnnDropoutDescriptor_t"],
314         ["cudnnDropoutGetStatesSize", "cudnnHandle_t", "size_t *"],
315         ["cudnnDropoutGetReserveSpaceSize", "cudnnTensorDescriptor_t", "size_t *"],
316         ["cudnnSetDropoutDescriptor", "cudnnDropoutDescriptor_t", "cudnnHandle_t", "float", "void *", "size_t",
317             "ulong"],
318         ["cudnnDropoutForward", "cudnnHandle_t", "const cudnnDropoutDescriptor_t", "const cudnnTensorDescriptor_t",
319             "const void *", "const cudnnTensorDescriptor_t", "void *", "void *", "size_t"],
320         ["cudnnDropoutBackward", "cudnnHandle_t", "const cudnnDropoutDescriptor_t", "const cudnnTensorDescriptor_t",
321             "const void *", "const cudnnTensorDescriptor_t", "void *", "void *", "size_t"],
322         
323 
324         ["cudnnCreateRNNDescriptor", "cudnnRNNDescriptor_t *"],
325         ["cudnnDestroyRNNDescriptor", "cudnnRNNDescriptor_t"],
326         ["cudnnSetRNNDescriptor", "cudnnRNNDescriptor_t", "int", "int", "cudnnDropoutDescriptor_t",
327             "cudnnRNNInputMode_t", "cudnnDirectionMode_t", "cudnnRNNMode_t", "cudnnDataType_t"],
328         ["cudnnGetRNNWorkspaceSize", "cudnnHandle_t", "const cudnnRNNDescriptor_t", "const int",
329             "const cudnnTensorDescriptor_t *", "size_t *"],
330         ["cudnnGetRNNTrainingReserveSize", "cudnnHandle_t", "const cudnnRNNDescriptor_t", "const int",
331             "const cudnnTensorDescriptor_t *", "size_t *"],
332         ["cudnnGetRNNParamsSize", "cudnnHandle_t", "const cudnnRNNDescriptor_t", "const cudnnTensorDescriptor_t",
333             "size_t *", "cudnnDataType_t"],
334         ["cudnnGetRNNLinLayerMatrixParams", "cudnnHandle_t", "const cudnnRNNDescriptor_t", "const int",
335             "const cudnnTensorDescriptor_t", "const cudnnFilterDescriptor_t", "const void *", "const int",
336             "cudnnFilterDescriptor_t", "void **"],
337         ["cudnnGetRNNLinLayerBiasParams", "cudnnHandle_t", "const cudnnRNNDescriptor_t", "const int",
338             "const cudnnTensorDescriptor_t", "const cudnnFilterDescriptor_t", "const void *", "const int",
339             "cudnnFilterDescriptor_t", "void **"],
340         ["cudnnRNNForwardInference", "cudnnHandle_t", "const cudnnRNNDescriptor_t", "const int",
341             "const cudnnTensorDescriptor_t *", "const void *", "const cudnnTensorDescriptor_t", "const void *",
342             "const void *", "const cudnnTensorDescriptor_t", "const void *", "const cudnnFilterDescriptor_t",
343             "const void *", "const cudnnTensorDescriptor_t *", "void *", "const cudnnTensorDescriptor_t", "void *",
344             "const cudnnTensorDescriptor_t", "void *", "void *", "size_t"],
345         ["cudnnRNNForwardTraining", "cudnnHandle_t", "const cudnnRNNDescriptor_t", "const int",
346             "const cudnnTensorDescriptor_t *", "const void *", "const cudnnTensorDescriptor_t", "const void *",
347             "const cudnnTensorDescriptor_t", "const void *", "const cudnnFilterDescriptor_t", "const void *",
348             "const cudnnTensorDescriptor_t *", "void *", "const cudnnTensorDescriptor_t", "void *",
349             "const cudnnTensorDescriptor_t", "void *", "void *", "size_t", "void *", "size_t"],
350         ["cudnnRNNBackwardData", "cudnnHandle_t", "const cudnnRNNDescriptor_t", "const int",
351             "const cudnnTensorDescriptor_t *", "const void *", "const cudnnTensorDescriptor_t *", "const void *",
352             "const cudnnTensorDescriptor_t", "const void *", "const cudnnTensorDescriptor_t", "const void *",
353             "const cudnnFilterDescriptor_t", "const void *", "const cudnnTensorDescriptor_t", "const void *",
354             "const cudnnTensorDescriptor_t", "const void *", "const cudnnTensorDescriptor_t *", "void *",
355             "const cudnnTensorDescriptor_t", "void *", "const cudnnTensorDescriptor_t", "void *", "void *",
356             "size_t", "const void *", "size_t"],
357         ["cudnnRNNBackwardWeights", "cudnnHandle_t", "const cudnnRNNDescriptor_t", "const int",
358             "const cudnnTensorDescriptor_t *", "const void *", "const cudnnTensorDescriptor_t", "const void *",
359             "const cudnnTensorDescriptor_t *", "const void *", "const void *", "size_t",
360             "const cudnnFilterDescriptor_t", "void *", "const void *", "size_t"],
361 
362         //New in cuDNN v6
363         ["cudnnConvolutionBiasActivationForward", "cudnnHandle_t", "const void *", "const cudnnTensorDescriptor_t",
364             "const void *", "const cudnnFilterDescriptor_t", "const void *", "const cudnnConvolutionDescriptor_t",
365             "cudnnConvolutionFwdAlgo_t", "void *", "size_t", "const void *", "const cudnnTensorDescriptor_t",
366             "const void *", "const cudnnTensorDescriptor_t", "const void *", "const cudnnActivationDescriptor_t",
367             "const cudnnTensorDescriptor_t", "void *"]
368     ];
369 
370     string generateFunctionAliases()
371     {
372         import std.algorithm : joiner;
373         import std.conv : to;
374 
375         string ret;
376 
377         foreach(ft; functionTypes)
378         {
379             ret ~= "alias da_" ~ ft[0] ~ " = cudnnStatus_t function(" ~ ft[1 .. $].joiner(",").to!string ~ ");";
380         }
381 
382         return ret;
383     }
384 
385     string generateFunctionPointers()
386     {
387         string ret;
388 
389         foreach(ft; functionTypes)
390         {
391             ret ~= "da_" ~ ft[0] ~ " " ~ ft[0] ~ ";";
392         }
393 
394         return ret;
395     }
396 
397     string generateFunctionBinds()
398     {
399         string ret;
400 
401         foreach(ft; functionTypes)
402         {
403             ret ~= "bindFunc(cast(void**)&" ~ ft[0] ~ ", \"" ~ ft[0] ~ "\");";
404         }
405 
406         return ret;
407     }
408 }
409 
410 struct cudnnContext;
411 alias cudnnHandle_t = cudnnContext*;
412 
413 alias cudnnStatus_t = int;
414 enum : cudnnStatus_t
415 {
416     CUDNN_STATUS_SUCCESS          = 0,
417     CUDNN_STATUS_NOT_INITIALIZED  = 1,
418     CUDNN_STATUS_ALLOC_FAILED     = 2,
419     CUDNN_STATUS_BAD_PARAM        = 3,
420     CUDNN_STATUS_INTERNAL_ERROR   = 4,
421     CUDNN_STATUS_INVALID_VALUE    = 5,
422     CUDNN_STATUS_ARCH_MISMATCH    = 6,
423     CUDNN_STATUS_MAPPING_ERROR    = 7,
424     CUDNN_STATUS_EXECUTION_FAILED = 8,
425     CUDNN_STATUS_NOT_SUPPORTED    = 9,
426     CUDNN_STATUS_LICENSE_ERROR    = 10
427 }
428 
429 struct cudnnTensorStruct;
430 alias cudnnTensorDescriptor_t = cudnnTensorStruct*;
431 
432 struct cudnnConvolutionStruct;
433 alias cudnnConvolutionDescriptor_t = cudnnConvolutionStruct*;
434 
435 struct cudnnPoolingStruct;
436 alias cudnnPoolingDescriptor_t = cudnnPoolingStruct*;
437 
438 struct cudnnFilterStruct;
439 alias cudnnFilterDescriptor_t = cudnnFilterStruct*;
440 
441 struct cudnnLRNStruct;
442 alias cudnnLRNDescriptor_t = cudnnLRNStruct*;
443 
444 struct cudnnActivationStruct;
445 alias cudnnActivationDescriptor_t = cudnnActivationStruct*;
446 
447 struct cudnnSpatialTransformerStruct;
448 alias cudnnSpatialTransformerDescriptor_t = cudnnSpatialTransformerStruct*;
449 
450 struct cudnnOpTensorStruct;
451 alias cudnnOpTensorDescriptor_t = cudnnOpTensorStruct*;
452 
453 struct cudnnReduceTensorStruct;
454 alias cudnnReduceTensorDescriptor_t = cudnnReduceTensorStruct*;
455 
456 alias cudnnDataType_t = int;
457 enum : cudnnDataType_t
458 {
459     CUDNN_DATA_FLOAT  = 0,
460     CUDNN_DATA_DOUBLE = 1,
461     CUDNN_DATA_HALF   = 2
462 }
463 
464 alias cudnnNanPropagation_t = int;
465 enum : cudnnNanPropagation_t
466 {
467     CUDNN_NOT_PROPAGATE_NAN  = 0,
468     CUDNN_PROPAGATE_NAN      = 1
469 }
470 
471 alias cudnnDeterminism_t = int;
472 enum : cudnnDeterminism_t
473 {
474     CUDNN_NON_DETERMINISTIC = 0,
475     CUDNN_DETERMINISTIC = 1
476 }
477 
478 alias cudnnTensorFormat_t = int;
479 enum : cudnnTensorFormat_t
480 {
481     CUDNN_TENSOR_NCHW = 0,   /* row major (wStride = 1, hStride = w) */
482     CUDNN_TENSOR_NHWC = 1    /* feature maps interleaved ( cStride = 1 )*/
483 }
484 
485 alias cudnnOpTensorOp_t = int;
486 enum : cudnnOpTensorOp_t
487 {
488     CUDNN_OP_TENSOR_ADD = 0,
489     CUDNN_OP_TENSOR_MUL = 1,
490     CUDNN_OP_TENSOR_MIN = 2,
491     CUDNN_OP_TENSOR_MAX = 3
492 }
493 
494 /**
495    CUDNN ReduceTensor op type
496 */
497 alias cudnnReduceTensorOp_t = int;
498 enum : cudnnReduceTensorOp_t
499 {
500     CUDNN_REDUCE_TENSOR_ADD          = 0,
501     CUDNN_REDUCE_TENSOR_MUL          = 1,
502     CUDNN_REDUCE_TENSOR_MIN          = 2,
503     CUDNN_REDUCE_TENSOR_MAX          = 3,
504     CUDNN_REDUCE_TENSOR_AMAX         = 4,
505     CUDNN_REDUCE_TENSOR_AVG          = 5,
506     CUDNN_REDUCE_TENSOR_NORM1        = 6,
507     CUDNN_REDUCE_TENSOR_NORM2        = 7,
508     CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8,
509 }
510 
511 /**
512    CUDNN ReduceTensor indices type
513 */
514 alias cudnnReduceTensorIndices_t = int;
515 enum : cudnnReduceTensorIndices_t
516 {
517     CUDNN_REDUCE_TENSOR_NO_INDICES        = 0,
518     CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1,
519 }
520 
521 /**
522    CUDNN tensor indices type size (all unsigned)
523    Currently not supported, default is 32 bit unsigned.
524 */
525 alias cudnnIndicesType_t = int;
526 enum : cudnnIndicesType_t
527 {
528     CUDNN_32BIT_INDICES = 0,
529     CUDNN_64BIT_INDICES = 1,
530     CUDNN_16BIT_INDICES = 2,
531     CUDNN_8BIT_INDICES  = 3,
532 };
533 
534 
535 alias cudnnConvolutionMode_t = int;
536 enum : cudnnConvolutionMode_t
537 {
538     CUDNN_CONVOLUTION       = 0,
539     CUDNN_CROSS_CORRELATION = 1
540 }
541 
542 alias cudnnConvolutionFwdPreference_t = int;
543 enum : cudnnConvolutionFwdPreference_t
544 {
545     CUDNN_CONVOLUTION_FWD_NO_WORKSPACE            = 0,
546     CUDNN_CONVOLUTION_FWD_PREFER_FASTEST          = 1,
547     CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT = 2
548 }
549 
550 alias cudnnConvolutionFwdAlgo_t = int;
551 enum : cudnnConvolutionFwdAlgo_t
552 {
553     CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM         = 0,
554     CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1,
555     CUDNN_CONVOLUTION_FWD_ALGO_GEMM                  = 2,
556     CUDNN_CONVOLUTION_FWD_ALGO_DIRECT                = 3,
557     CUDNN_CONVOLUTION_FWD_ALGO_FFT                   = 4,
558     CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING            = 5,
559     CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD              = 6,
560     CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED     = 7
561 }
562 
563 struct cudnnConvolutionFwdAlgoPerf_t
564 {
565     cudnnConvolutionFwdAlgo_t   algo;
566     cudnnStatus_t               status;
567     float                       time;
568     size_t                      memory;
569     int[5] reserved;
570 }
571 
572 alias cudnnConvolutionBwdFilterPreference_t = int;
573 enum : cudnnConvolutionBwdFilterPreference_t
574 {
575     CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE            = 0,
576     CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST          = 1,
577     CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT = 2
578 }
579 
580 alias cudnnConvolutionBwdFilterAlgo_t = int;
581 enum : cudnnConvolutionBwdFilterAlgo_t
582 {
583     CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0         = 0,  // non-deterministic
584     CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1         = 1,
585     CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT       = 2,
586     CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3         = 3,  // non-deterministic, algo0 with workspace
587     // CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD  = 4, // not implemented
588     CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5
589 }
590 
591 struct cudnnConvolutionBwdFilterAlgoPerf_t
592 {
593     cudnnConvolutionBwdFilterAlgo_t algo;
594     cudnnStatus_t status;
595     float time;
596     size_t memory;
597     int[5] reserved;
598 }
599 
600 alias cudnnConvolutionBwdDataPreference_t = int;
601 enum : cudnnConvolutionBwdDataPreference_t
602 {
603     CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE             = 0,
604     CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST           = 1,
605     CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT  = 2
606 }
607 
608 alias cudnnConvolutionBwdDataAlgo_t = int;
609 enum : cudnnConvolutionBwdDataAlgo_t
610 {
611     CUDNN_CONVOLUTION_BWD_DATA_ALGO_0                 = 0, // non-deterministic
612     CUDNN_CONVOLUTION_BWD_DATA_ALGO_1                 = 1,
613     CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT               = 2,
614     CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING        = 3,
615     CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD          = 4,
616     CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5
617 }
618 
619 struct cudnnConvolutionBwdDataAlgoPerf_t
620 {
621     cudnnConvolutionBwdDataAlgo_t   algo;
622     cudnnStatus_t                   status;
623     float                           time;
624     size_t                          memory;
625     int[5] reserved;
626 }
627 
628 alias cudnnSoftmaxAlgorithm_t = int;
629 enum : cudnnSoftmaxAlgorithm_t
630 {
631     CUDNN_SOFTMAX_FAST     = 0,         /* straightforward implementation */
632     CUDNN_SOFTMAX_ACCURATE = 1,         /* subtract max from every point to avoid overflow */
633     CUDNN_SOFTMAX_LOG      = 2
634 }
635 
636 alias cudnnSoftmaxMode_t = int;
637 enum : cudnnSoftmaxMode_t
638 {
639     CUDNN_SOFTMAX_MODE_INSTANCE = 0,   /* compute the softmax over all C, H, W for each N */
640     CUDNN_SOFTMAX_MODE_CHANNEL = 1
641 }
642 
643 alias cudnnPoolingMode_t = int;
644 enum : cudnnPoolingMode_t
645 {
646     CUDNN_POOLING_MAX     = 0,
647     CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, // count for average includes padded values
648     CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2,
649     CUDNN_POOLING_MAX_DETERMINISTIC     = 3,
650 }
651 
652 alias cudnnActivationMode_t = int;
653 enum : cudnnActivationMode_t
654 {
655     CUDNN_ACTIVATION_SIGMOID      = 0,
656     CUDNN_ACTIVATION_RELU         = 1,
657     CUDNN_ACTIVATION_TANH         = 2,
658     CUDNN_ACTIVATION_CLIPPED_RELU = 3
659 }
660 
661 alias cudnnLRNMode_t = int;
662 enum : cudnnLRNMode_t
663 {
664     CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0
665 }
666 
667 alias cudnnDivNormMode_t = int;
668 enum : cudnnDivNormMode_t
669 {
670     CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0
671 }
672 
673 alias cudnnBatchNormMode_t = int;
674 enum : cudnnBatchNormMode_t
675 {
676     // bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice)
677     CUDNN_BATCHNORM_PER_ACTIVATION = 0,
678 
679     //bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors)
680     CUDNN_BATCHNORM_SPATIAL        = 1
681 }
682 
683 alias cudnnSamplerType_t = int;
684 enum : cudnnSamplerType_t
685 {
686     CUDNN_SAMPLER_BILINEAR=0
687 }
688 
689 struct cudnnDropoutStruct;
690 alias cudnnDropoutDescriptor_t = cudnnDropoutStruct*;
691 
692 alias cudnnRNNMode_t = int;
693 enum : cudnnRNNMode_t
694 {
695     CUDNN_RNN_RELU = 0, // Stock RNN with ReLu activation
696     CUDNN_RNN_TANH = 1, // Stock RNN with tanh activation
697     CUDNN_LSTM = 2,     // LSTM with no peephole connections
698     CUDNN_GRU = 3       // Using h' = tanh(r * Uh(t-1) + Wx) and h = (1 - z) * h' + z * h(t-1);
699 }
700 
701 alias cudnnDirectionMode_t = int;
702 enum : cudnnDirectionMode_t
703 {
704     CUDNN_UNIDIRECTIONAL = 0,
705     CUDNN_BIDIRECTIONAL = 1      // Using output concatination at each step. Do we also want to support output sum?
706 }
707 
708 alias cudnnRNNInputMode_t = int;
709 enum : cudnnRNNInputMode_t
710 {
711     CUDNN_LINEAR_INPUT = 0,
712     CUDNN_SKIP_INPUT = 1
713 }
714 
715 struct cudnnRNNStruct;
716 alias cudnnRNNDescriptor_t = cudnnRNNStruct*;
717 
718 extern(System) @nogc nothrow
719 {
720     alias da_cudnnGetErrorString = const char *function(cudnnStatus_t);
721 
722     mixin(generateFunctionAliases());
723 }
724 
725 __gshared
726 {
727     da_cudnnGetErrorString cudnnGetErrorString;
728 
729     mixin(generateFunctionPointers());
730 }
731 
732 class DerelictCuDNN7Loader : SharedLibLoader
733 {
734     public
735     {
736         this()
737         {
738             super(libNames);
739         }
740     }
741 
742     protected
743     {
744         override void loadSymbols()
745         {
746             bindFunc(cast(void**)&cudnnGetErrorString, "cudnnGetErrorString");
747 
748             mixin(generateFunctionBinds());
749         }
750     }
751 }
752 
753 __gshared DerelictCuDNN7Loader DerelictCuDNN7;
754 
755 shared static this()
756 {
757     DerelictCuDNN7 = new DerelictCuDNN7Loader();
758 }
759 
760 version (grain_cuda) unittest
761 {
762     import std.conv : to;
763     import std.stdio : writeln;
764 
765     try
766     {
767         DerelictCuDNN7.load();
768         writeln("Successfully loaded cuDNN v7");
769     }
770     catch(Exception e)
771     {
772         writeln("Could not load cuDNN v7");
773     }
774 }
775 
776 
777 /* Maximum supported number of tensor dimensions */
778 enum CUDNN_DIM_MAX = 8;