4 gadi atpakaļ · 0ba017ca56
--- a/c++/lib/json
+++ b/c++/lib/json
@@ -1 +1 @@
 
															-Subproject commit dd7e25927fe7a49c81d07943c32444f0a9011665
														
 
															+Subproject commit 27f5a6e82731b5538f1063bb8e955458ea7746e7
														
--- a/c++/src/conv2D.cpp
+++ b/c++/src/conv2D.cpp
@@ -36,15 +36,15 @@ namespace tf_lib {
 
															       filter_shape, GetFilterDimIndex<num_spatial_dims>(filter_format, 'H'));
														
 
															     DimensionHandle filter_cols_dim = c->Dim(
														
 
															       filter_shape, GetFilterDimIndex<num_spatial_dims>(filter_format, 'W'));
														
 
															-      */
														
 
															+      
														
 
															     DimensionHandle filter_input_depth_dim = c->Dim(
														
 
															       filter_shape, GetFilterDimIndex<num_spatial_dims>(filter_format, 'I'));
														
 
															-
														
 
															+      */
														
 
															     DimensionHandle output_rows, output_cols, output_channels;
														
 
															     c->Subtract(input_spatial_dims[0], 4, &output_rows);
														
 
															     c->Subtract(input_spatial_dims[1], 4, &output_cols);
														
 
															-    c->Multiply(filter_input_depth_dim, output_depth_dim, &output_channels);
														
 
															+    c->Subtract(output_depth_dim, 0, &output_channels);
														
 
															     std::vector<DimensionHandle> out_dims(4);
														
 
															     out_dims[0] = batch_size_dim;
														
@@ -80,19 +80,21 @@ namespace tf_lib {
 
															     OP_REQUIRES_ASYNC(context, input_shape.dim_size(2) == 228, errors::InvalidArgument("Unsupported input width: ", input_shape.dim_size(2)), done);
														
 
															     OP_REQUIRES_ASYNC(context, kernel_shape.dim_size(0) == 5, errors::InvalidArgument("Unsupported kernel height: ", kernel_shape.dim_size(0)), done);
														
 
															     OP_REQUIRES_ASYNC(context, kernel_shape.dim_size(1) == 5, errors::InvalidArgument("Unsupported kernel width: ", kernel_shape.dim_size(1)), done);
														
 
															+    OP_REQUIRES_ASYNC(context, kernel_shape.dim_size(2) == input_shape.dim_size(3), 
														
 
															+      errors::InvalidArgument("kernel channels != input channels: ", kernel_shape.dim_size(2), " != ", input_shape.dim_size(3)), done);
														
 
															     int batchSize = input_shape.dim_size(0);
														
 
															     int channels = input_shape.dim_size(3);
														
 
															-    int filters = kernel_shape.dim_size(3);
														
 
															+    int outputChannels = kernel_shape.dim_size(3);
														
 
															     TensorShape output_shape;
														
 
															-    const int32 dims[] = {batchSize, outputSize, outputSize, channels * filters};
														
 
															+    const int32 dims[] = {batchSize, outputSize, outputSize, outputChannels};
														
 
															     TensorShapeUtils::MakeShape(dims, 4, &output_shape);
														
 
															     output_shape.set_dim(0, batchSize);
														
 
															     output_shape.set_dim(1, outputSize);
														
 
															     output_shape.set_dim(2, outputSize);
														
 
															-    output_shape.set_dim(3, channels * filters);
														
 
															+    output_shape.set_dim(3, outputChannels);
														
 
															     //printMu.lock();
														
 
															     //std::cout << output_shape.DebugString() << std::endl;
														
@@ -108,20 +110,20 @@ namespace tf_lib {
 
															     auto kernel_tensor = kernel.tensor<float, 4>();
														
 
															     auto output_tensor = output->tensor<float, 4>();
														
 
															-    auto worker = connectionManager.createWorker(Module::conv2D_5x5_Module, batchSize * channels * filters);
														
 
															+    auto worker = connectionManager.createWorker(Module::conv2D_5x5_Module, batchSize * channels * outputChannels);
														
 
															     {
														
 
															       worker->setJobTimeout(milliseconds(300));
														
 
															       worker->setRetryCount(10);
														
 
															       auto jobs = worker->getJobList();
														
 
															       for(int sample=0; sample<batchSize; sample++) {
														
 
															-        for(int channel=0; channel<channels; channel++) {
														
 
															-          for(int filter=0; filter<filters; filter++) {
														
 
															-            auto job = jobs->getJob(sample * channels * filters + channel * filters + filter);
														
 
															+        for(int outputChannel=0; outputChannel<outputChannels; outputChannel++) {
														
 
															+          for(int channel=0; channel<channels; channel++) {
														
 
															+            auto job = jobs->getJob(sample * outputChannels * channels + outputChannel * channels + channel);
														
 
															             for(int x=0; x<kernelSize; x++) {
														
 
															               for(int y=0; y<kernelSize; y++) {
														
 
															-                job->setPayload(y*kernelSize + x, *((uint32_t*)&kernel_tensor(y, x, channel, filter)));
														
 
															+                job->setPayload(y*kernelSize + x, *((uint32_t*)&kernel_tensor(y, x, channel, outputChannel)));
														
 
															               }
														
 
															             }
														
 
															             for(int x=0; x<sizeWithBorder; x++) {
														
@@ -134,16 +136,21 @@ namespace tf_lib {
 
															         }
														
 
															       }
														
 
															     }
														
 
															-    worker->setDoneCallback([output_tensor, worker, done, batchSize, channels, filters, this]{
														
 
															+    worker->setDoneCallback([output_tensor, worker, done, batchSize, channels, outputChannels, this]{
														
 
															       auto jobs = worker->getJobList();
														
 
															       for(int sample=0; sample<batchSize; sample++) {
														
 
															-        for(int channel=0; channel<channels; channel++) {
														
 
															-          for(int filter=0; filter<filters; filter++) {
														
 
															-            auto job = jobs->getJob(sample * channels * filters + channel * filters + filter);
														
 
															+        for(int outputChannel=0; outputChannel<outputChannels; outputChannel++) {
														
 
															+          for(int x=0; x<outputSize; x++) {
														
 
															+            for(int y=0; y<outputSize; y++) {
														
 
															+              output_tensor(sample, y, x, outputChannel) = 0;
														
 
															+            }
														
 
															+          }
														
 
															+          for(int channel=0; channel<channels; channel++) {
														
 
															+            auto job = jobs->getJob(sample * outputChannels * channels + outputChannel * channels + channel);
														
 
															             for(int x=0; x<outputSize; x++) {
														
 
															               for(int y=0; y<outputSize; y++) {
														
 
															-                uint32_t val = job->getResponsePayload((y+border*2)*sizeWithBorder + (x+border*2) + 1);
														
 
															-                output_tensor(sample, y, x, channel) = *((float*)&val);
														
 
															+                uint32_t val = job->getResponsePayload((y+border*2)*sizeWithBorder + (x+border*2));
														
 
															+                output_tensor(sample, y, x, outputChannel) += *((float*)&val);
														
 
															               }
														
 
															             }
														
 
															           }
														
--- a/doku/layer/conv2d.md
+++ b/doku/layer/conv2d.md
@@ -7,25 +7,24 @@ Input:
 
															 - an FPGA: `[imageY, imageX]`
														
 
															 Kernel:
														
 
															-- in TF: `[kernelY, kernelX, channels, filters]`
														
 
															+- in TF: `[kernelY, kernelX, channels, outputChannels]`
														
 
															 - an FPGA: `[kernelY, kernelX]`
														
 
															 Output:
														
 
															 - vom FPGA: `[imageY2, imageX2]`
														
 
															-- an TF: `[batchSize, imageY2, imageX2, channels * filters]`
														
 
															+- an TF: `[batchSize, imageY2, imageX2, outputChannels]`
														
 
															 ## Parallelisierung
														
 
															 1.  **Ohne FPGA-seitigem Speicher**
														
 
															-    FPGA Recheneinheiten werden verteilt `(batchSize * channels * filters)` Mal verwendet.
														
 
															+    FPGA Recheneinheiten werden verteilt `(batchSize * channels * outputChannels)` Mal verwendet.
														
 
															     ```python
														
 
															     for sample in range(batchSize):
														
 
															-      for channnel in range(channels):
														
 
															-        for filter in range(filters):
														
 
															-          output[sample][channel + filter * channels] = f(
														
 
															-            input[sample][channel], 
														
 
															-            kernel[channel][filter]
														
 
															-          )
														
 
															+      for outputChannel in range(outputChannels):
														
 
															+        output[sample][outputChannel] = sum([f(
														
 
															+          input[sample][channel], 
														
 
															+          kernel[channel][outputChannel]
														
 
															+        ) for channel in range(channels)])
														
 
															     ```
														
--- a/tests/op_test.py
+++ b/tests/op_test.py
@@ -1,4 +1,5 @@
 
															 import tensorflow as tf
														
 
															+from tensorflow import nn
														
 
															 import numpy as np
														
 
															 from IPython import embed
														
@@ -35,5 +36,37 @@ class FPGALibTest(tf.test.TestCase):
 
															         result = load_op.op_lib.MyDummyBig(input=input)
														
 
															         self.assertAllEqual(result, input)
														
 
															+  def testConv2DSingle(self):
														
 
															+    img = np.ndarray((228,228), dtype=float)
														
 
															+    img.fill(0)
														
 
															+    for a in range(228):
														
 
															+      for b in range(228):
														
 
															+        img[a][b] = (a)*228+(b)
														
 
															+    kernel = np.ndarray((5,5), dtype=float)
														
 
															+    kernel.fill(0)
														
 
															+    kernel[2][2] = 1
														
 
															+    input = tf.constant(np.expand_dims(img, (0, 3)), dtype=float)
														
 
															+    filter = tf.constant(np.expand_dims(kernel, (2, 3)), dtype=float)
														
 
															+    with self.session():
														
 
															+      ref = nn.convolution(input, filter)
														
 
															+      result = load_op.op_lib.MyConv2D(input=input, filter=filter)
														
 
															+      self.assertAllClose(result, ref)
														
 
															+
														
 
															+  def testConv2DRandom(self):
														
 
															+    input = tf.random.uniform(shape=[1,228,228,1])
														
 
															+    filter = tf.random.uniform(shape=[5,5,1,1])
														
 
															+    with self.session():
														
 
															+      ref = nn.convolution(input, filter)
														
 
															+      result = load_op.op_lib.MyConv2D(input=input, filter=filter)
														
 
															+      self.assertAllClose(result, ref)
														
 
															+
														
 
															+  def testConv2DMulti(self):
														
 
															+    input = tf.random.uniform(shape=[2,228,228,3])
														
 
															+    filter = tf.random.uniform(shape=[5,5,3,4])
														
 
															+    with self.session():
														
 
															+      ref = nn.convolution(input, filter)
														
 
															+      result = load_op.op_lib.MyConv2D(input=input, filter=filter)
														
 
															+      self.assertAllClose(result, ref)
														
 
															+
														
 
															 if __name__ == "__main__":
														
 
															   tf.test.main()
	`@@ -1 +1 @@`
	`-Subproject commit dd7e25927fe7a49c81d07943c32444f0a9011665`
			`+Subproject commit 27f5a6e82731b5538f1063bb8e955458ea7746e7`