Browse Source

Fit running

subDesTagesMitExtraKaese 5 years ago
parent
commit
f23a689158
6 changed files with 26 additions and 15 deletions
  1. BIN
      build/op_lib.so
  2. 5 3
      examples/train.py
  3. 1 1
      layers/conv2D.py
  4. BIN
      model.png
  5. 18 11
      src/conv2D.cpp
  6. 2 0
      src/conv2D.hpp

BIN
build/op_lib.so


+ 5 - 3
examples/train.py

@@ -27,8 +27,8 @@ x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
 x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
 input_shape = (img_rows, img_cols, 1)
 
-x_train = x_train.astype('int32')
-x_test = x_test.astype('int32')
+x_train = x_train.astype('float')
+x_test = x_test.astype('float')
 #x_train /= 255
 #x_test /= 255
 print('x_train shape:', x_train.shape)
@@ -51,11 +51,13 @@ print(c)
 print(d)
 print(e)
 
-x = layers.Add()([d,e])
+x = layers.Add()([c,c])
 y = layers.Flatten()(x)
 z = layers.Dense(num_classes, activation='softmax')(y)
 
 model = Model(inputs=a, outputs=z)
+print(model.output_shape)
+
 """
 model = Sequential()
 model.add(Conv2DFPGA([0,0]))

+ 1 - 1
layers/conv2D.py

@@ -37,4 +37,4 @@ class Conv2D(layers.Layer):
 
     #out = tf.Tensor(tf.int32, shape=inputs.shape)
     intKernel = tf.cast(self.kernel, dtype=tf.int32)
-    return load_op.op_lib.MyConv2D(input=inputs, filter=intKernel, delay=1000*self.filters)
+    return load_op.op_lib.MyConv2D(input=inputs, filter=intKernel, delay=100*self.filters)

BIN
model.png


+ 18 - 11
src/conv2D.cpp

@@ -22,19 +22,18 @@ void Conv2DOp::fpgaCall(const Tensor *input, const Tensor *kernel, Tensor *outpu
     auto input_tensor = input->tensor<int32, 4>();
     auto kernel_tensor = kernel->tensor<int32, 4>();
     auto output_tensor = output->tensor<int32, 4>();
-    int size = 24;
     
     printMu.lock();
-    //printf(" sample: %3d, channel: %3d, filter: %3d\n", sample, channel, filter);
-    /*
-    for(int x=0; x<size; x++) {
-      for(int y=0; y<size; y++) {
+    printf(" sample: %3d, channel: %3d, filter: %3d\n", sample, channel, filter);
+    
+    for(int x=0; x<outputSize; x++) {
+      for(int y=0; y<outputSize; y++) {
         printf("%c", input_tensor(sample, x, y, channel) > 0 ? '#' : ' ');
       }
       std::cout << std::endl;
     }
     std::cout << std::endl;
-    */
+    
     printMu.unlock();
 }
 
@@ -57,21 +56,29 @@ void Conv2DOp::ComputeAsync(OpKernelContext* context, DoneCallback done) {
 
   TensorShape kernel_shape = kernel.shape();
   TensorShape input_shape = input.shape();
-  TensorShape output_shape = input.shape();
-  
+
 
   int batchSize = input_shape.dim_size(0);
   int channels = input_shape.dim_size(3);
   int filters = kernel_shape.dim_size(3);
 
-  output_shape.set_dim(1, 24);
-  output_shape.set_dim(2, 24);
+  TensorShape output_shape;
+  const int32 dims[] = {batchSize, outputSize, outputSize, channels * filters};
+  TensorShapeUtils::MakeShape(dims, 4, &output_shape);
+
+  output_shape.set_dim(0, batchSize);
+  output_shape.set_dim(1, outputSize);
+  output_shape.set_dim(2, outputSize);
   output_shape.set_dim(3, channels * filters);
 
+  printMu.lock();
+  std::cout << output_shape.DebugString() << std::endl;
+  printMu.unlock();
+
   // Output tensor is of the following dimensions:
   // [ in_batch, out_rows, out_cols, out_depth ]
   Tensor* output = nullptr;
-  OP_REQUIRES_OK(context, context->allocate_output(0, input_shape, &output));
+  OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
 
   for(int sample=0; sample<batchSize; sample++) {
     for(int channel=0; channel<channels; channel++) {

+ 2 - 0
src/conv2D.hpp

@@ -25,6 +25,8 @@ class Conv2DOp : public AsyncOpKernel {
     int instance = -1;
     int delay = 1000;
 
+    int outputSize = 28;
+
     void fpgaCall(const Tensor *input, const Tensor *kernel, Tensor *output, int sample, int channel, int filter);
     void delayThread(DoneCallback done);