5 سال پیش · 9e43327607
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 
				 *.code-workspace
			
 
				 .vs/
			
 
				-.vscode/
			
 
				+.vscode/
			
 
				+*.tar
			
--- a/tf-addOp/train.py
+++ b/tf-addOp/train.py
@@ -58,7 +58,12 @@ model.add(Linear())
 
				 model.compile(loss=keras.losses.categorical_crossentropy,
			
 
				               optimizer=keras.optimizers.Adadelta(),
			
 
				               metrics=['accuracy'])
			
 
				-
			
 
				+              
			
 
				+model.fit(x_train, y_train,
			
 
				+          batch_size=batch_size,
			
 
				+          epochs=epochs,
			
 
				+          verbose=1,
			
 
				+          validation_data=(x_test, y_test))
			
 
				 
			
 
				 score = model.evaluate(x_test, y_test, verbose=0)
			
 
				 print('Test loss:', score[0])
			
--- a/tf-addOp/zero_out.cc
+++ b/tf-addOp/zero_out.cc
@@ -1,5 +1,6 @@
 
				 #include "tensorflow/core/framework/op.h"
			
 
				 #include "tensorflow/core/framework/shape_inference.h"
			
 
				+#include "tensorflow/core/framework/function.h"
			
 
				 
			
 
				 using namespace tensorflow;
			
 
				 
			
--- a/tf-addOp/zero_out.so
+++ b/tf-addOp/zero_out.so
--- a/tf-matMulOp/build.sh
+++ b/tf-matMulOp/build.sh
@@ -0,0 +1,10 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+if [ "$TF_CFLAGS" == "" ]; then
			
 
				+  export TF_CFLAGS=( $(python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))' 2>/dev/null) )
			
 
				+fi
			
 
				+if [ "$TF_LFLAGS" == "" ]; then
			
 
				+  export TF_LFLAGS=( $(python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))' 2>/dev/null) )
			
 
				+fi
			
 
				+
			
 
				+g++ -g -std=c++11 -shared matMul.cc -o matMul.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2 -Wall
			
--- a/tf-matMulOp/matMul.cc
+++ b/tf-matMulOp/matMul.cc
@@ -0,0 +1,165 @@
 
				+#include "tensorflow/core/framework/op.h"
			
 
				+#include "tensorflow/core/framework/shape_inference.h"
			
 
				+#include "tensorflow/core/framework/function.h"
			
 
				+
			
 
				+#include "tensorflow/core/lib/math/math_util.h"
			
 
				+
			
 
				+using namespace tensorflow;
			
 
				+typedef FunctionDefHelper FDH;
			
 
				+
			
 
				+REGISTER_OP("MyMatMul")
			
 
				+    .Input("to_zero: int32")
			
 
				+    .Output("zeroed: int32")
			
 
				+    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
			
 
				+      c->set_output(0, c->input(0));
			
 
				+      return Status::OK();
			
 
				+    });
			
 
				+
			
 
				+REGISTER_OP("MyConv2D")
			
 
				+    .Input("input: int32")
			
 
				+    .Input("filter: int32")
			
 
				+    .Output("output: int32")
			
 
				+    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
			
 
				+      c->set_output(0, c->input(0));
			
 
				+      return Status::OK();
			
 
				+    });
			
 
				+    
			
 
				+#include "tensorflow/core/framework/op_kernel.h"
			
 
				+
			
 
				+using namespace tensorflow;
			
 
				+/*
			
 
				+class Conv2DOp : public OpKernel {
			
 
				+ public:
			
 
				+  explicit Conv2DOp(OpKernelConstruction* context) : OpKernel(context) {}
			
 
				+
			
 
				+  void Compute(OpKernelContext* context) override {
			
 
				+    // Grab the input tensor
			
 
				+    const Tensor& input_tensor = context->input(0);
			
 
				+    auto input = input_tensor.flat<int32>();
			
 
				+
			
 
				+    printf("call n: %d\n", n++);
			
 
				+
			
 
				+    // Create an output tensor
			
 
				+    Tensor* output_tensor = NULL;
			
 
				+    OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
			
 
				+                                                     &output_tensor));
			
 
				+    auto output_flat = output_tensor->flat<int32>();
			
 
				+
			
 
				+    // Set all but the first element of the output tensor to 0.
			
 
				+    const int N = input.size();
			
 
				+    
			
 
				+    for (int i = 1; i < N; i++) {
			
 
				+      output_flat(i) = 0;
			
 
				+    }
			
 
				+    // Preserve the first input value if possible.
			
 
				+    if (N > 0) output_flat(0) = input(0);
			
 
				+  }
			
 
				+
			
 
				+  int n = 0;
			
 
				+};
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+class Conv2DOp : public OpKernel {
			
 
				+ public:
			
 
				+  explicit Conv2DOp(OpKernelConstruction* context) : OpKernel(context) {
			
 
				+  }
			
 
				+
			
 
				+  void Compute(OpKernelContext* context) override {
			
 
				+    // Input tensor is of the following dimensions:
			
 
				+    // [ batch, in_rows, in_cols, in_depth ]
			
 
				+    const Tensor& input = context->input(0);
			
 
				+
			
 
				+    // Input filter is of the following dimensions:
			
 
				+    // [ filter_rows, filter_cols, in_depth, out_depth]
			
 
				+    const Tensor& filter = context->input(1);
			
 
				+
			
 
				+    TensorShape out_shape = input.shape();
			
 
				+
			
 
				+    // Output tensor is of the following dimensions:
			
 
				+    // [ in_batch, out_rows, out_cols, out_depth ]
			
 
				+    Tensor* output = nullptr;
			
 
				+    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
			
 
				+
			
 
				+    std::cout << "Conv2D" << std::endl;
			
 
				+
			
 
				+    // If there is nothing to compute, return.
			
 
				+    if (out_shape.num_elements() == 0) {
			
 
				+      return;
			
 
				+    }
			
 
				+
			
 
				+    
			
 
				+  }
			
 
				+
			
 
				+ private:
			
 
				+  //LaunchConv2DOp<Device, T> launcher_;
			
 
				+
			
 
				+  TF_DISALLOW_COPY_AND_ASSIGN(Conv2DOp);
			
 
				+};
			
 
				+
			
 
				+
			
 
				+REGISTER_KERNEL_BUILDER(Name("MyConv2D").Device(DEVICE_CPU), Conv2DOp);
			
 
				+
			
 
				+static Status MatMulGradHelper(FunctionDef* g, const string& opname,
			
 
				+                               const string& attr_adj_x,
			
 
				+                               const string& attr_adj_y, const string& x0,
			
 
				+                               bool ax0, const string& x1, bool ax1,
			
 
				+                               const string& y0, bool ay0, const string& y1,
			
 
				+                               bool ay1) {
			
 
				+  // The final outputs are "dx" and "dy". If we're broadcasting compute
			
 
				+  // intermediate nodes for now.
			
 
				+  std::vector<FDH::Node> nodes = {
			
 
				+      {{("dx")},
			
 
				+       opname,
			
 
				+       {x0, x1},
			
 
				+       {{"T", "$T"}, {attr_adj_x, ax0}, {attr_adj_y, ax1}}},
			
 
				+      {{("dy")},
			
 
				+       opname,
			
 
				+       {y0, y1},
			
 
				+       {{"T", "$T"}, {attr_adj_x, ay0}, {attr_adj_y, ay1}}},
			
 
				+  };
			
 
				+
			
 
				+  *g = FDH::Define(
			
 
				+      // Arg defs
			
 
				+      {"x: T", "y: T", "dz: T"},
			
 
				+      // Ret val defs
			
 
				+      {"dx: T", "dy: T"},
			
 
				+      // Attr defs
			
 
				+      {{"T: {half, float, double}"}},
			
 
				+      // Nodes
			
 
				+      nodes);
			
 
				+  return Status::OK();
			
 
				+}
			
 
				+Status MatMulGrad(const AttrSlice& attrs, FunctionDef* g) {
			
 
				+  const string opname = "MyMatMul";
			
 
				+  const string attr_adj_x = "transpose_a";
			
 
				+  const string attr_adj_y = "transpose_b";
			
 
				+  DataType T;
			
 
				+  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "T", &T));
			
 
				+  if (T == DT_COMPLEX64 || T == DT_COMPLEX128) {
			
 
				+    return errors::Unimplemented(
			
 
				+        "MatMul gradient for complex is not supported yet.");
			
 
				+  }
			
 
				+  bool ta;
			
 
				+  bool tb;
			
 
				+  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_x, &ta));
			
 
				+  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_y, &tb));
			
 
				+
			
 
				+  if (!ta && !tb) {
			
 
				+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "dz", false, "y",
			
 
				+                            true, "x", true, "dz", false);
			
 
				+  }
			
 
				+  if (!ta && tb) {
			
 
				+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "dz", false, "y",
			
 
				+                            false, "dz", true, "x", false);
			
 
				+  }
			
 
				+  if (ta && !tb) {
			
 
				+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "y", false, "dz",
			
 
				+                            true, "x", false, "dz", false);
			
 
				+  }
			
 
				+  CHECK(ta && tb);
			
 
				+  return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "y", true, "dz",
			
 
				+                          true, "dz", true, "x", true);
			
 
				+}
			
 
				+
			
 
				+REGISTER_OP_GRADIENT("MyConv2D", MatMulGrad);
			
--- a/tf-matMulOp/matMul.so
+++ b/tf-matMulOp/matMul.so
--- a/tf-matMulOp/train.py
+++ b/tf-matMulOp/train.py
@@ -0,0 +1,124 @@
 
				+import tensorflow as tf
			
 
				+import tensorflow.keras as keras
			
 
				+from tensorflow.keras import layers
			
 
				+from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, Flatten, MaxPooling2D, Conv2D
			
 
				+from tensorflow.keras.models import Model, Sequential
			
 
				+from tensorflow.keras.datasets import mnist
			
 
				+from tensorflow.keras.utils import plot_model, to_categorical
			
 
				+
			
 
				+import numpy as np
			
 
				+from IPython import embed
			
 
				+
			
 
				+my_matmul_module = tf.load_op_library('./matMul.so')
			
 
				+
			
 
				+batch_size = 128
			
 
				+num_classes = 10
			
 
				+epochs = 1 # 12
			
 
				+
			
 
				+# input image dimensions
			
 
				+img_rows, img_cols = 28, 28
			
 
				+
			
 
				+# the data, split between train and test sets
			
 
				+(x_train, y_train), (x_test, y_test) = mnist.load_data()
			
 
				+
			
 
				+x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
			
 
				+x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
			
 
				+input_shape = (img_rows, img_cols, 1)
			
 
				+
			
 
				+x_train = x_train.astype('float32')
			
 
				+x_test = x_test.astype('float32')
			
 
				+x_train /= 255
			
 
				+x_test /= 255
			
 
				+print('x_train shape:', x_train.shape)
			
 
				+print(x_train.shape[0], 'train samples')
			
 
				+print(x_test.shape[0], 'test samples')
			
 
				+
			
 
				+# convert class vectors to binary class matrices
			
 
				+y_train = to_categorical(y_train, num_classes)
			
 
				+y_test = to_categorical(y_test, num_classes)
			
 
				+
			
 
				+class Conv2DFPGA(layers.Layer):
			
 
				+  def __init__(self, kernel):
			
 
				+    super(Conv2DFPGA, self).__init__()
			
 
				+    self.kernel = kernel
			
 
				+  def call(self, inputs):
			
 
				+    ints = tf.dtypes.cast(inputs, dtype=tf.int32)
			
 
				+    outs = my_matmul_module.MyConv2D(input=ints, filter=ints)
			
 
				+    return tf.dtypes.cast(outs, dtype=tf.float32)
			
 
				+
			
 
				+class MyConv2D(layers.Conv2D):
			
 
				+
			
 
				+  def __init__(self,
			
 
				+               filters,
			
 
				+               kernel_size,
			
 
				+               strides=(1, 1),
			
 
				+               padding='valid',
			
 
				+               data_format=None,
			
 
				+               dilation_rate=(1, 1),
			
 
				+               activation=None,
			
 
				+               use_bias=True,
			
 
				+               kernel_initializer='glorot_uniform',
			
 
				+               bias_initializer='zeros',
			
 
				+               kernel_regularizer=None,
			
 
				+               bias_regularizer=None,
			
 
				+               activity_regularizer=None,
			
 
				+               kernel_constraint=None,
			
 
				+               bias_constraint=None,
			
 
				+               **kwargs):
			
 
				+    super(MyConv2D, self).__init__(
			
 
				+        filters=filters,
			
 
				+        kernel_size=kernel_size,
			
 
				+        strides=strides,
			
 
				+        padding=padding,
			
 
				+        data_format=data_format,
			
 
				+        dilation_rate=dilation_rate,
			
 
				+        activation=activation,
			
 
				+        use_bias=use_bias,
			
 
				+        kernel_initializer=kernel_initializer,
			
 
				+        bias_initializer=bias_initializer,
			
 
				+        kernel_regularizer=kernel_regularizer,
			
 
				+        bias_regularizer=bias_regularizer,
			
 
				+        activity_regularizer=activity_regularizer,
			
 
				+        kernel_constraint=kernel_constraint,
			
 
				+        bias_constraint=bias_constraint,
			
 
				+        **kwargs)
			
 
				+  def call(self, inputs):
			
 
				+      #inputs.get_shape(),
			
 
				+      #filter_shape=self.kernel.shape,
			
 
				+      #dilation_rate=self.dilation_rate,
			
 
				+      #strides=self.strides,
			
 
				+      #padding=self._padding_op,
			
 
				+      #data_format=self._conv_op_data_format)
			
 
				+
			
 
				+      #kernel.shape.ndims
			
 
				+      #inputs.get_shape().ndims
			
 
				+    if self.rank == 1 and inputs.get_shape(): #fpga restriction
			
 
				+      return my_matmul_module.MyConv2D(inputs, self.kernel)
			
 
				+    else:
			
 
				+      return super(MyConv2D, self).call(inputs)
			
 
				+
			
 
				+model = Sequential()
			
 
				+model.add(MyConv2D(32, kernel_size=(3, 3),
			
 
				+                 activation='relu',
			
 
				+                 input_shape=input_shape))
			
 
				+model.add(Conv2DFPGA([0,0]))
			
 
				+model.add(Flatten())
			
 
				+model.add(Dense(128, activation='relu'))
			
 
				+model.add(Dropout(0.5))
			
 
				+model.add(Dense(num_classes, activation='softmax'))
			
 
				+
			
 
				+model.compile(loss=keras.losses.categorical_crossentropy,
			
 
				+              optimizer=keras.optimizers.Adadelta(),
			
 
				+              metrics=['accuracy'])
			
 
				+              
			
 
				+model.fit(x_train, y_train,
			
 
				+          batch_size=batch_size,
			
 
				+          epochs=epochs,
			
 
				+          verbose=1,
			
 
				+          validation_data=(x_test, y_test))
			
 
				+
			
 
				+score = model.evaluate(x_test, y_test, verbose=0)
			
 
				+print('Test loss:', score[0])
			
 
				+print('Test accuracy:', score[1])
			
 
				+
			
 
				+plot_model(model, to_file='model.png', expand_nested=True, show_shapes=True)