subDesTagesMitExtraKaese преди 5 години
родител
ревизия
9e43327607
променени са 8 файла, в които са добавени 308 реда и са изтрити 2 реда
  1. 2 1
      .gitignore
  2. 6 1
      tf-addOp/train.py
  3. 1 0
      tf-addOp/zero_out.cc
  4. BIN
      tf-addOp/zero_out.so
  5. 10 0
      tf-matMulOp/build.sh
  6. 165 0
      tf-matMulOp/matMul.cc
  7. BIN
      tf-matMulOp/matMul.so
  8. 124 0
      tf-matMulOp/train.py

+ 2 - 1
.gitignore

@@ -1,3 +1,4 @@
 *.code-workspace
 .vs/
-.vscode/
+.vscode/
+*.tar

+ 6 - 1
tf-addOp/train.py

@@ -58,7 +58,12 @@ model.add(Linear())
 model.compile(loss=keras.losses.categorical_crossentropy,
               optimizer=keras.optimizers.Adadelta(),
               metrics=['accuracy'])
-
+              
+model.fit(x_train, y_train,
+          batch_size=batch_size,
+          epochs=epochs,
+          verbose=1,
+          validation_data=(x_test, y_test))
 
 score = model.evaluate(x_test, y_test, verbose=0)
 print('Test loss:', score[0])

+ 1 - 0
tf-addOp/zero_out.cc

@@ -1,5 +1,6 @@
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/function.h"
 
 using namespace tensorflow;
 

BIN
tf-addOp/zero_out.so


+ 10 - 0
tf-matMulOp/build.sh

@@ -0,0 +1,10 @@
+#!/bin/bash
+
+if [ "$TF_CFLAGS" == "" ]; then
+  export TF_CFLAGS=( $(python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))' 2>/dev/null) )
+fi
+if [ "$TF_LFLAGS" == "" ]; then
+  export TF_LFLAGS=( $(python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))' 2>/dev/null) )
+fi
+
+g++ -g -std=c++11 -shared matMul.cc -o matMul.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2 -Wall

+ 165 - 0
tf-matMulOp/matMul.cc

@@ -0,0 +1,165 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/function.h"
+
+#include "tensorflow/core/lib/math/math_util.h"
+
+using namespace tensorflow;
+typedef FunctionDefHelper FDH;
+
+REGISTER_OP("MyMatMul")
+    .Input("to_zero: int32")
+    .Output("zeroed: int32")
+    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
+      c->set_output(0, c->input(0));
+      return Status::OK();
+    });
+
+REGISTER_OP("MyConv2D")
+    .Input("input: int32")
+    .Input("filter: int32")
+    .Output("output: int32")
+    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
+      c->set_output(0, c->input(0));
+      return Status::OK();
+    });
+    
+#include "tensorflow/core/framework/op_kernel.h"
+
+using namespace tensorflow;
+/*
+class Conv2DOp : public OpKernel {
+ public:
+  explicit Conv2DOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    const Tensor& input_tensor = context->input(0);
+    auto input = input_tensor.flat<int32>();
+
+    printf("call n: %d\n", n++);
+
+    // Create an output tensor
+    Tensor* output_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
+                                                     &output_tensor));
+    auto output_flat = output_tensor->flat<int32>();
+
+    // Set all but the first element of the output tensor to 0.
+    const int N = input.size();
+    
+    for (int i = 1; i < N; i++) {
+      output_flat(i) = 0;
+    }
+    // Preserve the first input value if possible.
+    if (N > 0) output_flat(0) = input(0);
+  }
+
+  int n = 0;
+};
+*/
+
+
+class Conv2DOp : public OpKernel {
+ public:
+  explicit Conv2DOp(OpKernelConstruction* context) : OpKernel(context) {
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Input tensor is of the following dimensions:
+    // [ batch, in_rows, in_cols, in_depth ]
+    const Tensor& input = context->input(0);
+
+    // Input filter is of the following dimensions:
+    // [ filter_rows, filter_cols, in_depth, out_depth]
+    const Tensor& filter = context->input(1);
+
+    TensorShape out_shape = input.shape();
+
+    // Output tensor is of the following dimensions:
+    // [ in_batch, out_rows, out_cols, out_depth ]
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
+
+    std::cout << "Conv2D" << std::endl;
+
+    // If there is nothing to compute, return.
+    if (out_shape.num_elements() == 0) {
+      return;
+    }
+
+    
+  }
+
+ private:
+  //LaunchConv2DOp<Device, T> launcher_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(Conv2DOp);
+};
+
+
+REGISTER_KERNEL_BUILDER(Name("MyConv2D").Device(DEVICE_CPU), Conv2DOp);
+
+static Status MatMulGradHelper(FunctionDef* g, const string& opname,
+                               const string& attr_adj_x,
+                               const string& attr_adj_y, const string& x0,
+                               bool ax0, const string& x1, bool ax1,
+                               const string& y0, bool ay0, const string& y1,
+                               bool ay1) {
+  // The final outputs are "dx" and "dy". If we're broadcasting compute
+  // intermediate nodes for now.
+  std::vector<FDH::Node> nodes = {
+      {{("dx")},
+       opname,
+       {x0, x1},
+       {{"T", "$T"}, {attr_adj_x, ax0}, {attr_adj_y, ax1}}},
+      {{("dy")},
+       opname,
+       {y0, y1},
+       {{"T", "$T"}, {attr_adj_x, ay0}, {attr_adj_y, ay1}}},
+  };
+
+  *g = FDH::Define(
+      // Arg defs
+      {"x: T", "y: T", "dz: T"},
+      // Ret val defs
+      {"dx: T", "dy: T"},
+      // Attr defs
+      {{"T: {half, float, double}"}},
+      // Nodes
+      nodes);
+  return Status::OK();
+}
+Status MatMulGrad(const AttrSlice& attrs, FunctionDef* g) {
+  const string opname = "MyMatMul";
+  const string attr_adj_x = "transpose_a";
+  const string attr_adj_y = "transpose_b";
+  DataType T;
+  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "T", &T));
+  if (T == DT_COMPLEX64 || T == DT_COMPLEX128) {
+    return errors::Unimplemented(
+        "MatMul gradient for complex is not supported yet.");
+  }
+  bool ta;
+  bool tb;
+  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_x, &ta));
+  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_y, &tb));
+
+  if (!ta && !tb) {
+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "dz", false, "y",
+                            true, "x", true, "dz", false);
+  }
+  if (!ta && tb) {
+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "dz", false, "y",
+                            false, "dz", true, "x", false);
+  }
+  if (ta && !tb) {
+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "y", false, "dz",
+                            true, "x", false, "dz", false);
+  }
+  CHECK(ta && tb);
+  return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "y", true, "dz",
+                          true, "dz", true, "x", true);
+}
+
+REGISTER_OP_GRADIENT("MyConv2D", MatMulGrad);

BIN
tf-matMulOp/matMul.so


+ 124 - 0
tf-matMulOp/train.py

@@ -0,0 +1,124 @@
+import tensorflow as tf
+import tensorflow.keras as keras
+from tensorflow.keras import layers
+from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, Flatten, MaxPooling2D, Conv2D
+from tensorflow.keras.models import Model, Sequential
+from tensorflow.keras.datasets import mnist
+from tensorflow.keras.utils import plot_model, to_categorical
+
+import numpy as np
+from IPython import embed
+
+my_matmul_module = tf.load_op_library('./matMul.so')
+
+batch_size = 128
+num_classes = 10
+epochs = 1 # 12
+
+# input image dimensions
+img_rows, img_cols = 28, 28
+
+# the data, split between train and test sets
+(x_train, y_train), (x_test, y_test) = mnist.load_data()
+
+x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
+x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
+input_shape = (img_rows, img_cols, 1)
+
+x_train = x_train.astype('float32')
+x_test = x_test.astype('float32')
+x_train /= 255
+x_test /= 255
+print('x_train shape:', x_train.shape)
+print(x_train.shape[0], 'train samples')
+print(x_test.shape[0], 'test samples')
+
+# convert class vectors to binary class matrices
+y_train = to_categorical(y_train, num_classes)
+y_test = to_categorical(y_test, num_classes)
+
+class Conv2DFPGA(layers.Layer):
+  def __init__(self, kernel):
+    super(Conv2DFPGA, self).__init__()
+    self.kernel = kernel
+  def call(self, inputs):
+    ints = tf.dtypes.cast(inputs, dtype=tf.int32)
+    outs = my_matmul_module.MyConv2D(input=ints, filter=ints)
+    return tf.dtypes.cast(outs, dtype=tf.float32)
+
+class MyConv2D(layers.Conv2D):
+
+  def __init__(self,
+               filters,
+               kernel_size,
+               strides=(1, 1),
+               padding='valid',
+               data_format=None,
+               dilation_rate=(1, 1),
+               activation=None,
+               use_bias=True,
+               kernel_initializer='glorot_uniform',
+               bias_initializer='zeros',
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activity_regularizer=None,
+               kernel_constraint=None,
+               bias_constraint=None,
+               **kwargs):
+    super(MyConv2D, self).__init__(
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding=padding,
+        data_format=data_format,
+        dilation_rate=dilation_rate,
+        activation=activation,
+        use_bias=use_bias,
+        kernel_initializer=kernel_initializer,
+        bias_initializer=bias_initializer,
+        kernel_regularizer=kernel_regularizer,
+        bias_regularizer=bias_regularizer,
+        activity_regularizer=activity_regularizer,
+        kernel_constraint=kernel_constraint,
+        bias_constraint=bias_constraint,
+        **kwargs)
+  def call(self, inputs):
+      #inputs.get_shape(),
+      #filter_shape=self.kernel.shape,
+      #dilation_rate=self.dilation_rate,
+      #strides=self.strides,
+      #padding=self._padding_op,
+      #data_format=self._conv_op_data_format)
+
+      #kernel.shape.ndims
+      #inputs.get_shape().ndims
+    if self.rank == 1 and inputs.get_shape(): #fpga restriction
+      return my_matmul_module.MyConv2D(inputs, self.kernel)
+    else:
+      return super(MyConv2D, self).call(inputs)
+
+model = Sequential()
+model.add(MyConv2D(32, kernel_size=(3, 3),
+                 activation='relu',
+                 input_shape=input_shape))
+model.add(Conv2DFPGA([0,0]))
+model.add(Flatten())
+model.add(Dense(128, activation='relu'))
+model.add(Dropout(0.5))
+model.add(Dense(num_classes, activation='softmax'))
+
+model.compile(loss=keras.losses.categorical_crossentropy,
+              optimizer=keras.optimizers.Adadelta(),
+              metrics=['accuracy'])
+              
+model.fit(x_train, y_train,
+          batch_size=batch_size,
+          epochs=epochs,
+          verbose=1,
+          validation_data=(x_test, y_test))
+
+score = model.evaluate(x_test, y_test, verbose=0)
+print('Test loss:', score[0])
+print('Test accuracy:', score[1])
+
+plot_model(model, to_file='model.png', expand_nested=True, show_shapes=True)