5 жил өмнө · fcc81c165f
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,6 @@
 
				 *.code-workspace
			
 
				 .vs/
			
 
				 .vscode/
			
 
				-*.tar
			
 
				+*.tar
			
 
				+__pycache__/
			
 
				+*.o
			
--- a/__init__.py
+++ b/__init__.py
@@ -0,0 +1 @@
 
				+__all__ = ["layers", "examples", "tests"]
			
--- a/build/op_lib.so
+++ b/build/op_lib.so
--- a/examples/train.py
+++ b/examples/train.py
@@ -0,0 +1,63 @@
 
				+import tensorflow as tf
			
 
				+import tensorflow.keras as keras
			
 
				+from tensorflow.keras import layers
			
 
				+from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, Flatten, MaxPooling2D, Conv2D
			
 
				+from tensorflow.keras.models import Model, Sequential
			
 
				+from tensorflow.keras.datasets import mnist
			
 
				+from tensorflow.keras.utils import plot_model, to_categorical
			
 
				+
			
 
				+import numpy as np
			
 
				+from IPython import embed
			
 
				+
			
 
				+import sys
			
 
				+sys.path.append('..')
			
 
				+from hostLib.layers.conv2D import Conv2D as Conv2DFPGA
			
 
				+
			
 
				+batch_size = 128
			
 
				+num_classes = 10
			
 
				+epochs = 1 # 12
			
 
				+
			
 
				+# input image dimensions
			
 
				+img_rows, img_cols = 28, 28
			
 
				+
			
 
				+# the data, split between train and test sets
			
 
				+(x_train, y_train), (x_test, y_test) = mnist.load_data()
			
 
				+
			
 
				+x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
			
 
				+x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
			
 
				+input_shape = (img_rows, img_cols, 1)
			
 
				+
			
 
				+x_train = x_train.astype('float32')
			
 
				+x_test = x_test.astype('float32')
			
 
				+x_train /= 255
			
 
				+x_test /= 255
			
 
				+print('x_train shape:', x_train.shape)
			
 
				+print(x_train.shape[0], 'train samples')
			
 
				+print(x_test.shape[0], 'test samples')
			
 
				+
			
 
				+# convert class vectors to binary class matrices
			
 
				+y_train = to_categorical(y_train, num_classes)
			
 
				+y_test = to_categorical(y_test, num_classes)
			
 
				+
			
 
				+model = Sequential()
			
 
				+model.add(Conv2DFPGA([0,0]))
			
 
				+model.add(Flatten())
			
 
				+model.add(Dense(128, activation='relu'))
			
 
				+model.add(Dropout(0.5))
			
 
				+model.add(Dense(num_classes, activation='softmax'))
			
 
				+
			
 
				+model.compile(loss=keras.losses.categorical_crossentropy,
			
 
				+              optimizer=keras.optimizers.Adadelta(),
			
 
				+              metrics=['accuracy'])
			
 
				+              
			
 
				+model.fit(x_train, y_train,
			
 
				+          batch_size=batch_size,
			
 
				+          epochs=epochs,
			
 
				+          verbose=1,
			
 
				+          validation_data=(x_test, y_test))
			
 
				+
			
 
				+score = model.evaluate(x_test, y_test, verbose=0)
			
 
				+print('Test loss:', score[0])
			
 
				+print('Test accuracy:', score[1])
			
 
				+
			
 
				+plot_model(model, to_file='model.png', expand_nested=True, show_shapes=True)
			
--- a/layers/__init__.py
+++ b/layers/__init__.py
@@ -0,0 +1,2 @@
 
				+
			
 
				+__all__ = ["conv2D"]
			
--- a/layers/conv2D.py
+++ b/layers/conv2D.py
@@ -0,0 +1,13 @@
 
				+import tensorflow as tf
			
 
				+from tensorflow.keras import layers
			
 
				+
			
 
				+from .. import load_op
			
 
				+
			
 
				+class Conv2D(layers.Layer):
			
 
				+  def __init__(self, kernel):
			
 
				+    super(Conv2D, self).__init__()
			
 
				+    self.kernel = kernel
			
 
				+  def call(self, inputs):
			
 
				+    ints = tf.dtypes.cast(inputs, dtype=tf.int32)
			
 
				+    outs = load_op.op_lib.MyConv2D(input=ints, filter=ints)
			
 
				+    return tf.dtypes.cast(outs, dtype=tf.float32)
			
--- a/load_op.py
+++ b/load_op.py
@@ -0,0 +1,5 @@
 
				+import os 
			
 
				+import tensorflow as tf
			
 
				+
			
 
				+dir_path = os.path.dirname(os.path.realpath(__file__))
			
 
				+op_lib = tf.load_op_library(dir_path + '/build/op_lib.so')
			
--- a/makefile
+++ b/makefile
@@ -0,0 +1,30 @@
 
				+CXX=/usr/bin/g++
			
 
				+
			
 
				+CFLAGS=-g -Wall -pthread -std=c++11
			
 
				+LFLAGS=-shared -Wl,--no-as-needed
			
 
				+
			
 
				+TF_CFLAGS=$(shell python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))' 2>/dev/null)
			
 
				+TF_LFLAGS=$(shell python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))' 2>/dev/null)
			
 
				+
			
 
				+SRC_DIR=./src
			
 
				+INC_DIR=./src
			
 
				+BUILD_DIR=./build
			
 
				+
			
 
				+SRCS=$(wildcard $(SRC_DIR)/*.cpp)
			
 
				+OBJS=$(patsubst $(SRC_DIR)/%.cpp,$(BUILD_DIR)/%.o,$(SRCS))
			
 
				+
			
 
				+EXECUTABLE=op_lib.so
			
 
				+
			
 
				+all: dir $(BUILD_DIR)/$(EXECUTABLE)
			
 
				+
			
 
				+dir:
			
 
				+	mkdir -p $(BUILD_DIR)
			
 
				+
			
 
				+$(BUILD_DIR)/$(EXECUTABLE): $(OBJS)
			
 
				+	$(CXX) $(LFLAGS) $(TF_LFLAGS) -o $@ $^
			
 
				+
			
 
				+$(OBJS): $(BUILD_DIR)/%.o : $(SRC_DIR)/%.cpp $(INC_DIR)/%.hpp
			
 
				+	$(CXX) $(CFLAGS) -fPIC -c $(TF_CFLAGS) -I$(INC_DIR) -o $@ $< -O2
			
 
				+
			
 
				+clean:
			
 
				+	rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/$(EXECUTABLE)
			
--- a/model.png
+++ b/model.png
--- a/src/conv2D.cpp
+++ b/src/conv2D.cpp
@@ -0,0 +1,96 @@
 
				+#ifndef CONV2D_FPGA
			
 
				+#define CONV2D_FPGA
			
 
				+
			
 
				+#include "conv2D.hpp"
			
 
				+
			
 
				+void Conv2DOp::Compute(OpKernelContext* context) {
			
 
				+  // Input tensor is of the following dimensions:
			
 
				+  // [ batch, in_rows, in_cols, in_depth ]
			
 
				+  const Tensor& input = context->input(0);
			
 
				+
			
 
				+  // Input filter is of the following dimensions:
			
 
				+  // [ filter_rows, filter_cols, in_depth, out_depth]
			
 
				+  const Tensor& filter = context->input(1);
			
 
				+
			
 
				+  TensorShape out_shape = input.shape();
			
 
				+
			
 
				+  // Output tensor is of the following dimensions:
			
 
				+  // [ in_batch, out_rows, out_cols, out_depth ]
			
 
				+  Tensor* output = nullptr;
			
 
				+  OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
			
 
				+
			
 
				+  std::cout << "Conv2D" << std::endl;
			
 
				+
			
 
				+  // If there is nothing to compute, return.
			
 
				+  if (out_shape.num_elements() == 0) {
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static Status MatMulGradHelper(FunctionDef* g, const string& opname,
			
 
				+                               const string& attr_adj_x,
			
 
				+                               const string& attr_adj_y, const string& x0,
			
 
				+                               bool ax0, const string& x1, bool ax1,
			
 
				+                               const string& y0, bool ay0, const string& y1,
			
 
				+                               bool ay1) {
			
 
				+  // The final outputs are "dx" and "dy". If we're broadcasting compute
			
 
				+  // intermediate nodes for now.
			
 
				+  std::vector<FDH::Node> nodes = {
			
 
				+      {{("dx")},
			
 
				+       opname,
			
 
				+       {x0, x1},
			
 
				+       {{"T", "$T"}, {attr_adj_x, ax0}, {attr_adj_y, ax1}}},
			
 
				+      {{("dy")},
			
 
				+       opname,
			
 
				+       {y0, y1},
			
 
				+       {{"T", "$T"}, {attr_adj_x, ay0}, {attr_adj_y, ay1}}},
			
 
				+  };
			
 
				+
			
 
				+  *g = FDH::Define(
			
 
				+      // Arg defs
			
 
				+      {"x: T", "y: T", "dz: T"},
			
 
				+      // Ret val defs
			
 
				+      {"dx: T", "dy: T"},
			
 
				+      // Attr defs
			
 
				+      {{"T: {half, float, double}"}},
			
 
				+      // Nodes
			
 
				+      nodes);
			
 
				+  return Status::OK();
			
 
				+}
			
 
				+
			
 
				+Status MatMulGrad(const AttrSlice& attrs, FunctionDef* g) {
			
 
				+  const string opname = "MyMatMul";
			
 
				+  const string attr_adj_x = "transpose_a";
			
 
				+  const string attr_adj_y = "transpose_b";
			
 
				+  DataType T;
			
 
				+  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "T", &T));
			
 
				+  if (T == DT_COMPLEX64 || T == DT_COMPLEX128) {
			
 
				+    return errors::Unimplemented(
			
 
				+        "MatMul gradient for complex is not supported yet.");
			
 
				+  }
			
 
				+  bool ta;
			
 
				+  bool tb;
			
 
				+  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_x, &ta));
			
 
				+  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_y, &tb));
			
 
				+
			
 
				+  if (!ta && !tb) {
			
 
				+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "dz", false, "y",
			
 
				+                            true, "x", true, "dz", false);
			
 
				+  }
			
 
				+  if (!ta && tb) {
			
 
				+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "dz", false, "y",
			
 
				+                            false, "dz", true, "x", false);
			
 
				+  }
			
 
				+  if (ta && !tb) {
			
 
				+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "y", false, "dz",
			
 
				+                            true, "x", false, "dz", false);
			
 
				+  }
			
 
				+  CHECK(ta && tb);
			
 
				+  return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "y", true, "dz",
			
 
				+                          true, "dz", true, "x", true);
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/src/conv2D.hpp
+++ b/src/conv2D.hpp
@@ -0,0 +1,15 @@
 
				+#include "tensorflow/core/framework/op_kernel.h"
			
 
				+#include "tensorflow/core/framework/function.h"
			
 
				+
			
 
				+using namespace tensorflow;
			
 
				+typedef FunctionDefHelper FDH;
			
 
				+
			
 
				+
			
 
				+class Conv2DOp : public OpKernel {
			
 
				+ public:
			
 
				+  explicit Conv2DOp(OpKernelConstruction* context) : OpKernel(context) {};
			
 
				+
			
 
				+  void Compute(OpKernelContext* context) override;
			
 
				+
			
 
				+  //TF_DISALLOW_COPY_AND_ASSIGN(Conv2DOp);
			
 
				+};
			
--- a/src/entrypoint.cpp
+++ b/src/entrypoint.cpp
@@ -0,0 +1,7 @@
 
				+#ifndef ENTRY_FPGA
			
 
				+#define ENTRY_FPGA
			
 
				+
			
 
				+
			
 
				+#include "entrypoint.hpp"
			
 
				+
			
 
				+#endif
			
--- a/src/entrypoint.hpp
+++ b/src/entrypoint.hpp
@@ -0,0 +1,21 @@
 
				+
			
 
				+#include "tensorflow/core/framework/op.h"
			
 
				+#include "tensorflow/core/framework/shape_inference.h"
			
 
				+#include "tensorflow/core/framework/function.h"
			
 
				+
			
 
				+#include "tensorflow/core/lib/math/math_util.h"
			
 
				+
			
 
				+#include "conv2D.hpp"
			
 
				+
			
 
				+
			
 
				+
			
 
				+REGISTER_OP("MyConv2D")
			
 
				+    .Input("input: int32")
			
 
				+    .Input("filter: int32")
			
 
				+    .Output("output: int32")
			
 
				+    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
			
 
				+      c->set_output(0, c->input(0));
			
 
				+      return Status::OK();
			
 
				+    });
			
 
				+
			
 
				+REGISTER_KERNEL_BUILDER(Name("MyConv2D").Device(DEVICE_CPU), Conv2DOp);
			
--- a/tf-addOp/test.py
+++ b/tf-addOp/test.py
--- a/tf-matMulOp/makefile
+++ b/tf-matMulOp/makefile
@@ -0,0 +1,29 @@
 
				+CXX=/usr/bin/g++
			
 
				+
			
 
				+FLAGS = -g -Wall -pthread -std=c++11
			
 
				+
			
 
				+TF_CFLAGS=$(shell python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))' 2>/dev/null)
			
 
				+TF_LFLAGS=$(shell python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))' 2>/dev/null)
			
 
				+
			
 
				+SRC_DIR=.
			
 
				+INC_DIR=.
			
 
				+BUILD_DIR=.
			
 
				+
			
 
				+SRCS=$(wildcard $(SRC_DIR)/*.cpp)
			
 
				+OBJS=$(patsubst $(SRC_DIR)/%.cpp,$(BUILD_DIR)/%.o,$(SRCS))
			
 
				+
			
 
				+EXECUTABLE=matMul.so
			
 
				+
			
 
				+all: dir $(BUILD_DIR)/$(EXECUTABLE)
			
 
				+
			
 
				+dir:
			
 
				+	mkdir -p $(BUILD_DIR)
			
 
				+
			
 
				+$(BUILD_DIR)/$(EXECUTABLE): $(OBJS)
			
 
				+	$(CXX) -shared $(TF_LFLAGS) -Wall -o $@ $^
			
 
				+
			
 
				+$(OBJS): $(BUILD_DIR)/%.o : $(SRC_DIR)/%.cpp $(INC_DIR)/%.hpp
			
 
				+	$(CXX) $(FLAGS) -fPIC -c $(TF_CFLAGS) -I$(INC_DIR) -o $@ $< -O2
			
 
				+
			
 
				+clean:
			
 
				+	rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/$(EXECUTABLE)
			
--- a/tf-matMulOp/matMul.so
+++ b/tf-matMulOp/matMul.so
--- a/tf-matMulOp/train.py
+++ b/tf-matMulOp/train.py
@@ -46,61 +46,7 @@ class Conv2DFPGA(layers.Layer):
 
				     outs = my_matmul_module.MyConv2D(input=ints, filter=ints)
			
 
				     return tf.dtypes.cast(outs, dtype=tf.float32)
			
 
				 
			
 
				-class MyConv2D(layers.Conv2D):
			
 
				-
			
 
				-  def __init__(self,
			
 
				-               filters,
			
 
				-               kernel_size,
			
 
				-               strides=(1, 1),
			
 
				-               padding='valid',
			
 
				-               data_format=None,
			
 
				-               dilation_rate=(1, 1),
			
 
				-               activation=None,
			
 
				-               use_bias=True,
			
 
				-               kernel_initializer='glorot_uniform',
			
 
				-               bias_initializer='zeros',
			
 
				-               kernel_regularizer=None,
			
 
				-               bias_regularizer=None,
			
 
				-               activity_regularizer=None,
			
 
				-               kernel_constraint=None,
			
 
				-               bias_constraint=None,
			
 
				-               **kwargs):
			
 
				-    super(MyConv2D, self).__init__(
			
 
				-        filters=filters,
			
 
				-        kernel_size=kernel_size,
			
 
				-        strides=strides,
			
 
				-        padding=padding,
			
 
				-        data_format=data_format,
			
 
				-        dilation_rate=dilation_rate,
			
 
				-        activation=activation,
			
 
				-        use_bias=use_bias,
			
 
				-        kernel_initializer=kernel_initializer,
			
 
				-        bias_initializer=bias_initializer,
			
 
				-        kernel_regularizer=kernel_regularizer,
			
 
				-        bias_regularizer=bias_regularizer,
			
 
				-        activity_regularizer=activity_regularizer,
			
 
				-        kernel_constraint=kernel_constraint,
			
 
				-        bias_constraint=bias_constraint,
			
 
				-        **kwargs)
			
 
				-  def call(self, inputs):
			
 
				-      #inputs.get_shape(),
			
 
				-      #filter_shape=self.kernel.shape,
			
 
				-      #dilation_rate=self.dilation_rate,
			
 
				-      #strides=self.strides,
			
 
				-      #padding=self._padding_op,
			
 
				-      #data_format=self._conv_op_data_format)
			
 
				-
			
 
				-      #kernel.shape.ndims
			
 
				-      #inputs.get_shape().ndims
			
 
				-    if self.rank == 1 and inputs.get_shape(): #fpga restriction
			
 
				-      return my_matmul_module.MyConv2D(inputs, self.kernel)
			
 
				-    else:
			
 
				-      return super(MyConv2D, self).call(inputs)
			
 
				-
			
 
				 model = Sequential()
			
 
				-model.add(MyConv2D(32, kernel_size=(3, 3),
			
 
				-                 activation='relu',
			
 
				-                 input_shape=input_shape))
			
 
				 model.add(Conv2DFPGA([0,0]))
			
 
				 model.add(Flatten())
			
 
				 model.add(Dense(128, activation='relu'))