Эх сурвалжийг харах

Python module and makefile

subDesTagesMitExtraKaese 5 жил өмнө
parent
commit
fcc81c165f

+ 3 - 1
.gitignore

@@ -1,4 +1,6 @@
 *.code-workspace
 .vs/
 .vscode/
-*.tar
+*.tar
+__pycache__/
+*.o

+ 1 - 0
__init__.py

@@ -0,0 +1 @@
+__all__ = ["layers", "examples", "tests"]

BIN
build/op_lib.so


+ 63 - 0
examples/train.py

@@ -0,0 +1,63 @@
+import tensorflow as tf
+import tensorflow.keras as keras
+from tensorflow.keras import layers
+from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, Flatten, MaxPooling2D, Conv2D
+from tensorflow.keras.models import Model, Sequential
+from tensorflow.keras.datasets import mnist
+from tensorflow.keras.utils import plot_model, to_categorical
+
+import numpy as np
+from IPython import embed
+
+import sys
+sys.path.append('..')
+from hostLib.layers.conv2D import Conv2D as Conv2DFPGA
+
+batch_size = 128
+num_classes = 10
+epochs = 1 # 12
+
+# input image dimensions
+img_rows, img_cols = 28, 28
+
+# the data, split between train and test sets
+(x_train, y_train), (x_test, y_test) = mnist.load_data()
+
+x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
+x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
+input_shape = (img_rows, img_cols, 1)
+
+x_train = x_train.astype('float32')
+x_test = x_test.astype('float32')
+x_train /= 255
+x_test /= 255
+print('x_train shape:', x_train.shape)
+print(x_train.shape[0], 'train samples')
+print(x_test.shape[0], 'test samples')
+
+# convert class vectors to binary class matrices
+y_train = to_categorical(y_train, num_classes)
+y_test = to_categorical(y_test, num_classes)
+
+model = Sequential()
+model.add(Conv2DFPGA([0,0]))
+model.add(Flatten())
+model.add(Dense(128, activation='relu'))
+model.add(Dropout(0.5))
+model.add(Dense(num_classes, activation='softmax'))
+
+model.compile(loss=keras.losses.categorical_crossentropy,
+              optimizer=keras.optimizers.Adadelta(),
+              metrics=['accuracy'])
+              
+model.fit(x_train, y_train,
+          batch_size=batch_size,
+          epochs=epochs,
+          verbose=1,
+          validation_data=(x_test, y_test))
+
+score = model.evaluate(x_test, y_test, verbose=0)
+print('Test loss:', score[0])
+print('Test accuracy:', score[1])
+
+plot_model(model, to_file='model.png', expand_nested=True, show_shapes=True)

+ 2 - 0
layers/__init__.py

@@ -0,0 +1,2 @@
+
+__all__ = ["conv2D"]

+ 13 - 0
layers/conv2D.py

@@ -0,0 +1,13 @@
+import tensorflow as tf
+from tensorflow.keras import layers
+
+from .. import load_op
+
+class Conv2D(layers.Layer):
+  def __init__(self, kernel):
+    super(Conv2D, self).__init__()
+    self.kernel = kernel
+  def call(self, inputs):
+    ints = tf.dtypes.cast(inputs, dtype=tf.int32)
+    outs = load_op.op_lib.MyConv2D(input=ints, filter=ints)
+    return tf.dtypes.cast(outs, dtype=tf.float32)

+ 5 - 0
load_op.py

@@ -0,0 +1,5 @@
+import os 
+import tensorflow as tf
+
+dir_path = os.path.dirname(os.path.realpath(__file__))
+op_lib = tf.load_op_library(dir_path + '/build/op_lib.so')

+ 30 - 0
makefile

@@ -0,0 +1,30 @@
+CXX=/usr/bin/g++
+
+CFLAGS=-g -Wall -pthread -std=c++11
+LFLAGS=-shared -Wl,--no-as-needed
+
+TF_CFLAGS=$(shell python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))' 2>/dev/null)
+TF_LFLAGS=$(shell python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))' 2>/dev/null)
+
+SRC_DIR=./src
+INC_DIR=./src
+BUILD_DIR=./build
+
+SRCS=$(wildcard $(SRC_DIR)/*.cpp)
+OBJS=$(patsubst $(SRC_DIR)/%.cpp,$(BUILD_DIR)/%.o,$(SRCS))
+
+EXECUTABLE=op_lib.so
+
+all: dir $(BUILD_DIR)/$(EXECUTABLE)
+
+dir:
+	mkdir -p $(BUILD_DIR)
+
+$(BUILD_DIR)/$(EXECUTABLE): $(OBJS)
+	$(CXX) $(LFLAGS) $(TF_LFLAGS) -o $@ $^
+
+$(OBJS): $(BUILD_DIR)/%.o : $(SRC_DIR)/%.cpp $(INC_DIR)/%.hpp
+	$(CXX) $(CFLAGS) -fPIC -c $(TF_CFLAGS) -I$(INC_DIR) -o $@ $< -O2
+
+clean:
+	rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/$(EXECUTABLE)

BIN
model.png


+ 96 - 0
src/conv2D.cpp

@@ -0,0 +1,96 @@
+#ifndef CONV2D_FPGA
+#define CONV2D_FPGA
+
+#include "conv2D.hpp"
+
+void Conv2DOp::Compute(OpKernelContext* context) {
+  // Input tensor is of the following dimensions:
+  // [ batch, in_rows, in_cols, in_depth ]
+  const Tensor& input = context->input(0);
+
+  // Input filter is of the following dimensions:
+  // [ filter_rows, filter_cols, in_depth, out_depth]
+  const Tensor& filter = context->input(1);
+
+  TensorShape out_shape = input.shape();
+
+  // Output tensor is of the following dimensions:
+  // [ in_batch, out_rows, out_cols, out_depth ]
+  Tensor* output = nullptr;
+  OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
+
+  std::cout << "Conv2D" << std::endl;
+
+  // If there is nothing to compute, return.
+  if (out_shape.num_elements() == 0) {
+    return;
+  }
+
+  
+}
+
+
+static Status MatMulGradHelper(FunctionDef* g, const string& opname,
+                               const string& attr_adj_x,
+                               const string& attr_adj_y, const string& x0,
+                               bool ax0, const string& x1, bool ax1,
+                               const string& y0, bool ay0, const string& y1,
+                               bool ay1) {
+  // The final outputs are "dx" and "dy". If we're broadcasting compute
+  // intermediate nodes for now.
+  std::vector<FDH::Node> nodes = {
+      {{("dx")},
+       opname,
+       {x0, x1},
+       {{"T", "$T"}, {attr_adj_x, ax0}, {attr_adj_y, ax1}}},
+      {{("dy")},
+       opname,
+       {y0, y1},
+       {{"T", "$T"}, {attr_adj_x, ay0}, {attr_adj_y, ay1}}},
+  };
+
+  *g = FDH::Define(
+      // Arg defs
+      {"x: T", "y: T", "dz: T"},
+      // Ret val defs
+      {"dx: T", "dy: T"},
+      // Attr defs
+      {{"T: {half, float, double}"}},
+      // Nodes
+      nodes);
+  return Status::OK();
+}
+
+Status MatMulGrad(const AttrSlice& attrs, FunctionDef* g) {
+  const string opname = "MyMatMul";
+  const string attr_adj_x = "transpose_a";
+  const string attr_adj_y = "transpose_b";
+  DataType T;
+  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "T", &T));
+  if (T == DT_COMPLEX64 || T == DT_COMPLEX128) {
+    return errors::Unimplemented(
+        "MatMul gradient for complex is not supported yet.");
+  }
+  bool ta;
+  bool tb;
+  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_x, &ta));
+  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_y, &tb));
+
+  if (!ta && !tb) {
+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "dz", false, "y",
+                            true, "x", true, "dz", false);
+  }
+  if (!ta && tb) {
+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "dz", false, "y",
+                            false, "dz", true, "x", false);
+  }
+  if (ta && !tb) {
+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "y", false, "dz",
+                            true, "x", false, "dz", false);
+  }
+  CHECK(ta && tb);
+  return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "y", true, "dz",
+                          true, "dz", true, "x", true);
+}
+
+#endif

+ 15 - 0
src/conv2D.hpp

@@ -0,0 +1,15 @@
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/function.h"
+
+using namespace tensorflow;
+typedef FunctionDefHelper FDH;
+
+
+class Conv2DOp : public OpKernel {
+ public:
+  explicit Conv2DOp(OpKernelConstruction* context) : OpKernel(context) {};
+
+  void Compute(OpKernelContext* context) override;
+
+  //TF_DISALLOW_COPY_AND_ASSIGN(Conv2DOp);
+};

+ 7 - 0
src/entrypoint.cpp

@@ -0,0 +1,7 @@
+#ifndef ENTRY_FPGA
+#define ENTRY_FPGA
+
+
+#include "entrypoint.hpp"
+
+#endif

+ 21 - 0
src/entrypoint.hpp

@@ -0,0 +1,21 @@
+
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/function.h"
+
+#include "tensorflow/core/lib/math/math_util.h"
+
+#include "conv2D.hpp"
+
+
+
+REGISTER_OP("MyConv2D")
+    .Input("input: int32")
+    .Input("filter: int32")
+    .Output("output: int32")
+    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
+      c->set_output(0, c->input(0));
+      return Status::OK();
+    });
+
+REGISTER_KERNEL_BUILDER(Name("MyConv2D").Device(DEVICE_CPU), Conv2DOp);

+ 0 - 0
test.py → tf-addOp/test.py


+ 29 - 0
tf-matMulOp/makefile

@@ -0,0 +1,29 @@
+CXX=/usr/bin/g++
+
+FLAGS = -g -Wall -pthread -std=c++11
+
+TF_CFLAGS=$(shell python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))' 2>/dev/null)
+TF_LFLAGS=$(shell python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))' 2>/dev/null)
+
+SRC_DIR=.
+INC_DIR=.
+BUILD_DIR=.
+
+SRCS=$(wildcard $(SRC_DIR)/*.cpp)
+OBJS=$(patsubst $(SRC_DIR)/%.cpp,$(BUILD_DIR)/%.o,$(SRCS))
+
+EXECUTABLE=matMul.so
+
+all: dir $(BUILD_DIR)/$(EXECUTABLE)
+
+dir:
+	mkdir -p $(BUILD_DIR)
+
+$(BUILD_DIR)/$(EXECUTABLE): $(OBJS)
+	$(CXX) -shared $(TF_LFLAGS) -Wall -o $@ $^
+
+$(OBJS): $(BUILD_DIR)/%.o : $(SRC_DIR)/%.cpp $(INC_DIR)/%.hpp
+	$(CXX) $(FLAGS) -fPIC -c $(TF_CFLAGS) -I$(INC_DIR) -o $@ $< -O2
+
+clean:
+	rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/$(EXECUTABLE)

BIN
tf-matMulOp/matMul.so


+ 0 - 54
tf-matMulOp/train.py

@@ -46,61 +46,7 @@ class Conv2DFPGA(layers.Layer):
     outs = my_matmul_module.MyConv2D(input=ints, filter=ints)
     return tf.dtypes.cast(outs, dtype=tf.float32)
 
-class MyConv2D(layers.Conv2D):
-
-  def __init__(self,
-               filters,
-               kernel_size,
-               strides=(1, 1),
-               padding='valid',
-               data_format=None,
-               dilation_rate=(1, 1),
-               activation=None,
-               use_bias=True,
-               kernel_initializer='glorot_uniform',
-               bias_initializer='zeros',
-               kernel_regularizer=None,
-               bias_regularizer=None,
-               activity_regularizer=None,
-               kernel_constraint=None,
-               bias_constraint=None,
-               **kwargs):
-    super(MyConv2D, self).__init__(
-        filters=filters,
-        kernel_size=kernel_size,
-        strides=strides,
-        padding=padding,
-        data_format=data_format,
-        dilation_rate=dilation_rate,
-        activation=activation,
-        use_bias=use_bias,
-        kernel_initializer=kernel_initializer,
-        bias_initializer=bias_initializer,
-        kernel_regularizer=kernel_regularizer,
-        bias_regularizer=bias_regularizer,
-        activity_regularizer=activity_regularizer,
-        kernel_constraint=kernel_constraint,
-        bias_constraint=bias_constraint,
-        **kwargs)
-  def call(self, inputs):
-      #inputs.get_shape(),
-      #filter_shape=self.kernel.shape,
-      #dilation_rate=self.dilation_rate,
-      #strides=self.strides,
-      #padding=self._padding_op,
-      #data_format=self._conv_op_data_format)
-
-      #kernel.shape.ndims
-      #inputs.get_shape().ndims
-    if self.rank == 1 and inputs.get_shape(): #fpga restriction
-      return my_matmul_module.MyConv2D(inputs, self.kernel)
-    else:
-      return super(MyConv2D, self).call(inputs)
-
 model = Sequential()
-model.add(MyConv2D(32, kernel_size=(3, 3),
-                 activation='relu',
-                 input_shape=input_shape))
 model.add(Conv2DFPGA([0,0]))
 model.add(Flatten())
 model.add(Dense(128, activation='relu'))