subDesTagesMitExtraKaese 5 жил өмнө
parent
commit
0176979b4b
8 өөрчлөгдсөн 120 нэмэгдсэн , 26 устгасан
  1. 2 2
      .gitignore
  2. BIN
      build/op_lib.so
  3. 12 0
      configure
  4. 15 1
      examples/train.py
  5. 46 5
      layers/conv2D.py
  6. 9 7
      makefile
  7. 27 7
      src/conv2D.cpp
  8. 9 4
      src/conv2D.hpp

+ 2 - 2
.gitignore

@@ -2,5 +2,5 @@
 .vs/
 .vscode/
 *.tar
-__pycache__/
-*.o
+__pycache__
+/build/

BIN
build/op_lib.so


+ 12 - 0
configure

@@ -0,0 +1,12 @@
+#!/bin/bash
+
+BUILD_DIR=$1
+if [ "" = "$BUILD_DIR" ]; then
+    BUILD_DIR='./build'
+fi
+
+mkdir -p $BUILD_DIR
+
+python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))' > $BUILD_DIR/TF_CFLAGS
+python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))' > $BUILD_DIR/TF_LFLAGS
+

+ 15 - 1
examples/train.py

@@ -39,13 +39,27 @@ print(x_test.shape[0], 'test samples')
 y_train = to_categorical(y_train, num_classes)
 y_test = to_categorical(y_test, num_classes)
 
+a = layers.Input(shape=(28, 28, 1))
+b = Conv2DFPGA(2)(a)
+c = Conv2DFPGA(2)(a)
+d = Conv2DFPGA(2)(a)
+e = Conv2DFPGA(2)(a)
+
+x = layers.Add()([b,c,d,e])
+y = layers.Flatten()(x)
+z = layers.Dense(num_classes, activation='softmax')(y)
+
+model = Model(inputs=a, outputs=z)
+"""
 model = Sequential()
 model.add(Conv2DFPGA([0,0]))
+model.add(Conv2DFPGA([0,0]))
+model.add(Conv2DFPGA([0,0]))
 model.add(Flatten())
 model.add(Dense(128, activation='relu'))
 model.add(Dropout(0.5))
 model.add(Dense(num_classes, activation='softmax'))
-
+"""
 model.compile(loss=keras.losses.categorical_crossentropy,
               optimizer=keras.optimizers.Adadelta(),
               metrics=['accuracy'])

+ 46 - 5
layers/conv2D.py

@@ -1,13 +1,54 @@
 import tensorflow as tf
-from tensorflow.keras import layers
+from tensorflow.python.framework import tensor_shape
+from tensorflow.keras import layers, initializers, regularizers, constraints
 
 from .. import load_op
 
 class Conv2D(layers.Layer):
-  def __init__(self, kernel):
+  def __init__(self,
+    filters = 1,
+    kernel_initializer = 'glorot_uniform',
+               kernel_regularizer=None,
+               kernel_constraint=None,
+    ):
     super(Conv2D, self).__init__()
-    self.kernel = kernel
+    #int, dim of output space
+    self.filters = filters
+    self.kernel_initializer = initializers.get(kernel_initializer)
+    self.kernel_regularizer = regularizers.get(kernel_regularizer)
+    self.kernel_constraint = constraints.get(kernel_constraint)
+
+
+  def build(self, input_shape):
+    input_shape = tf.TensorShape(input_shape)
+    self.input_channel = input_shape[3]
+    kernel_shape = (5,)*2 + (self.input_channel, self.filters)
+
+    self.kernel = self.add_weight(
+        name='kernel',
+        shape=kernel_shape,
+        initializer=self.kernel_initializer,
+        regularizer=self.kernel_regularizer,
+        constraint=self.kernel_constraint,
+        trainable=True,
+        dtype=self.dtype)
+
   def call(self, inputs):
-    ints = tf.dtypes.cast(inputs, dtype=tf.int32)
-    outs = load_op.op_lib.MyConv2D(input=ints, filter=ints)
+
+    #out = tf.Tensor(tf.int32, shape=inputs.shape)
+
+    ch_inputs = tf.unstack(tf.dtypes.cast(inputs, dtype=tf.int32), axis=3)
+    ch_kernel = tf.unstack(tf.dtypes.cast(self.kernel, dtype=tf.int32), axis=2)
+
+    ch_outputs = [None] * len(ch_inputs)
+
+    for ch in range(len(ch_inputs)):
+      print(ch_inputs[ch], ch_kernel[ch])
+      ch_outputs[ch] = [None] * self.filters
+      kernel_2d = tf.unstack(ch_kernel[ch], axis=2)
+      for f in range(len(kernel_2d)):
+        ch_outputs[ch][f] = load_op.op_lib.MyConv2D(input=ch_inputs[ch], filter=kernel_2d[f])
+      
+      ch_outputs[ch] = tf.stack(ch_outputs[ch], axis=2)
+    outs = tf.stack(ch_outputs, axis=2)
     return tf.dtypes.cast(outs, dtype=tf.float32)

+ 9 - 7
makefile

@@ -3,28 +3,30 @@ CXX=/usr/bin/g++
 CFLAGS=-g -Wall -pthread -std=c++11
 LFLAGS=-shared -Wl,--no-as-needed
 
-TF_CFLAGS=$(shell python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))' 2>/dev/null)
-TF_LFLAGS=$(shell python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))' 2>/dev/null)
-
 SRC_DIR=./src
 INC_DIR=./src
 BUILD_DIR=./build
 
+TF_CFLAGS=$(shell cat $(BUILD_DIR)/TF_CFLAGS)
+TF_LFLAGS=$(shell cat $(BUILD_DIR)/TF_LFLAGS)
+
 SRCS=$(wildcard $(SRC_DIR)/*.cpp)
 OBJS=$(patsubst $(SRC_DIR)/%.cpp,$(BUILD_DIR)/%.o,$(SRCS))
 
 EXECUTABLE=op_lib.so
 
-all: dir $(BUILD_DIR)/$(EXECUTABLE)
+all: config $(BUILD_DIR)/$(EXECUTABLE)
 
-dir:
-	mkdir -p $(BUILD_DIR)
+config:
+	@if [ ! -d "$(BUILD_DIR)" ]; then ./configure $(BUILD_DIR) || exit 1; fi
 
 $(BUILD_DIR)/$(EXECUTABLE): $(OBJS)
 	$(CXX) $(LFLAGS) $(TF_LFLAGS) -o $@ $^
 
 $(OBJS): $(BUILD_DIR)/%.o : $(SRC_DIR)/%.cpp $(INC_DIR)/%.hpp
-	$(CXX) $(CFLAGS) -fPIC -c $(TF_CFLAGS) -I$(INC_DIR) -o $@ $< -O2
+	$(CXX) $(CFLAGS) -fPIC -c $(TF_CFLAGS) -I$(INC_DIR) -o $@ $<
+
+tf_cflags:
 
 clean:
 	rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/$(EXECUTABLE)

+ 27 - 7
src/conv2D.cpp

@@ -3,7 +3,23 @@
 
 #include "conv2D.hpp"
 
-void Conv2DOp::Compute(OpKernelContext* context) {
+volatile int instances = 0;
+pthread_t tDelay;
+pthread_attr_t attr;
+typedef void (*fptr)();
+void *delayThread(void *ref) {
+  sleep(1);
+  fptr done = reinterpret_cast<fptr>(ref);
+  printf("cb!\n");
+  done();
+  return 0;
+}
+
+Conv2DOp::Conv2DOp(OpKernelConstruction* context) : AsyncOpKernel(context) {
+  instance = instances++;
+};
+
+void Conv2DOp::ComputeAsync(OpKernelContext* context, DoneCallback done) {
   // Input tensor is of the following dimensions:
   // [ batch, in_rows, in_cols, in_depth ]
   const Tensor& input = context->input(0);
@@ -11,6 +27,15 @@ void Conv2DOp::Compute(OpKernelContext* context) {
   // Input filter is of the following dimensions:
   // [ filter_rows, filter_cols, in_depth, out_depth]
   const Tensor& filter = context->input(1);
+  TensorShape filterShape = filter.shape();
+
+
+  printf("\ninstance: %d shape: ", instance);
+  for(int i=0; i<filterShape.dims(); i++) {
+    printf(" %lld", filter.shape().dim_size(i));
+  }
+  printf("\n");
+  sleep(1);
 
   TensorShape out_shape = input.shape();
 
@@ -19,12 +44,7 @@ void Conv2DOp::Compute(OpKernelContext* context) {
   Tensor* output = nullptr;
   OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
 
-  std::cout << "Conv2D" << std::endl;
-
-  // If there is nothing to compute, return.
-  if (out_shape.num_elements() == 0) {
-    return;
-  }
+  pthread_create(&tDelay, &attr, delayThread, static_cast<void*>(&done));
 
   
 }

+ 9 - 4
src/conv2D.hpp

@@ -1,15 +1,20 @@
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/function.h"
+#include <stdlib.h>
+
+#include <pthread.h>
 
 using namespace tensorflow;
 typedef FunctionDefHelper FDH;
 
 
-class Conv2DOp : public OpKernel {
- public:
-  explicit Conv2DOp(OpKernelConstruction* context) : OpKernel(context) {};
+class Conv2DOp : public AsyncOpKernel {
+  public:
+    explicit Conv2DOp(OpKernelConstruction* context);
 
-  void Compute(OpKernelContext* context) override;
+    void ComputeAsync(OpKernelContext* context, DoneCallback done) override;
 
+  private:
+    int instance = -1;
   //TF_DISALLOW_COPY_AND_ASSIGN(Conv2DOp);
 };