5 years ago · 282338bb86
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
 
				+*.code-workspace
			
 
				+.vs/
			
 
				+.vscode/
			
--- a/install.sh
+++ b/install.sh
@@ -0,0 +1,3 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+python3 -m pip install tensorflow
			
--- a/model.png
+++ b/model.png
--- a/test.py
+++ b/test.py
@@ -0,0 +1,88 @@
 
				+import tensorflow as tf

			
 
				+import tensorflow.keras as keras

			
 
				+from keras import layers

			
 
				+from keras.layers import Input, Embedding, LSTM, Dense, Dropout, Flatten, MaxPooling2D, Conv2D

			
 
				+from keras.models import Model, Sequential

			
 
				+from keras.datasets import mnist

			
 
				+from keras.utils import plot_model, to_categorical

			
 
				+

			
 
				+import numpy as np

			
 
				+from IPython import embed

			
 
				+

			
 
				+

			
 
				+batch_size = 128

			
 
				+num_classes = 10

			
 
				+epochs = 1 # 12

			
 
				+

			
 
				+# input image dimensions

			
 
				+img_rows, img_cols = 28, 28

			
 
				+

			
 
				+# the data, split between train and test sets

			
 
				+(x_train, y_train), (x_test, y_test) = mnist.load_data()

			
 
				+

			
 
				+x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)

			
 
				+x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)

			
 
				+input_shape = (img_rows, img_cols, 1)

			
 
				+

			
 
				+x_train = x_train.astype('float32')

			
 
				+x_test = x_test.astype('float32')

			
 
				+x_train /= 255

			
 
				+x_test /= 255

			
 
				+print('x_train shape:', x_train.shape)

			
 
				+print(x_train.shape[0], 'train samples')

			
 
				+print(x_test.shape[0], 'test samples')

			
 
				+

			
 
				+# convert class vectors to binary class matrices

			
 
				+y_train = to_categorical(y_train, num_classes)

			
 
				+y_test = to_categorical(y_test, num_classes)

			
 
				+

			
 
				+

			
 
				+class Linear(layers.Layer):

			
 
				+

			
 
				+  def __init__(self, units=32, input_dim=32):

			
 
				+    super(Linear, self).__init__()

			
 
				+    w_init = tf.random_normal_initializer()

			
 
				+    self.w = tf.Variable(initial_value=w_init(shape=(input_dim, units),

			
 
				+                                              dtype='float32'),

			
 
				+                         trainable=True)

			
 
				+    b_init = tf.zeros_initializer()

			
 
				+    self.b = tf.Variable(initial_value=b_init(shape=(units,),

			
 
				+                                              dtype='float32'),

			
 
				+                         trainable=True)

			
 
				+

			
 
				+  def call(self, inputs):

			
 
				+    print(inputs)

			
 
				+    embed()

			
 
				+    return tf.matmul(inputs, self.w) + self.b

			
 
				+

			
 
				+

			
 
				+model = Sequential()

			
 
				+model.add(Conv2D(32, kernel_size=(3, 3),

			
 
				+                 activation='relu',

			
 
				+                 input_shape=input_shape))

			
 
				+model.add(Conv2D(64, (3, 3), activation='relu'))

			
 
				+

			
 
				+model.add(MaxPooling2D(pool_size=(2, 2)))

			
 
				+model.add(Dropout(0.25))

			
 
				+model.add(Flatten())

			
 
				+model.add(Dense(128, activation='relu'))

			
 
				+model.add(Dropout(0.5))

			
 
				+model.add(Dense(num_classes, activation='softmax'))

			
 
				+

			
 
				+model.add(Linear(10,10))

			
 
				+

			
 
				+

			
 
				+model.compile(loss=keras.losses.categorical_crossentropy,

			
 
				+              optimizer=keras.optimizers.Adadelta(),

			
 
				+              metrics=['accuracy'])

			
 
				+

			
 
				+model.fit(x_train, y_train,

			
 
				+          batch_size=batch_size,

			
 
				+          epochs=epochs,

			
 
				+          verbose=1,

			
 
				+          validation_data=(x_test, y_test))

			
 
				+score = model.evaluate(x_test, y_test, verbose=0)

			
 
				+print('Test loss:', score[0])

			
 
				+print('Test accuracy:', score[1])

			
 
				+

			
 
				+plot_model(model, to_file='model.png', expand_nested=True, show_shapes=True)

			
--- a/tf-addOp/build.sh
+++ b/tf-addOp/build.sh
@@ -0,0 +1,15 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+if [ "$TF_CFLAGS" == "" ]; then
			
 
				+  export TF_CFLAGS=( $(python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') )
			
 
				+fi
			
 
				+if [ "$TF_LFLAGS" == "" ]; then
			
 
				+  export TF_LFLAGS=( $(python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
			
 
				+fi
			
 
				+
			
 
				+g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2 -v
			
 
				+
			
 
				+#g++ -g -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC -I/usr/local/lib/python3.6/dist-packages/tensorflow_core/include -D_GLIBCXX_USE_CXX11_ABI=0 -L/usr/local/lib/python3.6/dist-packages/tensorflow_core -l:libtensorflow_framework.so.2 -O2
			
 
				+
			
 
				+#g++ -g -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2 -Wall -Wl,-z,defs
			
 
				+
			
--- a/tf-addOp/model.png
+++ b/tf-addOp/model.png
--- a/tf-addOp/train.py
+++ b/tf-addOp/train.py
@@ -0,0 +1,67 @@
 
				+import tensorflow as tf
			
 
				+import tensorflow.keras as keras
			
 
				+from tensorflow.keras import layers
			
 
				+from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, Flatten, MaxPooling2D, Conv2D
			
 
				+from tensorflow.keras.models import Model, Sequential
			
 
				+from tensorflow.keras.datasets import mnist
			
 
				+from tensorflow.keras.utils import plot_model, to_categorical
			
 
				+
			
 
				+import numpy as np
			
 
				+from IPython import embed
			
 
				+
			
 
				+zero_out_module = tf.load_op_library('./zero_out.so')
			
 
				+
			
 
				+batch_size = 128
			
 
				+num_classes = 10
			
 
				+epochs = 1 # 12
			
 
				+
			
 
				+# input image dimensions
			
 
				+img_rows, img_cols = 28, 28
			
 
				+
			
 
				+# the data, split between train and test sets
			
 
				+(x_train, y_train), (x_test, y_test) = mnist.load_data()
			
 
				+
			
 
				+x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
			
 
				+x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
			
 
				+input_shape = (img_rows, img_cols, 1)
			
 
				+
			
 
				+x_train = x_train.astype('float32')
			
 
				+x_test = x_test.astype('float32')
			
 
				+x_train /= 255
			
 
				+x_test /= 255
			
 
				+print('x_train shape:', x_train.shape)
			
 
				+print(x_train.shape[0], 'train samples')
			
 
				+print(x_test.shape[0], 'test samples')
			
 
				+
			
 
				+# convert class vectors to binary class matrices
			
 
				+y_train = to_categorical(y_train, num_classes)
			
 
				+y_test = to_categorical(y_test, num_classes)
			
 
				+
			
 
				+class Linear(layers.Layer):
			
 
				+
			
 
				+  def __init__(self, units=32, input_dim=32):
			
 
				+    super(Linear, self).__init__()
			
 
				+  def call(self, inputs):
			
 
				+    ints = tf.dtypes.cast(inputs, dtype=tf.int32)
			
 
				+    print(ints)
			
 
				+    outs = zero_out_module.zero_out(ints)
			
 
				+    return tf.dtypes.cast(outs, dtype=tf.float32)
			
 
				+
			
 
				+model = Sequential()
			
 
				+model.add(Flatten())
			
 
				+model.add(Dense(128, activation='relu'))
			
 
				+model.add(Dropout(0.5))
			
 
				+model.add(Dense(num_classes, activation='softmax'))
			
 
				+
			
 
				+model.add(Linear())
			
 
				+
			
 
				+model.compile(loss=keras.losses.categorical_crossentropy,
			
 
				+              optimizer=keras.optimizers.Adadelta(),
			
 
				+              metrics=['accuracy'])
			
 
				+
			
 
				+
			
 
				+score = model.evaluate(x_test, y_test, verbose=0)
			
 
				+print('Test loss:', score[0])
			
 
				+print('Test accuracy:', score[1])
			
 
				+
			
 
				+plot_model(model, to_file='model.png', expand_nested=True, show_shapes=True)
			
--- a/tf-addOp/zero_out.cc
+++ b/tf-addOp/zero_out.cc
@@ -0,0 +1,48 @@
 
				+#include "tensorflow/core/framework/op.h"
			
 
				+#include "tensorflow/core/framework/shape_inference.h"
			
 
				+
			
 
				+using namespace tensorflow;
			
 
				+
			
 
				+REGISTER_OP("ZeroOut")
			
 
				+    .Input("to_zero: int32")
			
 
				+    .Output("zeroed: int32")
			
 
				+    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
			
 
				+      c->set_output(0, c->input(0));
			
 
				+      return Status::OK();
			
 
				+    });
			
 
				+
			
 
				+#include "tensorflow/core/framework/op_kernel.h"
			
 
				+
			
 
				+using namespace tensorflow;
			
 
				+
			
 
				+class ZeroOutOp : public OpKernel {
			
 
				+ public:
			
 
				+  explicit ZeroOutOp(OpKernelConstruction* context) : OpKernel(context) {}
			
 
				+
			
 
				+  void Compute(OpKernelContext* context) override {
			
 
				+    // Grab the input tensor
			
 
				+    const Tensor& input_tensor = context->input(0);
			
 
				+    auto input = input_tensor.flat<int32>();
			
 
				+
			
 
				+    printf("call n: %d\n", n++);
			
 
				+
			
 
				+    // Create an output tensor
			
 
				+    Tensor* output_tensor = NULL;
			
 
				+    OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
			
 
				+                                                     &output_tensor));
			
 
				+    auto output_flat = output_tensor->flat<int32>();
			
 
				+
			
 
				+    // Set all but the first element of the output tensor to 0.
			
 
				+    const int N = input.size();
			
 
				+    
			
 
				+    for (int i = 1; i < N; i++) {
			
 
				+      output_flat(i) = 0;
			
 
				+    }
			
 
				+    // Preserve the first input value if possible.
			
 
				+    if (N > 0) output_flat(0) = input(0);
			
 
				+  }
			
 
				+
			
 
				+  int n = 0;
			
 
				+};
			
 
				+
			
 
				+REGISTER_KERNEL_BUILDER(Name("ZeroOut").Device(DEVICE_CPU), ZeroOutOp);
			
--- a/tf-addOp/zero_out.py
+++ b/tf-addOp/zero_out.py
@@ -0,0 +1,7 @@
 
				+import tensorflow as tf
			
 
				+zero_out_module = tf.load_op_library('./zero_out.so')
			
 
				+
			
 
				+print(zero_out_module.zero_out([[1, 2], [3, 4]]))
			
 
				+
			
 
				+# Prints
			
 
				+# array([[1, 0], [0, 0]], dtype=int32)
			
--- a/tf-addOp/zero_out.so
+++ b/tf-addOp/zero_out.so