#include "conv2D.hpp" namespace tf_lib { volatile int instances = 0; volatile int inParallel = 0; std::mutex printMu; Conv2DOp::Conv2DOp(OpKernelConstruction* context) : AsyncOpKernel(context) { instance = instances++; OP_REQUIRES_OK(context, context->GetAttr("delay", &delay)); }; void Conv2DOp::ComputeAsync(OpKernelContext* context, DoneCallback done) { // Input tensor is of the following dimensions: // [ batch, in_rows, in_cols, in_depth ] const Tensor& input = context->input(0); ///const int32 *p = input.flat().data(); // Input filter is of the following dimensions: // [ filter_rows, filter_cols, in_depth, out_depth] const Tensor& kernel = context->input(1); TensorShape kernel_shape = kernel.shape(); TensorShape input_shape = input.shape(); int batchSize = input_shape.dim_size(0); int channels = input_shape.dim_size(3); int filters = kernel_shape.dim_size(3); TensorShape output_shape; const int32 dims[] = {batchSize, outputSize, outputSize, channels * filters}; TensorShapeUtils::MakeShape(dims, 4, &output_shape); output_shape.set_dim(0, batchSize); output_shape.set_dim(1, outputSize); output_shape.set_dim(2, outputSize); output_shape.set_dim(3, channels * filters); //printMu.lock(); //std::cout << output_shape.DebugString() << std::endl; //printMu.unlock(); // Output tensor is of the following dimensions: // [ in_batch, out_rows, out_cols, out_depth ] Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); auto input_tensor = input.tensor(); auto output_tensor = output->tensor(); std::shared_ptr jobs(new JobList(Module::dummyModule, batchSize * channels * filters)); for(int sample=0; sample &job = jobs->getJob(sample * channels * filters + channel * filters + filter); for(int x=0; xsetPayload(x*outputSize + y, input_tensor(sample, x, y, channel)); } } } } } jobs->setDoneCallback([output_tensor, &jobs, done]{ output_tensor(0) = jobs->getJob(0)->getResponsePayload(0); done(); }); connectionManager.sendJobListAsync(jobs); } static Status MatMulGradHelper(FunctionDef* g, const string& opname, const string& attr_adj_x, const string& attr_adj_y, const string& x0, bool ax0, const string& x1, bool ax1, const string& y0, bool ay0, const string& y1, bool ay1) { // The final outputs are "dx" and "dy". If we're broadcasting compute // intermediate nodes for now. std::vector nodes = { {{("dx")}, opname, {x0, x1}, {{"T", "$T"}, {attr_adj_x, ax0}, {attr_adj_y, ax1}}}, {{("dy")}, opname, {y0, y1}, {{"T", "$T"}, {attr_adj_x, ay0}, {attr_adj_y, ay1}}}, }; *g = FDH::Define( // Arg defs {"x: T", "y: T", "dz: T"}, // Ret val defs {"dx: T", "dy: T"}, // Attr defs {{"T: {half, float, double}"}}, // Nodes nodes); return Status::OK(); } Status MatMulGrad(const AttrSlice& attrs, FunctionDef* g) { const string opname = "MyMatMul"; const string attr_adj_x = "transpose_a"; const string attr_adj_y = "transpose_b"; DataType T; TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "T", &T)); if (T == DT_COMPLEX64 || T == DT_COMPLEX128) { return errors::Unimplemented( "MatMul gradient for complex is not supported yet."); } bool ta; bool tb; TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_x, &ta)); TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_y, &tb)); if (!ta && !tb) { return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "dz", false, "y", true, "x", true, "dz", false); } if (!ta && tb) { return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "dz", false, "y", false, "dz", true, "x", false); } if (ta && !tb) { return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "y", false, "dz", true, "x", false, "dz", false); } CHECK(ta && tb); return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "y", true, "dz", true, "dz", true, "x", true); } }