/* Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "flat/flat_parser.h" #include #include #include #include #include #include #include #include #include #include //// // MAGIC numbers // #define VARIABLES_FILE_MAGIC 0xF00DD1E0 #define VARIABLES_DATA_MAGIC 0xF00DD1E1 #define VARIABLES_EOFF_MAGIC 0xF00DD1E2 //// // NNEF to OpenVX Translator // class NNEF2OpenVX_Translator : public nnef::Parser::Callback { public: NNEF2OpenVX_Translator(std::string nnefFolder_, std::string openvxFolder_, bool useVirtual_, int verbose_) : nnefFolder(nnefFolder_), openvxFolder(openvxFolder_), useVirtual(useVirtual_), verbose(verbose_) { } protected: //// // class variables // int verbose; bool useVirtual; std::string nnefFolder; std::string openvxFolder; std::string openvxFilenameC; std::ofstream ovxC; std::vector inputList; std::vector outputList; std::vector virtualList; std::vector variableList; std::map> variableBinary; std::map inputShape; std::map outputShape; std::map virtualShape; std::map variableShape; std::map variableLabel; std::map variableRequiredDims; std::vector opsProto; std::vector> opsValues; std::vector> opsShapes; std::vector operationRemoved; std::map variableMerged; std::map virtualRename; std::map convNewBiasName; private: // utility functions static void getTensorDims(const nnef::Shape& shape, std::vector& dims, size_t num_dims) { size_t rank = shape.rank(); if(num_dims == 0) num_dims = rank; dims.clear(); size_t count = 0; if(rank > 1) { for(; count < (num_dims - rank); count++) { dims.push_back(1); } } for(size_t i = 0; i < rank; i++, count++) { dims.push_back(shape[rank-1-i]); } for(; count < num_dims; count++) { dims.push_back(1); } } static std::string codeGenTensorCreate (const std::string& name, const nnef::Shape& shape, bool useVirtual, size_t num_dims) { std::stringstream ss; std::vector dims; getTensorDims(shape, dims, num_dims); ss << " vx_size " << name << "_dims[" << dims.size() << "] = {"; for(size_t i = 0; i < dims.size(); i++) { ss << (i == 0 ? " " : ", ") << dims[i]; } ss << " };" << std::endl; ss << " vx_tensor " << name << " = " << (useVirtual ? "vxCreateVirtualTensor(graph, " : "vxCreateTensor(context, ") << dims.size() << ", " << name << "_dims, VX_TYPE_FLOAT32, 0);" << std::endl; ss << " ERROR_CHECK_OBJECT(" << name << ");" << std::endl; return ss.str(); } static unsigned int loadTensorFile(const std::string& nnefFolder, const std::string& label, const nnef::Shape& shape, char *& data) { std::string fileName = nnefFolder + "/" + label + ".dat"; FILE * fp = fopen(fileName.c_str(), "rb"); if(!fp) { printf("ERROR: unable to open: %s\n", fileName.c_str()); exit(1); } enum TensorDataType : unsigned char { TensorDataType_Float, TensorDataType_Quantized, TensorDataType_Signed, TensorDataType_Unsigned }; struct TensorFileHeader { unsigned char magic[2]; unsigned char major; unsigned char minor; unsigned int offset; unsigned int rank; unsigned int dim[8]; unsigned char data_type; unsigned char bit_width; unsigned short quant_alg_len; char quant_alg[1024]; } h = { 0 }; unsigned int offset = 0; offset += fread(&h.magic, 1, sizeof(h.magic), fp); offset += fread(&h.major, 1, sizeof(h.major), fp); offset += fread(&h.minor, 1, sizeof(h.minor), fp); offset += fread(&h.offset, 1, sizeof(h.offset), fp); offset += fread(&h.rank, 1, sizeof(h.rank), fp); if(h.rank > 0) { offset += fread(h.dim, 1, h.rank * sizeof(h.dim[0]), fp); } offset += fread(&h.data_type, 1, sizeof(h.data_type), fp); offset += fread(&h.bit_width, 1, sizeof(h.bit_width), fp); offset += fread(&h.quant_alg_len, 1, sizeof(h.quant_alg_len), fp); if(h.quant_alg_len > 0) { offset += fread(h.quant_alg, 1, h.quant_alg_len, fp); } if(h.magic[0] != 0x4e || h.magic[1] != 0xef || h.major != 1 || h.minor != 0 || h.bit_width == 0 || h.rank > 8 || h.quant_alg_len >= 1024 || (12 + h.rank * 4 + 4 + h.quant_alg_len) != offset || h.offset < offset) { printf("ERROR: invalid or unsupported tensor file: %s\n", fileName.c_str()); printf(" [ 0x%02x, 0x%02x, %d, %d, %d, %d, {", h.magic[0], h.magic[1], h.major, h.minor, h.offset, h.rank); for(unsigned int i = 0; i < h.rank; i++) printf(" %d", h.dim[i]); printf(" }, %d, %d, %d, '%s' ] offset = %d\n", h.data_type, h.bit_width, h.quant_alg_len, h.quant_alg, offset); exit(1); } if(h.offset > offset) { fseek(fp, h.offset, SEEK_SET); } unsigned int size = h.bit_width; for(unsigned int i = 0; i < h.rank; i++) { size *= h.dim[i]; if(h.dim[i] != shape[i]) { printf("ERROR: dimension[%d] mismatch: %d in %s (must be %d)\n", i, h.dim[i], fileName.c_str(), shape[i]); exit(1); } } size = (size + 7) >> 3; data = nullptr; if(h.data_type == TensorDataType_Float && h.bit_width == 32) { data = new char [size]; if(!data) { printf("ERROR: memory allocation for %d bytes failed for %s\n", size, fileName.c_str()); exit(1); } unsigned int n = fread(data, 1, size, fp); if(n != size) { printf("ERROR: unable to read %d bytes of data from %s\n", size, fileName.c_str()); exit(1); } } else { printf("ERROR: import of Tensor DataType=%d BitWidth=%d is not yet supported\n", h.data_type, h.bit_width); exit(1); } fclose(fp); return size; } std::string virtualName(const std::string name) { auto it = virtualRename.find(name); return (it != virtualRename.end()) ? it->second : name; } void codeGenOperation(size_t pos, bool getVariables, bool genCode, int verbose) { //// // make sure that operation is not disabled // if(operationRemoved[pos]) { return; } //// // get operation details // const nnef::Prototype& proto = opsProto[pos]; const nnef::Dictionary& args = opsValues[pos]; const nnef::Dictionary& shapes = opsShapes[pos]; if(verbose & 1) { std::cout << '\t'; for ( size_t i = 0; i < proto.resultCount(); ++i ) { auto& result = proto.result(i); if ( i ) std::cout << ", "; std::cout << args[result.name()]; } std::cout << " = " << proto.name() << "("; for ( size_t i = 0; i < proto.paramCount(); ++i ) { auto& param = proto.param(i); if ( i ) std::cout << ", "; if ( !param.type()->isTensor() ) std::cout << param.name() << " = "; std::cout << args[param.name()]; } std::cout << ")" << std::endl; } //// // utility functions // auto getTensorOrScalar = [] (const nnef::Value& v) -> std::string { std::string value = "0"; if(v) { if(v.kind() == nnef::Value::Tensor) { value = v.tensor().id; } else if(v.kind() == nnef::Value::Scalar) { value = std::to_string(v.scalar()); } } return value; }; auto getExtentArray = [] (const nnef::Value& v) -> std::vector { std::vector value; if(v && v.kind() == nnef::Value::Array) { auto&& a = v.array(); for(auto& i : a) { value.push_back(i.integer()); } } return value; }; auto getPaddingInfo = [] (const nnef::Value& v, size_t pad[4]) { std::vector value; if(v && v.kind() == nnef::Value::Array) { auto&& a = v.array(); if(a.size() == 2) { pad[0] = a[0][0].integer(); pad[1] = a[0][1].integer(); pad[2] = a[1][0].integer(); pad[3] = a[1][1].integer(); // TODO: protection against -ve values if(pad[0] > 16384) pad[0] = 0; if(pad[1] > 16384) pad[1] = 0; if(pad[2] > 16384) pad[2] = 0; if(pad[3] > 16384) pad[3] = 0; } } }; //// // process operations // std::string opname = proto.name(); if(opname == "external") { const std::string& output = args["output"].tensor().id; const nnef::Shape& shape = shapes[output]; if(verbose & 2) { std::cout << opname << " " << output << " " << shape << std::endl; } if(getVariables) { inputShape[output] = shape; } } else if(opname == "variable") { const std::string& output = args["output"].tensor().id; const nnef::Shape& shape = shapes[output]; const std::string& label = args["label"].string(); if(verbose & 2) { std::cout << opname << " " << output << " " << shape << " label=" << label << std::endl; } if(getVariables) { variableList.push_back(output); variableMerged[output] = false; variableShape[output] = shape; variableLabel[output] = label; } } else if(opname == "conv") { const std::string& output = args["output"].tensor().id; const nnef::Shape& shape = shapes[output]; const std::string& input = args["input"].tensor().id; const std::string& filter = args["filter"].tensor().id; std::string bias = getTensorOrScalar(args["bias"]); const std::string& border = args["border"].string(); const auto& padding = args["padding"]; const auto& stride = args["stride"]; const auto& dilation = args["dilation"]; const auto& groups = args["groups"] ? args["groups"].integer() : 1; if(verbose & 2) { std::cout << opname << " " << output << " " << shape << " " << input << " " << filter << " " << bias << " border=" << border << " " << padding << " " << stride << " " << dilation << " " << groups << std::endl; } if(getVariables) { if(std::find(outputList.begin(), outputList.end(), output) == outputList.end()) { virtualList.push_back(output); virtualShape[output] = shape; } else { outputShape[output] = shape; } variableRequiredDims[filter] = 4; if(bias[0] != '0') { variableRequiredDims[bias] = 2; } } if(genCode) { if(std::find(virtualList.begin(), virtualList.end(), output) != virtualList.end()) { ovxC << codeGenTensorCreate(output, shape, useVirtual, 4); } if(bias[0] == '0') { if(convNewBiasName.find(output) != convNewBiasName.end()) { bias = convNewBiasName.find(output)->second; } } if(shape[2] == 1 && shape[3] == 1) { ovxC << " { vx_node node = vxFullyConnectedLayer(graph, " << virtualName(input) << ", " << filter << ", " << ((bias[0] == '0') ? "NULL" : bias) << ", VX_CONVERT_POLICY_SATURATE, VX_ROUND_POLICY_TO_NEAREST_EVEN, " << output << ");" << std::endl; ovxC << " ERROR_CHECK_STATUS(vxReleaseNode(&node));" << std::endl; ovxC << " }" << std::endl; } else { std::vector&& vDilation = getExtentArray(dilation); size_t pad[4] = { 0, 0, 0, 0 }; getPaddingInfo(padding, pad); ovxC << " { vx_nn_convolution_params_t conv_params = { 0 };" << std::endl; ovxC << " conv_params.padding_x = " << pad[1] << ";" << std::endl; ovxC << " conv_params.padding_y = " << pad[0] << ";" << std::endl; ovxC << " conv_params.dilation_x = " << (vDilation.size() > 1 ? vDilation[1] - 1 : 0) << ";" << std::endl; ovxC << " conv_params.dilation_y = " << (vDilation.size() > 0 ? vDilation[0] - 1 : 0) << ";" << std::endl; ovxC << " conv_params.overflow_policy = " << "VX_CONVERT_POLICY_SATURATE" << ";" << std::endl; ovxC << " conv_params.rounding_policy = " << "VX_ROUND_POLICY_TO_NEAREST_EVEN" << ";" << std::endl; ovxC << " conv_params.down_scale_size_rounding = " << "VX_NN_DS_SIZE_ROUNDING_FLOOR" << ";" << std::endl; ovxC << " vx_node node = vxConvolutionLayer(graph, " << virtualName(input) << ", " << filter << ", " << ((bias[0] == '0') ? "NULL" : bias) << ", &conv_params, sizeof(conv_params), " << output << ");" << std::endl; ovxC << " ERROR_CHECK_STATUS(vxReleaseNode(&node));" << std::endl; ovxC << " }" << std::endl; } } } else if(opname == "relu") { const std::string& output = args["y"].tensor().id; const nnef::Shape& shape = shapes[output]; const std::string& input = args["x"].tensor().id; if(verbose & 2) { std::cout << opname << " " << output << " " << shape << " " << input << std::endl; } if(getVariables) { if(std::find(outputList.begin(), outputList.end(), output) == outputList.end()) { virtualList.push_back(output); virtualShape[output] = shape; } else { outputShape[output] = shape; } } if(genCode) { if(std::find(virtualList.begin(), virtualList.end(), output) != virtualList.end()) { ovxC << codeGenTensorCreate(output, shape, useVirtual, 4); } ovxC << " { vx_node node = vxActivationLayer(graph, " << virtualName(input) << ", VX_NN_ACTIVATION_RELU, 0.0f, 0.0f, " << output << ");" << std::endl; ovxC << " ERROR_CHECK_STATUS(vxReleaseNode(&node));" << std::endl; ovxC << " }" << std::endl; } } else if(opname == "max_pool") { const std::string& output = args["output"].tensor().id; const nnef::Shape& shape = shapes[output]; const std::string& input = args["input"].tensor().id; const auto& size = args["size"]; const std::string& border = args["border"].string(); const auto& padding = args["padding"]; const auto& stride = args["stride"]; const auto& dilation = args["dilation"]; if(verbose & 2) { std::cout << opname << " " << output << " " << shape << " " << input << " size=" << size << " border=" << border << " " << padding << " " << stride << " " << dilation << std::endl; } if(getVariables) { if(std::find(outputList.begin(), outputList.end(), output) == outputList.end()) { virtualList.push_back(output); virtualShape[output] = shape; } else { outputShape[output] = shape; } } if(genCode) { if(std::find(virtualList.begin(), virtualList.end(), output) != virtualList.end()) { ovxC << codeGenTensorCreate(output, shape, useVirtual, 4); } std::vector&& vSize = getExtentArray(size); size_t pad[4] = { 0, 0, 0, 0 }; getPaddingInfo(padding, pad); ovxC << " { vx_node node = vxPoolingLayer(graph, " << virtualName(input) << ", VX_NN_POOLING_MAX, " << size[3] << ", " << size[2] << ", " << pad[1] << ", " << pad[0] << ", " << "VX_ROUND_POLICY_TO_NEAREST_EVEN, " << output << ");" << std::endl; ovxC << " ERROR_CHECK_STATUS(vxReleaseNode(&node));" << std::endl; ovxC << " }" << std::endl; } } else if(opname == "avg_pool") { const std::string& output = args["output"].tensor().id; const nnef::Shape& shape = shapes[output]; const std::string& input = args["input"].tensor().id; const auto& size = args["size"]; const std::string& border = args["border"].string(); const auto& padding = args["padding"]; const auto& stride = args["stride"]; const auto& dilation = args["dilation"]; if(verbose & 2) { std::cout << opname << " " << output << " " << shape << " " << input << " size=" << size << " border=" << border << " " << padding << " " << stride << " " << dilation << std::endl; } if(getVariables) { if(std::find(outputList.begin(), outputList.end(), output) == outputList.end()) { virtualList.push_back(output); virtualShape[output] = shape; } else { outputShape[output] = shape; } } if(genCode) { if(std::find(virtualList.begin(), virtualList.end(), output) != virtualList.end()) { ovxC << codeGenTensorCreate(output, shape, useVirtual, 4); } std::vector&& vSize = getExtentArray(size); size_t pad[4] = { 0, 0, 0, 0 }; getPaddingInfo(padding, pad); ovxC << " { vx_node node = vxPoolingLayer(graph, " << virtualName(input) << ", VX_NN_POOLING_AVG, " << size[3] << ", " << size[2] << ", " << pad[1] << ", " << pad[0] << ", " << "VX_ROUND_POLICY_TO_NEAREST_EVEN, " << output << ");" << std::endl; ovxC << " ERROR_CHECK_STATUS(vxReleaseNode(&node));" << std::endl; ovxC << " }" << std::endl; } } else if(opname == "concat") { const std::string& output = args["value"].tensor().id; const nnef::Shape& shape = shapes[output]; std::vector itemList; const auto& inputpar = args["values"]; for(size_t i = 0; i < inputpar.size(); i++) { std::string name = inputpar[i].tensor().id; itemList.push_back(name); } const int axis = args["axis"].integer(); if(verbose & 2) { std::cout << opname << " " << output << " " << shape << " ["; for(auto& v : itemList) std::cout << " " << v; std::cout << " ] axis=" << axis << std::endl; } if(getVariables) { if(std::find(outputList.begin(), outputList.end(), output) == outputList.end()) { virtualList.push_back(output); virtualShape[output] = shape; } else { outputShape[output] = shape; } } if(genCode) { if(std::find(virtualList.begin(), virtualList.end(), output) != virtualList.end()) { ovxC << codeGenTensorCreate(output, shape, useVirtual, 4); } ovxC << " { vx_node node = vxConcatLayer(graph, " << output; for(auto& v : itemList) { ovxC << ", " << virtualName(v); } for(size_t i = itemList.size(); i < 8; i++) { ovxC << ", NULL"; } ovxC << ");" << std::endl; ovxC << " ERROR_CHECK_STATUS(vxReleaseNode(&node));" << std::endl; ovxC << " }" << std::endl; } } else if(opname == "batch_normalization") { const std::string& output = args["output"].tensor().id; const nnef::Shape& shape = shapes[output]; const std::string& input = args["input"].tensor().id; const std::string& mean = args["mean"].tensor().id; const std::string& variance = args["variance"].tensor().id; std::string scale = getTensorOrScalar(args["scale"]); std::string offset = getTensorOrScalar(args["offset"]); const float epsilon = args["epsilon"].scalar(); if(verbose & 2) { std::cout << opname << " " << output << " " << shape << " " << input << " " << mean << " " << variance << " " << offset << " " << scale << " " << epsilon << std::endl; } if(getVariables) { if(std::find(outputList.begin(), outputList.end(), output) == outputList.end()) { virtualList.push_back(output); virtualShape[output] = shape; } else { outputShape[output] = shape; } } if(genCode) { if(std::find(virtualList.begin(), virtualList.end(), output) != virtualList.end()) { ovxC << codeGenTensorCreate(output, shape, useVirtual, 4); } ovxC << " { vx_node node = vxBatchNormalizationLayer(graph, " << virtualName(input) << ", " << mean << ", " << variance << ", " << (scale[0] == '1' ? "NULL" : scale) << ", " << (offset[0] == '0' ? "NULL" : offset) << ", " << epsilon << ", " << output << ");" << std::endl; ovxC << " ERROR_CHECK_STATUS(vxReleaseNode(&node));" << std::endl; ovxC << " }" << std::endl; } } else if(opname == "mul") { const std::string& output = args["z"].tensor().id; const nnef::Shape& shape = shapes[output]; const std::string& input1 = args["x"].tensor().id; const std::string& input2 = args["y"].tensor().id; if(verbose & 2) { std::cout << opname << " " << output << " " << shape << " " << input1 << " " << input2 << std::endl; } if(getVariables) { if(std::find(outputList.begin(), outputList.end(), output) == outputList.end()) { virtualList.push_back(output); virtualShape[output] = shape; } else { outputShape[output] = shape; } } if(genCode) { if(std::find(virtualList.begin(), virtualList.end(), output) != virtualList.end()) { ovxC << codeGenTensorCreate(output, shape, useVirtual, 4); } ovxC << " { float one = 1.0f;" << std::endl; ovxC << " vx_scalar scale = vxCreateScalar(context, VX_TYPE_FLOAT32, &one);" << std::endl; ovxC << " vx_node node = vxTensorMultiplyNode(graph, " << virtualName(input1) << ", " << virtualName(input2) << ", scale, VX_CONVERT_POLICY_SATURATE, VX_ROUND_POLICY_TO_NEAREST_EVEN, " << output << ");" << std::endl; ovxC << " ERROR_CHECK_STATUS(vxReleaseScalar(&scale));" << std::endl; ovxC << " ERROR_CHECK_STATUS(vxReleaseNode(&node));" << std::endl; ovxC << " }" << std::endl; } } else if(opname == "add") { const std::string& output = args["z"].tensor().id; const nnef::Shape& shape = shapes[output]; const std::string& input1 = args["x"].tensor().id; const std::string& input2 = args["y"].tensor().id; if(verbose & 2) { std::cout << opname << " " << output << " " << shape << " " << input1 << " " << input2 << std::endl; } if(getVariables) { if(std::find(outputList.begin(), outputList.end(), output) == outputList.end()) { virtualList.push_back(output); virtualShape[output] = shape; } else { outputShape[output] = shape; } } if(genCode) { if(std::find(virtualList.begin(), virtualList.end(), output) != virtualList.end()) { ovxC << codeGenTensorCreate(output, shape, useVirtual, 4); } ovxC << " { vx_node node = vxTensorAddNode(graph, " << virtualName(input1) << ", " << virtualName(input2) << ", VX_CONVERT_POLICY_SATURATE, " << output << ");" << std::endl; ovxC << " ERROR_CHECK_STATUS(vxReleaseNode(&node));" << std::endl; ovxC << " }" << std::endl; } } else if(opname == "softmax") { const std::string& output = args["y"].tensor().id; const nnef::Shape& shape = shapes[output]; const std::string& input = args["x"].tensor().id; std::vector&& axes = getExtentArray(args["axes"]); if(verbose & 2) { std::cout << opname << " " << output << " " << shape << " " << input << " " << args["axes"] << std::endl; } if(axes.size() != 1 || axes[0] != 1) { std::cout << "ERROR: " << opname << " with " << args["axes"] << " is *** not yet supported ***" << std::endl; exit(1); } if(getVariables) { if(std::find(outputList.begin(), outputList.end(), output) == outputList.end()) { virtualList.push_back(output); virtualShape[output] = shape; } else { outputShape[output] = shape; } } if(genCode) { if(std::find(virtualList.begin(), virtualList.end(), output) != virtualList.end()) { ovxC << codeGenTensorCreate(output, shape, useVirtual, 4); } ovxC << " { vx_node node = vxSoftmaxLayer(graph, " << virtualName(input) << ", " << output << ");" << std::endl; ovxC << " ERROR_CHECK_STATUS(vxReleaseNode(&node));" << std::endl; ovxC << " }" << std::endl; } } else if(opname == "sum_reduce") { const std::string& output = args["output"].tensor().id; const nnef::Shape& shape = shapes[output]; const std::string& input = args["input"].tensor().id; const auto& axes = args["axes"]; const bool normalize = args["normalize"].logical(); if(verbose & 2) { std::cout << opname << " " << output << " " << shape << " " << input << " " << axes << " " << normalize << std::endl; } if(getVariables) { if(std::find(outputList.begin(), outputList.end(), output) == outputList.end()) { virtualList.push_back(output); virtualShape[output] = shape; } else { outputShape[output] = shape; } } if(genCode) { if(std::find(virtualList.begin(), virtualList.end(), output) != virtualList.end()) { ovxC << codeGenTensorCreate(output, shape, useVirtual, 4); } std::cout << opname << " *** not yet supported ***" << std::endl; exit(1); } } else if(opname == "mean_reduce") { const std::string& output = args["output"].tensor().id; const nnef::Shape& shape = shapes[output]; const std::string& input = args["input"].tensor().id; const auto& axes = args["axes"]; if(verbose & 2) { std::cout << opname << " " << output << " " << shape << " " << input << " " << axes << std::endl; } if(getVariables) { if(std::find(outputList.begin(), outputList.end(), output) == outputList.end()) { virtualList.push_back(output); virtualShape[output] = shape; } else { outputShape[output] = shape; } } if(genCode) { if(std::find(virtualList.begin(), virtualList.end(), output) != virtualList.end()) { ovxC << codeGenTensorCreate(output, shape, useVirtual, 4); } std::cout << opname << " *** not yet supported ***" << std::endl; exit(1); } } else { std::cout << opname << " *** not yet supported ***" << std::endl; exit(1); } } void codeGenMergeVariables() { auto getTensorOrScalar = [] (const nnef::Value& v) -> std::string { std::string value = "0"; if(v) { if(v.kind() == nnef::Value::Tensor) { value = v.tensor().id; } else if(v.kind() == nnef::Value::Scalar) { value = std::to_string(v.scalar()); } } return value; }; size_t prevPos = 0; std::string prevOpName = "", prevOutput = ""; for(size_t pos = 0; pos < opsProto.size(); pos++) { std::string opname = opsProto[pos].name(); if(prevOpName == "batch_normalization" && opname == "conv") { // get "batch_normalization" variables const nnef::Dictionary& argsBN = opsValues[prevPos]; const nnef::Dictionary& shapesBN = opsShapes[prevPos]; const std::string& inputBN = argsBN["input"].tensor().id; const std::string& mean = argsBN["mean"].tensor().id; const std::string& variance = argsBN["variance"].tensor().id; std::string scale = getTensorOrScalar(argsBN["scale"]); std::string offset = getTensorOrScalar(argsBN["offset"]); const float epsilon = argsBN["epsilon"].scalar(); const nnef::Shape& shapeMean = shapesBN[mean]; // get "conv" variables const nnef::Dictionary& argsConv = opsValues[pos]; const nnef::Dictionary& shapesConv = opsShapes[pos]; const std::string& outputConv = argsConv["output"].tensor().id; const std::string& filter = argsConv["filter"].tensor().id; const std::string& bias = getTensorOrScalar(argsConv["bias"]); const nnef::Shape& shapeFilter = shapesConv[filter]; // get filter and mean dimensions size_t filterDimsCount = shapeFilter.rank(), meanDimsCount = shapeMean.rank(); std::vector filterDims, meanDims; getTensorDims(shapeFilter, filterDims, filterDimsCount); getTensorDims(shapeMean, meanDims, meanDimsCount); // check validity of dimensions size_t K = (filterDimsCount == 4) ? filterDims[3] : filterDims[1]; size_t N = (filterDimsCount == 4) ? (filterDims[0] * filterDims[1] * filterDims[2]) : filterDims[0]; if((filterDimsCount == 4 || filterDimsCount == 2) && meanDimsCount == 2 && K == meanDims[0]) { // fuse batch_normalization variables into conv variables std::tuple filterBinary = variableBinary[filter]; std::tuple meanBinary = variableBinary[mean]; std::tuple varianceBinary = variableBinary[variance]; float * filterBuf = (float *)std::get<1>(filterBinary); float * biasBuf = nullptr; float * meanBuf = (float *)std::get<1>(meanBinary); float * varianceBuf = (float *)std::get<1>(varianceBinary); float * scaleBuf = nullptr; float * offsetBuf = nullptr; if(bias[0] != '0') { std::tuple biasBinary = variableBinary[bias]; biasBuf = (float *)std::get<1>(biasBinary); } else if(convNewBiasName.find(outputConv) != convNewBiasName.end()) { std::tuple biasBinary = variableBinary[convNewBiasName[outputConv]]; biasBuf = (float *)std::get<1>(biasBinary); } else { size_t size = K * sizeof(float); char * data = new char [size]; biasBuf = (float *)data; for(size_t i = 0; i < K; i++) { biasBuf[i] = 0; } std::string name = filter + "__new_bias"; std::tuple binary(size, data); variableBinary[name] = binary; convNewBiasName[outputConv] = name; variableList.push_back(name); variableMerged[name] = false; nnef::Shape shape(1); shape[0] = K; shape[1] = 1; variableShape[name] = shape; variableRequiredDims[name] = 2; } if(scale[0] != '1') { scaleBuf = (float *)std::get<1>(variableBinary[scale]); } if(offset[0] != '0') { offsetBuf = (float *)std::get<1>(variableBinary[offset]); } for(size_t k = 0; k < K; k++) { double mk = 1.0 / sqrt((double)varianceBuf[k] + epsilon); double ck = -meanBuf[k] * mk; if(scaleBuf) { mk *= scaleBuf[k]; ck *= scaleBuf[k]; } if(offsetBuf) { ck += offsetBuf[k]; } float * W = &filterBuf[k*N]; double Wsum = 0; for(size_t j = 0; j < N; j++) { Wsum += W[j]; W[j] = (float)(W[j] * mk); } if(biasBuf) { biasBuf[k] = (float)(Wsum * ck + biasBuf[k]); } } // mark that batch_normalization is disabled and rename output as input operationRemoved[prevPos] = true; virtualRename[argsConv["input"].tensor().id] = inputBN; // mark the merged variables variableMerged[mean] = true; variableMerged[variance] = true; if(scaleBuf) variableMerged[scale] = true; if(offsetBuf) variableMerged[offset] = true; } // use conv as previous layer prevPos = pos; prevOpName = opname; prevOutput = argsConv["output"].tensor().id; } else if(prevOpName == "conv" && opname == "batch_normalization") { // get "conv" variables const nnef::Dictionary& argsConv = opsValues[prevPos]; const nnef::Dictionary& shapesConv = opsShapes[prevPos]; const std::string& outputConv = argsConv["output"].tensor().id; const std::string& filter = argsConv["filter"].tensor().id; const std::string& bias = getTensorOrScalar(argsConv["bias"]); const nnef::Shape& shapeFilter = shapesConv[filter]; // get "batch_normalization" variables const nnef::Dictionary& argsBN = opsValues[pos]; const nnef::Dictionary& shapesBN = opsShapes[pos]; const std::string& mean = argsBN["mean"].tensor().id; const std::string& variance = argsBN["variance"].tensor().id; std::string scale = getTensorOrScalar(argsBN["scale"]); std::string offset = getTensorOrScalar(argsBN["offset"]); const float epsilon = argsBN["epsilon"].scalar(); const nnef::Shape& shapeMean = shapesBN[mean]; // get filter and mean dimensions size_t filterDimsCount = shapeFilter.rank(), meanDimsCount = shapeMean.rank(); std::vector filterDims, meanDims; getTensorDims(shapeFilter, filterDims, filterDimsCount); getTensorDims(shapeMean, meanDims, meanDimsCount); // check validity of dimensions size_t K = (filterDimsCount == 4) ? filterDims[3] : filterDims[1]; size_t N = (filterDimsCount == 4) ? (filterDims[0] * filterDims[1] * filterDims[2]) : filterDims[0]; if((filterDimsCount == 4 || filterDimsCount == 2) && meanDimsCount == 2 && K == meanDims[0]) { // fuse batch_normalization variables into conv variables std::tuple filterBinary = variableBinary[filter]; std::tuple meanBinary = variableBinary[mean]; std::tuple varianceBinary = variableBinary[variance]; float * filterBuf = (float *)std::get<1>(filterBinary); float * biasBuf = nullptr; float * meanBuf = (float *)std::get<1>(meanBinary); float * varianceBuf = (float *)std::get<1>(varianceBinary); float * scaleBuf = nullptr; float * offsetBuf = nullptr; if(bias[0] != '0') { std::tuple biasBinary = variableBinary[bias]; biasBuf = (float *)std::get<1>(biasBinary); } else if(convNewBiasName.find(outputConv) != convNewBiasName.end()) { std::tuple biasBinary = variableBinary[convNewBiasName[outputConv]]; biasBuf = (float *)std::get<1>(biasBinary); } else { size_t size = K * sizeof(float); char * data = new char [size]; biasBuf = (float *)data; for(size_t i = 0; i < K; i++) { biasBuf[i] = 0; } std::string name = filter + "__new_bias"; std::tuple binary(size, data); variableBinary[name] = binary; convNewBiasName[outputConv] = name; variableList.push_back(name); variableMerged[name] = false; nnef::Shape shape(1); shape[0] = K; shape[1] = 1; variableShape[name] = shape; variableRequiredDims[name] = 2; } if(scale[0] != '1') { scaleBuf = (float *)std::get<1>(variableBinary[scale]); } if(offset[0] != '0') { offsetBuf = (float *)std::get<1>(variableBinary[offset]); } for(size_t k = 0; k < K; k++) { double mk = 1.0 / sqrt((double)varianceBuf[k] + epsilon); double ck = -meanBuf[k] * mk; if(scaleBuf) { mk *= scaleBuf[k]; ck *= scaleBuf[k]; } if(offsetBuf) { ck += offsetBuf[k]; } float * W = &filterBuf[k*N]; for(size_t j = 0; j < N; j++) { W[j] = (float)(W[j] * mk); } if(biasBuf) { biasBuf[k] = (float)(mk * biasBuf[k] + ck); } } // mark that batch_normalization is disabled, rename output as input, and use conv as previous layer operationRemoved[pos] = true; virtualRename[argsBN["output"].tensor().id] = outputConv; prevOutput = argsBN["output"].tensor().id; // mark the merged variables variableMerged[mean] = true; variableMerged[variance] = true; if(scaleBuf) variableMerged[scale] = true; if(offsetBuf) variableMerged[offset] = true; } else { // use batch_normalization as previous layer prevPos = pos; prevOpName = opname; prevOutput = argsBN["output"].tensor().id; } } else if((prevOpName == "mul" || prevOpName == "add") && opname == "conv") { // get "mul" or "add" variables const nnef::Dictionary& argsOP = opsValues[prevPos]; const nnef::Dictionary& shapesOP = opsShapes[prevPos]; const std::string& x = argsOP["x"].tensor().id; const std::string& y = argsOP["y"].tensor().id; std::string var, inputBN; nnef::Shape shapeVar; if(std::find(variableList.begin(), variableList.end(), x) != variableList.end()) { inputBN = y; var = x; shapeVar = shapesOP[x]; } else if(std::find(variableList.begin(), variableList.end(), y) != variableList.end()) { inputBN = x; var = y; shapeVar = shapesOP[y]; } // get "conv" variables const nnef::Dictionary& argsConv = opsValues[pos]; const nnef::Dictionary& shapesConv = opsShapes[pos]; const std::string& outputConv = argsConv["output"].tensor().id; const std::string& filter = argsConv["filter"].tensor().id; const std::string& bias = getTensorOrScalar(argsConv["bias"]); const nnef::Shape& shapeFilter = shapesConv[filter]; // get var dimensions size_t filterDimsCount = shapeFilter.rank(), varDimsCount = 0; std::vector filterDims, varDims; getTensorDims(shapeFilter, filterDims, filterDimsCount); if(var.length() > 0) { varDimsCount = shapeVar.rank(); getTensorDims(shapeVar, varDims, varDimsCount); } // check validity of dimensions size_t K = (filterDimsCount == 4) ? filterDims[3] : filterDims[1]; size_t N = (filterDimsCount == 4) ? (filterDims[0] * filterDims[1] * filterDims[2]) : filterDims[0]; if((filterDimsCount == 4 || filterDimsCount == 2) && varDimsCount == 2 && K == varDims[0]) { // fuse var into conv variables std::tuple filterBinary = variableBinary[filter]; std::tuple biasBinary = variableBinary[bias]; std::tuple varBinary = variableBinary[var]; float * filterBuf = (float *)std::get<1>(filterBinary); float * biasBuf = nullptr; float * varBuf = (float *)std::get<1>(varBinary); if(bias[0] != '0') { std::tuple biasBinary = variableBinary[bias]; biasBuf = (float *)std::get<1>(biasBinary); } else if(convNewBiasName.find(outputConv) != convNewBiasName.end()) { std::tuple biasBinary = variableBinary[convNewBiasName[outputConv]]; biasBuf = (float *)std::get<1>(biasBinary); } else { size_t size = K * sizeof(float); char * data = new char [size]; biasBuf = (float *)data; for(size_t i = 0; i < K; i++) { biasBuf[i] = 0; } std::string name = filter + "__new_bias"; std::tuple binary(size, data); variableBinary[name] = binary; convNewBiasName[outputConv] = name; variableList.push_back(name); variableMerged[name] = false; nnef::Shape shape(1); shape[0] = K; shape[1] = 1; variableShape[name] = shape; variableRequiredDims[name] = 2; } if(prevOpName == "mul") { for(size_t k = 0; k < K; k++) { double mk = varBuf[k]; size_t N = filterDims[0] * filterDims[1] * filterDims[2]; float * W = &filterBuf[k*N]; for(size_t j = 0; j < N; j++) { W[j] = (float)(W[j] * mk); } } } else { for(size_t k = 0; k < K; k++) { double ck = varBuf[k]; size_t N = filterDims[0] * filterDims[1] * filterDims[2]; float * W = &filterBuf[k*N]; double Wsum = 0; for(size_t j = 0; j < N; j++) { Wsum += W[j]; } biasBuf[k] = (float)(ck * Wsum + biasBuf[k]); } } // mark that OP is disabled, rename output as input, and use conv as previous layer operationRemoved[prevPos] = true; virtualRename[argsConv["input"].tensor().id] = inputBN; prevOutput = argsConv["output"].tensor().id; // mark the merged variables variableMerged[var] = true; } else { // use conv as previous layer prevPos = pos; prevOpName = opname; prevOutput = argsConv["output"].tensor().id; } } else if(prevOpName == "conv" && (opname == "mul" || opname == "add")) { // get "conv" variables const nnef::Dictionary& argsConv = opsValues[prevPos]; const nnef::Dictionary& shapesConv = opsShapes[prevPos]; const std::string& outputConv = argsConv["output"].tensor().id; const std::string& filter = argsConv["filter"].tensor().id; const std::string& bias = getTensorOrScalar(argsConv["bias"]); const nnef::Shape& shapeFilter = shapesConv[filter]; // get "mul" or "add" variables const nnef::Dictionary& argsOP = opsValues[pos]; const nnef::Dictionary& shapesOP = opsShapes[pos]; const std::string& x = argsOP["x"].tensor().id; const std::string& y = argsOP["y"].tensor().id; std::string var; nnef::Shape shapeVar; if(std::find(variableList.begin(), variableList.end(), x) != variableList.end()) { var = x; shapeVar = shapesOP[x]; } else if(std::find(variableList.begin(), variableList.end(), y) != variableList.end()) { var = y; shapeVar = shapesOP[y]; } // get var dimensions size_t filterDimsCount = shapeFilter.rank(), varDimsCount = 0; std::vector filterDims, varDims; getTensorDims(shapeFilter, filterDims, filterDimsCount); if(var.length() > 0) { varDimsCount = shapeVar.rank(); getTensorDims(shapeVar, varDims, varDimsCount); } // check validity of dimensions size_t K = (filterDimsCount == 4) ? filterDims[3] : filterDims[1]; size_t N = (filterDimsCount == 4) ? (filterDims[0] * filterDims[1] * filterDims[2]) : filterDims[0]; if((filterDimsCount == 4 || filterDimsCount == 2) && varDimsCount == 2 && K == varDims[0]) { // fuse var into conv variables std::tuple filterBinary = variableBinary[filter]; std::tuple biasBinary = variableBinary[bias]; std::tuple varBinary = variableBinary[var]; float * filterBuf = (float *)std::get<1>(filterBinary); float * biasBuf = nullptr; float * varBuf = (float *)std::get<1>(varBinary); if(bias[0] != '0') { std::tuple biasBinary = variableBinary[bias]; biasBuf = (float *)std::get<1>(biasBinary); } else if(convNewBiasName.find(outputConv) != convNewBiasName.end()) { std::tuple biasBinary = variableBinary[convNewBiasName[outputConv]]; biasBuf = (float *)std::get<1>(biasBinary); } else { size_t size = K * sizeof(float); char * data = new char [size]; biasBuf = (float *)data; for(size_t i = 0; i < K; i++) { biasBuf[i] = 0; } std::string name = filter + "__new_bias"; std::tuple binary(size, data); variableBinary[name] = binary; convNewBiasName[outputConv] = name; variableList.push_back(name); variableMerged[name] = false; nnef::Shape shape(1); shape[0] = K; shape[1] = 1; variableShape[name] = shape; variableRequiredDims[name] = 2; } if(opname == "mul") { for(size_t k = 0; k < K; k++) { double mk = varBuf[k]; float * W = &filterBuf[k*N]; for(size_t j = 0; j < N; j++) { W[j] = (float)(W[j] * mk); } if(biasBuf) { biasBuf[k] = (float)(mk * biasBuf[k]); } } } else { for(size_t k = 0; k < K; k++) { float ck = varBuf[k]; biasBuf[k] = biasBuf[k] + ck; } } // mark that OP is disabled, rename output as input, and use conv as previous layer operationRemoved[pos] = true; virtualRename[argsOP["z"].tensor().id] = outputConv; prevOutput = argsOP["z"].tensor().id; // mark the merged variables variableMerged[var] = true; } else { // use OP as previous layer prevPos = pos; prevOpName = opname; prevOutput = argsOP["z"].tensor().id; } } else if(opname == "max_pool" || opname == "avg_pool") { const nnef::Dictionary& args = opsValues[pos]; const std::string& input = args["input"].tensor().id; if(input != prevOutput || prevOpName != "conv") { prevPos = pos; prevOpName = opname; } prevOutput = args["output"].tensor().id; } else if(opname == "conv" || opname == "batch_normalization") { const nnef::Dictionary& args = opsValues[pos]; const std::string& input = args["input"].tensor().id; prevPos = pos; prevOpName = opname; prevOutput = args["output"].tensor().id; } else if(opname == "add" || opname == "mul") { const nnef::Dictionary& args = opsValues[pos]; const std::string& input1 = args["x"].tensor().id; const std::string& input2 = args["y"].tensor().id; prevPos = pos; prevOpName = opname; prevOutput = args["z"].tensor().id; } else { prevPos = 0; prevOpName = ""; prevOutput = ""; } } } protected: //// // translator callback implementations // virtual void beginGraph( const nnef::Prototype& proto ) { // show NNEF syntax if(verbose & 1) { std::cout << "graph " << proto.name() << "( "; for ( size_t i = 0; i < proto.paramCount(); ++i ) { auto& param = proto.param(i); if ( i ) std::cout << ", "; std::cout << param.name(); } std::cout << " ) -> ( "; for ( size_t i = 0; i < proto.resultCount(); ++i ) { auto& result = proto.result(i); if ( i ) std::cout << ", "; std::cout << result.name(); } std::cout << " )" << std::endl << '{' << std::endl; } //// // get input and output parameter list // for (size_t i = 0; i < proto.paramCount(); ++i) { inputList.push_back(proto.param(i).name()); } for (size_t i = 0; i < proto.resultCount(); ++i) { outputList.push_back(proto.result(i).name()); } //// // generate OpenVX C code preamble // openvxFilenameC = openvxFolder + "/annmodule.cpp"; ovxC.open(openvxFilenameC); if(!ovxC) { printf("ERROR: unable to create: %s\n", openvxFilenameC.c_str()); exit(1); } } virtual void endGraph( const nnef::Prototype& proto ) { // show NNEF syntax if(verbose & 1) { std::cout << '}' << std::endl; } //// // generate OpenVX C code preamble // ovxC << "#include \"annmodule.h\"" << std::endl << "#include " << std::endl << "#include " << std::endl << "#include " << std::endl << "#include " << std::endl << std::endl << "#define ERROR_CHECK_OBJECT(obj) { vx_status status = vxGetStatus((vx_reference)(obj)); if(status != VX_SUCCESS) { vxAddLogEntry((vx_reference)context, status , \"ERROR: failed with status = (%d) at \" __FILE__ \"#%d\\n\", status, __LINE__); return status; } }" << std::endl << "#define ERROR_CHECK_STATUS(call) { vx_status status = (call); if(status != VX_SUCCESS) { vxAddLogEntry((vx_reference)context, status, \"ERROR: failed with status = (%d) at \" __FILE__ \"#%d\\n\", status, __LINE__); return status; } }" << std::endl << std::endl << "static vx_status initializeTensor(vx_context context, vx_tensor tensor, FILE * fp, const char * binaryFilename)" << std::endl << "{" << std::endl << " vx_enum data_type = VX_TYPE_FLOAT32;" << std::endl << " vx_size num_of_dims = 4, dims[4] = { 1, 1, 1, 1 }, stride[4];" << std::endl << " ERROR_CHECK_STATUS(vxQueryTensor(tensor, VX_TENSOR_DATA_TYPE, &data_type, sizeof(vx_enum)));" << std::endl << " ERROR_CHECK_STATUS(vxQueryTensor(tensor, VX_TENSOR_NUMBER_OF_DIMS, &num_of_dims, sizeof(vx_size)));" << std::endl << " ERROR_CHECK_STATUS(vxQueryTensor(tensor, VX_TENSOR_DIMS, &dims, num_of_dims * sizeof(vx_size)));" << std::endl << " vx_size itemsize = sizeof(float);" << std::endl << " if(data_type == VX_TYPE_UINT8 || data_type == VX_TYPE_INT8) {" << std::endl << " itemsize = sizeof(vx_uint8);" << std::endl << " }" << std::endl << " else if(data_type == VX_TYPE_UINT16 || data_type == VX_TYPE_INT16 || data_type == VX_TYPE_FLOAT16) {" << std::endl << " itemsize = sizeof(vx_uint16);" << std::endl << " }" << std::endl << " vx_size count = dims[0] * dims[1] * dims[2] * dims[3];" << std::endl << std::endl << " vx_uint32 h[2] = { 0 };" << std::endl << " fread(h, 1, sizeof(h), fp);" << std::endl << " if(h[0] != 0x" << std::hex << VARIABLES_DATA_MAGIC << std::dec << " || (vx_size)h[1] != (count*itemsize)) {" << std::endl << " vxAddLogEntry((vx_reference)tensor, VX_FAILURE, \"ERROR: invalid data (magic,size)=(0x%x,%d) in %s at byte position %d -- expected size is %ld\\n\", h[0], h[1], binaryFilename, ftell(fp)-sizeof(h), count*itemsize);" << std::endl << " return VX_FAILURE;" << std::endl << " }" << std::endl << std::endl << " vx_map_id map_id;" << std::endl << " float * ptr;" << std::endl << " ERROR_CHECK_STATUS(vxMapTensorPatch(tensor, num_of_dims, nullptr, nullptr, &map_id, stride, (void **)&ptr, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0));" << std::endl << " vx_size n = fread(ptr, itemsize, count, fp);" << std::endl << " if(n != count) {" << std::endl << " vxAddLogEntry((vx_reference)tensor, VX_FAILURE, \"ERROR: expected char[%ld], but got char[%ld] in %s\\n\", count*itemsize, n*itemsize, binaryFilename);" << std::endl << " return VX_FAILURE;" << std::endl << " }" << std::endl << " ERROR_CHECK_STATUS(vxUnmapTensorPatch(tensor, map_id));" << std::endl << std::endl << " return VX_SUCCESS;" << std::endl << "}" << std::endl << std::endl << "vx_status annAddToGraph(vx_graph graph"; for(auto& name : inputList) { ovxC << ", vx_tensor " << name; } for(auto& name : outputList) { ovxC << ", vx_tensor " << name; } ovxC << ", const char * binaryFilename)" << std::endl << "{" << std::endl << " vx_context context = vxGetContext((vx_reference)graph);" << std::endl << " ERROR_CHECK_OBJECT(context);" << std::endl << " ERROR_CHECK_STATUS(vxLoadKernels(context, \"vx_nn\"));" << std::endl; //// // get variables // for(size_t i = 0; i < opsProto.size(); i++) { codeGenOperation(i, true, false, verbose); } //// // get data // for(auto& name : variableList) { unsigned int size = 0; char * data = nullptr; if(variableShape.find(name) != variableShape.end() && variableLabel.find(name) != variableLabel.end()) { auto& shape = variableShape[name]; auto& label = variableLabel[name]; size = loadTensorFile(nnefFolder, label, shape, data); } if(size > 0 && data) { std::tuple binary(size, data); variableBinary[name] = binary; } else { printf("ERROR: unable to load binary data for variable '%s'\n", name.c_str()); exit(1); } } //// // merge variables // codeGenMergeVariables(); //// // create and initialize variables file // ovxC << std::endl; ovxC << " // create variables" << std::endl; for(auto& name : variableList) { if(!variableMerged[name]) { if(variableShape.find(name) != variableShape.end()) { auto& shape = variableShape[name]; int num_dims = 0; auto it = variableRequiredDims.find(name); if(it != variableRequiredDims.end()) { num_dims = it->second; } ovxC << codeGenTensorCreate(name, shape, false, num_dims); } else { printf("ERROR: something wrong with variable '%s': variableShape is missing\n", name.c_str()); exit(1); } } } ovxC << std::endl << " // initialize variables" << std::endl << " FILE * fp__variables = fopen(binaryFilename, \"rb\");" << std::endl << " if(!fp__variables) {" << std::endl << " vxAddLogEntry((vx_reference)context, VX_FAILURE, \"ERROR: unable to open: %s\\n\", binaryFilename);" << std::endl << " return VX_FAILURE;" << std::endl << " }" << std::endl << " { vx_uint32 magic = 0;" << std::endl << " fread(&magic, 1, sizeof(magic), fp__variables);" << std::endl << " if(magic != 0x" << std::hex << VARIABLES_FILE_MAGIC << std::dec << ") {" << std::endl << " vxAddLogEntry((vx_reference)context, VX_FAILURE, \"ERROR: invalid file magic in %s\\n\", binaryFilename);" << std::endl << " return VX_FAILURE;" << std::endl << " }" << std::endl << " }" << std::endl; std::string variablesFilename = openvxFolder + "/weights.bin"; FILE * fpVariables = fopen(variablesFilename.c_str(), "wb"); if(!fpVariables) { printf("ERROR: unable to create: %s\n", variablesFilename.c_str()); exit(1); } unsigned int magic_file = VARIABLES_FILE_MAGIC; unsigned int magic_data = VARIABLES_DATA_MAGIC; fwrite(&magic_file, 1, sizeof(magic_file), fpVariables); for(auto& name : variableList) { if(!variableMerged[name]) { if(variableShape.find(name) != variableShape.end()) { auto& shape = variableShape[name]; std::tuple binary = variableBinary[name]; unsigned int size = std::get<0>(binary); char * data = std::get<1>(binary); if(size > 0 && data) { fwrite(&magic_data, 1, sizeof(magic_data), fpVariables); fwrite(&size, 1, sizeof(size), fpVariables); fwrite(data, 1, size, fpVariables); delete[] data; std::tuple empty(0, nullptr); variableBinary[name] = empty; ovxC << " ERROR_CHECK_STATUS(initializeTensor(context, " << name << ", fp__variables, binaryFilename));" << std::endl; } else { printf("ERROR: something wrong with variable '%s': variableBinary is not valid\n", name.c_str()); exit(1); } } else { printf("ERROR: something wrong with variable '%s': variableShape is missing\n", name.c_str()); exit(1); } } } unsigned int magic_eoff = VARIABLES_EOFF_MAGIC; fwrite(&magic_eoff, 1, sizeof(magic_eoff), fpVariables); fclose(fpVariables); ovxC << " { vx_uint32 magic = 0;" << std::endl << " fread(&magic, 1, sizeof(magic), fp__variables);" << std::endl << " if(magic != 0x" << std::hex << VARIABLES_EOFF_MAGIC << std::dec << ") {" << std::endl << " vxAddLogEntry((vx_reference)context, VX_FAILURE, \"ERROR: invalid eoff magic in %s\\n\", binaryFilename);" << std::endl << " return VX_FAILURE;" << std::endl << " }" << std::endl << " fclose(fp__variables);" << std::endl << " }" << std::endl; std::cout << "OK: created '" << variablesFilename << "'" << std::endl; //// // instantiate nodes in graph // ovxC << std::endl; ovxC << " // create nodes in graph" << std::endl; for(auto i = 0; i < opsProto.size(); i++) { codeGenOperation(i, false, true, 0); } //// // generate clean-up code // ovxC << std::endl; ovxC << " // release internal tensors" << std::endl; for(auto& name : virtualList) { if(virtualRename.find(name) == virtualRename.end()) { ovxC << " ERROR_CHECK_STATUS(vxReleaseTensor(&" << name << "));" << std::endl; } } for(auto& name : variableList) { if(!variableMerged[name]) { ovxC << " ERROR_CHECK_STATUS(vxReleaseTensor(&" << name << "));" << std::endl; } } ovxC << std::endl; ovxC << " return VX_SUCCESS;" << std::endl; ovxC << "}" << std::endl; ovxC.close(); std::cout << "OK: created '" << openvxFilenameC << "'" << std::endl; //// // generate OpenVX header file // openvxFilenameC = openvxFolder + "/annmodule.h"; ovxC.open(openvxFilenameC); if(!ovxC) { printf("ERROR: unable to create: %s\n", openvxFilenameC.c_str()); exit(1); } ovxC << "#ifndef included_file_annmodule_h" << std::endl << "#define included_file_annmodule_h" << std::endl << std::endl << "#include " << std::endl << std::endl; ovxC << "////" << std::endl << "// initialize graph neural network for inference" << std::endl; for(auto& name : inputList) { if(inputShape.find(name) != inputShape.end()) { std::vector dims; getTensorDims(inputShape[name], dims, 4); ovxC << "// " << name << " -- dims[] = {"; for(size_t i = 0; i < dims.size(); i++) { ovxC << (i == 0 ? " " : ", ") << dims[i]; } ovxC << " } (input)" << std::endl; } } for(auto& name : outputList) { if(outputShape.find(name) != outputShape.end()) { std::vector dims; getTensorDims(outputShape[name], dims, 4); ovxC << "// " << name << " -- dims[] = {"; for(size_t i = 0; i < dims.size(); i++) { ovxC << (i == 0 ? " " : ", ") << dims[i]; } ovxC << " } (output)" << std::endl; } } ovxC << "//" << std::endl << "vx_status annAddToGraph(vx_graph graph"; for(auto& name : inputList) { ovxC << ", vx_tensor " << name; } for(auto& name : outputList) { ovxC << ", vx_tensor " << name; } ovxC << ", const char * binaryFilename);" << std::endl << std::endl << "#endif" << std::endl; ovxC.close(); std::cout << "OK: created '" << openvxFilenameC << "'" << std::endl; //// // generate a simple test program // openvxFilenameC = openvxFolder + "/anntest.cpp"; ovxC.open(openvxFilenameC); if(!ovxC) { printf("ERROR: unable to create: %s\n", openvxFilenameC.c_str()); exit(1); } ovxC << "#include \"annmodule.h\"" << std::endl << "#include " << std::endl << "#include " << std::endl << "#include " << std::endl << "#include " << std::endl << "#include " << std::endl << "#include " << std::endl << "#include " << std::endl << "#include " << std::endl << "" << std::endl << "#if ENABLE_OPENCV" << std::endl << "#include " << std::endl << "#include " << std::endl << "#include " << std::endl << "using namespace cv; " << std::endl << "#endif" << std::endl << "" << std::endl << "#define ERROR_CHECK_STATUS(call) { vx_status status = (call); if(status != VX_SUCCESS) { printf(\"ERROR: failed with status = (%d) at \" __FILE__ \"#%d\", status, __LINE__); return -1; } }" << std::endl << "" << std::endl << "static void VX_CALLBACK log_callback(vx_context context, vx_reference ref, vx_status status, const vx_char string[])" << std::endl << "{" << std::endl << " size_t len = strlen(string);" << std::endl << " if (len > 0) {" << std::endl << " printf(\"%s\", string);" << std::endl << " if (string[len - 1] != '\\n')" << std::endl << " printf(\"\\n\");" << std::endl << " fflush(stdout);" << std::endl << " }" << std::endl << "}" << std::endl << "" << std::endl << "inline int64_t clockCounter()" << std::endl << "{" << std::endl << " return std::chrono::high_resolution_clock::now().time_since_epoch().count();" << std::endl << "}" << std::endl << "" << std::endl << "inline int64_t clockFrequency()" << std::endl << "{" << std::endl << " return std::chrono::high_resolution_clock::period::den / std::chrono::high_resolution_clock::period::num;" << std::endl << "}" << std::endl << "" << std::endl << "static vx_status copyTensor(vx_tensor tensor, std::string fileName, vx_enum usage = VX_WRITE_ONLY)" << std::endl << "{" << std::endl << " vx_enum data_type = VX_TYPE_FLOAT32;" << std::endl << " vx_size num_of_dims = 4, dims[4] = { 1, 1, 1, 1 }, stride[4];" << std::endl << " vxQueryTensor(tensor, VX_TENSOR_DATA_TYPE, &data_type, sizeof(data_type));" << std::endl << " vxQueryTensor(tensor, VX_TENSOR_NUMBER_OF_DIMS, &num_of_dims, sizeof(num_of_dims));" << std::endl << " vxQueryTensor(tensor, VX_TENSOR_DIMS, &dims, sizeof(dims[0])*num_of_dims);" << std::endl << " vx_size itemsize = sizeof(float);" << std::endl << " if(data_type == VX_TYPE_UINT8 || data_type == VX_TYPE_INT8) {" << std::endl << " itemsize = sizeof(vx_uint8);" << std::endl << " }" << std::endl << " else if(data_type == VX_TYPE_UINT16 || data_type == VX_TYPE_INT16 || data_type == VX_TYPE_FLOAT16) {" << std::endl << " itemsize = sizeof(vx_uint16);" << std::endl << " }" << std::endl << " vx_size count = dims[0] * dims[1] * dims[2] * dims[3];" << std::endl << " vx_map_id map_id;" << std::endl << " float * ptr;" << std::endl << " vx_status status = vxMapTensorPatch(tensor, num_of_dims, nullptr, nullptr, &map_id, stride, (void **)&ptr, usage, VX_MEMORY_TYPE_HOST, 0);" << std::endl << " if(status) {" << std::endl << " std::cerr << \"ERROR: vxMapTensorPatch() failed for \" << fileName << std::endl;" << std::endl << " return -1;" << std::endl << " }" << std::endl << " if(usage == VX_WRITE_ONLY) {" << std::endl << "#if ENABLE_OPENCV" << std::endl << " if(dims[3] == 1 && dims[2] == 3 && fileName.size() > 4 && (fileName.substr(fileName.size()-4, 4) == \".png\" || fileName.substr(fileName.size()-4, 4) == \".jpg\"))" << std::endl << " {" << std::endl << " Mat img = imread(fileName.c_str(), CV_LOAD_IMAGE_COLOR);" << std::endl << " if(!img.data || img.rows != dims[1] || img.cols != dims[0]) {" << std::endl << " std::cerr << \"ERROR: invalid image or dimensions in \" << fileName << std::endl;" << std::endl << " return -1;" << std::endl << " }" << std::endl << " unsigned char * src = img.data;" << std::endl << " for(vx_size c = 0; c < 3; c++) {" << std::endl << " for(vx_size y = 0; y < dims[1]; y++) {" << std::endl << " for(vx_size x = 0; x < dims[0]; x++) {" << std::endl << " ptr[(c*stride[2]+y*stride[1]+x*stride[0])>>2] = src[y*dims[0]*3+x*3+c];" << std::endl << " }" << std::endl << " }" << std::endl << " }" << std::endl << " }" << std::endl << " else" << std::endl << "#endif" << std::endl << " {" << std::endl << " FILE * fp = fopen(fileName.c_str(), \"rb\");" << std::endl << " if(!fp) {" << std::endl << " std::cerr << \"ERROR: unable to open: \" << fileName << std::endl;" << std::endl << " return -1;" << std::endl << " }" << std::endl << " vx_size n = fread(ptr, itemsize, count, fp);" << std::endl << " fclose(fp);" << std::endl << " if(n != count) {" << std::endl << " std::cerr << \"ERROR: expected char[\" << count*itemsize << \"], but got char[\" << n*itemsize << \"] in \" << fileName << std::endl;" << std::endl << " return -1;" << std::endl << " }" << std::endl << " }" << std::endl << " }" << std::endl << " else {" << std::endl << " FILE * fp = fopen(fileName.c_str(), \"wb\");" << std::endl << " if(!fp) {" << std::endl << " std::cerr << \"ERROR: unable to open: \" << fileName << std::endl;" << std::endl << " return -1;" << std::endl << " }" << std::endl << " fwrite(ptr, itemsize, count, fp);" << std::endl << " fclose(fp);" << std::endl << " }" << std::endl << " status = vxUnmapTensorPatch(tensor, map_id);" << std::endl << " if(status) {" << std::endl << " std::cerr << \"ERROR: vxUnmapTensorPatch() failed for \" << fileName << std::endl;" << std::endl << " return -1;" << std::endl << " }" << std::endl << " return 0;" << std::endl << "}" << std::endl << "" << std::endl << "int main(int argc, const char ** argv)" << std::endl << "{" << std::endl << " // check command-line usage" << std::endl << " if(argc < 2) {" << std::endl << " printf(\"Usage: anntest [...]\\n\");" << std::endl << " return -1;" << std::endl << " }" << std::endl << " const char * binaryFilename = argv[1];" << std::endl << " argc -= 2;" << std::endl << " argv += 2;" << std::endl << "" << std::endl << " // create context, input, output, and graph" << std::endl << " vxRegisterLogCallback(NULL, log_callback, vx_false_e);" << std::endl << " vx_context context = vxCreateContext();" << std::endl << " if(vxGetStatus((vx_reference)context)) {" << std::endl << " printf(\"ERROR: vxCreateContext() failed\\n\");" << std::endl << " return -1;" << std::endl << " }" << std::endl << " vxRegisterLogCallback(context, log_callback, vx_false_e);" << std::endl << "" << std::endl << " // create input tensors and initialize" << std::endl ; for(auto& name : inputList) { std::vector dims; getTensorDims(inputShape[name], dims, 4); ovxC << " vx_size " << name << "_dims[" << dims.size() << "] = {"; for(size_t i = 0; i < dims.size(); i++) { ovxC << (i == 0 ? " " : ", ") << dims[i]; } ovxC << " };" << std::endl << " vx_tensor " << name << " = vxCreateTensor(context, " << dims.size() << ", " << name << "_dims, VX_TYPE_FLOAT32, 0);" << std::endl << " if(vxGetStatus((vx_reference)" << name << ")) {" << std::endl << " printf(\"ERROR: vxCreateTensor() failed for " << name << "\\n\");" << std::endl << " return -1;" << std::endl << " }" << std::endl << " if(*argv) {" << std::endl << " if(strcmp(*argv, \"-\") != 0) {" << std::endl << " if(copyTensor(" << name << ", *argv, VX_WRITE_ONLY) < 0) {" << std::endl << " return -1;" << std::endl << " }" << std::endl << " printf(\"OK: read tensor '" << name << "' from %s\\n\", *argv);" << std::endl << " }" << std::endl << " argv++;" << std::endl << " }" << std::endl ; } ovxC << " // create output tensors" << std::endl; for(auto& name : outputList) { std::vector dims; getTensorDims(outputShape[name], dims, 4); ovxC << " vx_size " << name << "_dims[" << dims.size() << "] = {"; for(size_t i = 0; i < dims.size(); i++) { ovxC << (i == 0 ? " " : ", ") << dims[i]; } ovxC << " };" << std::endl << " vx_tensor " << name << " = vxCreateTensor(context, " << dims.size() << ", " << name << "_dims, VX_TYPE_FLOAT32, 0);" << std::endl << " if(vxGetStatus((vx_reference)" << name << ")) {" << std::endl << " printf(\"ERROR: vxCreateTensor() failed for " << name << "\\n\");" << std::endl << " return -1;" << std::endl << " }" << std::endl; } ovxC << "" << std::endl << " // build graph using annmodule" << std::endl << " vx_status status;" << std::endl << " int64_t freq = clockFrequency(), t0, t1;" << std::endl << " t0 = clockCounter();" << std::endl << " vx_graph graph = vxCreateGraph(context);" << std::endl << " status = vxGetStatus((vx_reference)graph);" << std::endl << " if(status) {" << std::endl << " printf(\"ERROR: vxCreateGraph(...) failed (%d)\\n\", status);" << std::endl << " return -1;" << std::endl << " }" << std::endl << " status = annAddToGraph(graph, " ; for(auto& name : inputList) { ovxC << name << ", "; } for(auto& name : outputList) { ovxC << name << ", "; } ovxC << "binaryFilename);" << std::endl << " if(status) {" << std::endl << " printf(\"ERROR: annAddToGraph() failed (%d)\\n\", status);" << std::endl << " return -1;" << std::endl << " }" << std::endl << " status = vxVerifyGraph(graph);" << std::endl << " if(status) {" << std::endl << " printf(\"ERROR: vxVerifyGraph(...) failed (%d)\\n\", status);" << std::endl << " return -1;" << std::endl << " }" << std::endl << " t1 = clockCounter();" << std::endl << " printf(\"OK: graph initialization with annAddToGraph() took %.3f msec\\n\", (float)(t1-t0)*1000.0f/(float)freq);" << std::endl << "" << std::endl << " t0 = clockCounter();" << std::endl << " status = vxProcessGraph(graph);" << std::endl << " t1 = clockCounter();" << std::endl << " if(status != VX_SUCCESS) {" << std::endl << " printf(\"ERROR: vxProcessGraph() failed (%d)\\n\", status);" << std::endl << " return -1;" << std::endl << " }" << std::endl << " printf(\"OK: vxProcessGraph() took %.3f msec (1st iteration)\\n\", (float)(t1-t0)*1000.0f/(float)freq);" << std::endl << "" << std::endl << " // write outputs" << std::endl ; for(auto& name : outputList) { ovxC << " if(*argv) {" << std::endl << " if(strcmp(*argv, \"-\") != 0) {" << std::endl << " if(copyTensor(" << name << ", *argv, VX_READ_ONLY) < 0) {" << std::endl << " return -1;" << std::endl << " }" << std::endl << " printf(\"OK: wrote tensor '" << name << "' into %s\\n\", *argv);" << std::endl << " }" << std::endl << " argv++;" << std::endl << " }" << std::endl ; } ovxC << "" << std::endl << " t0 = clockCounter();" << std::endl << " int N = 100;" << std::endl << " for(int i = 0; i < N; i++) {" << std::endl << " status = vxProcessGraph(graph);" << std::endl << " if(status != VX_SUCCESS)" << std::endl << " break;" << std::endl << " }" << std::endl << " t1 = clockCounter();" << std::endl << " printf(\"OK: vxProcessGraph() took %.3f msec (average over %d iterations)\\n\", (float)(t1-t0)*1000.0f/(float)freq/(float)N, N);" << std::endl << "" << std::endl << " // release resources" << std::endl << " ERROR_CHECK_STATUS(vxReleaseGraph(&graph));" << std::endl ; for(auto& name : inputList) { ovxC << " ERROR_CHECK_STATUS(vxReleaseTensor(&" << name << "));" << std::endl; } for(auto& name : outputList) { ovxC << " ERROR_CHECK_STATUS(vxReleaseTensor(&" << name << "));" << std::endl; } ovxC << " ERROR_CHECK_STATUS(vxReleaseContext(&context));" << std::endl << " printf(\"OK: successful\\n\");" << std::endl << "" << std::endl << " return 0;" << std::endl << "}" << std::endl ; ovxC.close(); std::cout << "OK: created '" << openvxFilenameC << "'" << std::endl; //// // generate CMakeLists.txt // openvxFilenameC = openvxFolder + "/CMakeLists.txt"; ovxC.open(openvxFilenameC); if(!ovxC) { printf("ERROR: unable to create: %s\n", openvxFilenameC.c_str()); exit(1); } ovxC << "cmake_minimum_required (VERSION 2.8)" << std::endl << "project (annmodule)" << std::endl << "set (CMAKE_CXX_STANDARD 11) " << std::endl << "list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)" << std::endl << "find_package(OpenCL REQUIRED)" << std::endl << "find_package(OpenCV QUIET)" << std::endl << "include_directories (${OpenCL_INCLUDE_DIRS} ${OpenCL_INCLUDE_DIRS}/Headers )" << std::endl << "include_directories (/opt/rocm/mivisionx/include)" << std::endl << "link_directories (/opt/rocm/mivisionx/lib)" << std::endl << "list(APPEND SOURCES annmodule.cpp)" << std::endl << "add_library(${PROJECT_NAME} SHARED ${SOURCES})" << std::endl << "set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -msse4.2 -std=c++11\")" << std::endl << "target_link_libraries(${PROJECT_NAME} openvx vx_nn pthread)" << std::endl << "add_executable(anntest anntest.cpp)" << std::endl << "if (OpenCV_FOUND)" << std::endl << " target_compile_definitions(anntest PUBLIC ENABLE_OPENCV=1)" << std::endl << " include_directories(${OpenCV_INCLUDE_DIRS})" << std::endl << " target_link_libraries(anntest ${OpenCV_LIBRARIES})" << std::endl << "else(OpenCV_FOUND)" << std::endl << " target_compile_definitions(anntest PUBLIC ENABLE_OPENCV=0)" << std::endl << "endif(OpenCV_FOUND)" << std::endl << "target_link_libraries(anntest openvx vx_nn pthread ${PROJECT_NAME})" << std::endl ; ovxC.close(); std::cout << "OK: created '" << openvxFilenameC << "'" << std::endl; } virtual void operation(const nnef::Prototype& proto, const nnef::Dictionary& args, const nnef::Dictionary& shapes) { // save the operation details opsProto.push_back(proto); opsValues.push_back(args); opsShapes.push_back(shapes); operationRemoved.push_back(false); } virtual bool isAtomic( const nnef::Prototype& proto, const nnef::Dictionary& args ) { static std::set atomics = { "sqr", "sqrt", "min", "max", "softmax", "relu", "tanh", "sigmoid", "batch_normalization", "max_pool", "avg_pool", "quantize_linear", "quantize_logarithmic" }; return atomics.find(proto.name()) != atomics.end(); } }; int main(int argc, const char * argv[]) { //// // get command-line parameters // int verbose = 0; bool useVirtual = true; while(argc > 1 && argv[1][0] == '-') { if(!strcmp(argv[1], "--no-virtual")) { useVirtual = false; argc -= 1; argv += 1; } else if(argc > 2 && !strcmp(argv[1], "-v")) { verbose = atoi(argv[2]); argc -= 2; argv += 2; } else { printf("ERROR: invalid option: %s\n", argv[1]); return -1; } } if(argc < 3) { printf("Usage: nnef2openvx [-v ] [--no-virtual] \n"); return -1; } std::string nnefContainedFolder = argv[1]; std::string openvxOutputFolder = argv[2]; std::string nnefFilename = nnefContainedFolder + "/graph.nnef"; //// // parse NNEF structure and translate to OpenVX code // std::ifstream ifs(nnefFilename.c_str()); if(!ifs) { printf("ERROR: unable to open: %s\n", nnefFilename.c_str()); return -1; } mkdir(openvxOutputFolder.c_str(), 0777); printf("OK: parsing %s ...\n", nnefFilename.c_str()); std::unique_ptr parser((nnef::Parser*)new nnef::FlatParser()); try { NNEF2OpenVX_Translator callback(nnefContainedFolder, openvxOutputFolder, useVirtual, verbose); parser->parse(ifs, callback); } catch(nnef::Error e) { printf("Parse error: [%u:%u] %s\n", e.position().line, e.position().column, e.what()); auto origin = e.position().origin; while(origin) { printf("... evaluated from [%u:%u]\n", origin->line, origin->column); origin = origin->origin; } } ifs.close(); return 0; }