/*
Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

#include "vx_amd_media.h"
#include "kernels.h"
#include <thread>
#include <mutex>
#include <condition_variable>
#include <deque>

// OpenCL configuration
#define ENCODE_ENABLE_OPENCL       1         // enable use of OpenCL buffers

#if ENCODE_ENABLE_OPENCL
#if __APPLE__
#include <opencl.h>
#else
#include <CL/cl.h>
#endif
#endif

#define DEFAULT_MBPS               4.0f
#define DEFAULT_FPS                30.0f
#define DEFAULT_BFRAMES            0
#define DEFAULT_GOPSIZE            60
#define ENCODE_BUFFER_POOL_SIZE    4         // number of buffers in encoder queue: keep it atleast 2

typedef struct {
	vx_uint32 size;
	vx_uint32 type;
} AuxDataContainerHeader;

typedef struct {
	AuxDataContainerHeader h0;
	int outputFrameCount;
	int reserved[3];
	int64_t cpuTimestamp;
} LoomIoMediaEncoderAuxInfo;

class CLoomIoMediaEncoder {
public:
	CLoomIoMediaEncoder(vx_node node, const char ioConfig[], vx_uint32 width, vx_uint32 height, vx_df_image format, vx_uint32 stride, vx_uint32 offset, vx_size input_aux_data_max_size);
	~CLoomIoMediaEncoder();
	vx_status Initialize();
	vx_status ProcessFrame(vx_image input_image, vx_array input_aux, vx_array output_aux);
	vx_status UpdateBufferOpenCL(vx_image input_image, vx_array input_aux);
protected:
	typedef enum { cmd_abort, cmd_encode } command;
	void EncodeLoop();
	void PushCommand(command cmd);
	void PushAck(int ack);
	command PopCommand();
	int PopAck();
private:
	vx_node node;
	std::string ioConfig;
	int width;
	int height;
	vx_df_image format;
	AVPixelFormat inputFormat;
	int stride, offset;
	vx_size input_aux_data_max_size;
	AVFormatContext * formatContext;
	AVStream * videoStream;
	AVCodecContext * videoCodecContext;
	AVCodec * videoCodec;
	SwsContext * conversionContext;
#if ENCODE_ENABLE_OPENCL
	cl_command_queue cmdq;
	cl_mem mem[ENCODE_BUFFER_POOL_SIZE];
#endif
	AVFrame * videoFrame[ENCODE_BUFFER_POOL_SIZE];
	uint8_t * outputBuffer;
	int outputBufferSize;
	uint8_t * outputAuxBuffer;
	vx_size outputAuxLength;
	FILE * fpOutput;
	std::mutex mutexCmd, mutexAck;
	std::condition_variable cvCmd, cvAck;
	std::deque<command> queueCmd;
	std::deque<int> queueAck;
	std::thread * thread;
	bool threadTerminated;
	int encodeFrameCount;
	int inputFrameCount;
	float mbps, fps;
	int bframes, gopsize;
};

void CLoomIoMediaEncoder::PushCommand(CLoomIoMediaEncoder::command cmd)
{
	std::unique_lock<std::mutex> lock(mutexCmd);
	queueCmd.push_front(cmd);
	cvCmd.notify_one();
}

CLoomIoMediaEncoder::command CLoomIoMediaEncoder::PopCommand()
{
	std::unique_lock<std::mutex> lock(mutexCmd);
	cvCmd.wait(lock, [=] { return !queueCmd.empty(); });
	command cmd = std::move(queueCmd.back());
	queueCmd.pop_back();
	return cmd;
}

void CLoomIoMediaEncoder::PushAck(int ack)
{
	std::unique_lock<std::mutex> lock(mutexAck);
	queueAck.push_front(ack);
	cvAck.notify_one();
}

int CLoomIoMediaEncoder::PopAck()
{
	std::unique_lock<std::mutex> lock(mutexAck);
	cvAck.wait(lock, [=] { return !queueAck.empty(); });
	int ack = std::move(queueAck.back());
	queueAck.pop_back();
	return ack;
}

CLoomIoMediaEncoder::CLoomIoMediaEncoder(vx_node node_, const char ioConfig_[], vx_uint32 width_, vx_uint32 height_, vx_df_image format_, vx_uint32 stride_, vx_uint32 offset_, vx_size input_aux_data_max_size_)
	: node{ node_ }, ioConfig(ioConfig_), width{ static_cast<int>(width_) }, height{ static_cast<int>(height_) }, format{ format_ },
	  stride{ static_cast<int>(stride_) }, offset{ static_cast<int>(offset_) }, input_aux_data_max_size{ input_aux_data_max_size_ },
	  inputFrameCount{ 0 }, encodeFrameCount{ 0 }, threadTerminated{ false }, inputFormat{ AV_PIX_FMT_UYVY422 }, 
	  outputBuffer{ nullptr }, outputBufferSize{ 1000000 }, fpOutput{ nullptr }, formatContext{ nullptr }, videoStream{ nullptr }, outputAuxBuffer{ nullptr }, outputAuxLength{ 0 },
	  videoCodecContext{ nullptr }, videoCodec{ nullptr }, conversionContext{ nullptr }, thread{ nullptr },
	  mbps{ DEFAULT_MBPS }, fps{ DEFAULT_FPS }, gopsize{ DEFAULT_GOPSIZE }, bframes{ DEFAULT_BFRAMES }
{
#if ENCODE_ENABLE_OPENCL
	cmdq = nullptr;
	memset(mem, 0, sizeof(mem));
#endif
	memset(videoFrame, 0, sizeof(videoFrame));
	outputAuxBuffer = new uint8_t[input_aux_data_max_size + sizeof(LoomIoMediaEncoderAuxInfo)]();
	// initialize freq inside GetTimeInMicroseconds()
	GetTimeInMicroseconds();
}

CLoomIoMediaEncoder::~CLoomIoMediaEncoder()
{
	// terminate the thread
	if (thread) {
		PushCommand(cmd_abort);
		while (!threadTerminated) {
			if (PopAck() < 0)
				break;
		}
		thread->join();
		delete thread;
	}

	// close output mediaFile
	if (fpOutput) {
		fclose(fpOutput);
	}
	if (formatContext) {
		av_write_trailer(formatContext);
		av_free(formatContext);
	}

	// release OpenCL and media resources
	if (outputBuffer) aligned_free(outputBuffer);
#if ENCODE_ENABLE_OPENCL
	if (cmdq) clReleaseCommandQueue(cmdq);
	for (int i = 0; i < ENCODE_BUFFER_POOL_SIZE; i++) {
		if (mem[i]) clReleaseMemObject(mem[i]);
	}
#endif
	for (int i = 0; i < ENCODE_BUFFER_POOL_SIZE; i++) {
		if (videoFrame[i]) {
			if (videoFrame[i]->data[0]) aligned_free(videoFrame[i]->data[0]);
			if (videoFrame[i]->data[1]) aligned_free(videoFrame[i]->data[1]);
			av_frame_free(&videoFrame[i]);
		}
	}
	if (conversionContext) av_free(conversionContext);
	if (videoCodecContext) {
		avcodec_close(videoCodecContext);
		av_free(videoCodecContext);
	}
	if (videoCodec) av_free(videoCodec);
	if (outputAuxBuffer) delete[] outputAuxBuffer;
}

vx_status CLoomIoMediaEncoder::Initialize()
{
	// check for valid image type support and get stride in bytes (aligned to 16-byte boundary)
	if (format == VX_DF_IMAGE_UYVY) {
		inputFormat = AV_PIX_FMT_UYVY422;
	}
	else if (format == VX_DF_IMAGE_YUYV) {
		inputFormat = AV_PIX_FMT_YUYV422;
	}
	else if (format == VX_DF_IMAGE_RGB) {
		inputFormat = AV_PIX_FMT_RGB24;
	}
	else {
		vxAddLogEntry((vx_reference)node, VX_ERROR_INVALID_FORMAT, "ERROR: input image format %4.4s not supported", &format);
		return VX_ERROR_INVALID_FORMAT;
	}

	// get media configuration and fileName
	const char * s = ioConfig.c_str();
	if (*s == '{') {
		sscanf(s + 1, "%f,%f,%d,%d", &mbps, &fps, &bframes, &gopsize);
		while (*s && *s != '}')
			s++;
		if (*s == '}') s++;
		if (*s == ',') s++;
		else {
			vxAddLogEntry((vx_reference)node, VX_ERROR_INVALID_VALUE, "ERROR: invalid ioConfig: %s\nERROR: invalid ioConfig: valid syntax: [{mbps,fps,bframes,gopsize},]filename.mp4\n", ioConfig.c_str());
			return VX_ERROR_INVALID_VALUE;
		}
	}
	const char * fileName = s;

	// open media file and initialize codec
	ERROR_CHECK_STATUS(initialize_ffmpeg());
	ERROR_CHECK_NULLPTR(videoCodec = avcodec_find_encoder(AV_CODEC_ID_H264));
	ERROR_CHECK_NULLPTR(videoCodecContext = avcodec_alloc_context3(videoCodec));
	videoCodecContext->bit_rate = (int)(mbps * 1000000);
	videoCodecContext->width = width;
	videoCodecContext->height = height;
	videoCodecContext->time_base.num = (int)(60000.0f / fps);
	videoCodecContext->time_base.den = 60000;
	videoCodecContext->gop_size = gopsize;
	videoCodecContext->max_b_frames = bframes;
	videoCodecContext->pix_fmt = AV_PIX_FMT_NV12;
	ERROR_CHECK_STATUS(avcodec_open2(videoCodecContext, videoCodec, nullptr));
	ERROR_CHECK_NULLPTR(conversionContext = sws_getContext(width, height, inputFormat, width, height, videoCodecContext->pix_fmt, SWS_BICUBIC, NULL, NULL, NULL));
	for (int i = 0; i < ENCODE_BUFFER_POOL_SIZE; i++) {
		ERROR_CHECK_NULLPTR(videoFrame[i] = av_frame_alloc());
		videoFrame[i]->data[0] = aligned_alloc(width * height);
		videoFrame[i]->data[1] = aligned_alloc(width * height / 2);
		videoFrame[i]->linesize[0] = width;
		videoFrame[i]->linesize[1] = width;
		videoFrame[i]->format = AV_PIX_FMT_NV12;
#if ENCODE_ENABLE_OPENCL
		// just one videoFrame[0] is sufficient
		break;
#endif
	}
	outputBufferSize = 1024*1024;
	ERROR_CHECK_NULLPTR(outputBuffer = aligned_alloc(outputBufferSize));

	// open output file
	if (strlen(fileName) > 4 && !strcmp(fileName + strlen(fileName) - 4, ".264")) {
		fpOutput = fopen(fileName, "wb");
		if (!fpOutput) {
			vxAddLogEntry((vx_reference)node, VX_ERROR_INVALID_LINK, "ERROR: CLoomIoMediaEncoder::Initialize: unable to create: %s", fileName);
			return VX_ERROR_INVALID_LINK;
		}
	}
	else {
		int status = avformat_alloc_output_context2(&formatContext, nullptr, nullptr, fileName);
		if (status < 0) {
			vxAddLogEntry((vx_reference)node, VX_ERROR_INVALID_LINK, "ERROR: CLoomIoMediaEncoder::Initialize: avformat_alloc_output_context2(...,%s) failed (%d)", fileName, status);
			return VX_ERROR_INVALID_LINK;
		}
		ERROR_CHECK_NULLPTR(videoStream = avformat_new_stream(formatContext, videoCodec));
		videoStream->id = formatContext->nb_streams - 1;
		if (formatContext->oformat->flags & AVFMT_GLOBALHEADER)
            videoCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
		av_dump_format(formatContext, 0, fileName, 1);
		if (!(formatContext->oformat->flags & AVFMT_NOFILE)) {
			if ((status = avio_open(&formatContext->pb, fileName, AVIO_FLAG_WRITE)) < 0) {
				vxAddLogEntry((vx_reference)node, VX_ERROR_INVALID_LINK, "ERROR: CLoomIoMediaEncoder::Initialize: avio_open(...,%s) failed (%d)", fileName, status);
				return VX_ERROR_INVALID_LINK;
			}
		}
		videoStream->codec->width = width;
		videoStream->codec->height = height;
		videoStream->codec->codec_id = videoCodecContext->codec_id;
		videoStream->codec->bit_rate = videoCodecContext->bit_rate;
		videoStream->codec->time_base = videoCodecContext->time_base;
		videoStream->codec->gop_size = videoCodecContext->gop_size;
		videoStream->codec->max_b_frames = videoCodecContext->max_b_frames;
		ERROR_CHECK_STATUS(avformat_write_header(formatContext, nullptr));
	}

#if ENCODE_ENABLE_OPENCL
	// allocate OpenCL encode buffers
	cl_context context = nullptr;
	ERROR_CHECK_STATUS(vxQueryContext(vxGetContext((vx_reference)node), VX_CONTEXT_ATTRIBUTE_AMD_OPENCL_CONTEXT, &context, sizeof(context)));
	cl_device_id device_id = nullptr;
	ERROR_CHECK_STATUS(clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device_id), &device_id, nullptr));
#if defined(CL_VERSION_2_0)
	cmdq = clCreateCommandQueueWithProperties(context, device_id, 0, nullptr);
#else
	cmdq = clCreateCommandQueue(context, device_id, 0, nullptr);
#endif
	ERROR_CHECK_NULLPTR(cmdq);
	for (int i = 0; i < ENCODE_BUFFER_POOL_SIZE; i++) {
		mem[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, offset + stride * height, nullptr, nullptr);
		ERROR_CHECK_NULLPTR(mem[i]);
	}
#endif

	// start encoder thread
	inputFrameCount = 0;
	encodeFrameCount = 0;
	threadTerminated = false;
	thread = new std::thread(&CLoomIoMediaEncoder::EncodeLoop, this);
	ERROR_CHECK_NULLPTR(thread);

	// debug info
	vxAddLogEntry((vx_reference)node, VX_SUCCESS, "INFO: writing %dx%d %.2fmbps %.2ffps gopsize=%d bframes=%d video into %s", width, height, mbps, fps, gopsize, bframes, fileName);

	return VX_SUCCESS;
}

#if ENCODE_ENABLE_OPENCL
vx_status CLoomIoMediaEncoder::UpdateBufferOpenCL(vx_image input_image, vx_array input_aux)
{
	// wait until there is an ACK from encoder thread
	int ack = PopAck();
	if ((ack < 0) || threadTerminated) {
		// nothing to process, so abandon the graph execution
		return VX_ERROR_GRAPH_ABANDONED;
	}

	// update output auxiliary information
	outputAuxLength = 0;
	if (input_aux) {
		ERROR_CHECK_STATUS(vxQueryArray(input_aux, VX_ARRAY_NUMITEMS, &outputAuxLength, sizeof(outputAuxLength)));
		if (outputAuxLength > 0) {
			ERROR_CHECK_STATUS(vxCopyArrayRange(input_aux, 0, outputAuxLength, sizeof(uint8_t), outputAuxBuffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
		}
	}
	LoomIoMediaEncoderAuxInfo * auxInfo = (LoomIoMediaEncoderAuxInfo *)&outputAuxBuffer[outputAuxLength];
	auxInfo->h0.size = sizeof(*auxInfo);
    auxInfo->h0.type = AMDOVX_KERNEL_AMD_MEDIA_ENCODE;
	auxInfo->outputFrameCount = inputFrameCount;
	auxInfo->cpuTimestamp = GetTimeInMicroseconds();
	outputAuxLength += auxInfo->h0.size;

	// pick the OpenCL buffer frame from pool
	int bufId = inputFrameCount % ENCODE_BUFFER_POOL_SIZE; inputFrameCount++;
	ERROR_CHECK_STATUS(vxSetImageAttribute(input_image, VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &mem[bufId], sizeof(cl_mem)));

	return VX_SUCCESS;
}
#endif

vx_status CLoomIoMediaEncoder::ProcessFrame(vx_image input_image, vx_array input_aux, vx_array output_aux)
{
#if ENCODE_ENABLE_OPENCL
	if (threadTerminated)
#else
	// wait until there is an ACK from encoder thread
	int ack = PopAck();
	if ((ack < 0) || threadTerminated)
#endif
	{ // nothing to process, so abandon the graph execution
		return VX_ERROR_GRAPH_ABANDONED;
	}

#if ENCODE_ENABLE_OPENCL
	// just submit OpenCL buffer for encoding
	PushCommand(cmd_encode);
#else
	// format convert input image into encode buffer
	int bufId = inputFrameCount % ENCODE_BUFFER_POOL_SIZE; inputFrameCount++;
	vx_rectangle_t rect = { 0, 0, width, height };
	vx_map_id map_id; vx_imagepatch_addressing_t addr;
	uint8_t * ptr = nullptr;
	ERROR_CHECK_STATUS(vxMapImagePatch(input_image, &rect, 0, &map_id, &addr, (void **)&ptr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, VX_NOGAP_X));
	int status = sws_scale(conversionContext, &ptr, &addr.stride_y, 0, height, videoFrame[bufId]->data, videoFrame[bufId]->linesize);
	ERROR_CHECK_STATUS(vxUnmapImagePatch(input_image, map_id));
	if (status < 0) {
		vxAddLogEntry((vx_reference)node, VX_FAILURE, "ERROR: CLoomIoMediaEncoder::ProcessFrame: sws_scale() failed (%d)\n", status);
		return VX_FAILURE;
	}
	// submit encoding
	PushCommand(cmd_encode);
#endif

	// copy input aux data to output, if there is input aux data.
	ERROR_CHECK_STATUS(vxTruncateArray(output_aux, 0));
	if (outputAuxLength > 0) {
		ERROR_CHECK_STATUS(vxAddArrayItems(output_aux, outputAuxLength, outputAuxBuffer, sizeof(uint8_t)));
	}

	return VX_SUCCESS;
}

void CLoomIoMediaEncoder::EncodeLoop()
{
	// initial ACK to inform producer for readiness
	for (int i = 1; i < ENCODE_BUFFER_POOL_SIZE; i++)
		PushAck(0);

	// initialize packet and start encoding
	AVPacket pkt = { 0 };
	av_init_packet(&pkt);
	pkt.data = nullptr;
	pkt.size = 0;
	int64_t pts = 0;
	for (command cmd; !threadTerminated && ((cmd = PopCommand()) != cmd_abort);) {
		int status;
		// get the bufId to process
		int bufId = (encodeFrameCount % ENCODE_BUFFER_POOL_SIZE);
		int got_output = 0;
#if ENCODE_ENABLE_OPENCL
		// format convert input image into encode buffer
		cl_int err = -1;
		uint8_t * ptr = (uint8_t *)clEnqueueMapBuffer(cmdq, mem[bufId], CL_TRUE, CL_MAP_READ, offset, height * stride, 0, nullptr, nullptr, &err);
		if (err < 0 || !ptr) {
			vxAddLogEntry((vx_reference)node, VX_FAILURE, "ERROR: CLoomIoMediaEncoder::EncodeLoop: clEnqueueMapBuffer() failed (%d)\n", err);
			threadTerminated = true;
			PushAck(-1);
			return;
		}
		clFinish(cmdq);
		status = sws_scale(conversionContext, &ptr, &stride, 0, height, videoFrame[0]->data, videoFrame[0]->linesize);
		err = clEnqueueUnmapMemObject(cmdq, mem[bufId], ptr, 0, nullptr, nullptr);
		if (err < 0) {
			vxAddLogEntry((vx_reference)node, VX_FAILURE, "ERROR: CLoomIoMediaEncoder::EncodeLoop: clEnqueueUnmapMemObject() failed (%d)\n", err);
			threadTerminated = true;
			PushAck(-1);
			return;
		}
		clFinish(cmdq);
		if (status < 0) {
			vxAddLogEntry((vx_reference)node, VX_FAILURE, "ERROR: CLoomIoMediaEncoder::EncodeLoop: sws_scale() failed (%d)\n", status);
			threadTerminated = true;
			PushAck(-1);
			return;
		}
		// reset bufId to zero because only videoFrame[0] is valid
		bufId = 0;
#endif
		// encode video frame and write output to file
		videoFrame[bufId]->pts = pts;
		status = avcodec_encode_video2(videoCodecContext, &pkt, videoFrame[bufId], &got_output);
		if (status < 0) {
			vxAddLogEntry((vx_reference)node, VX_FAILURE, "ERROR: CLoomIoMediaEncoder::EncodeLoop: avcodec_encode_video2() failed (%4.4s:0x%08x:%d) for frame:%d\n", &status, status, status, encodeFrameCount);
			threadTerminated = true;
			PushAck(-1);
			return;
		}
		if (got_output && (pkt.size > 0)) {
			if (fpOutput) {
				fwrite(pkt.data, 1, pkt.size, fpOutput);
			}
			if (formatContext) {
				pkt.stream_index = videoStream->index;
				status = av_interleaved_write_frame(formatContext, &pkt);
				if (status < 0) {
					vxAddLogEntry((vx_reference)node, VX_FAILURE, "ERROR: CLoomIoMediaEncoder::EncodeLoop: av_interleaved_write_frame() failed (%4.4s:0x%08x:%d) for frame:%d\n", &status, status, status, encodeFrameCount);
					threadTerminated = true;
					PushAck(-1);
					return;
				}
			}
		}
		av_free_packet(&pkt);
		// update encode frame count and send ACK
		encodeFrameCount++;
		if (videoStream) {
			pts += av_rescale_q(1, videoStream->codec->time_base, videoStream->time_base);
		}
		else {
			pts += (int64_t)(60000.0f / fps);
		}
		PushAck(0);
	}
	// process the delayed frames
	for (int got_output = !0; got_output;) {
		int status = avcodec_encode_video2(videoCodecContext, &pkt, nullptr, &got_output);
		if (status < 0) {
			vxAddLogEntry((vx_reference)node, VX_FAILURE, "ERROR: avcodec_encode_video2() failed (%4.4s:%d) at the end\n", &status, status);
			threadTerminated = true;
			PushAck(-1);
			return;
		}
		if (got_output && fpOutput && (pkt.size > 0)) {
			fwrite(pkt.data, 1, pkt.size, fpOutput);
		}
		av_free_packet(&pkt);
	}
	// mark termination and send ACK
	threadTerminated = true;
	PushAck(-1);
}

#if ENCODE_ENABLE_OPENCL
//! \brief The kernel execution.
static vx_status VX_CALLBACK amd_media_encode_opencl_buffer_update_callback(vx_node node, const vx_reference parameters[], vx_uint32 num)
{
	// get encoder and input image
	CLoomIoMediaEncoder * encoder = nullptr;
	ERROR_CHECK_STATUS(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &encoder, sizeof(encoder)));
	if (!encoder) return VX_FAILURE;

	return encoder->UpdateBufferOpenCL((vx_image)parameters[1], (vx_array)parameters[2]);
	return VX_SUCCESS;
}
#endif

//! \brief The kernel execution.
static vx_status VX_CALLBACK amd_media_encode_kernel(vx_node node, const vx_reference * parameters, vx_uint32 num)
{
	// get encoder and input image
	CLoomIoMediaEncoder * encoder = nullptr;
	ERROR_CHECK_STATUS(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &encoder, sizeof(encoder)));
	if (!encoder) return VX_FAILURE;

	return encoder->ProcessFrame((vx_image)parameters[1], (vx_array)parameters[2], (vx_array)parameters[3]);
}

//! \brief The kernel initializer.
static vx_status VX_CALLBACK amd_media_encode_initialize(vx_node node, const vx_reference * parameters, vx_uint32 num)
{
	// get input parameters
	char ioConfig[VX_MAX_STRING_BUFFER_SIZE_AMD];
	vx_uint32 width = 0, height = 0, stride = 0, offset = 0;
	vx_df_image format = VX_DF_IMAGE_VIRT;
	ERROR_CHECK_STATUS(vxCopyScalar((vx_scalar)parameters[0], ioConfig, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
	ERROR_CHECK_STATUS(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &width, sizeof(width)));
	ERROR_CHECK_STATUS(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &height, sizeof(height)));
	ERROR_CHECK_STATUS(vxQueryImage((vx_image)parameters[1], VX_IMAGE_FORMAT, &format, sizeof(format)));
	ERROR_CHECK_STATUS(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER_STRIDE, &stride, sizeof(stride)));
	ERROR_CHECK_STATUS(vxQueryImage((vx_image)parameters[1], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER_OFFSET, &offset, sizeof(offset)));
	vx_size input_aux_data_max_size = 0;
	if (parameters[2]) {
		ERROR_CHECK_STATUS(vxQueryArray((vx_array)parameters[2], VX_ARRAY_CAPACITY, &input_aux_data_max_size, sizeof(input_aux_data_max_size)));
	}
	// create and initialize encoder
	CLoomIoMediaEncoder * encoder = new CLoomIoMediaEncoder(node, ioConfig, width, height, format, stride, offset, input_aux_data_max_size);
	ERROR_CHECK_STATUS(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &encoder, sizeof(encoder)));
	ERROR_CHECK_STATUS(encoder->Initialize());

	return VX_SUCCESS;
}

//! \brief The kernel deinitializer.
static vx_status VX_CALLBACK amd_media_encode_deinitialize(vx_node node, const vx_reference * parameters, vx_uint32 num)
{
	// get encoder
	CLoomIoMediaEncoder * encoder = nullptr;
	ERROR_CHECK_STATUS(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &encoder, sizeof(encoder)));

	if (encoder) {
		// release the resources
		delete encoder;
	}

	return VX_SUCCESS;
}

//! \brief The input validator callback.
static vx_status VX_CALLBACK amd_media_encode_validate(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[])
{
	// make sure input media filename
	vx_enum type;
	ERROR_CHECK_STATUS(vxQueryScalar((vx_scalar)parameters[0], VX_SCALAR_TYPE, &type, sizeof(type)));
	if (type != VX_TYPE_STRING_AMD)
		return VX_ERROR_INVALID_FORMAT;
	// make sure input format is UYVY/YUYV/RGB
	vx_uint32 width = 0, height = 0;
	vx_df_image format = VX_DF_IMAGE_VIRT;
	ERROR_CHECK_STATUS(vxQueryImage((vx_image)parameters[1], VX_IMAGE_WIDTH, &width, sizeof(width)));
	ERROR_CHECK_STATUS(vxQueryImage((vx_image)parameters[1], VX_IMAGE_HEIGHT, &height, sizeof(height)));
	ERROR_CHECK_STATUS(vxQueryImage((vx_image)parameters[1], VX_IMAGE_FORMAT, &format, sizeof(format)));
	if (format != VX_DF_IMAGE_UYVY && format != VX_DF_IMAGE_YUYV && format != VX_DF_IMAGE_RGB)
		return VX_ERROR_INVALID_FORMAT;
	// check input auxiliary data parameter
	if (parameters[2]) {
		// make sure data type is UINT8
		vx_enum itemtype = VX_TYPE_INVALID;
		vx_size capacity = 0;
		ERROR_CHECK_STATUS(vxQueryArray((vx_array)parameters[2], VX_ARRAY_ITEMTYPE, &itemtype, sizeof(itemtype)));
		ERROR_CHECK_STATUS(vxQueryArray((vx_array)parameters[2], VX_ARRAY_CAPACITY, &capacity, sizeof(capacity)));
		if (itemtype != VX_TYPE_UINT8)
			return VX_ERROR_INVALID_TYPE;
	}
	// check and set output auxiliary data parameter
	vx_enum itemtype = VX_TYPE_INVALID;
	vx_size capacity = 0;
	ERROR_CHECK_STATUS(vxQueryArray((vx_array)parameters[3], VX_ARRAY_ITEMTYPE, &itemtype, sizeof(itemtype)));
	ERROR_CHECK_STATUS(vxQueryArray((vx_array)parameters[3], VX_ARRAY_CAPACITY, &capacity, sizeof(capacity)));
	if (itemtype != VX_TYPE_UINT8)
		return VX_ERROR_INVALID_TYPE;
	ERROR_CHECK_STATUS(vxSetMetaFormatAttribute(metas[3], VX_ARRAY_ITEMTYPE, &itemtype, sizeof(itemtype)));
	ERROR_CHECK_STATUS(vxSetMetaFormatAttribute(metas[3], VX_ARRAY_CAPACITY, &capacity, sizeof(capacity)));
	return VX_SUCCESS;
}

//! \brief The kernel publisher.
vx_status amd_media_encode_publish(vx_context context)
{
	// add kernel to the context with callbacks
    vx_kernel kernel = vxAddUserKernel(context, "com.amd.amd_media.encode", AMDOVX_KERNEL_AMD_MEDIA_ENCODE,
                            amd_media_encode_kernel, 4, amd_media_encode_validate,
                            amd_media_encode_initialize, amd_media_encode_deinitialize);
	ERROR_CHECK_OBJECT(kernel);

	// set kernel parameters
	ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));  // media config+filename: "[{mbps,fps,bframes,gopsize},]filename.mp4"
	ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED));   // input image
	ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_OPTIONAL));   // input auxiliary data (optional)
	ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));  // output auxiliary data

#if ENCODE_ENABLE_OPENCL
	// register amd_kernel_opencl_buffer_update_callback_f for input image
    AgoKernelOpenclBufferUpdateInfo info = { amd_media_encode_opencl_buffer_update_callback, 1 };
	ERROR_CHECK_STATUS(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_OPENCL_BUFFER_UPDATE_CALLBACK, &info, sizeof(info)));
#endif

	// finalize and release kernel object
	ERROR_CHECK_STATUS(vxFinalizeKernel(kernel));
	ERROR_CHECK_STATUS(vxReleaseKernel(&kernel));

	return VX_SUCCESS;
}

VX_API_ENTRY vx_node VX_API_CALL amdMediaEncoderNode(vx_graph graph, const char *output_str, vx_image input, vx_array aux_data_in, vx_array aux_data_out)
{
    vx_node node = NULL;
    vx_context context = vxGetContext((vx_reference)graph);
    if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
        vx_scalar s_output = vxCreateScalar(context, VX_TYPE_STRING_AMD, output_str);
        vx_reference params[] = {
            (vx_reference)s_output,
            (vx_reference)input,
            (vx_reference)aux_data_in,
            (vx_reference)aux_data_out,
        };
        if (vxGetStatus((vx_reference)s_output) == VX_SUCCESS) {
            node = createMediaNode(graph, "com.amd.amd_media.encode", params, sizeof(params) / sizeof(params[0])); // added node to graph
            vxReleaseScalar(&s_output);
        }
    }
    return node;
}
