// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef MEDIA_GPU_V4L2_IMAGE_PROCESSOR_H_
#define MEDIA_GPU_V4L2_IMAGE_PROCESSOR_H_

#include <stddef.h>
#include <stdint.h>

#include <memory>
#include <queue>
#include <vector>

#include "base/macros.h"
#include "base/memory/linked_ptr.h"
#include "base/memory/ref_counted.h"
#include "base/memory/weak_ptr.h"
#include "base/threading/thread.h"
#include "media/base/video_frame.h"
#include "media/gpu/media_gpu_export.h"
#include "media/gpu/v4l2_device.h"

namespace media {

// Handles image processing accelerators that expose a V4L2 memory-to-memory
// interface. The threading model of this class is the same as for other V4L2
// hardware accelerators (see V4L2VideoDecodeAccelerator) for more details.
class MEDIA_GPU_EXPORT V4L2ImageProcessor {
 public:
  explicit V4L2ImageProcessor(const scoped_refptr<V4L2Device>& device);
  virtual ~V4L2ImageProcessor();

  // Initializes the processor to convert from |input_format| to |output_format|
  // and/or scale from |input_visible_size| to |output_visible_size|.
  // Request the input buffers to be of at least |input_allocated_size| and the
  // output buffers to be of at least |output_allocated_size|. The number of
  // input buffers and output buffers will be |num_buffers|. Provided |error_cb|
  // will be called if an error occurs. Return true if the requested
  // configuration is supported.
  bool Initialize(VideoPixelFormat input_format,
                  VideoPixelFormat output_format,
                  v4l2_memory input_memory_type,
                  v4l2_memory output_memory_type,
                  gfx::Size input_visible_size,
                  gfx::Size input_allocated_size,
                  gfx::Size output_visible_size,
                  gfx::Size output_allocated_size,
                  int num_buffers,
                  const base::Closure& error_cb);

  // Returns a vector of dmabuf file descriptors, exported for V4L2 output
  // buffer with |index|. The size of vector will be the number of planes of the
  // buffer. Return an empty vector on failure.
  std::vector<base::ScopedFD> GetDmabufsForOutputBuffer(
      int output_buffer_index);

  // Returns true if image processing is supported on this platform.
  static bool IsSupported();

  // Returns a vector of supported input formats in fourcc.
  static std::vector<uint32_t> GetSupportedInputFormats();

  // Returns a vector of supported output formats in fourcc.
  static std::vector<uint32_t> GetSupportedOutputFormats();

  // Gets output allocated size and number of planes required by the device
  // for conversion from |input_pixelformat| to |output_pixelformat|, for
  // visible size |size|. Returns true on success. Adjusted coded size will be
  // stored in |size| and the number of planes will be stored in |num_planes|.
  static bool TryOutputFormat(uint32_t input_pixelformat,
                              uint32_t output_pixelformat,
                              gfx::Size* size,
                              size_t* num_planes);

  // Returns input allocated size required by the processor to be fed with.
  gfx::Size input_allocated_size() const { return input_allocated_size_; }

  // Returns output allocated size required by the processor.
  gfx::Size output_allocated_size() const { return output_allocated_size_; }

  // Callback to be used to return the index of a processed image to the
  // client. After the client is done with the frame, call Process with the
  // index to return the output buffer to the image processor.
  typedef base::Callback<void(int output_buffer_index)> FrameReadyCB;

  // Called by client to process |frame|. The resulting processed frame will be
  // stored in |output_buffer_index| output buffer and notified via |cb|. The
  // processor will drop all its references to |frame| after it finishes
  // accessing it. If |output_memory_type_| is V4L2_MEMORY_DMABUF, the caller
  // should pass non-empty |output_dmabuf_fds| and the processed frame will be
  // stored in those buffers. If the number of |output_dmabuf_fds| is not
  // expected, this function will return false.
  bool Process(const scoped_refptr<VideoFrame>& frame,
               int output_buffer_index,
               std::vector<base::ScopedFD> output_dmabuf_fds,
               const FrameReadyCB& cb);

  // Reset all processing frames. After this method returns, no more callbacks
  // will be invoked. V4L2ImageProcessor is ready to process more frames.
  bool Reset();

  // Stop all processing and clean up. After this method returns no more
  // callbacks will be invoked.  Deletes |this| unconditionally, so make sure
  // to drop all pointers to it!
  void Destroy();

 private:
  // Record for input buffers.
  struct InputRecord {
    InputRecord();
    ~InputRecord();
    scoped_refptr<VideoFrame> frame;
    bool at_device;
  };

  // Record for output buffers.
  struct OutputRecord {
    OutputRecord();
    OutputRecord(OutputRecord&&) = default;
    ~OutputRecord();
    bool at_device;
    // The processed frame will be stored in these buffers if
    // |output_memory_type_| is V4L2_MEMORY_DMABUF
    std::vector<base::ScopedFD> dmabuf_fds;
  };

  // Job record. Jobs are processed in a FIFO order. This is separate from
  // InputRecord, because an InputRecord may be returned before we dequeue
  // the corresponding output buffer. The processed frame will be stored in
  // |output_buffer_index| output buffer. If |output_memory_type_| is
  // V4L2_MEMORY_DMABUF, the processed frame will be stored in
  // |output_dmabuf_fds|.
  struct JobRecord {
    JobRecord();
    ~JobRecord();
    scoped_refptr<VideoFrame> frame;
    int output_buffer_index;
    std::vector<base::ScopedFD> output_dmabuf_fds;
    FrameReadyCB ready_cb;
  };

  void EnqueueInput();
  void EnqueueOutput(int index);
  void Dequeue();
  bool EnqueueInputRecord();
  bool EnqueueOutputRecord(int index);
  bool CreateInputBuffers();
  bool CreateOutputBuffers();
  void DestroyInputBuffers();
  void DestroyOutputBuffers();

  void NotifyError();
  void NotifyErrorOnChildThread(const base::Closure& error_cb);

  void ProcessTask(std::unique_ptr<JobRecord> job_record);
  void ServiceDeviceTask();

  // Attempt to start/stop device_poll_thread_.
  void StartDevicePoll();
  void StopDevicePoll();

  // Ran on device_poll_thread_ to wait for device events.
  void DevicePollTask(bool poll_device);

  // A processed frame is ready.
  void FrameReady(const FrameReadyCB& cb, int output_buffer_index);

  // Size and format-related members remain constant after initialization.
  // The visible/allocated sizes of the input frame.
  gfx::Size input_visible_size_;
  gfx::Size input_allocated_size_;

  // The visible/allocated sizes of the destination frame.
  gfx::Size output_visible_size_;
  gfx::Size output_allocated_size_;

  VideoPixelFormat input_format_;
  VideoPixelFormat output_format_;
  v4l2_memory input_memory_type_;
  v4l2_memory output_memory_type_;
  uint32_t input_format_fourcc_;
  uint32_t output_format_fourcc_;

  size_t input_planes_count_;
  size_t output_planes_count_;

  // Our original calling task runner for the child thread.
  const scoped_refptr<base::SingleThreadTaskRunner> child_task_runner_;

  // V4L2 device in use.
  scoped_refptr<V4L2Device> device_;

  // Thread to communicate with the device on.
  base::Thread device_thread_;
  // Thread used to poll the V4L2 for events only.
  base::Thread device_poll_thread_;

  // All the below members are to be accessed from device_thread_ only
  // (if it's running).
  std::queue<linked_ptr<JobRecord>> input_queue_;
  std::queue<linked_ptr<JobRecord>> running_jobs_;

  // Input queue state.
  bool input_streamon_;
  // Number of input buffers enqueued to the device.
  int input_buffer_queued_count_;
  // Input buffers ready to use; LIFO since we don't care about ordering.
  std::vector<int> free_input_buffers_;
  // Mapping of int index to an input buffer record.
  std::vector<InputRecord> input_buffer_map_;

  // Output queue state.
  bool output_streamon_;
  // Number of output buffers enqueued to the device.
  int output_buffer_queued_count_;
  // Mapping of int index to an output buffer record.
  std::vector<OutputRecord> output_buffer_map_;
  // The number of input or output buffers.
  int num_buffers_;

  // Error callback to the client.
  base::Closure error_cb_;

  // WeakPtr<> pointing to |this| for use in posting tasks from the device
  // worker threads back to the child thread.  Because the worker threads
  // are members of this class, any task running on those threads is guaranteed
  // that this object is still alive.  As a result, tasks posted from the child
  // thread to the device thread should use base::Unretained(this),
  // and tasks posted the other way should use |weak_this_|.
  base::WeakPtr<V4L2ImageProcessor> weak_this_;

  // Weak factory for producing weak pointers on the child thread.
  base::WeakPtrFactory<V4L2ImageProcessor> weak_this_factory_;

  DISALLOW_COPY_AND_ASSIGN(V4L2ImageProcessor);
};

}  // namespace media

#endif  // MEDIA_GPU_V4L2_IMAGE_PROCESSOR_H_
