// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <stddef.h>
#include <stdint.h>

#include <algorithm>
#include <memory>
#include <vector>

#include "base/containers/small_map.h"
#include "base/logging.h"
#include "base/memory/ref_counted.h"
#include "base/strings/stringprintf.h"
#include "gpu/perftests/measurements.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "testing/perf/perf_test.h"
#include "ui/gfx/geometry/size.h"
#include "ui/gfx/geometry/vector2d_f.h"
#include "ui/gl/gl_bindings.h"
#include "ui/gl/gl_context.h"
#include "ui/gl/gl_enums.h"
#include "ui/gl/gl_surface.h"
#include "ui/gl/gl_version_info.h"
#include "ui/gl/gpu_timing.h"
#include "ui/gl/init/gl_factory.h"
#include "ui/gl/scoped_make_current.h"

#if defined(USE_OZONE)
#include "base/message_loop/message_loop.h"
#endif

namespace gpu {
namespace {

const int kUploadPerfWarmupRuns = 5;
const int kUploadPerfTestRuns = 30;

#define SHADER(Src) #Src

// clang-format off
const char kVertexShader[] =
SHADER(
  uniform vec2 translation;
  attribute vec2 a_position;
  attribute vec2 a_texCoord;
  varying vec2 v_texCoord;
  void main() {
    gl_Position = vec4(
        translation.x + a_position.x, translation.y + a_position.y, 0.0, 1.0);
    v_texCoord = a_texCoord;
  }
);
const char kShaderDefaultFloatPrecision[] =
SHADER(
  precision mediump float;
);
const char kFragmentShader[] =
SHADER(
  uniform sampler2D a_texture;
  varying vec2 v_texCoord;
  void main() {
    gl_FragColor = texture2D(a_texture, v_texCoord);
  }
);
// clang-format on

void CheckNoGlError(const std::string& msg) {
  CHECK_EQ(static_cast<GLenum>(GL_NO_ERROR), glGetError()) << " " << msg;
}

// Utility function to compile a shader from a string.
GLuint LoadShader(const GLenum type, const char* const src) {
  GLuint shader = 0;
  shader = glCreateShader(type);
  CHECK_NE(0u, shader);
  glShaderSource(shader, 1, &src, NULL);
  glCompileShader(shader);

  GLint compiled = 0;
  glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
  if (compiled == 0) {
    GLint len = 0;
    glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &len);
    if (len > 1) {
      std::unique_ptr<char[]> error_log(new char[len]);
      glGetShaderInfoLog(shader, len, NULL, error_log.get());
      LOG(ERROR) << "Error compiling shader: " << error_log.get();
    }
  }
  CHECK_NE(0, compiled);
  return shader;
}

int GLFormatBytePerPixel(GLenum format) {
  DCHECK(format == GL_RGBA || format == GL_LUMINANCE || format == GL_RED_EXT);
  return format == GL_RGBA ? 4 : 1;
}

GLenum GLFormatToInternalFormat(GLenum format) {
  return format == GL_RED ? GL_R8 : format;
}

GLenum GLFormatToStorageFormat(GLenum format) {
  switch (format) {
    case GL_RGBA:
      return GL_RGBA8;
    case GL_LUMINANCE:
      return GL_LUMINANCE8;
    case GL_RED:
      return GL_R8;
    default:
      NOTREACHED();
  }
  return 0;
}

void GenerateTextureData(const gfx::Size& size,
                         int bytes_per_pixel,
                         const int seed,
                         std::vector<uint8_t>* const pixels) {
  // Row bytes has to be multiple of 4 (GL_PACK_ALIGNMENT defaults to 4).
  int stride = ((size.width() * bytes_per_pixel) + 3) & ~0x3;
  pixels->resize(size.height() * stride);
  for (int y = 0; y < size.height(); ++y) {
    for (int x = 0; x < size.width(); ++x) {
      for (int channel = 0; channel < bytes_per_pixel; ++channel) {
        int index = y * stride + x * bytes_per_pixel;
        pixels->at(index) = (index + (seed << 2)) % (0x20 << channel);
      }
    }
  }
}

// Compare a buffer containing pixels in a specified format to GL_RGBA buffer
// where the former buffer have been uploaded as a texture and drawn on the
// RGBA buffer.
bool CompareBufferToRGBABuffer(GLenum format,
                               const gfx::Size& size,
                               const std::vector<uint8_t>& pixels,
                               const std::vector<uint8_t>& rgba) {
  int bytes_per_pixel = GLFormatBytePerPixel(format);
  int pixels_stride = ((size.width() * bytes_per_pixel) + 3) & ~0x3;
  int rgba_stride = size.width() * GLFormatBytePerPixel(GL_RGBA);
  for (int y = 0; y < size.height(); ++y) {
    for (int x = 0; x < size.width(); ++x) {
      int rgba_index = y * rgba_stride + x * GLFormatBytePerPixel(GL_RGBA);
      int pixels_index = y * pixels_stride + x * bytes_per_pixel;
      uint8_t expected[4] = {0};
      switch (format) {
        case GL_LUMINANCE:  // (L_t, L_t, L_t, 1)
          expected[1] = pixels[pixels_index];
          expected[2] = pixels[pixels_index];
        case GL_RED:  // (R_t, 0, 0, 1)
          expected[0] = pixels[pixels_index];
          expected[3] = 255;
          break;
        case GL_RGBA:  // (R_t, G_t, B_t, A_t)
          memcpy(expected, &pixels[pixels_index], 4);
          break;
        default:
          NOTREACHED();
      }
      if (memcmp(&rgba[rgba_index], expected, 4)) {
        return false;
      }
    }
  }
  return true;
}

// PerfTest to check costs of texture upload at different stages
// on different platforms.
class TextureUploadPerfTest : public testing::Test {
 public:
  TextureUploadPerfTest() : fbo_size_(1024, 1024) {}

  // Overridden from testing::Test
  void SetUp() override {
#if defined(USE_OZONE)
    // On Ozone, the backend initializes the event system using a UI
    // thread.
    base::MessageLoopForUI main_loop;
#endif
    static bool gl_initialized = gl::init::InitializeGLOneOff();
    DCHECK(gl_initialized);
    // Initialize an offscreen surface and a gl context.
    surface_ = gl::init::CreateOffscreenGLSurface(gfx::Size());
    gl_context_ =
        gl::init::CreateGLContext(nullptr,  // share_group
                                  surface_.get(), gl::GLContextAttribs());
    ui::ScopedMakeCurrent smc(gl_context_.get(), surface_.get());
    glGenTextures(1, &color_texture_);
    glBindTexture(GL_TEXTURE_2D, color_texture_);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, fbo_size_.width(),
                 fbo_size_.height(), 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);

    glGenFramebuffersEXT(1, &framebuffer_object_);
    glBindFramebufferEXT(GL_FRAMEBUFFER, framebuffer_object_);

    glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
                              GL_TEXTURE_2D, color_texture_, 0);
    DCHECK_EQ(static_cast<GLenum>(GL_FRAMEBUFFER_COMPLETE),
              glCheckFramebufferStatusEXT(GL_FRAMEBUFFER));

    glViewport(0, 0, fbo_size_.width(), fbo_size_.height());
    gpu_timing_client_ = gl_context_->CreateGPUTimingClient();

    if (gpu_timing_client_->IsAvailable()) {
      LOG(INFO) << "Gpu timing initialized with timer type: "
                << gpu_timing_client_->GetTimerTypeName();
    } else {
      LOG(WARNING) << "Can't initialize gpu timing";
    }
    // Prepare a simple program and a vertex buffer that will be
    // used to draw a quad on the offscreen surface.
    vertex_shader_ = LoadShader(GL_VERTEX_SHADER, kVertexShader);

    bool is_gles = gl_context_->GetVersionInfo()->is_es;
    fragment_shader_ = LoadShader(
        GL_FRAGMENT_SHADER,
        base::StringPrintf("%s%s", is_gles ? kShaderDefaultFloatPrecision : "",
                           kFragmentShader).c_str());
    program_object_ = glCreateProgram();
    CHECK_NE(0u, program_object_);

    glAttachShader(program_object_, vertex_shader_);
    glAttachShader(program_object_, fragment_shader_);
    glBindAttribLocation(program_object_, 0, "a_position");
    glBindAttribLocation(program_object_, 1, "a_texCoord");
    glLinkProgram(program_object_);

    GLint linked = -1;
    glGetProgramiv(program_object_, GL_LINK_STATUS, &linked);
    CHECK_NE(0, linked);
    glUseProgram(program_object_);
    glUniform1i(sampler_location_, 0);
    translation_location_ =
        glGetUniformLocation(program_object_, "translation");
    DCHECK_NE(-1, translation_location_);
    glUniform2f(translation_location_, 0.0f, 0.0f);

    sampler_location_ = glGetUniformLocation(program_object_, "a_texture");
    CHECK_NE(-1, sampler_location_);

    glGenBuffersARB(1, &vertex_buffer_);
    CHECK_NE(0u, vertex_buffer_);
    DCHECK_NE(0u, vertex_buffer_);
    glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_);
    glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4, 0);
    glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4,
                          reinterpret_cast<void*>(sizeof(GLfloat) * 2));
    glEnableVertexAttribArray(0);
    glEnableVertexAttribArray(1);
    CheckNoGlError("glEnableVertexAttribArray");

    has_texture_storage_ =
        gl_context_->GetVersionInfo()->is_es3 ||
        gl_context_->HasExtension("GL_EXT_texture_storage") ||
        gl_context_->HasExtension("GL_ARB_texture_storage");
  }

  void GenerateVertexBuffer(const gfx::Size& size) {
    DCHECK_NE(0u, vertex_buffer_);
    glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_);
    // right and top are in clipspace
    float right = -1.f + 2.f * size.width() / fbo_size_.width();
    float top = -1.f + 2.f * size.height() / fbo_size_.height();
    // Four vertexes, one per line. Each vertex has two components per
    // position and two per texcoord.
    // It represents a quad formed by two triangles if interpreted
    // as a tristrip.

    // clang-format off
    GLfloat data[16] = {
      -1.f, -1.f,    0.f, 0.f,
      right, -1.f,   1.f, 0.f,
      -1.f, top,     0.f, 1.f,
      right, top,    1.f, 1.f};
    // clang-format on
    glBufferData(GL_ARRAY_BUFFER, sizeof(data), data, GL_STATIC_DRAW);
    CheckNoGlError("glBufferData");
  }

  void TearDown() override {
    ui::ScopedMakeCurrent smc(gl_context_.get(), surface_.get());
    glDeleteProgram(program_object_);
    glDeleteShader(vertex_shader_);
    glDeleteShader(fragment_shader_);
    glDeleteBuffersARB(1, &vertex_buffer_);

    glBindFramebufferEXT(GL_FRAMEBUFFER, 0);
    glDeleteFramebuffersEXT(1, &framebuffer_object_);
    glDeleteTextures(1, &color_texture_);
    CheckNoGlError("glDeleteTextures");

    gpu_timing_client_ = nullptr;
    gl_context_ = nullptr;
    surface_ = nullptr;
  }

 protected:
  GLuint CreateGLTexture(const GLenum format,
                         const gfx::Size& size,
                         const bool specify_storage) {
    GLuint texture_id = 0;
    glActiveTexture(GL_TEXTURE0);
    glGenTextures(1, &texture_id);
    glBindTexture(GL_TEXTURE_2D, texture_id);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
    if (specify_storage) {
      if (has_texture_storage_) {
        glTexStorage2DEXT(GL_TEXTURE_2D, 1, GLFormatToStorageFormat(format),
                          size.width(), size.height());
        CheckNoGlError("glTexStorage2DEXT");
      } else {
        glTexImage2D(GL_TEXTURE_2D, 0, GLFormatToInternalFormat(format),
                     size.width(), size.height(), 0, format, GL_UNSIGNED_BYTE,
                     nullptr);
        CheckNoGlError("glTexImage2D");
      }
    }
    return texture_id;
  }

  void UploadTexture(GLuint texture_id,
                     const gfx::Size& size,
                     const std::vector<uint8_t>& pixels,
                     GLenum format,
                     const bool subimage) {
    if (subimage) {
      glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, size.width(), size.height(),
                      format, GL_UNSIGNED_BYTE, &pixels[0]);
      CheckNoGlError("glTexSubImage2D");
    } else {
      glTexImage2D(GL_TEXTURE_2D, 0, GLFormatToInternalFormat(format),
                   size.width(), size.height(), 0, format, GL_UNSIGNED_BYTE,
                   &pixels[0]);
      CheckNoGlError("glTexImage2D");
    }
  }

  // Upload and draw on the offscren surface.
  // Return a list of pair. Each pair describe a gl operation and the wall
  // time elapsed in milliseconds.
  std::vector<Measurement> UploadAndDraw(GLuint texture_id,
                                         const gfx::Size& size,
                                         const std::vector<uint8_t>& pixels,
                                         const GLenum format,
                                         const bool subimage) {
    MeasurementTimers tex_timers(gpu_timing_client_.get());
    UploadTexture(texture_id, size, pixels, format, subimage);
    tex_timers.Record();

    MeasurementTimers first_draw_timers(gpu_timing_client_.get());
    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
    first_draw_timers.Record();

    MeasurementTimers draw_timers(gpu_timing_client_.get());
    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
    draw_timers.Record();

    MeasurementTimers finish_timers(gpu_timing_client_.get());
    glFinish();
    CheckNoGlError("glFinish");
    finish_timers.Record();

    std::vector<uint8_t> pixels_rendered(size.GetArea() * 4);
    glReadPixels(0, 0, size.width(), size.height(), GL_RGBA, GL_UNSIGNED_BYTE,
                 &pixels_rendered[0]);
    CheckNoGlError("glReadPixels");
    EXPECT_TRUE(
        CompareBufferToRGBABuffer(format, size, pixels, pixels_rendered))
        << "Format is: " << gl::GLEnums::GetStringEnum(format);

    std::vector<Measurement> measurements;
    bool gpu_timer_errors =
        gpu_timing_client_->IsAvailable() &&
        gpu_timing_client_->CheckAndResetTimerErrors();
    if (!gpu_timer_errors) {
      measurements.push_back(tex_timers.GetAsMeasurement(
          subimage ? "texsubimage2d" : "teximage2d"));
      measurements.push_back(
          first_draw_timers.GetAsMeasurement("firstdrawarrays"));
      measurements.push_back(draw_timers.GetAsMeasurement("drawarrays"));
      measurements.push_back(finish_timers.GetAsMeasurement("finish"));
    }
    return measurements;
  }

  void RunUploadAndDrawMultipleTimes(const gfx::Size& size,
                                     const GLenum format,
                                     const bool subimage) {
    std::vector<uint8_t> pixels;
    base::SmallMap<std::map<std::string, Measurement>>
        aggregates;  // indexed by name
    int successful_runs = 0;
    GLuint texture_id = CreateGLTexture(format, size, subimage);
    for (int i = 0; i < kUploadPerfWarmupRuns + kUploadPerfTestRuns; ++i) {
      GenerateTextureData(size, GLFormatBytePerPixel(format), i + 1, &pixels);
      auto run = UploadAndDraw(texture_id, size, pixels, format, subimage);
      if (i < kUploadPerfWarmupRuns || run.empty()) {
        continue;
      }
      successful_runs++;
      for (const Measurement& measurement : run) {
        auto& aggregate = aggregates[measurement.name];
        aggregate.name = measurement.name;
        aggregate.Increment(measurement);
      }
    }
    glDeleteTextures(1, &texture_id);

    std::string graph_name = base::StringPrintf(
        "%d_%s", size.width(), gl::GLEnums::GetStringEnum(format).c_str());
    if (subimage) {
      graph_name += "_sub";
    }

    if (successful_runs) {
      for (const auto& entry : aggregates) {
        const auto m = entry.second.Divide(successful_runs);
        m.PrintResult(graph_name);
      }
    }
    perf_test::PrintResult("sample_runs", "", graph_name,
                           static_cast<size_t>(successful_runs), "laps", true);
  }

  const gfx::Size fbo_size_;  // for the fbo
  scoped_refptr<gl::GLContext> gl_context_;
  scoped_refptr<gl::GLSurface> surface_;
  scoped_refptr<gl::GPUTimingClient> gpu_timing_client_;

  GLuint color_texture_ = 0;
  GLuint framebuffer_object_ = 0;
  GLuint vertex_shader_ = 0;
  GLuint fragment_shader_ = 0;
  GLuint program_object_ = 0;
  GLint sampler_location_ = -1;
  GLint translation_location_ = -1;
  GLuint vertex_buffer_ = 0;

  bool has_texture_storage_ = false;
};

// Perf test that generates, uploads and draws a texture on a surface repeatedly
// and prints out aggregated measurements for all the runs.
TEST_F(TextureUploadPerfTest, upload) {
  int sizes[] = {21, 128, 256, 512, 1024};
  std::vector<GLenum> formats;
  formats.push_back(GL_RGBA);

  if (!gl_context_->GetVersionInfo()->is_es3) {
    // Used by default for ResourceProvider::yuv_resource_format_.
    formats.push_back(GL_LUMINANCE);
  }

  ui::ScopedMakeCurrent smc(gl_context_.get(), surface_.get());
  const bool has_texture_rg = gl_context_->GetVersionInfo()->is_es3 ||
                              gl_context_->HasExtension("GL_EXT_texture_rg") ||
                              gl_context_->HasExtension("GL_ARB_texture_rg");

  if (has_texture_rg) {
    // Used as ResourceProvider::yuv_resource_format_ if
    // {ARB,EXT}_texture_rg are available.
    formats.push_back(GL_RED);
  }

  for (int side : sizes) {
    ASSERT_GE(fbo_size_.width(), side);
    ASSERT_GE(fbo_size_.height(), side);
    gfx::Size size(side, side);
    GenerateVertexBuffer(size);
    for (GLenum format : formats) {
      RunUploadAndDrawMultipleTimes(size, format, true);  // use glTexSubImage2D
      RunUploadAndDrawMultipleTimes(size, format, false);  // use glTexImage2D
    }
  }
}

// Perf test to check if the driver is doing texture renaming.
// This test creates one GL texture_id and four different images. For
// every image it uploads it using texture_id and it draws multiple
// times. The cpu/wall time and the gpu time for all the uploads and
// draws, but before glFinish, is computed and is printed out at the end as
// "upload_and_draw". If the gpu time is >> than the cpu/wall time we expect the
// driver to do texture renaming: this means that while the gpu is drawing using
// texture_id it didn't block cpu side the texture upload using the same
// texture_id.
TEST_F(TextureUploadPerfTest, renaming) {
  gfx::Size texture_size(fbo_size_.width() / 2, fbo_size_.height() / 2);

  std::vector<uint8_t> pixels[4];
  for (int i = 0; i < 4; ++i) {
    GenerateTextureData(texture_size, 4, i + 1, &pixels[i]);
  }

  ui::ScopedMakeCurrent smc(gl_context_.get(), surface_.get());
  GenerateVertexBuffer(texture_size);

  gfx::Vector2dF positions[] = {gfx::Vector2dF(0.f, 0.f),
                                gfx::Vector2dF(1.f, 0.f),
                                gfx::Vector2dF(0.f, 1.f),
                                gfx::Vector2dF(1.f, 1.f)};
  GLuint texture_id = CreateGLTexture(GL_RGBA, texture_size, true);

  MeasurementTimers upload_and_draw_timers(gpu_timing_client_.get());

  for (int i = 0; i < 4; ++i) {
    UploadTexture(texture_id, texture_size, pixels[i % 4], GL_RGBA, true);
    DCHECK_NE(-1, translation_location_);
    glUniform2f(translation_location_, positions[i % 4].x(),
                positions[i % 4].y());
    // Draw the same quad multiple times to make sure that the time spent on the
    // gpu is more than the cpu time.
    for (int draw = 0; draw < 128; ++draw) {
      glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
    }
  }

  upload_and_draw_timers.Record();
  MeasurementTimers finish_timers(gpu_timing_client_.get());
  glFinish();
  CheckNoGlError("glFinish");
  finish_timers.Record();

  glDeleteTextures(1, &texture_id);

  for (int i = 0; i < 4; ++i) {
    std::vector<uint8_t> pixels_rendered(texture_size.GetArea() * 4);
    glReadPixels(texture_size.width() * positions[i].x(),
                 texture_size.height() * positions[i].y(), texture_size.width(),
                 texture_size.height(), GL_RGBA, GL_UNSIGNED_BYTE,
                 &pixels_rendered[0]);
    CheckNoGlError("glReadPixels");
    ASSERT_EQ(pixels[i].size(), pixels_rendered.size());
    EXPECT_EQ(pixels[i], pixels_rendered);
  }

  bool gpu_timer_errors = gpu_timing_client_->IsAvailable() &&
                          gpu_timing_client_->CheckAndResetTimerErrors();
  if (!gpu_timer_errors) {
    upload_and_draw_timers.GetAsMeasurement("upload_and_draw")
        .PrintResult("renaming");
    finish_timers.GetAsMeasurement("finish").PrintResult("renaming");
  }
}

}  // namespace
}  // namespace gpu
