
// Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

syntax = "proto3";

// option java_multiple_files = true;
// option java_package = "io.grpc.examples.helloworld";
// option java_outer_classname = "HelloWorldProto";
// option objc_class_prefix = "HLW";

package rdc;

/****************************************************************************/
/********************************** Rsmi Service ****************************/
/****************************************************************************/
service Rsmi {
    // RSMI ID services
    rpc GetNumDevices (GetNumDevicesRequest) returns(GetNumDevicesResponse) {}

    // RSMI Physical Queries
    rpc GetTemperature(GetTemperatureRequest) returns(GetTemperatureResponse){}
    rpc GetFanRpms(GetFanRpmsRequest) returns(GetFanRpmsResponse){}
    rpc GetFanSpeed(GetFanSpeedRequest) returns(GetFanSpeedResponse){}
    rpc GetFanSpeedMax(GetFanSpeedMaxRequest) returns(GetFanSpeedMaxResponse){}
}

/* rsmi_num_monitor_devices() */
message GetNumDevicesRequest {
}
message GetNumDevicesResponse {
    uint64 val = 1;
    uint64 ret_val = 2;
}

/* GetTemperature */
/* rsmi_dev_temp_metric_get() */
message GetTemperatureRequest {
    uint32 dv_ind = 1;
    uint32 sensor_type = 2;
    enum TemperatureMetric {
        RSMI_TEMP_CURRENT = 0;
        RSMI_TEMP_MAX = 1;
        RSMI_TEMP_MIN = 2;
        RSMI_TEMP_MAX_HYST = 3;
        RSMI_TEMP_MIN_HYST = 4;
        RSMI_TEMP_CRITICAL = 5;
        RSMI_TEMP_CRITICAL_HYST = 6;
        RSMI_TEMP_EMERGENCY = 7;
        RSMI_TEMP_EMERGENCY_HYST = 8;
        RSMI_TEMP_CRIT_MIN = 9;
        RSMI_TEMP_CRIT_MIN_HYST = 10;
        RSMI_TEMP_OFFSET = 11;
        RSMI_TEMP_LOWEST = 12;
        RSMI_TEMP_HIGHEST = 13;
    }
    TemperatureMetric metric = 3;
}
message GetTemperatureResponse {
    int64 temperature = 1;
    uint64 ret_val = 2;
}

/* GetFanRpms */
/* rsmi_dev_fan_rpms_get() */
message GetFanRpmsRequest {
    uint32 dv_ind = 1;
    uint32 sensor_ind = 2;
}
message GetFanRpmsResponse {
    int64 rpms = 1;
    uint64 ret_val = 2;
}
/* GetFanSpeed */
/* rsmi_dev_fan_speed_get() */
message GetFanSpeedRequest {
    uint32 dv_ind = 1;
    uint32 sensor_ind = 2;
}
message GetFanSpeedResponse {
    int64 speed = 1;
    uint64 ret_val = 2;
}

/* GetFanSpeedMax */
/* rsmi_dev_fan_speed_max_get() */
message GetFanSpeedMaxRequest {
    uint32 dv_ind = 1;
    uint32 sensor_ind = 2;
}
message GetFanSpeedMaxResponse {
    uint64 max_speed = 1;
    uint64 ret_val = 2;
}

/****************************************************************************/
/********************************** RdcAdmin Service ************************/
/****************************************************************************/
service RdcAdmin {
    // RDC admin services
    rpc VerifyConnection (VerifyConnectionRequest)
                                         returns (VerifyConnectionResponse) {}
}

/* GetNumDevices */
message VerifyConnectionRequest {
    uint64 magic_num = 1;
}
message VerifyConnectionResponse {
    uint64 echo_magic_num = 1;
}

/****************************************************************************/
/********************************** RdcAPI Service ************************/
/****************************************************************************/

service RdcAPI {
  // Discovery API
  // rdc_status_t rdc_get_all_devices(uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES], uint32_t* count)
  rpc GetAllDevices(Empty) returns (GetAllDevicesResponse) {}
  // rdc_status_t rdc_get_device_attributes(uint32_t gpu_index, rdc_device_attributes_t* p_rdc_attr)
  rpc GetDeviceAttributes(GetDeviceAttributesRequest) returns (GetDeviceAttributesResponse) {}

  // Group API
  // rdc_status_t rdc_group_gpu_create(rdc_group_type_t type,
  //               const char* group_name, rdc_gpu_group_t* p_rdc_group_id)
  rpc CreateGpuGroup(CreateGpuGroupRequest) returns (CreateGpuGroupResponse) {}

  // rdc_status_t rdc_group_gpu_add(rdc_gpu_group_t groupId,
  //             uint32_t gpu_index)
  rpc AddToGpuGroup(AddToGpuGroupRequest) returns (AddToGpuGroupResponse) {}

  // rdc_status_t rdc_group_field_create(uint32_t num_field_ids,
  //      uint32_t* field_ids, const char* field_group_name,
  //     rdc_field_grp_t* rdc_field_group_id)
  rpc CreateFieldGroup(CreateFieldGroupRequest) returns (CreateFieldGroupResponse) {}

  // rdc_status_t rdc_group_field_get_info(
  //     rdc_field_grp_t rdc_field_group_id,
  //     rdc_field_group_info_t* field_group_info)
  rpc GetFieldGroupInfo(GetFieldGroupInfoRequest) returns (GetFieldGroupInfoResponse) {}

  // rdc_status_t rdc_group_gpu_get_info(
  //     rdc_gpu_group_t p_rdc_group_id, rdc_group_info_t* p_rdc_group_info)
  rpc GetGpuGroupInfo(GetGpuGroupInfoRequest) returns (GetGpuGroupInfoResponse) {}

  // rdc_status_t rdc_group_gpu_destroy(
  //     rdc_gpu_group_t p_rdc_group_id)
  rpc DestroyGpuGroup(DestroyGpuGroupRequest) returns (DestroyGpuGroupResponse) {}

  // rdc_status_t rdc_group_field_destroy(
  //     rdc_field_grp_t rdc_field_group_id)
  rpc DestroyFieldGroup(DestroyFieldGroupRequest) returns (DestroyFieldGroupResponse) {}

  // Field API
  // rdc_status_t rdc_watch_fields(rdc_gpu_group_t group_id,
  //     rdc_field_grp_t field_group_id, uint64_t update_freq,
  //     double max_keep_age, uint32_t max_keep_samples)
  rpc WatchFields(WatchFieldsRequest) returns (WatchFieldsResponse) {}

  // rdc_status_t rdc_get_latest_value_for_field(uint32_t gpu_index,
  //     uint32_t field, rdc_field_value* value)
  rpc GetLatestFieldValue(GetLatestFieldValueRequest) returns (GetLatestFieldValueResponse) {}

  // rdc_status_t rdc_get_field_value_since(uint32_t gpu_index,
  //     uint32_t field, uint64_t since_time_stamp,
  //     uint64_t *next_since_time_stamp, rdc_field_value* value)
  rpc GetFieldSince(GetFieldSinceRequest) returns (GetFieldSinceResponse) {}

  // rdc_status_t rdc_unwatch_fields(rdc_gpu_group_t group_id,
  //     rdc_field_grp_t field_group_id)
  rpc UnWatchFields(UnWatchFieldsRequest) returns (UnWatchFieldsResponse) {}

  // rdc_status_t rdc_update_all_fields(uint32_t wait_for_update)
  rpc UpdateAllFields(UpdateAllFieldsRequest) returns (UpdateAllFieldsResponse) {}

  // rdc_status_t rdc_group_get_all_ids(rdc_gpu_group_t group_id_list[], uint32_t* count)
  rpc GetGroupAllIds(Empty) returns (GetGroupAllIdsResponse) {}

  // rdc_status_t rdc_group_field_all_ids(rdc_field_grp_t field_group_id_list[], uint32_t* count)
  rpc GetFieldGroupAllIds(Empty) returns (GetFieldGroupAllIdsResponse) {}

  // JOB API
  // rdc_status_t rdc_job_start_stats(rdc_gpu_group_t groupId,
  //              char job_id[64], uint64_t update_freq)
  rpc StartJobStats(StartJobStatsRequest) returns (StartJobStatsResponse) {}

  // rdc_status_t rdc_job_get_stats(char jobId[64],
  //              rdc_job_info_t* p_job_info)
  rpc GetJobStats(GetJobStatsRequest) returns (GetJobStatsResponse) {}

  // rdc_status_t rdc_job_stop_stats(char job_id[64])
  rpc StopJobStats(StopJobStatsRequest) returns (StopJobStatsResponse) {}

  // rdc_status_t rdc_job_remove(char job_id[64])
  rpc RemoveJob(RemoveJobRequest) returns (RemoveJobResponse) {}

  // rdc_status_t rdc_job_remove_all()
  rpc RemoveAllJob(Empty) returns (RemoveAllJobResponse) {}

  // rdc_status_t rdc_diagnostic_run(
  //  rdc_gpu_group_t group_id,
  //  rdc_diag_level_t level,
  //  rdc_diag_response_t* response);
  rpc DiagnosticRun(DiagnosticRunRequest) returns (DiagnosticRunResponse) {}

  // rdc_status_t rdc_test_case_run(
  //   rdc_gpu_group_t group_id,
  //    rdc_diag_test_cases_t test_case,
  //    rdc_diag_test_result_t* result);
  rpc DiagnosticTestCaseRun(DiagnosticTestCaseRunRequest) returns (DiagnosticTestCaseRunResponse) {}
}

message Empty {
}

message GetAllDevicesResponse {
  uint32 status = 1;
  repeated uint32 gpus = 2;
}

message GetDeviceAttributesRequest {
  uint32 gpu_index = 1;
}

message DeviceAttributes {
  string device_name = 1;
}

message GetDeviceAttributesResponse {
  uint32 status = 1;
  DeviceAttributes attributes = 2;
}

message CreateGpuGroupRequest {
  enum GpuGroupType {
    RDC_GROUP_DEFAULT = 0;
    RDC_GROUP_EMPTY = 1;
  }
  GpuGroupType type = 1;
  string group_name = 2;
}

message CreateGpuGroupResponse {
  uint32 status = 1;
  uint32 group_id = 2;
}

message AddToGpuGroupRequest {
  uint32 group_id = 1;
  uint32 gpu_index = 2;
}

message AddToGpuGroupResponse {
  uint32 status = 1;
}

message CreateFieldGroupRequest {
  repeated uint32 field_ids = 1;
  string field_group_name = 2;
}

message CreateFieldGroupResponse {
  uint32 status = 1;
  uint32 field_group_id = 2;
}

message GetFieldGroupInfoRequest {
  uint32 field_group_id = 1;
}

message GetFieldGroupInfoResponse {
  uint32 status = 1;
  string filed_group_name = 2;
  repeated uint32 field_ids = 3;
}

message GetGpuGroupInfoRequest {
  uint32 group_id = 1;
}

message GetGpuGroupInfoResponse {
  uint32 status = 1;
  string group_name = 2;
  repeated uint32 entity_ids = 3;
}

message DestroyGpuGroupRequest {
  uint32 group_id = 1;
}

message DestroyGpuGroupResponse {
  uint32 status = 1;
}

message DestroyFieldGroupRequest {
  uint32 field_group_id = 1;
}

message DestroyFieldGroupResponse {
  uint32 status = 1;
}

message WatchFieldsRequest {
  uint32 group_id = 1;
  uint32 field_group_id = 2;
  uint64 update_freq = 3;
  double max_keep_age = 4;
  uint32 max_keep_samples = 5;
}

message WatchFieldsResponse {
  uint32 status = 1;
}

message GetLatestFieldValueRequest {
  uint32 gpu_index = 1;
  uint32 field_id = 2;
}

message GetLatestFieldValueResponse {
  uint32 status = 1;
  uint32 field_id = 2;
  uint32 rdc_status = 3;
  uint64 ts = 4;
  enum FieldType {
    INTEGER = 0;
     DOUBLE = 1;
     STRING = 2;
     BLOB = 3;
  };
  FieldType type = 5;
  oneof value {
    uint64 l_int = 6;
    double dbl = 7;
    string str = 8;
  }
}

message GetFieldSinceRequest {
  uint32 gpu_index = 1;
  uint32 field_id = 2;
  uint64 since_time_stamp = 3;
}

message GetFieldSinceResponse {
  uint32 status = 1;
  uint64 next_since_time_stamp = 2;
  uint32 field_id = 3;
  uint32 rdc_status = 4;
  uint64 ts = 5;
  enum FieldType {
    INTEGER = 0;
     DOUBLE = 1;
     STRING = 2;
     BLOB = 3;
  };
  FieldType type = 6;
  oneof value {
    uint64 l_int = 7;
    double dbl = 8;
    string str = 9;
  }
}

message UnWatchFieldsRequest {
  uint32 group_id = 1;
  uint32 field_group_id = 2;
}

message UnWatchFieldsResponse {
  uint32 status = 1;
}

message UpdateAllFieldsRequest {
  uint32 wait_for_update = 1;
}

message UpdateAllFieldsResponse {
  uint32 status = 1;
}

message GetGroupAllIdsResponse {
  uint32 status = 1;
  repeated uint32 group_ids = 2;
}


message GetFieldGroupAllIdsResponse {
  uint32 status = 1;
  repeated uint32 field_group_ids = 2;
}

message StartJobStatsRequest {
  uint32 group_id = 1;
  string job_id = 2;
  uint64 update_freq = 3;
}

message StartJobStatsResponse {
  uint32 status = 1;
}

message GetJobStatsRequest {
  string job_id = 1;
}

message JobStatsSummary {
  uint64 max_value = 1;
  uint64 min_value = 2;
  uint64 average = 3;
  double standard_deviation = 4;
}

message GpuUsageInfo {
  uint32 gpu_id = 1;
  uint64 start_time = 2;
  uint64 end_time = 3;
  uint64 energy_consumed = 4;
  JobStatsSummary power_usage = 5;
  JobStatsSummary gpu_clock = 6;
  JobStatsSummary gpu_utilization = 7;
  uint64 max_gpu_memory_used = 8;
  JobStatsSummary memory_utilization = 9;
  uint64 ecc_correct = 10;
  uint64 ecc_uncorrect = 11;
  JobStatsSummary pcie_tx = 12;
  JobStatsSummary pcie_rx = 13;
  JobStatsSummary memory_clock = 14;
  JobStatsSummary gpu_temperature = 15;
}
message GetJobStatsResponse {
  uint32 status = 1;
  uint32 num_gpus = 2;
  GpuUsageInfo summary = 3;
  repeated GpuUsageInfo gpus = 4;
}

message StopJobStatsRequest {
  string job_id = 1;
}

message StopJobStatsResponse {
  uint32 status = 1;
}

message RemoveJobRequest {
  string job_id = 1;
}

message RemoveJobResponse {
  uint32 status = 1;
}

message RemoveAllJobResponse {
  uint32 status = 1;
}

message DiagnosticRunRequest {
  uint32 group_id = 1;
  uint32 level = 2;
}

message DiagnosticDetail {
  string msg = 1;
  uint32 code = 2;
}

message DiagnosticPerGpuResult {
  uint32 gpu_index = 1;
  DiagnosticDetail gpu_result = 2;
}

message DiagnosticTestResult {
  uint32 status = 1;
  DiagnosticDetail details = 2;
  enum DiagnosticTestCase {
    COMPUTE_PROCESS = 0;
    SDMA_QUEUE = 1;
    COMPUTE_QUEUE = 2;
    VRAM_CHECK = 3;
    SYS_MEM_CHECK = 4;
    NODE_TOPOLOGY = 5;
    GPU_PARAMETERS = 6;
  };
  DiagnosticTestCase test_case = 3;
  uint32 per_gpu_result_count = 4;
  repeated DiagnosticPerGpuResult gpu_results = 5;
  string info = 6;
}

message DiagnosticResponse {
  uint32 results_count = 1;
  repeated DiagnosticTestResult diag_info = 2;
}

message DiagnosticRunResponse {
  uint32 status = 1;
  DiagnosticResponse response = 2;
}

message DiagnosticTestCaseRunRequest {
    uint32 group_id = 1;
    enum TestCaseType {
      COMPUTE_PROCESS = 0;
      SDMA_QUEUE = 1;
      COMPUTE_QUEUE = 2;
      VRAM_CHECK = 3;
      SYS_MEM_CHECK = 4;
      NODE_TOPOLOGY = 5;
      GPU_PARAMETERS = 6;
    };
    TestCaseType test_case = 2;
}

message DiagnosticTestCaseRunResponse {
  uint32 status = 1;
  DiagnosticTestResult result = 2;
}