/*
 * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
*/

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include "ras-logger.h"
#include "ras-page-isolation.h"

#define PARSED_ENV_LEN 50
static const struct config threshold_units[] = {
	{ "m",	1000 },
	{ "k",	1000 },
	{ "",	1    },
	{}
};

static const struct config cycle_units[] = {
	{ "d",	24 },
	{ "h",	60 },
	{ "m",	60 },
	{ "s",  1  },
	{}
};

static struct isolation threshold = {
	.name = "PAGE_CE_THRESHOLD",
	.units = threshold_units,
	.env = "50",
	.unit = "",
};

static struct isolation cycle = {
	.name = "PAGE_CE_REFRESH_CYCLE",
	.units = cycle_units,
	.env = "24h",
	.unit = "h",
};

static const char *kernel_offline[] = {
	[OFFLINE_SOFT]		 = "/sys/devices/system/memory/soft_offline_page",
	[OFFLINE_HARD]		 = "/sys/devices/system/memory/hard_offline_page",
	[OFFLINE_SOFT_THEN_HARD] = "/sys/devices/system/memory/soft_offline_page",
};

static const struct config offline_choice[] = {
	{ "off",		OFFLINE_OFF },
	{ "account",		OFFLINE_ACCOUNT },
	{ "soft",		OFFLINE_SOFT },
	{ "hard",		OFFLINE_HARD },
	{ "soft-then-hard",	OFFLINE_SOFT_THEN_HARD },
	{}
};

static const char *page_state[] = {
	[PAGE_ONLINE]		= "online",
	[PAGE_OFFLINE]		= "offlined",
	[PAGE_OFFLINE_FAILED]	= "offline-failed",
};

static enum otype offline = OFFLINE_SOFT;
static struct rb_root page_records;

static void page_offline_init(void)
{
	const char *env = "PAGE_CE_ACTION";
	char *choice = getenv(env);
	const struct config *c = NULL;
	int matched = 0;

	if (choice) {
		for (c = offline_choice; c->name; c++) {
			if (!strcasecmp(choice, c->name)) {
				offline = c->val;
				matched = 1;
				break;
			}
		}
	}

	if (!matched)
		log(TERM, LOG_INFO, "Improper %s, set to default soft\n", env);

	if (offline > OFFLINE_ACCOUNT && access(kernel_offline[offline], W_OK)) {
		log(TERM, LOG_INFO, "Kernel does not support page offline interface\n");
		offline = OFFLINE_ACCOUNT;
	}

	log(TERM, LOG_INFO, "Page offline choice on Corrected Errors is %s\n",
	    offline_choice[offline].name);
}

static void parse_isolation_env(struct isolation *config)
{
	char *env = getenv(config->name);
	char *unit = NULL;
	const struct config *units = NULL;
	int i, no_unit;
	int valid = 0;
	int unit_matched = 0;
	unsigned long value, tmp;

	/* check if env is vaild */
	if (env && strlen(env)) {
		/* All the character before unit must be digit */
		for (i = 0; i < strlen(env) - 1; i++) {
			if (!isdigit(env[i]))
				goto parse;
		}
		if (sscanf(env, "%lu", &value) < 1 || !value)
			goto parse;
		/* check if the unit is vaild */
		unit = env + strlen(env) - 1;
		/* no unit, all the character are value character */
		if (isdigit(*unit)) {
			valid = 1;
			no_unit = 1;
			goto parse;
		}
		for (units = config->units; units->name; units++) {
			/* value character and unit character are both valid */
			if (!strcasecmp(unit, units->name)) {
				valid = 1;
				no_unit = 0;
				break;
			}
		}
	}

parse:
	/* if invalid, use default env */
	if (valid) {
		config->env = env;
		if (!no_unit)
			config->unit = unit;
	} else {
		 log(TERM, LOG_INFO, "Improper %s, set to default %s.\n",
				 config->name, config->env);
	}

	/* if env value string is greater than ulong_max, truncate the last digit */
	sscanf(config->env, "%lu", &value);
	for (units = config->units; units->name; units++) {
		if (!strcasecmp(config->unit, units->name))
			unit_matched = 1;
		if (unit_matched) {
			tmp = value;
			value *= units->val;
			if (tmp != 0 && value / tmp != units->val)
				config->overflow = true;
		}
	}
	config->val = value;
	/* In order to output value and unit perfectly */
	config->unit = no_unit ? config->unit : "";
}

static void parse_env_string(struct isolation *config, char *str)
{
	int i;

	if (config->overflow) {
		/* when overflow, use basic unit */
		for (i = 0; config->units[i].name; i++) ;
		sprintf(str, "%lu%s", config->val, config->units[i-1].name);
		log(TERM, LOG_INFO, "%s is set overflow(%s), truncate it\n",
				config->name, config->env);
	} else {
		sprintf(str, "%s%s", config->env, config->unit);
	}
}

static void page_isolation_init(void)
{
	char threshold_string[PARSED_ENV_LEN];
	char cycle_string[PARSED_ENV_LEN];
	/**
	 * It's unnecessary to parse threshold configuration when offline
	 * choice is off.
	 */
	if (offline == OFFLINE_OFF)
		return;

	parse_isolation_env(&threshold);
	parse_isolation_env(&cycle);
	parse_env_string(&threshold, threshold_string);
	parse_env_string(&cycle, cycle_string);
	log(TERM, LOG_INFO, "Threshold of memory Corrected Errors is %s / %s\n",
			threshold_string, cycle_string);
}

void ras_page_account_init(void)
{
	page_offline_init();
	page_isolation_init();
}

static int do_page_offline(unsigned long long addr, enum otype type)
{
	int fd, rc;
	char buf[20];

	fd = open(kernel_offline[type], O_WRONLY);
	if (fd == -1) {
		log(TERM, LOG_ERR, "[%s]:open file: %s failed\n", __func__, kernel_offline[type]);
		return -1;
	}

	sprintf(buf, "%#llx", addr);
	rc = write(fd, buf, strlen(buf));
	if (rc < 0) {
		log(TERM, LOG_ERR, "page offline addr(%s) by %s failed, errno:%d\n", buf, kernel_offline[type], errno);
	}
	close(fd);
	return rc;
}

static void page_offline(struct page_record *pr)
{
	unsigned long long addr = pr->addr;
	int ret;

	/* Offlining page is not required */
	if (offline <= OFFLINE_ACCOUNT) {
		log(TERM, LOG_INFO, "PAGE_CE_ACTION=%s, ignore to offline page at %#llx\n",
				offline_choice[offline].name, addr);
		return;
	}

	/* Ignore offlined pages */
	if (pr->offlined == PAGE_OFFLINE) {
		log(TERM, LOG_INFO, "page at %#llx is already offlined, ignore\n", addr);
		return;
	}

	/* Time to silence this noisy page */
	if (offline == OFFLINE_SOFT_THEN_HARD) {
		ret = do_page_offline(addr, OFFLINE_SOFT);
		if (ret < 0)
			ret = do_page_offline(addr, OFFLINE_HARD);
	} else {
		ret = do_page_offline(addr, offline);
	}

	pr->offlined = ret < 0 ? PAGE_OFFLINE_FAILED : PAGE_OFFLINE;

	log(TERM, LOG_INFO, "Result of offlining page at %#llx: %s\n",
	    addr, page_state[pr->offlined]);
}

static void page_record(struct page_record *pr, unsigned count, time_t time)
{
	unsigned long period = time - pr->start;
	unsigned long tolerate;

	if (period >= cycle.val) {
		/**
		 * Since we don't refresh automatically, it is possible that the period
		 * between two occurences will be longer than the pre-configured refresh cycle.
		 * In this case, we tolerate the frequency of the whole period up to
		 * the pre-configured threshold.
		 */
		tolerate = (period / (double)cycle.val) * threshold.val;
		pr->count -= (tolerate > pr->count) ? pr->count : tolerate;
		pr->start = time;
		pr->excess = 0;
	}

	pr->count += count;
	if (pr->count >= threshold.val) {
		log(TERM, LOG_INFO, "Corrected Errors at %#llx exceeded threshold\n", pr->addr);

		/**
		 * Backup ce count of current cycle to enable next round, which actually
		 * should never happen if we can disable overflow completely in the same
		 * time unit (but sadly we can't).
		 */
		pr->excess += pr->count;
		pr->count = 0;
		page_offline(pr);
	}
}

static struct page_record *page_lookup_insert(unsigned long long addr)
{
	struct rb_node **entry = &page_records.rb_node;
	struct rb_node *parent = NULL;
	struct page_record *pr = NULL, *find = NULL;

	while (*entry) {
		parent = *entry;
		pr = rb_entry(parent, struct page_record, entry);
		if (addr == pr->addr) {
			return pr;
		} else if (addr < pr->addr) {
			entry = &(*entry)->rb_left;
		} else {
			entry = &(*entry)->rb_right;
		}
	}

	find = calloc(1, sizeof(struct page_record));
	if (!find) {
		log(TERM, LOG_ERR, "No memory for page records\n");
		return NULL;
	}

	find->addr = addr;
	rb_link_node(&find->entry, parent, entry);
	rb_insert_color(&find->entry, &page_records);

	return find;
}

void ras_record_page_error(unsigned long long addr, unsigned count, time_t time)
{
	struct page_record *pr = NULL;

	if (offline == OFFLINE_OFF)
		return;

	pr = page_lookup_insert(addr & PAGE_MASK);
	if (pr) {
		if (!pr->start)
			pr->start = time;
		page_record(pr, count, time);
	}
}
