/* ************************************************************************ * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "host_matrix_csr.hpp" #include "../../utils/allocate_free.hpp" #include "../../utils/def.hpp" #include "../../utils/log.hpp" #include "../../utils/math_functions.hpp" #include "../matrix_formats_ind.hpp" #include "host_conversion.hpp" #include "host_io.hpp" #include "host_matrix_bcsr.hpp" #include "host_matrix_coo.hpp" #include "host_matrix_dense.hpp" #include "host_matrix_dia.hpp" #include "host_matrix_ell.hpp" #include "host_matrix_hyb.hpp" #include "host_matrix_mcsr.hpp" #include "host_vector.hpp" #include #include #include #include #include #include #include #ifdef _OPENMP #include #else #define omp_set_num_threads(num) ; #define omp_get_max_threads() 1 #define omp_get_thread_num() 0 #define omp_get_num_threads() 1 #define omp_set_nested(num) ; #endif namespace rocalution { template HostMatrixCSR::HostMatrixCSR() { // no default constructors LOG_INFO("no default constructor"); FATAL_ERROR(__FILE__, __LINE__); } template HostMatrixCSR::HostMatrixCSR(const Rocalution_Backend_Descriptor& local_backend) { log_debug(this, "HostMatrixCSR::HostMatrixCSR()", "constructor with local_backend"); this->mat_.row_offset = NULL; this->mat_.col = NULL; this->mat_.val = NULL; this->set_backend(local_backend); this->L_diag_unit_ = false; this->U_diag_unit_ = false; } template HostMatrixCSR::~HostMatrixCSR() { log_debug(this, "HostMatrixCSR::~HostMatrixCSR()", "destructor"); this->Clear(); } template void HostMatrixCSR::Clear() { if(this->nnz_ > 0) { free_host(&this->mat_.row_offset); free_host(&this->mat_.col); free_host(&this->mat_.val); this->nrow_ = 0; this->ncol_ = 0; this->nnz_ = 0; } } template bool HostMatrixCSR::Zeros(void) { if(this->nnz_ > 0) { set_to_zero_host(this->nnz_, mat_.val); } return true; } template void HostMatrixCSR::Info(void) const { LOG_INFO( "HostMatrixCSR, OpenMP threads: " << this->local_backend_.OpenMP_threads); } template bool HostMatrixCSR::Check(void) const { bool sorted = true; if(this->nnz_ > 0) { // check nnz if((std::abs(this->nnz_) == std::numeric_limits::infinity()) || // inf (this->nnz_ != this->nnz_)) { // NaN LOG_VERBOSE_INFO(2, "*** error: Matrix CSR:Check - problems with matrix nnz"); return false; } // nrow if((std::abs(this->nrow_) == std::numeric_limits::infinity()) || // inf (this->nrow_ != this->nrow_)) { // NaN LOG_VERBOSE_INFO(2, "*** error: Matrix CSR:Check - problems with matrix nrow"); return false; } // ncol if((std::abs(this->ncol_) == std::numeric_limits::infinity()) || // inf (this->ncol_ != this->ncol_)) { // NaN LOG_VERBOSE_INFO(2, "*** error: Matrix CSR:Check - problems with matrix ncol"); return false; } for(int ai = 0; ai < this->nrow_ + 1; ++ai) { int row = this->mat_.row_offset[ai]; if((row < 0) || (row > this->nnz_)) { LOG_VERBOSE_INFO( 2, "*** error: Matrix CSR:Check - problems with matrix row offset pointers"); return false; } } for(int ai = 0; ai < this->nrow_; ++ai) { int s = this->mat_.col[this->mat_.row_offset[ai]]; for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { int col = this->mat_.col[aj]; if((col < 0) || (col > this->ncol_)) { LOG_VERBOSE_INFO( 2, "*** error: Matrix CSR:Check - problems with matrix col values"); return false; } ValueType val = this->mat_.val[aj]; if((val == std::numeric_limits::infinity()) || (val != val)) { LOG_VERBOSE_INFO( 2, "*** error: Matrix CSR:Check - problems with matrix values"); return false; } if((aj > this->mat_.row_offset[ai]) && (s >= col)) { sorted = false; } s = this->mat_.col[aj]; } } } else { assert(this->nnz_ == 0); assert(this->nrow_ == 0); assert(this->ncol_ == 0); assert(this->mat_.row_offset == NULL); assert(this->mat_.val == NULL); assert(this->mat_.col == NULL); } if(sorted == false) { LOG_VERBOSE_INFO(2, "*** warning: Matrix CSR:Check - the matrix has not sorted columns"); } return true; } template void HostMatrixCSR::AllocateCSR(int nnz, int nrow, int ncol) { assert(nnz >= 0); assert(ncol >= 0); assert(nrow >= 0); if(this->nnz_ > 0) { this->Clear(); } if(nnz > 0) { allocate_host(nrow + 1, &this->mat_.row_offset); allocate_host(nnz, &this->mat_.col); allocate_host(nnz, &this->mat_.val); set_to_zero_host(nrow + 1, mat_.row_offset); set_to_zero_host(nnz, mat_.col); set_to_zero_host(nnz, mat_.val); this->nrow_ = nrow; this->ncol_ = ncol; this->nnz_ = nnz; } } template void HostMatrixCSR::SetDataPtrCSR( int** row_offset, int** col, ValueType** val, int nnz, int nrow, int ncol) { assert(*row_offset != NULL); assert(*col != NULL); assert(*val != NULL); assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); this->Clear(); this->nrow_ = nrow; this->ncol_ = ncol; this->nnz_ = nnz; this->mat_.row_offset = *row_offset; this->mat_.col = *col; this->mat_.val = *val; } template void HostMatrixCSR::LeaveDataPtrCSR(int** row_offset, int** col, ValueType** val) { assert(this->nrow_ > 0); assert(this->ncol_ > 0); assert(this->nnz_ > 0); // see free_host function for details *row_offset = this->mat_.row_offset; *col = this->mat_.col; *val = this->mat_.val; this->mat_.row_offset = NULL; this->mat_.col = NULL; this->mat_.val = NULL; this->nrow_ = 0; this->ncol_ = 0; this->nnz_ = 0; } template void HostMatrixCSR::CopyFromCSR(const int* row_offsets, const int* col, const ValueType* val) { if(this->nnz_ > 0) { assert(this->nrow_ > 0); assert(this->ncol_ > 0); _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_ + 1; ++i) { this->mat_.row_offset[i] = row_offsets[i]; } #ifdef _OPENMP #pragma omp parallel for #endif for(int j = 0; j < this->nnz_; ++j) { this->mat_.col[j] = col[j]; this->mat_.val[j] = val[j]; } } } template void HostMatrixCSR::CopyToCSR(int* row_offsets, int* col, ValueType* val) const { if(this->nnz_ > 0) { assert(this->nrow_ > 0); assert(this->ncol_ > 0); _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_ + 1; ++i) { row_offsets[i] = this->mat_.row_offset[i]; } #ifdef _OPENMP #pragma omp parallel for #endif for(int j = 0; j < this->nnz_; ++j) { col[j] = this->mat_.col[j]; val[j] = this->mat_.val[j]; } } } template void HostMatrixCSR::CopyFrom(const BaseMatrix& mat) { // copy only in the same format assert(this->GetMatFormat() == mat.GetMatFormat()); if(const HostMatrixCSR* cast_mat = dynamic_cast*>(&mat)) { if(this->nnz_ == 0) { this->AllocateCSR(cast_mat->nnz_, cast_mat->nrow_, cast_mat->ncol_); } assert((this->nnz_ == cast_mat->nnz_) && (this->nrow_ == cast_mat->nrow_) && (this->ncol_ == cast_mat->ncol_)); if(this->nnz_ > 0) { _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_ + 1; ++i) { this->mat_.row_offset[i] = cast_mat->mat_.row_offset[i]; } #ifdef _OPENMP #pragma omp parallel for #endif for(int j = 0; j < this->nnz_; ++j) { this->mat_.col[j] = cast_mat->mat_.col[j]; this->mat_.val[j] = cast_mat->mat_.val[j]; } } } else { // Host matrix knows only host matrices // -> dispatching mat.CopyTo(this); } } template void HostMatrixCSR::CopyTo(BaseMatrix* mat) const { mat->CopyFrom(*this); } template bool HostMatrixCSR::ReadFileCSR(const std::string& filename) { int nrow; int ncol; int nnz; int* ptr = NULL; int* col = NULL; ValueType* val = NULL; if(read_matrix_csr(nrow, ncol, nnz, &ptr, &col, &val, filename.c_str()) != true) { return false; } this->Clear(); this->SetDataPtrCSR(&ptr, &col, &val, nnz, nrow, ncol); return true; } template void HostMatrixCSR::CopyFromHostCSR( const int* row_offset, const int* col, const ValueType* val, int nnz, int nrow, int ncol) { assert(nnz >= 0); assert(ncol >= 0); assert(nrow >= 0); assert(row_offset != NULL); assert(col != NULL); assert(val != NULL); // Allocate matrix if(this->nnz_ > 0) { this->Clear(); } if(nnz > 0) { allocate_host(nrow + 1, &this->mat_.row_offset); allocate_host(nnz, &this->mat_.col); allocate_host(nnz, &this->mat_.val); this->nrow_ = nrow; this->ncol_ = ncol; this->nnz_ = nnz; _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_ + 1; ++i) { this->mat_.row_offset[i] = row_offset[i]; } #ifdef _OPENMP #pragma omp parallel for #endif for(int j = 0; j < this->nnz_; ++j) { this->mat_.col[j] = col[j]; this->mat_.val[j] = val[j]; } } } template bool HostMatrixCSR::WriteFileCSR(const std::string& filename) const { if(write_matrix_csr(this->nrow_, this->ncol_, this->nnz_, this->mat_.row_offset, this->mat_.col, this->mat_.val, filename.c_str()) != true) { return false; } return true; } template bool HostMatrixCSR::ConvertFrom(const BaseMatrix& mat) { this->Clear(); // empty matrix is empty matrix if(mat.GetNnz() == 0) { return true; } if(const HostMatrixCSR* cast_mat = dynamic_cast*>(&mat)) { this->CopyFrom(*cast_mat); return true; } if(const HostMatrixBCSR* cast_mat = dynamic_cast*>(&mat)) { this->Clear(); int nrow = cast_mat->mat_.nrowb * cast_mat->mat_.blockdim * cast_mat->mat_.blockdim; int ncol = cast_mat->mat_.ncolb * cast_mat->mat_.blockdim * cast_mat->mat_.blockdim; int nnz = cast_mat->mat_.nnzb * cast_mat->mat_.blockdim * cast_mat->mat_.blockdim; if(bcsr_to_csr(this->local_backend_.OpenMP_threads, nnz, nrow, ncol, cast_mat->mat_, &this->mat_) == true) { this->nrow_ = nrow; this->ncol_ = ncol; this->nnz_ = nnz; return true; } } if(const HostMatrixCOO* cast_mat = dynamic_cast*>(&mat)) { this->Clear(); if(coo_to_csr(this->local_backend_.OpenMP_threads, cast_mat->nnz_, cast_mat->nrow_, cast_mat->ncol_, cast_mat->mat_, &this->mat_) == true) { this->nrow_ = cast_mat->nrow_; this->ncol_ = cast_mat->ncol_; this->nnz_ = cast_mat->nnz_; return true; } } if(const HostMatrixDENSE* cast_mat = dynamic_cast*>(&mat)) { this->Clear(); int nnz = 0; if(dense_to_csr(this->local_backend_.OpenMP_threads, cast_mat->nrow_, cast_mat->ncol_, cast_mat->mat_, &this->mat_, &nnz) == true) { this->nrow_ = cast_mat->nrow_; this->ncol_ = cast_mat->ncol_; this->nnz_ = nnz; return true; } } if(const HostMatrixDIA* cast_mat = dynamic_cast*>(&mat)) { this->Clear(); int nnz; if(dia_to_csr(this->local_backend_.OpenMP_threads, cast_mat->nnz_, cast_mat->nrow_, cast_mat->ncol_, cast_mat->mat_, &this->mat_, &nnz) == true) { this->nrow_ = cast_mat->nrow_; this->ncol_ = cast_mat->ncol_; this->nnz_ = nnz; return true; } } if(const HostMatrixELL* cast_mat = dynamic_cast*>(&mat)) { this->Clear(); int nnz; if(ell_to_csr(this->local_backend_.OpenMP_threads, cast_mat->nnz_, cast_mat->nrow_, cast_mat->ncol_, cast_mat->mat_, &this->mat_, &nnz) == true) { this->nrow_ = cast_mat->nrow_; this->ncol_ = cast_mat->ncol_; this->nnz_ = nnz; return true; } } if(const HostMatrixMCSR* cast_mat = dynamic_cast*>(&mat)) { this->Clear(); if(mcsr_to_csr(this->local_backend_.OpenMP_threads, cast_mat->nnz_, cast_mat->nrow_, cast_mat->ncol_, cast_mat->mat_, &this->mat_) == true) { this->nrow_ = cast_mat->nrow_; this->ncol_ = cast_mat->ncol_; this->nnz_ = cast_mat->nnz_; return true; } } if(const HostMatrixHYB* cast_mat = dynamic_cast*>(&mat)) { this->Clear(); int nnz; if(hyb_to_csr(this->local_backend_.OpenMP_threads, cast_mat->nnz_, cast_mat->nrow_, cast_mat->ncol_, cast_mat->ell_nnz_, cast_mat->coo_nnz_, cast_mat->mat_, &this->mat_, &nnz) == true) { this->nrow_ = cast_mat->nrow_; this->ncol_ = cast_mat->ncol_; this->nnz_ = nnz; return true; } } return false; } template void HostMatrixCSR::Apply(const BaseVector& in, BaseVector* out) const { assert(in.GetSize() >= 0); assert(out->GetSize() >= 0); assert(in.GetSize() == this->ncol_); assert(out->GetSize() == this->nrow_); const HostVector* cast_in = dynamic_cast*>(&in); HostVector* cast_out = dynamic_cast*>(out); assert(cast_in != NULL); assert(cast_out != NULL); _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < this->nrow_; ++ai) { ValueType sum = static_cast(0); int row_beg = this->mat_.row_offset[ai]; int row_end = this->mat_.row_offset[ai + 1]; for(int aj = row_beg; aj < row_end; ++aj) { sum += this->mat_.val[aj] * cast_in->vec_[this->mat_.col[aj]]; } cast_out->vec_[ai] = sum; } } template void HostMatrixCSR::ApplyAdd(const BaseVector& in, ValueType scalar, BaseVector* out) const { if(this->nnz_ > 0) { assert(in.GetSize() >= 0); assert(out->GetSize() >= 0); assert(in.GetSize() == this->ncol_); assert(out->GetSize() == this->nrow_); const HostVector* cast_in = dynamic_cast*>(&in); HostVector* cast_out = dynamic_cast*>(out); assert(cast_in != NULL); assert(cast_out != NULL); _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { cast_out->vec_[ai] += scalar * this->mat_.val[aj] * cast_in->vec_[this->mat_.col[aj]]; } } } } template bool HostMatrixCSR::ExtractDiagonal(BaseVector* vec_diag) const { assert(vec_diag != NULL); assert(vec_diag->GetSize() == this->nrow_); HostVector* cast_vec_diag = dynamic_cast*>(vec_diag); _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(ai == this->mat_.col[aj]) { cast_vec_diag->vec_[ai] = this->mat_.val[aj]; break; } } } return true; } template bool HostMatrixCSR::ExtractInverseDiagonal(BaseVector* vec_inv_diag) const { assert(vec_inv_diag != NULL); assert(vec_inv_diag->GetSize() == this->nrow_); HostVector* cast_vec_inv_diag = dynamic_cast*>(vec_inv_diag); int detect_zero_diag = 0; _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(ai == this->mat_.col[aj]) { if(this->mat_.val[aj] != static_cast(0)) { cast_vec_inv_diag->vec_[ai] = static_cast(1) / this->mat_.val[aj]; } else { cast_vec_inv_diag->vec_[ai] = static_cast(1); detect_zero_diag = 1; } break; } } } if(detect_zero_diag == 1) { LOG_VERBOSE_INFO( 2, "*** warning: in HostMatrixCSR::ExtractInverseDiagonal() a zero has been detected " "on the diagonal. It has been replaced with one to avoid inf"); } return true; } template bool HostMatrixCSR::ExtractSubMatrix(int row_offset, int col_offset, int row_size, int col_size, BaseMatrix* mat) const { assert(mat != NULL); assert(row_offset >= 0); assert(col_offset >= 0); assert(this->nrow_ > 0); assert(this->ncol_ > 0); HostMatrixCSR* cast_mat = dynamic_cast*>(mat); assert(cast_mat != NULL); int mat_nnz = 0; // use omp in local_matrix (higher level) // _set_omp_backend_threads(this->local_backend_, this->nrow_); //#ifdef _OPENMP //#pragma omp parallel for reduction(+:mat_nnz) //#endif for(int ai = row_offset; ai < row_offset + row_size; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if((this->mat_.col[aj] >= col_offset) && (this->mat_.col[aj] < col_offset + col_size)) { ++mat_nnz; } } } // not empty submatrix if(mat_nnz > 0) { cast_mat->AllocateCSR(mat_nnz, row_size, col_size); int mat_row_offset = 0; cast_mat->mat_.row_offset[0] = mat_row_offset; for(int ai = row_offset; ai < row_offset + row_size; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if((this->mat_.col[aj] >= col_offset) && (this->mat_.col[aj] < col_offset + col_size)) { cast_mat->mat_.col[mat_row_offset] = this->mat_.col[aj] - col_offset; cast_mat->mat_.val[mat_row_offset] = this->mat_.val[aj]; ++mat_row_offset; } } cast_mat->mat_.row_offset[ai - row_offset + 1] = mat_row_offset; } cast_mat->mat_.row_offset[row_size] = mat_row_offset; assert(mat_row_offset == mat_nnz); } return true; } template bool HostMatrixCSR::ExtractU(BaseMatrix* U) const { assert(U != NULL); assert(this->nrow_ > 0); assert(this->ncol_ > 0); HostMatrixCSR* cast_U = dynamic_cast*>(U); assert(cast_U != NULL); // count nnz of upper triangular part int nnz_U = 0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : nnz_U) #endif for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(this->mat_.col[aj] > ai) { ++nnz_U; } } } // allocate upper triangular part structure int* row_offset = NULL; int* col = NULL; ValueType* val = NULL; allocate_host(this->nrow_ + 1, &row_offset); allocate_host(nnz_U, &col); allocate_host(nnz_U, &val); // fill upper triangular part int nnz = 0; row_offset[0] = 0; for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(this->mat_.col[aj] > ai) { col[nnz] = this->mat_.col[aj]; val[nnz] = this->mat_.val[aj]; ++nnz; } } row_offset[ai + 1] = nnz; } cast_U->Clear(); cast_U->SetDataPtrCSR(&row_offset, &col, &val, nnz_U, this->nrow_, this->ncol_); return true; } template bool HostMatrixCSR::ExtractUDiagonal(BaseMatrix* U) const { assert(U != NULL); assert(this->nrow_ > 0); assert(this->ncol_ > 0); HostMatrixCSR* cast_U = dynamic_cast*>(U); assert(cast_U != NULL); // count nnz of upper triangular part int nnz_U = 0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : nnz_U) #endif for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(this->mat_.col[aj] >= ai) { ++nnz_U; } } } // allocate upper triangular part structure int* row_offset = NULL; int* col = NULL; ValueType* val = NULL; allocate_host(this->nrow_ + 1, &row_offset); allocate_host(nnz_U, &col); allocate_host(nnz_U, &val); // fill upper triangular part int nnz = 0; row_offset[0] = 0; for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(this->mat_.col[aj] >= ai) { col[nnz] = this->mat_.col[aj]; val[nnz] = this->mat_.val[aj]; ++nnz; } } row_offset[ai + 1] = nnz; } cast_U->Clear(); cast_U->SetDataPtrCSR(&row_offset, &col, &val, nnz_U, this->nrow_, this->ncol_); return true; } template bool HostMatrixCSR::ExtractL(BaseMatrix* L) const { assert(L != NULL); assert(this->nrow_ > 0); assert(this->ncol_ > 0); HostMatrixCSR* cast_L = dynamic_cast*>(L); assert(cast_L != NULL); // count nnz of lower triangular part int nnz_L = 0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : nnz_L) #endif for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(this->mat_.col[aj] < ai) { ++nnz_L; } } } // allocate lower triangular part structure int* row_offset = NULL; int* col = NULL; ValueType* val = NULL; allocate_host(this->nrow_ + 1, &row_offset); allocate_host(nnz_L, &col); allocate_host(nnz_L, &val); // fill lower triangular part int nnz = 0; row_offset[0] = 0; for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(this->mat_.col[aj] < ai) { col[nnz] = this->mat_.col[aj]; val[nnz] = this->mat_.val[aj]; ++nnz; } } row_offset[ai + 1] = nnz; } cast_L->Clear(); cast_L->SetDataPtrCSR(&row_offset, &col, &val, nnz_L, this->nrow_, this->ncol_); return true; } template bool HostMatrixCSR::ExtractLDiagonal(BaseMatrix* L) const { assert(L != NULL); assert(this->nrow_ > 0); assert(this->ncol_ > 0); HostMatrixCSR* cast_L = dynamic_cast*>(L); assert(cast_L != NULL); // count nnz of lower triangular part int nnz_L = 0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : nnz_L) #endif for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(this->mat_.col[aj] <= ai) { ++nnz_L; } } } // allocate lower triangular part structure int* row_offset = NULL; int* col = NULL; ValueType* val = NULL; allocate_host(this->nrow_ + 1, &row_offset); allocate_host(nnz_L, &col); allocate_host(nnz_L, &val); // fill lower triangular part int nnz = 0; row_offset[0] = 0; for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(this->mat_.col[aj] <= ai) { col[nnz] = this->mat_.col[aj]; val[nnz] = this->mat_.val[aj]; ++nnz; } } row_offset[ai + 1] = nnz; } cast_L->Clear(); cast_L->SetDataPtrCSR(&row_offset, &col, &val, nnz_L, this->nrow_, this->ncol_); return true; } template bool HostMatrixCSR::LUSolve(const BaseVector& in, BaseVector* out) const { assert(in.GetSize() >= 0); assert(out->GetSize() >= 0); assert(in.GetSize() == this->ncol_); assert(out->GetSize() == this->nrow_); const HostVector* cast_in = dynamic_cast*>(&in); HostVector* cast_out = dynamic_cast*>(out); assert(cast_in != NULL); assert(cast_out != NULL); // Solve L for(int ai = 0; ai < this->nrow_; ++ai) { cast_out->vec_[ai] = cast_in->vec_[ai]; for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(this->mat_.col[aj] < ai) { // under the diagonal cast_out->vec_[ai] -= this->mat_.val[aj] * cast_out->vec_[this->mat_.col[aj]]; } else { // CSR should be sorted break; } } } // last elements should be the diagonal one (last) int diag_aj = this->nnz_ - 1; // Solve U for(int ai = this->nrow_ - 1; ai >= 0; --ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(this->mat_.col[aj] > ai) { // above the diagonal cast_out->vec_[ai] -= this->mat_.val[aj] * cast_out->vec_[this->mat_.col[aj]]; } if(this->mat_.col[aj] == ai) { diag_aj = aj; } } cast_out->vec_[ai] /= this->mat_.val[diag_aj]; } return true; } template void HostMatrixCSR::LLAnalyse(void) { // do nothing } template void HostMatrixCSR::LLAnalyseClear(void) { // do nothing } template void HostMatrixCSR::LUAnalyse(void) { // do nothing } template void HostMatrixCSR::LUAnalyseClear(void) { // do nothing } template bool HostMatrixCSR::LLSolve(const BaseVector& in, BaseVector* out) const { assert(in.GetSize() >= 0); assert(out->GetSize() >= 0); assert(in.GetSize() == this->ncol_); assert(out->GetSize() == this->nrow_); const HostVector* cast_in = dynamic_cast*>(&in); HostVector* cast_out = dynamic_cast*>(out); assert(cast_in != NULL); assert(cast_out != NULL); // Solve L for(int ai = 0; ai < this->nrow_; ++ai) { ValueType value = cast_in->vec_[ai]; int diag_idx = this->mat_.row_offset[ai + 1] - 1; for(int aj = this->mat_.row_offset[ai]; aj < diag_idx; ++aj) { value -= this->mat_.val[aj] * cast_out->vec_[this->mat_.col[aj]]; } cast_out->vec_[ai] = value / this->mat_.val[diag_idx]; } // Solve L^T for(int ai = this->nrow_ - 1; ai >= 0; --ai) { int diag_idx = this->mat_.row_offset[ai + 1] - 1; ValueType value = cast_out->vec_[ai] / this->mat_.val[diag_idx]; for(int aj = this->mat_.row_offset[ai]; aj < diag_idx; ++aj) { cast_out->vec_[this->mat_.col[aj]] -= value * this->mat_.val[aj]; } cast_out->vec_[ai] = value; } return true; } template bool HostMatrixCSR::LLSolve(const BaseVector& in, const BaseVector& inv_diag, BaseVector* out) const { assert(in.GetSize() >= 0); assert(out->GetSize() >= 0); assert(in.GetSize() == this->ncol_); assert(out->GetSize() == this->nrow_); assert(inv_diag.GetSize() == this->nrow_ || inv_diag.GetSize() == this->ncol_); const HostVector* cast_in = dynamic_cast*>(&in); const HostVector* cast_diag = dynamic_cast*>(&inv_diag); HostVector* cast_out = dynamic_cast*>(out); assert(cast_in != NULL); assert(cast_out != NULL); // Solve L for(int ai = 0; ai < this->nrow_; ++ai) { ValueType value = cast_in->vec_[ai]; int diag_idx = this->mat_.row_offset[ai + 1] - 1; for(int aj = this->mat_.row_offset[ai]; aj < diag_idx; ++aj) { value -= this->mat_.val[aj] * cast_out->vec_[this->mat_.col[aj]]; } cast_out->vec_[ai] = value * cast_diag->vec_[ai]; } // Solve L^T for(int ai = this->nrow_ - 1; ai >= 0; --ai) { int diag_idx = this->mat_.row_offset[ai + 1] - 1; ValueType value = cast_out->vec_[ai] * cast_diag->vec_[ai]; for(int aj = this->mat_.row_offset[ai]; aj < diag_idx; ++aj) { cast_out->vec_[this->mat_.col[aj]] -= value * this->mat_.val[aj]; } cast_out->vec_[ai] = value; } return true; } template void HostMatrixCSR::LAnalyse(bool diag_unit) { this->L_diag_unit_ = diag_unit; } template void HostMatrixCSR::LAnalyseClear(void) { // do nothing this->L_diag_unit_ = true; } template bool HostMatrixCSR::LSolve(const BaseVector& in, BaseVector* out) const { assert(in.GetSize() >= 0); assert(out->GetSize() >= 0); assert(in.GetSize() == this->ncol_); assert(out->GetSize() == this->nrow_); const HostVector* cast_in = dynamic_cast*>(&in); HostVector* cast_out = dynamic_cast*>(out); assert(cast_in != NULL); assert(cast_out != NULL); int diag_aj = 0; // Solve L for(int ai = 0; ai < this->nrow_; ++ai) { cast_out->vec_[ai] = cast_in->vec_[ai]; for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(this->mat_.col[aj] < ai) { // under the diagonal cast_out->vec_[ai] -= this->mat_.val[aj] * cast_out->vec_[this->mat_.col[aj]]; } else { // CSR should be sorted if(this->L_diag_unit_ == false) { assert(this->mat_.col[aj] == ai); diag_aj = aj; } break; } } if(this->L_diag_unit_ == false) { cast_out->vec_[ai] /= this->mat_.val[diag_aj]; } } return true; } template void HostMatrixCSR::UAnalyse(bool diag_unit) { this->U_diag_unit_ = diag_unit; } template void HostMatrixCSR::UAnalyseClear(void) { // do nothing this->U_diag_unit_ = false; } template bool HostMatrixCSR::USolve(const BaseVector& in, BaseVector* out) const { assert(in.GetSize() >= 0); assert(out->GetSize() >= 0); assert(in.GetSize() == this->ncol_); assert(out->GetSize() == this->nrow_); const HostVector* cast_in = dynamic_cast*>(&in); HostVector* cast_out = dynamic_cast*>(out); assert(cast_in != NULL); assert(cast_out != NULL); // last elements should the diagonal one (last) int diag_aj = this->nnz_ - 1; // Solve U for(int ai = this->nrow_ - 1; ai >= 0; --ai) { cast_out->vec_[ai] = cast_in->vec_[ai]; for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(this->mat_.col[aj] > ai) { // above the diagonal cast_out->vec_[ai] -= this->mat_.val[aj] * cast_out->vec_[this->mat_.col[aj]]; } if(this->L_diag_unit_ == false) { if(this->mat_.col[aj] == ai) { diag_aj = aj; } } } if(this->L_diag_unit_ == false) { cast_out->vec_[ai] /= this->mat_.val[diag_aj]; } } return true; } // Algorithm for ILU factorization is based on // Y. Saad, Iterative methods for sparse linear systems, 2nd edition, SIAM template bool HostMatrixCSR::ILU0Factorize(void) { assert(this->nrow_ == this->ncol_); assert(this->nnz_ > 0); // pointer of upper part of each row int* diag_offset = NULL; int* nnz_entries = NULL; allocate_host(this->nrow_, &diag_offset); allocate_host(this->nrow_, &nnz_entries); for(int i = 0; i < this->nrow_; ++i) { nnz_entries[i] = 0; } // ai = 0 to N loop over all rows for(int ai = 0; ai < this->nrow_; ++ai) { // ai-th row entries int row_start = this->mat_.row_offset[ai]; int row_end = this->mat_.row_offset[ai + 1]; int j; // nnz position of ai-th row in mat_.val array for(j = row_start; j < row_end; ++j) { nnz_entries[this->mat_.col[j]] = j; } // loop over ai-th row nnz entries for(j = row_start; j < row_end; ++j) { // if nnz entry is in lower matrix if(this->mat_.col[j] < ai) { int col_j = this->mat_.col[j]; int diag_j = diag_offset[col_j]; if(this->mat_.val[diag_j] != static_cast(0)) { // multiplication factor this->mat_.val[j] = this->mat_.val[j] / this->mat_.val[diag_j]; // loop over upper offset pointer and do linear combination for nnz entry for(int k = diag_j + 1; k < this->mat_.row_offset[col_j + 1]; ++k) { // if nnz at this position do linear combination if(nnz_entries[this->mat_.col[k]] != 0) { this->mat_.val[nnz_entries[this->mat_.col[k]]] -= this->mat_.val[j] * this->mat_.val[k]; } } } } else { break; } } // set diagonal pointer to diagonal element diag_offset[ai] = j; // clear nnz entries for(j = row_start; j < row_end; ++j) { nnz_entries[this->mat_.col[j]] = 0; } } free_host(&diag_offset); free_host(&nnz_entries); return true; } // Algorithm for ILUT factorization is based on // Y. Saad, Iterative methods for sparse linear systems, 2nd edition, SIAM template bool HostMatrixCSR::ILUTFactorize(double t, int maxrow) { assert(this->nrow_ == this->ncol_); assert(this->nnz_ > 0); int nrow = this->nrow_; int ncol = this->ncol_; int* row_offset = NULL; int* diag_offset = NULL; int* nnz_entries = NULL; bool* nnz_pos = (bool*)malloc(nrow * sizeof(bool)); ValueType* w = NULL; allocate_host(nrow + 1, &row_offset); allocate_host(nrow, &diag_offset); allocate_host(nrow, &nnz_entries); allocate_host(nrow, &w); for(int i = 0; i < nrow; ++i) { w[i] = 0.0; nnz_entries[i] = -1; nnz_pos[i] = false; diag_offset[i] = 0; } // pre-allocate 1.5x nnz arrays for preconditioner matrix int nnzA = this->nnz_; int alloc_size = int(nnzA * 1.5); int* col = (int*)malloc(alloc_size * sizeof(int)); ValueType* val = (ValueType*)malloc(alloc_size * sizeof(ValueType)); // initialize row_offset row_offset[0] = 0; int nnz = 0; // loop over all rows for(int ai = 0; ai < this->nrow_; ++ai) { row_offset[ai + 1] = row_offset[ai]; int row_begin = this->mat_.row_offset[ai]; int row_end = this->mat_.row_offset[ai + 1]; double row_norm = 0.0; // fill working array with ai-th row int m = 0; for(int aj = row_begin; aj < row_end; ++aj) { int idx = this->mat_.col[aj]; w[idx] = this->mat_.val[aj]; nnz_entries[m] = idx; nnz_pos[idx] = true; row_norm += std::abs(this->mat_.val[aj]); ++m; } // threshold for dropping strategy double threshold = t * row_norm / (row_end - row_begin); for(int k = 0; k < nrow; ++k) { if(nnz_entries[k] == -1) { break; } int aj = nnz_entries[k]; // get smallest column index int sidx = k; for(int j = k + 1; j < nrow; ++j) { if(nnz_entries[j] == -1) { break; } if(nnz_entries[j] < aj) { sidx = j; aj = nnz_entries[j]; } } aj = nnz_entries[k]; // swap column index if(k != sidx) { nnz_entries[k] = nnz_entries[sidx]; nnz_entries[sidx] = aj; aj = nnz_entries[k]; } // lower matrix part if(aj < ai) { // if zero diagonal entry do nothing if(val[diag_offset[aj]] == static_cast(0)) { LOG_INFO("(ILUT) zero row"); continue; } w[aj] /= val[diag_offset[aj]]; // do linear combination with previous row for(int l = diag_offset[aj] + 1; l < row_offset[aj + 1]; ++l) { int idx = col[l]; ValueType fillin = w[aj] * val[l]; // drop off strategy for fill in if(nnz_pos[idx] == false) { if(std::abs(fillin) >= threshold) { nnz_entries[m] = idx; nnz_pos[idx] = true; w[idx] -= fillin; ++m; } } else { w[idx] -= fillin; } } } } // fill ai-th row of preconditioner matrix for(int k = 0, num_lower = 0, num_upper = 0; k < nrow; ++k) { int aj = nnz_entries[k]; if(aj == -1) { break; } if(nnz_pos[aj] == false) { break; } // lower part if(aj < ai && num_lower < maxrow) { val[nnz] = w[aj]; col[nnz] = aj; ++row_offset[ai + 1]; ++num_lower; ++nnz; // upper part } else if(aj > ai && num_upper < maxrow) { val[nnz] = w[aj]; col[nnz] = aj; ++row_offset[ai + 1]; ++num_upper; ++nnz; // diagonal part } else if(aj == ai) { val[nnz] = w[aj]; col[nnz] = aj; diag_offset[ai] = row_offset[ai + 1]; ++row_offset[ai + 1]; ++nnz; } // clear working arrays w[aj] = static_cast(0); nnz_entries[k] = -1; nnz_pos[aj] = false; } // resize preconditioner matrix if needed if(alloc_size < nnz + 2 * maxrow + 1) { alloc_size += int(nnzA * 1.5); int* col_tmp = (int*)realloc(col, alloc_size * sizeof(int)); ValueType* val_tmp = (ValueType*)realloc(val, alloc_size * sizeof(ValueType)); if(col_tmp == NULL || val_tmp == NULL) { free(col); free(val); LOG_INFO("ILUTFactorize failed on realloc"); FATAL_ERROR(__FILE__, __LINE__); } else { col = col_tmp; val = val_tmp; } } } free_host(&w); free_host(&diag_offset); free_host(&nnz_entries); free(nnz_pos); // pinned memory int* p_col = NULL; ValueType* p_val = NULL; allocate_host(nnz, &p_col); allocate_host(nnz, &p_val); for(int i = 0; i < nnz; ++i) { p_col[i] = col[i]; p_val[i] = val[i]; } free(col); free(val); this->Clear(); this->SetDataPtrCSR(&row_offset, &p_col, &p_val, nnz, nrow, ncol); return true; } template bool HostMatrixCSR::ICFactorize(BaseVector* inv_diag) { assert(this->nrow_ == this->ncol_); assert(this->nnz_ > 0); assert(inv_diag != NULL); HostVector* cast_diag = dynamic_cast*>(inv_diag); assert(cast_diag != NULL); cast_diag->Allocate(this->nrow_); int* diag_offset = NULL; int* nnz_entries = NULL; allocate_host(this->nrow_, &diag_offset); allocate_host(this->nrow_, &nnz_entries); for(int i = 0; i < this->nrow_; ++i) { nnz_entries[i] = 0; } // i=0,..n for(int i = 0; i < this->nrow_; ++i) { int row_begin = this->mat_.row_offset[i]; int row_end = this->mat_.row_offset[i + 1]; for(int j = row_begin; j < row_end; ++j) { nnz_entries[this->mat_.col[j]] = j; } ValueType sum = static_cast(0); bool has_diag = false; // j=0,..i int j; for(j = row_begin; j < row_end; ++j) { int col_j = this->mat_.col[j]; ValueType val_j = this->mat_.val[j]; // Mark diagonal and skip row if(col_j == i) { has_diag = true; break; } // Skip upper triangular if(col_j > i) { break; } int row_begin_j = this->mat_.row_offset[col_j]; int row_diag_j = diag_offset[col_j]; ValueType local_sum = static_cast(0); ValueType inv_diag = this->mat_.val[row_diag_j]; // Check for numeric zero if(inv_diag == static_cast(0)) { LOG_INFO("IC breakdown: division by zero"); FATAL_ERROR(__FILE__, __LINE__); } inv_diag = static_cast(1) / inv_diag; for(int k = row_begin_j; k < row_diag_j; ++k) { int col_k = this->mat_.col[k]; if(nnz_entries[col_k] != 0) { int idx = nnz_entries[col_k]; local_sum += this->mat_.val[k] * this->mat_.val[idx]; } } val_j = (val_j - local_sum) * inv_diag; sum += val_j * val_j; this->mat_.val[j] = val_j; } if(!has_diag) { // Structural zero LOG_INFO("IC breakdown: structural zero diagonal"); FATAL_ERROR(__FILE__, __LINE__); } // Process diagonal entry ValueType diag_entry = std::sqrt(std::abs(this->mat_.val[j] - sum)); this->mat_.val[j] = diag_entry; // Check for numerical zero if(diag_entry == static_cast(0)) { LOG_INFO("IC breakdown: division by zero"); FATAL_ERROR(__FILE__, __LINE__); } // Store inverse diagonal entry cast_diag->vec_[i] = static_cast(1) / diag_entry; // Store diagonal offset diag_offset[i] = j; // Clear nnz entries for(j = row_begin; j < row_end; ++j) { nnz_entries[this->mat_.col[j]] = 0; } } // Free temporary storage free_host(&diag_offset); free_host(&nnz_entries); return true; } template bool HostMatrixCSR::MultiColoring(int& num_colors, int** size_colors, BaseVector* permutation) const { assert(*size_colors == NULL); assert(permutation != NULL); HostVector* cast_perm = dynamic_cast*>(permutation); assert(cast_perm != NULL); // node colors (init value = 0 i.e. no color) int* color = NULL; allocate_host(this->nrow_, &color); memset(color, 0, sizeof(int) * this->nrow_); num_colors = 0; std::vector row_col; for(int ai = 0; ai < this->nrow_; ++ai) { color[ai] = 1; row_col.clear(); row_col.reserve(num_colors + 2); row_col.assign(num_colors + 2, false); for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(ai != this->mat_.col[aj]) { row_col[color[this->mat_.col[aj]]] = true; } } for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(row_col[color[ai]] == true) { ++color[ai]; } } if(color[ai] > num_colors) { num_colors = color[ai]; } } allocate_host(num_colors, size_colors); set_to_zero_host(num_colors, *size_colors); int* offsets_color = NULL; allocate_host(num_colors, &offsets_color); memset(offsets_color, 0, sizeof(int) * num_colors); for(int i = 0; i < this->nrow_; ++i) { ++(*size_colors)[color[i] - 1]; } int total = 0; for(int i = 1; i < num_colors; ++i) { total += (*size_colors)[i - 1]; offsets_color[i] = total; // LOG_INFO("offsets = " << total); } cast_perm->Allocate(this->nrow_); for(int i = 0; i < permutation->GetSize(); ++i) { cast_perm->vec_[i] = offsets_color[color[i] - 1]; ++offsets_color[color[i] - 1]; } free_host(&color); free_host(&offsets_color); return true; } template bool HostMatrixCSR::MaximalIndependentSet(int& size, BaseVector* permutation) const { assert(permutation != NULL); assert(this->nrow_ == this->ncol_); HostVector* cast_perm = dynamic_cast*>(permutation); assert(cast_perm != NULL); int* mis = NULL; allocate_host(this->nrow_, &mis); memset(mis, 0, sizeof(int) * this->nrow_); size = 0; for(int ai = 0; ai < this->nrow_; ++ai) { if(mis[ai] == 0) { // set the node mis[ai] = 1; ++size; // remove all nbh nodes (without diagonal) for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(ai != this->mat_.col[aj]) { mis[this->mat_.col[aj]] = -1; } } } } cast_perm->Allocate(this->nrow_); int pos = 0; for(int ai = 0; ai < this->nrow_; ++ai) { if(mis[ai] == 1) { cast_perm->vec_[ai] = pos; ++pos; } else { cast_perm->vec_[ai] = size + ai - pos; } } // Check the permutation // // for (int ai=0; ainrow_; ++ai) { // assert( cast_perm->vec_[ai] >= 0 ); // assert( cast_perm->vec_[ai] < this->nrow_ ); // } free_host(&mis); return true; } template bool HostMatrixCSR::ZeroBlockPermutation(int& size, BaseVector* permutation) const { assert(permutation != NULL); assert(permutation->GetSize() == this->nrow_); assert(permutation->GetSize() == this->ncol_); HostVector* cast_perm = dynamic_cast*>(permutation); assert(cast_perm != NULL); size = 0; for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(ai == this->mat_.col[aj]) { ++size; } } } int k_z = size; int k_nz = 0; for(int ai = 0; ai < this->nrow_; ++ai) { bool hit = false; for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(ai == this->mat_.col[aj]) { cast_perm->vec_[ai] = k_nz; ++k_nz; hit = true; } } if(hit == false) { cast_perm->vec_[ai] = k_z; ++k_z; } } return true; } // following R.E.Bank and C.C.Douglas paper template bool HostMatrixCSR::SymbolicMatMatMult(const BaseMatrix& src) { const HostMatrixCSR* cast_mat = dynamic_cast*>(&src); assert(cast_mat != NULL); assert(this->ncol_ == cast_mat->nrow_); std::vector row_offset; std::vector* new_col = new std::vector[this->nrow_]; row_offset.resize(this->nrow_ + 1); row_offset[0] = 0; _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_; ++i) { // loop over the row for(int j = this->mat_.row_offset[i]; j < this->mat_.row_offset[i + 1]; ++j) { int ii = this->mat_.col[j]; // loop corresponding row for(int k = cast_mat->mat_.row_offset[ii]; k < cast_mat->mat_.row_offset[ii + 1]; ++k) { new_col[i].push_back(cast_mat->mat_.col[k]); } } std::sort(new_col[i].begin(), new_col[i].end()); new_col[i].erase(std::unique(new_col[i].begin(), new_col[i].end()), new_col[i].end()); row_offset[i + 1] = int(new_col[i].size()); } for(int i = 0; i < this->nrow_; ++i) { row_offset[i + 1] += row_offset[i]; } this->AllocateCSR(row_offset[this->nrow_], this->nrow_, this->ncol_); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_ + 1; ++i) { this->mat_.row_offset[i] = row_offset[i]; } #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_; ++i) { int jj = 0; for(int j = this->mat_.row_offset[i]; j < this->mat_.row_offset[i + 1]; ++j) { this->mat_.col[j] = new_col[i][jj]; ++jj; } } //#ifdef _OPENMP //#pragma omp parallel for //#endif // for (unsigned int i=0; innz_; ++i) // this->mat_.val[i] = static_cast(1); delete[] new_col; return true; } // ---------------------------------------------------------- // original function prod(const spmat1 &A, const spmat2 &B) // ---------------------------------------------------------- // Modified and adapted from AMGCL, // https://github.com/ddemidov/amgcl // MIT License // ---------------------------------------------------------- // CHANGELOG // - adapted interface // sorting is added // ---------------------------------------------------------- template bool HostMatrixCSR::MatMatMult(const BaseMatrix& A, const BaseMatrix& B) { assert((this != &A) && (this != &B)); // this->SymbolicMatMatMult(A, B); // this->NumericMatMatMult (A, B); // // return true; const HostMatrixCSR* cast_mat_A = dynamic_cast*>(&A); const HostMatrixCSR* cast_mat_B = dynamic_cast*>(&B); assert(cast_mat_A != NULL); assert(cast_mat_B != NULL); assert(cast_mat_A->ncol_ == cast_mat_B->nrow_); int n = cast_mat_A->nrow_; int m = cast_mat_B->ncol_; int* row_offset = NULL; allocate_host(n + 1, &row_offset); int* col = NULL; ValueType* val = NULL; for(int i = 0; i < n + 1; ++i) { row_offset[i] = 0; } #ifdef _OPENMP #pragma omp parallel #endif { std::vector marker(m, -1); #ifdef _OPENMP int nt = omp_get_num_threads(); int tid = omp_get_thread_num(); int chunk_size = (n + nt - 1) / nt; int chunk_start = tid * chunk_size; int chunk_end = std::min(n, chunk_start + chunk_size); #else int chunk_start = 0; int chunk_end = n; #endif for(int ia = chunk_start; ia < chunk_end; ++ia) { for(int ja = cast_mat_A->mat_.row_offset[ia], ea = cast_mat_A->mat_.row_offset[ia + 1]; ja < ea; ++ja) { int ca = cast_mat_A->mat_.col[ja]; for(int jb = cast_mat_B->mat_.row_offset[ca], eb = cast_mat_B->mat_.row_offset[ca + 1]; jb < eb; ++jb) { int cb = cast_mat_B->mat_.col[jb]; if(marker[cb] != ia) { marker[cb] = ia; ++row_offset[ia + 1]; } } } } std::fill(marker.begin(), marker.end(), -1); #ifdef _OPENMP #pragma omp barrier #endif #ifdef _OPENMP #pragma omp single #endif { for(int i = 1; i < n + 1; ++i) { row_offset[i] += row_offset[i - 1]; } allocate_host(row_offset[n], &col); allocate_host(row_offset[n], &val); } for(int ia = chunk_start; ia < chunk_end; ++ia) { int row_begin = row_offset[ia]; int row_end = row_begin; for(int ja = cast_mat_A->mat_.row_offset[ia], ea = cast_mat_A->mat_.row_offset[ia + 1]; ja < ea; ++ja) { int ca = cast_mat_A->mat_.col[ja]; ValueType va = cast_mat_A->mat_.val[ja]; for(int jb = cast_mat_B->mat_.row_offset[ca], eb = cast_mat_B->mat_.row_offset[ca + 1]; jb < eb; ++jb) { int cb = cast_mat_B->mat_.col[jb]; ValueType vb = cast_mat_B->mat_.val[jb]; if(marker[cb] < row_begin) { marker[cb] = row_end; col[row_end] = cb; val[row_end] = va * vb; ++row_end; } else { val[marker[cb]] += va * vb; } } } } } this->SetDataPtrCSR( &row_offset, &col, &val, row_offset[n], cast_mat_A->nrow_, cast_mat_B->ncol_); // Sorting the col (per row) // Bubble sort algorithm #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_; ++i) { for(int j = this->mat_.row_offset[i]; j < this->mat_.row_offset[i + 1]; ++j) { for(int jj = this->mat_.row_offset[i]; jj < this->mat_.row_offset[i + 1] - 1; ++jj) { if(this->mat_.col[jj] > this->mat_.col[jj + 1]) { // swap elements int ind = this->mat_.col[jj]; ValueType val = this->mat_.val[jj]; this->mat_.col[jj] = this->mat_.col[jj + 1]; this->mat_.val[jj] = this->mat_.val[jj + 1]; this->mat_.col[jj + 1] = ind; this->mat_.val[jj + 1] = val; } } } } return true; } // following R.E.Bank and C.C.Douglas paper // this = A * B template bool HostMatrixCSR::SymbolicMatMatMult(const BaseMatrix& A, const BaseMatrix& B) { const HostMatrixCSR* cast_mat_A = dynamic_cast*>(&A); const HostMatrixCSR* cast_mat_B = dynamic_cast*>(&B); assert(cast_mat_A != NULL); assert(cast_mat_B != NULL); assert(cast_mat_A->ncol_ == cast_mat_B->nrow_); std::vector row_offset; std::vector* new_col = new std::vector[cast_mat_A->nrow_]; row_offset.resize(cast_mat_A->nrow_ + 1); row_offset[0] = 0; _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < cast_mat_A->nrow_; ++i) { // loop over the row for(int j = cast_mat_A->mat_.row_offset[i]; j < cast_mat_A->mat_.row_offset[i + 1]; ++j) { int ii = cast_mat_A->mat_.col[j]; // new_col[i].push_back(ii); // loop corresponding row for(int k = cast_mat_B->mat_.row_offset[ii]; k < cast_mat_B->mat_.row_offset[ii + 1]; ++k) { new_col[i].push_back(cast_mat_B->mat_.col[k]); } } std::sort(new_col[i].begin(), new_col[i].end()); new_col[i].erase(std::unique(new_col[i].begin(), new_col[i].end()), new_col[i].end()); row_offset[i + 1] = int(new_col[i].size()); } for(int i = 0; i < cast_mat_A->nrow_; ++i) { row_offset[i + 1] += row_offset[i]; } this->AllocateCSR(row_offset[cast_mat_A->nrow_], cast_mat_A->nrow_, cast_mat_B->ncol_); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < cast_mat_A->nrow_ + 1; ++i) { this->mat_.row_offset[i] = row_offset[i]; } #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < cast_mat_A->nrow_; ++i) { int jj = 0; for(int j = this->mat_.row_offset[i]; j < this->mat_.row_offset[i + 1]; ++j) { this->mat_.col[j] = new_col[i][jj]; ++jj; } } //#ifdef _OPENMP //#pragma omp parallel for //#endif // for (unsigned int i=0; innz_; ++i) // this->mat_.val[i] = static_cast(1); delete[] new_col; return true; } template bool HostMatrixCSR::NumericMatMatMult(const BaseMatrix& A, const BaseMatrix& B) { const HostMatrixCSR* cast_mat_A = dynamic_cast*>(&A); const HostMatrixCSR* cast_mat_B = dynamic_cast*>(&B); assert(cast_mat_A != NULL); assert(cast_mat_B != NULL); assert(cast_mat_A->ncol_ == cast_mat_B->nrow_); assert(this->nrow_ == cast_mat_A->nrow_); assert(this->ncol_ == cast_mat_B->ncol_); _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < cast_mat_A->nrow_; ++i) { // loop over the row for(int j = cast_mat_A->mat_.row_offset[i]; j < cast_mat_A->mat_.row_offset[i + 1]; ++j) { int ii = cast_mat_A->mat_.col[j]; // loop corresponding row for(int k = cast_mat_B->mat_.row_offset[ii]; k < cast_mat_B->mat_.row_offset[ii + 1]; ++k) { for(int p = this->mat_.row_offset[i]; p < this->mat_.row_offset[i + 1]; ++p) { if(cast_mat_B->mat_.col[k] == this->mat_.col[p]) { this->mat_.val[p] += cast_mat_B->mat_.val[k] * cast_mat_A->mat_.val[j]; break; } } } } } return true; } template bool HostMatrixCSR::SymbolicPower(int p) { assert(p > 1); // The optimal values for the firsts values if(p == 2) { this->SymbolicMatMatMult(*this); } if(p == 3) { HostMatrixCSR tmp(this->local_backend_); tmp.CopyFrom(*this); this->SymbolicPower(2); this->SymbolicMatMatMult(tmp); } if(p == 4) { this->SymbolicPower(2); this->SymbolicPower(2); } if(p == 5) { HostMatrixCSR tmp(this->local_backend_); tmp.CopyFrom(*this); this->SymbolicPower(4); this->SymbolicMatMatMult(tmp); } if(p == 6) { this->SymbolicPower(2); this->SymbolicPower(3); } if(p == 7) { HostMatrixCSR tmp(this->local_backend_); tmp.CopyFrom(*this); this->SymbolicPower(6); this->SymbolicMatMatMult(tmp); } if(p == 8) { HostMatrixCSR tmp(this->local_backend_); tmp.CopyFrom(*this); this->SymbolicPower(6); tmp.SymbolicPower(2); this->SymbolicMatMatMult(tmp); } if(p > 8) { HostMatrixCSR tmp(this->local_backend_); tmp.CopyFrom(*this); for(int i = 0; i < p; ++i) { this->SymbolicMatMatMult(tmp); } } return true; } template bool HostMatrixCSR::ILUpFactorizeNumeric(int p, const BaseMatrix& mat) { const HostMatrixCSR* cast_mat = dynamic_cast*>(&mat); assert(cast_mat != NULL); assert(cast_mat->nrow_ == this->nrow_); assert(cast_mat->ncol_ == this->ncol_); assert(this->nnz_ > 0); assert(cast_mat->nnz_ > 0); int* row_offset = NULL; int* ind_diag = NULL; int* levels = NULL; ValueType* val = NULL; allocate_host(cast_mat->nrow_ + 1, &row_offset); allocate_host(cast_mat->nrow_, &ind_diag); allocate_host(cast_mat->nnz_, &levels); allocate_host(cast_mat->nnz_, &val); int inf_level = 99999; int nnz = 0; _set_omp_backend_threads(this->local_backend_, this->nrow_); // find diagonals #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < cast_mat->nrow_; ++ai) { for(int aj = cast_mat->mat_.row_offset[ai]; aj < cast_mat->mat_.row_offset[ai + 1]; ++aj) { if(ai == cast_mat->mat_.col[aj]) { ind_diag[ai] = aj; break; } } } // init row_offset #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < cast_mat->nrow_ + 1; ++i) { row_offset[i] = 0; } // init inf levels #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < cast_mat->nnz_; ++i) { levels[i] = inf_level; } #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < cast_mat->nnz_; ++i) { val[i] = static_cast(0); } // fill levels and values #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < cast_mat->nrow_; ++ai) { for(int aj = cast_mat->mat_.row_offset[ai]; aj < cast_mat->mat_.row_offset[ai + 1]; ++aj) { for(int ajj = this->mat_.row_offset[ai]; ajj < this->mat_.row_offset[ai + 1]; ++ajj) { if(cast_mat->mat_.col[aj] == this->mat_.col[ajj]) { val[aj] = this->mat_.val[ajj]; levels[aj] = 0; break; } } } } // ai = 1 to N for(int ai = 1; ai < cast_mat->nrow_; ++ai) { // ak = 1 to ai-1 for(int ak = cast_mat->mat_.row_offset[ai]; ai > cast_mat->mat_.col[ak]; ++ak) { if(levels[ak] <= p) { val[ak] /= val[ind_diag[cast_mat->mat_.col[ak]]]; // aj = ak+1 to N for(int aj = ak + 1; aj < cast_mat->mat_.row_offset[ai + 1]; ++aj) { ValueType val_kj = static_cast(0); int level_kj = inf_level; // find a_k,j for(int kj = cast_mat->mat_.row_offset[cast_mat->mat_.col[ak]]; kj < cast_mat->mat_.row_offset[cast_mat->mat_.col[ak] + 1]; ++kj) { if(cast_mat->mat_.col[aj] == cast_mat->mat_.col[kj]) { level_kj = levels[kj]; val_kj = val[kj]; break; } } int lev = level_kj + levels[ak] + 1; if(levels[aj] > lev) { levels[aj] = lev; } // a_i,j = a_i,j - a_i,k * a_k,j val[aj] -= val[ak] * val_kj; } } } for(int ak = cast_mat->mat_.row_offset[ai]; ak < cast_mat->mat_.row_offset[ai + 1]; ++ak) { if(levels[ak] > p) { levels[ak] = inf_level; val[ak] = static_cast(0); } else { ++row_offset[ai + 1]; } } } row_offset[0] = this->mat_.row_offset[0]; row_offset[1] = this->mat_.row_offset[1]; for(int i = 0; i < cast_mat->nrow_; ++i) { row_offset[i + 1] += row_offset[i]; } nnz = row_offset[cast_mat->nrow_]; this->AllocateCSR(nnz, cast_mat->nrow_, cast_mat->ncol_); int jj = 0; for(int i = 0; i < cast_mat->nrow_; ++i) { for(int j = cast_mat->mat_.row_offset[i]; j < cast_mat->mat_.row_offset[i + 1]; ++j) { if(levels[j] <= p) { this->mat_.col[jj] = cast_mat->mat_.col[j]; this->mat_.val[jj] = val[j]; ++jj; } } } assert(jj == nnz); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_ + 1; ++i) { this->mat_.row_offset[i] = row_offset[i]; } free_host(&row_offset); free_host(&ind_diag); free_host(&levels); free_host(&val); return true; } template bool HostMatrixCSR::MatrixAdd(const BaseMatrix& mat, ValueType alpha, ValueType beta, bool structure) { const HostMatrixCSR* cast_mat = dynamic_cast*>(&mat); assert(cast_mat != NULL); assert(cast_mat->nrow_ == this->nrow_); assert(cast_mat->ncol_ == this->ncol_); assert(this->nnz_ > 0); assert(cast_mat->nnz_ > 0); _set_omp_backend_threads(this->local_backend_, this->nrow_); // the structure is sub-set if(structure == false) { // CSR should be sorted #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < cast_mat->nrow_; ++ai) { int first_col = cast_mat->mat_.row_offset[ai]; for(int ajj = this->mat_.row_offset[ai]; ajj < this->mat_.row_offset[ai + 1]; ++ajj) { for(int aj = first_col; aj < cast_mat->mat_.row_offset[ai + 1]; ++aj) { if(cast_mat->mat_.col[aj] == this->mat_.col[ajj]) { this->mat_.val[ajj] = alpha * this->mat_.val[ajj] + beta * cast_mat->mat_.val[aj]; ++first_col; break; } } } } } else { std::vector row_offset; std::vector* new_col = new std::vector[this->nrow_]; HostMatrixCSR tmp(this->local_backend_); tmp.CopyFrom(*this); row_offset.resize(this->nrow_ + 1); row_offset[0] = 0; #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_; ++i) { for(int j = this->mat_.row_offset[i]; j < this->mat_.row_offset[i + 1]; ++j) { new_col[i].push_back(this->mat_.col[j]); } for(int k = cast_mat->mat_.row_offset[i]; k < cast_mat->mat_.row_offset[i + 1]; ++k) { new_col[i].push_back(cast_mat->mat_.col[k]); } std::sort(new_col[i].begin(), new_col[i].end()); new_col[i].erase(std::unique(new_col[i].begin(), new_col[i].end()), new_col[i].end()); row_offset[i + 1] = int(new_col[i].size()); } for(int i = 0; i < this->nrow_; ++i) { row_offset[i + 1] += row_offset[i]; } this->AllocateCSR(row_offset[this->nrow_], this->nrow_, this->ncol_); // copy structure #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_ + 1; ++i) { this->mat_.row_offset[i] = row_offset[i]; } #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_; ++i) { int jj = 0; for(int j = this->mat_.row_offset[i]; j < this->mat_.row_offset[i + 1]; ++j) { this->mat_.col[j] = new_col[i][jj]; ++jj; } } // add values #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_; ++i) { int Aj = tmp.mat_.row_offset[i]; int Bj = cast_mat->mat_.row_offset[i]; for(int j = this->mat_.row_offset[i]; j < this->mat_.row_offset[i + 1]; ++j) { for(int jj = Aj; jj < tmp.mat_.row_offset[i + 1]; ++jj) { if(this->mat_.col[j] == tmp.mat_.col[jj]) { this->mat_.val[j] += alpha * tmp.mat_.val[jj]; ++Aj; break; } } for(int jj = Bj; jj < cast_mat->mat_.row_offset[i + 1]; ++jj) { if(this->mat_.col[j] == cast_mat->mat_.col[jj]) { this->mat_.val[j] += beta * cast_mat->mat_.val[jj]; ++Bj; break; } } } } delete[] new_col; } return true; } template bool HostMatrixCSR::Gershgorin(ValueType& lambda_min, ValueType& lambda_max) const { _set_omp_backend_threads(this->local_backend_, this->nrow_); lambda_min = static_cast(0); lambda_max = static_cast(0); // TODO (parallel max, min) //#ifdef _OPENMP // #pragma omp parallel for //#endif for(int ai = 0; ai < this->nrow_; ++ai) { ValueType sum = static_cast(0); ValueType diag = static_cast(0); for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(ai != this->mat_.col[aj]) { sum += std::abs(this->mat_.val[aj]); } else { diag = this->mat_.val[aj]; } } if(sum + diag > lambda_max) { lambda_max = sum + diag; } if(diag - sum < lambda_min) { lambda_min = diag - sum; } } return true; } template bool HostMatrixCSR::Scale(ValueType alpha) { _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < this->nnz_; ++ai) { this->mat_.val[ai] *= alpha; } return true; } template bool HostMatrixCSR::ScaleDiagonal(ValueType alpha) { _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(ai == this->mat_.col[aj]) { this->mat_.val[aj] *= alpha; break; } } } return true; } template bool HostMatrixCSR::ScaleOffDiagonal(ValueType alpha) { _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(ai != this->mat_.col[aj]) { this->mat_.val[aj] *= alpha; } } } return true; } template bool HostMatrixCSR::AddScalar(ValueType alpha) { _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < this->nnz_; ++ai) { this->mat_.val[ai] += alpha; } return true; } template bool HostMatrixCSR::AddScalarDiagonal(ValueType alpha) { _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(ai == this->mat_.col[aj]) { this->mat_.val[aj] += alpha; break; } } } return true; } template bool HostMatrixCSR::AddScalarOffDiagonal(ValueType alpha) { _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { if(ai != this->mat_.col[aj]) { this->mat_.val[aj] += alpha; } } } return true; } template bool HostMatrixCSR::DiagonalMatrixMultR(const BaseVector& diag) { assert(diag.GetSize() == this->ncol_); const HostVector* cast_diag = dynamic_cast*>(&diag); assert(cast_diag != NULL); _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { this->mat_.val[aj] *= cast_diag->vec_[this->mat_.col[aj]]; } } return true; } template bool HostMatrixCSR::DiagonalMatrixMultL(const BaseVector& diag) { assert(diag.GetSize() == this->ncol_); const HostVector* cast_diag = dynamic_cast*>(&diag); assert(cast_diag != NULL); _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int ai = 0; ai < this->nrow_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { this->mat_.val[aj] *= cast_diag->vec_[ai]; } } return true; } template bool HostMatrixCSR::Compress(double drop_off) { if(this->nnz_ > 0) { std::vector row_offset; HostMatrixCSR tmp(this->local_backend_); tmp.CopyFrom(*this); row_offset.resize(this->nrow_ + 1); row_offset[0] = 0; _set_omp_backend_threads(this->local_backend_, this->nrow_); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_; ++i) { row_offset[i + 1] = 0; for(int j = this->mat_.row_offset[i]; j < this->mat_.row_offset[i + 1]; ++j) { if((std::abs(this->mat_.val[j]) > drop_off) || (this->mat_.col[j] == i)) { row_offset[i + 1] += 1; } } } for(int i = 0; i < this->nrow_; ++i) { row_offset[i + 1] += row_offset[i]; } this->AllocateCSR(row_offset[this->nrow_], this->nrow_, this->ncol_); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_ + 1; ++i) { this->mat_.row_offset[i] = row_offset[i]; } #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_; ++i) { int jj = this->mat_.row_offset[i]; for(int j = tmp.mat_.row_offset[i]; j < tmp.mat_.row_offset[i + 1]; ++j) { if((std::abs(tmp.mat_.val[j]) > drop_off) || (tmp.mat_.col[j] == i)) { this->mat_.col[jj] = tmp.mat_.col[j]; this->mat_.val[jj] = tmp.mat_.val[j]; ++jj; } } } } return true; } template bool HostMatrixCSR::Transpose(void) { if(this->nnz_ > 0) { HostMatrixCSR tmp(this->local_backend_); tmp.CopyFrom(*this); tmp.Transpose(this); } return true; } template bool HostMatrixCSR::Transpose(BaseMatrix* T) const { assert(T != NULL); HostMatrixCSR* cast_T = dynamic_cast*>(T); assert(cast_T != NULL); if(this->nnz_ > 0) { cast_T->Clear(); cast_T->AllocateCSR(this->nnz_, this->ncol_, this->nrow_); for(int i = 0; i < cast_T->nnz_; ++i) { cast_T->mat_.row_offset[this->mat_.col[i] + 1] += 1; } for(int i = 0; i < cast_T->nrow_; ++i) { cast_T->mat_.row_offset[i + 1] += cast_T->mat_.row_offset[i]; } for(int ai = 0; ai < cast_T->ncol_; ++ai) { for(int aj = this->mat_.row_offset[ai]; aj < this->mat_.row_offset[ai + 1]; ++aj) { int ind_col = this->mat_.col[aj]; int ind = cast_T->mat_.row_offset[ind_col]; cast_T->mat_.col[ind] = ai; cast_T->mat_.val[ind] = this->mat_.val[aj]; cast_T->mat_.row_offset[ind_col] += 1; } } int shift = 0; for(int i = 0; i < cast_T->nrow_; ++i) { int tmp = cast_T->mat_.row_offset[i]; cast_T->mat_.row_offset[i] = shift; shift = tmp; } cast_T->mat_.row_offset[cast_T->nrow_] = shift; assert(this->nnz_ == shift); } return true; } template bool HostMatrixCSR::Sort(void) { if(this->nnz_ > 0) { #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_; ++i) { for(int j = this->mat_.row_offset[i]; j < this->mat_.row_offset[i + 1]; ++j) { for(int jj = this->mat_.row_offset[i]; jj < this->mat_.row_offset[i + 1] - 1; ++jj) { if(this->mat_.col[jj] > this->mat_.col[jj + 1]) { // swap elements int ind = this->mat_.col[jj]; ValueType val = this->mat_.val[jj]; this->mat_.col[jj] = this->mat_.col[jj + 1]; this->mat_.val[jj] = this->mat_.val[jj + 1]; this->mat_.col[jj + 1] = ind; this->mat_.val[jj + 1] = val; } } } } } return true; } template bool HostMatrixCSR::Permute(const BaseVector& permutation) { assert((permutation.GetSize() == this->nrow_) && (permutation.GetSize() == this->ncol_)); if(this->nnz_ > 0) { const HostVector* cast_perm = dynamic_cast*>(&permutation); assert(cast_perm != NULL); _set_omp_backend_threads(this->local_backend_, this->nrow_); // Calculate nnz per row int* row_nnz = NULL; allocate_host(this->nrow_, &row_nnz); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_; ++i) { row_nnz[i] = this->mat_.row_offset[i + 1] - this->mat_.row_offset[i]; } // Permute vector of nnz per row int* perm_row_nnz = NULL; allocate_host(this->nrow_, &perm_row_nnz); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_; ++i) { perm_row_nnz[cast_perm->vec_[i]] = row_nnz[i]; } // Calculate new nnz int* perm_nnz = NULL; allocate_host(this->nrow_ + 1, &perm_nnz); int sum = 0; for(int i = 0; i < this->nrow_; ++i) { perm_nnz[i] = sum; sum += perm_row_nnz[i]; } perm_nnz[this->nrow_] = sum; // Permute rows int* col = NULL; ValueType* val = NULL; allocate_host(this->nnz_, &col); allocate_host(this->nnz_, &val); #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_; ++i) { int permIndex = perm_nnz[cast_perm->vec_[i]]; int prevIndex = this->mat_.row_offset[i]; for(int j = 0; j < row_nnz[i]; ++j) { col[permIndex + j] = this->mat_.col[prevIndex + j]; val[permIndex + j] = this->mat_.val[prevIndex + j]; } } // Permute columns #ifdef _OPENMP #pragma omp parallel for #endif for(int i = 0; i < this->nrow_; ++i) { int row_index = perm_nnz[i]; for(int j = 0; j < perm_row_nnz[i]; ++j) { int k = j - 1; int aComp = col[row_index + j]; int comp = cast_perm->vec_[aComp]; for(; k >= 0; --k) { if(this->mat_.col[row_index + k] > comp) { this->mat_.val[row_index + k + 1] = this->mat_.val[row_index + k]; this->mat_.col[row_index + k + 1] = this->mat_.col[row_index + k]; } else { break; } } this->mat_.val[row_index + k + 1] = val[row_index + j]; this->mat_.col[row_index + k + 1] = comp; } } free_host(&this->mat_.row_offset); this->mat_.row_offset = perm_nnz; free_host(&col); free_host(&val); free_host(&row_nnz); free_host(&perm_row_nnz); } return true; } template bool HostMatrixCSR::CMK(BaseVector* permutation) const { assert(this->nnz_ > 0); assert(permutation != NULL); HostVector* cast_perm = dynamic_cast*>(permutation); assert(cast_perm != NULL); cast_perm->Clear(); cast_perm->Allocate(this->nrow_); int next = 0; int head = 0; int tmp = 0; int test = 1; int maxdeg = 0; int* nd = NULL; int* marker = NULL; int* levset = NULL; int* nextlevset = NULL; allocate_host(this->nrow_, &nd); allocate_host(this->nrow_, &marker); allocate_host(this->nrow_, &levset); allocate_host(this->nrow_, &nextlevset); int qlength = 1; for(int k = 0; k < this->nrow_; ++k) { marker[k] = 0; nd[k] = this->mat_.row_offset[k + 1] - this->mat_.row_offset[k] - 1; if(nd[k] > maxdeg) { maxdeg = nd[k]; } } head = this->mat_.col[0]; levset[0] = head; cast_perm->vec_[0] = 0; ++next; marker[head] = 1; while(next < this->nrow_) { int position = 0; for(int h = 0; h < qlength; ++h) { head = levset[h]; for(int k = this->mat_.row_offset[head]; k < this->mat_.row_offset[head + 1]; ++k) { tmp = this->mat_.col[k]; if((marker[tmp] == 0) && (tmp != head)) { nextlevset[position] = tmp; marker[tmp] = 1; cast_perm->vec_[tmp] = next; ++next; ++position; } } } qlength = position; while(test == 1) { test = 0; for(int j = position - 1; j > 0; --j) { if(nd[nextlevset[j]] < nd[nextlevset[j - 1]]) { tmp = nextlevset[j]; nextlevset[j] = nextlevset[j - 1]; nextlevset[j - 1] = tmp; test = 1; } } } for(int i = 0; i < position; ++i) { levset[i] = nextlevset[i]; } if(qlength == 0) { for(int i = 0; i < this->nrow_; ++i) { if(marker[i] == 0) { levset[0] = i; qlength = 1; cast_perm->vec_[i] = next; marker[i] = 1; ++next; } } } } free_host(&nd); free_host(&marker); free_host(&levset); free_host(&nextlevset); return true; } template bool HostMatrixCSR::RCMK(BaseVector* permutation) const { HostVector* cast_perm = dynamic_cast*>(permutation); assert(cast_perm != NULL); cast_perm->Clear(); cast_perm->Allocate(this->nrow_); HostVector tmp_perm(this->local_backend_); this->CMK(&tmp_perm); for(int i = 0; i < this->nrow_; ++i) { cast_perm->vec_[i] = this->nrow_ - tmp_perm.vec_[i] - 1; } return true; } template bool HostMatrixCSR::ConnectivityOrder(BaseVector* permutation) const { HostVector* cast_perm = dynamic_cast*>(permutation); assert(cast_perm != NULL); cast_perm->Clear(); cast_perm->Allocate(this->nrow_); std::multimap map; for(int i = 0; i < this->nrow_; ++i) { map.insert( std::pair(this->mat_.row_offset[i + 1] - this->mat_.row_offset[i], i)); } std::multimap::iterator it = map.begin(); for(int i = 0; i < this->nrow_; ++i, ++it) { cast_perm->vec_[i] = it->second; } return true; } template bool HostMatrixCSR::CreateFromMap(const BaseVector& map, int n, int m) { assert(map.GetSize() == n); const HostVector* cast_map = dynamic_cast*>(&map); assert(cast_map != NULL); int* row_nnz = NULL; int* row_buffer = NULL; allocate_host(m, &row_nnz); allocate_host(m + 1, &row_buffer); set_to_zero_host(m, row_nnz); int nnz = 0; for(int i = 0; i < n; ++i) { assert(cast_map->vec_[i] < m); if(cast_map->vec_[i] < 0) { continue; } ++row_nnz[cast_map->vec_[i]]; ++nnz; } this->Clear(); this->AllocateCSR(nnz, m, n); this->mat_.row_offset[0] = 0; row_buffer[0] = 0; for(int i = 0; i < m; ++i) { this->mat_.row_offset[i + 1] = this->mat_.row_offset[i] + row_nnz[i]; row_buffer[i + 1] = this->mat_.row_offset[i + 1]; } for(int i = 0; i < nnz; ++i) { if(cast_map->vec_[i] < 0) { continue; } this->mat_.col[row_buffer[cast_map->vec_[i]]] = i; this->mat_.val[i] = static_cast(1); row_buffer[cast_map->vec_[i]]++; } assert(this->mat_.row_offset[m] == nnz); free_host(&row_nnz); free_host(&row_buffer); return true; } template bool HostMatrixCSR::CreateFromMap(const BaseVector& map, int n, int m, BaseMatrix* pro) { assert(map.GetSize() == n); assert(pro != NULL); const HostVector* cast_map = dynamic_cast*>(&map); HostMatrixCSR* cast_pro = dynamic_cast*>(pro); assert(cast_pro != NULL); assert(cast_map != NULL); // Build restriction operator this->CreateFromMap(map, n, m); int nnz = this->GetNnz(); // Build prolongation operator cast_pro->Clear(); cast_pro->AllocateCSR(nnz, n, m); int k = 0; for(int i = 0; i < n; ++i) { cast_pro->mat_.row_offset[i + 1] = cast_pro->mat_.row_offset[i]; // Check for entry in i-th row if(cast_map->vec_[i] < 0) { continue; } assert(cast_map->vec_[i] < m); ++cast_pro->mat_.row_offset[i + 1]; cast_pro->mat_.col[k] = cast_map->vec_[i]; cast_pro->mat_.val[k] = static_cast(1); ++k; } return true; } // ---------------------------------------------------------- // original function connect(const spmat &A, // float eps_strong) // ---------------------------------------------------------- // Modified and adapted from AMGCL, // https://github.com/ddemidov/amgcl // MIT License // ---------------------------------------------------------- // CHANGELOG // - adapted interface // ---------------------------------------------------------- template bool HostMatrixCSR::AMGConnect(ValueType eps, BaseVector* connections) const { assert(connections != NULL); HostVector* cast_conn = dynamic_cast*>(connections); assert(cast_conn != NULL); cast_conn->Clear(); cast_conn->Allocate(this->nnz_); ValueType eps2 = eps * eps; HostVector vec_diag(this->local_backend_); vec_diag.Allocate(this->nrow_); this->ExtractDiagonal(&vec_diag); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1024) #endif for(int i = 0; i < this->nrow_; ++i) { ValueType eps_dia_i = eps2 * vec_diag.vec_[i]; for(int j = this->mat_.row_offset[i]; j < this->mat_.row_offset[i + 1]; ++j) { int c = this->mat_.col[j]; ValueType v = this->mat_.val[j]; cast_conn->vec_[j] = (c != i) && (v * v > eps_dia_i * vec_diag.vec_[c]); } } return true; } // ---------------------------------------------------------- // original function aggregates( const spmat &A, // const std::vector &S ) // ---------------------------------------------------------- // Modified and adapted from AMGCL, // https://github.com/ddemidov/amgcl // MIT License // ---------------------------------------------------------- // CHANGELOG // - adapted interface // ---------------------------------------------------------- template bool HostMatrixCSR::AMGAggregate(const BaseVector& connections, BaseVector* aggregates) const { assert(aggregates != NULL); HostVector* cast_agg = dynamic_cast*>(aggregates); const HostVector* cast_conn = dynamic_cast*>(&connections); assert(cast_agg != NULL); assert(cast_conn != NULL); aggregates->Clear(); aggregates->Allocate(this->nrow_); const int undefined = -1; const int removed = -2; // Remove nodes without neighbours int max_neib = 0; for(int i = 0; i < this->nrow_; ++i) { int j = this->mat_.row_offset[i]; int e = this->mat_.row_offset[i + 1]; max_neib = std::max(e - j, max_neib); int state = removed; for(; j < e; ++j) { if(cast_conn->vec_[j]) { state = undefined; break; } } cast_agg->vec_[i] = state; } std::vector neib; neib.reserve(max_neib); int last_g = -1; // Perform plain aggregation for(int i = 0; i < this->nrow_; ++i) { if(cast_agg->vec_[i] != undefined) { continue; } // The point is not adjacent to a core of any previous aggregate: // so its a seed of a new aggregate. cast_agg->vec_[i] = ++last_g; neib.clear(); // Include its neighbors as well. for(int j = this->mat_.row_offset[i], e = this->mat_.row_offset[i + 1]; j < e; ++j) { int c = this->mat_.col[j]; if(cast_conn->vec_[j] && cast_agg->vec_[c] != removed) { cast_agg->vec_[c] = last_g; neib.push_back(c); } } // Temporarily mark undefined points adjacent to the new aggregate as // belonging to the aggregate. If nobody claims them later, they will // stay here. for(typename std::vector::const_iterator nb = neib.begin(); nb != neib.end(); ++nb) { for(int j = this->mat_.row_offset[*nb], e = this->mat_.row_offset[*nb + 1]; j < e; ++j) { if(cast_conn->vec_[j] && cast_agg->vec_[this->mat_.col[j]] == undefined) { cast_agg->vec_[this->mat_.col[j]] = last_g; } } } } return true; } // ---------------------------------------------------------- // original function interp(const sparse::matrix &A, const params &prm) // ---------------------------------------------------------- // Modified and adapted from AMGCL, // https://github.com/ddemidov/amgcl // MIT License // ---------------------------------------------------------- // CHANGELOG // - adapted interface // ---------------------------------------------------------- template