/* ************************************************************************ * Copyright (c) 2018 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * ************************************************************************ */ #include "host_conversion.hpp" #include "../../utils/allocate_free.hpp" #include "../../utils/def.hpp" #include "../../utils/log.hpp" #include "../matrix_formats.hpp" #include "../matrix_formats_ind.hpp" #include #include #ifdef _OPENMP #include #else #define omp_set_num_threads(num) ; #endif namespace rocalution { template bool csr_to_dense(int omp_threads, IndexType nnz, IndexType nrow, IndexType ncol, const MatrixCSR& src, MatrixDENSE* dst) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); allocate_host(nrow * ncol, &dst->val); set_to_zero_host(nrow * ncol, dst->val); #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType i = 0; i < nrow; ++i) { for(IndexType j = src.row_offset[i]; j < src.row_offset[i + 1]; ++j) { dst->val[DENSE_IND(i, src.col[j], nrow, ncol)] = src.val[j]; } } return true; } template bool dense_to_csr(int omp_threads, IndexType nrow, IndexType ncol, const MatrixDENSE& src, MatrixCSR* dst, IndexType* nnz) { assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); allocate_host(nrow + 1, &dst->row_offset); set_to_zero_host(nrow + 1, dst->row_offset); #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType i = 0; i < nrow; ++i) { for(IndexType j = 0; j < ncol; ++j) { if(src.val[DENSE_IND(i, j, nrow, ncol)] != static_cast(0)) { dst->row_offset[i] += 1; } } } *nnz = 0; for(IndexType i = 0; i < nrow; ++i) { IndexType tmp = dst->row_offset[i]; dst->row_offset[i] = *nnz; *nnz += tmp; } dst->row_offset[nrow] = *nnz; allocate_host(*nnz, &dst->col); allocate_host(*nnz, &dst->val); set_to_zero_host(*nnz, dst->col); set_to_zero_host(*nnz, dst->val); #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType i = 0; i < nrow; ++i) { IndexType ind = dst->row_offset[i]; for(IndexType j = 0; j < ncol; ++j) { if(src.val[DENSE_IND(i, j, nrow, ncol)] != static_cast(0)) { dst->val[ind] = src.val[DENSE_IND(i, j, nrow, ncol)]; dst->col[ind] = j; ++ind; } } } return true; } template bool csr_to_mcsr(int omp_threads, IndexType nnz, IndexType nrow, IndexType ncol, const MatrixCSR& src, MatrixMCSR* dst) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); // No support for non-squared matrices if(nrow != ncol) { return false; } omp_set_num_threads(omp_threads); // Pre-analysing step to check zero diagonal entries IndexType diag_entries = 0; for(int i = 0; i < nrow; ++i) { for(int j = src.row_offset[i]; j < src.row_offset[i + 1]; ++j) { if(i == src.col[j]) { ++diag_entries; } } } IndexType zero_diag_entries = nrow - diag_entries; // MCSR does not support zero diagonal entries (yet) if(zero_diag_entries > 0) { return false; } allocate_host(nrow + 1, &dst->row_offset); allocate_host(nnz, &dst->col); allocate_host(nnz, &dst->val); set_to_zero_host(nrow + 1, dst->row_offset); set_to_zero_host(nnz, dst->col); set_to_zero_host(nnz, dst->val); for(IndexType ai = 0; ai < nrow + 1; ++ai) { dst->row_offset[ai] = nrow + src.row_offset[ai] - ai; } #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType ai = 0; ai < nrow; ++ai) { IndexType correction = ai; for(IndexType aj = src.row_offset[ai]; aj < src.row_offset[ai + 1]; ++aj) { if(ai != src.col[aj]) { IndexType ind = nrow + aj - correction; // non-diag dst->col[ind] = src.col[aj]; dst->val[ind] = src.val[aj]; } else { // diag dst->val[ai] = src.val[aj]; ++correction; } } } if(dst->row_offset[nrow] != src.row_offset[nrow]) { return false; } return true; } template bool mcsr_to_csr(int omp_threads, IndexType nnz, IndexType nrow, IndexType ncol, const MatrixMCSR& src, MatrixCSR* dst) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); // Only support square matrices if(nrow != ncol) { return false; } omp_set_num_threads(omp_threads); allocate_host(nrow + 1, &dst->row_offset); allocate_host(nnz, &dst->col); allocate_host(nnz, &dst->val); set_to_zero_host(nrow + 1, dst->row_offset); set_to_zero_host(nnz, dst->col); set_to_zero_host(nnz, dst->val); for(IndexType ai = 0; ai < nrow + 1; ++ai) { dst->row_offset[ai] = src.row_offset[ai] - nrow + ai; } #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType ai = 0; ai < nrow; ++ai) { for(IndexType aj = src.row_offset[ai]; aj < src.row_offset[ai + 1]; ++aj) { IndexType ind = aj - nrow + ai; // non-diag dst->col[ind] = src.col[aj]; dst->val[ind] = src.val[aj]; } IndexType diag_ind = src.row_offset[ai + 1] - nrow + ai; // diag dst->val[diag_ind] = src.val[ai]; dst->col[diag_ind] = ai; } if(dst->row_offset[nrow] != src.row_offset[nrow]) { return false; } // Sorting the col (per row) // Bubble sort algorithm #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType i = 0; i < nrow; ++i) { for(IndexType j = dst->row_offset[i]; j < dst->row_offset[i + 1]; ++j) { for(IndexType jj = dst->row_offset[i]; jj < dst->row_offset[i + 1] - 1; ++jj) { if(dst->col[jj] > dst->col[jj + 1]) { // swap elements IndexType ind = dst->col[jj]; ValueType val = dst->val[jj]; dst->col[jj] = dst->col[jj + 1]; dst->val[jj] = dst->val[jj + 1]; dst->col[jj + 1] = ind; dst->val[jj + 1] = val; } } } } return true; } template bool csr_to_coo(int omp_threads, IndexType nnz, IndexType nrow, IndexType ncol, const MatrixCSR& src, MatrixCOO* dst) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); allocate_host(nnz, &dst->row); allocate_host(nnz, &dst->col); allocate_host(nnz, &dst->val); set_to_zero_host(nnz, dst->row); set_to_zero_host(nnz, dst->col); set_to_zero_host(nnz, dst->val); #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType i = 0; i < nrow; ++i) { for(IndexType j = src.row_offset[i]; j < src.row_offset[i + 1]; ++j) { dst->row[j] = i; } } #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType i = 0; i < nnz; ++i) { dst->col[i] = src.col[i]; } #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType i = 0; i < nnz; ++i) { dst->val[i] = src.val[i]; } return true; } template bool csr_to_ell(int omp_threads, IndexType nnz, IndexType nrow, IndexType ncol, const MatrixCSR& src, MatrixELL* dst, IndexType* nnz_ell) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); dst->max_row = 0; for(IndexType i = 0; i < nrow; ++i) { IndexType max_row = src.row_offset[i + 1] - src.row_offset[i]; if(max_row > dst->max_row) { dst->max_row = max_row; } } *nnz_ell = dst->max_row * nrow; // Limit ELL size to 5 times CSR nnz if(dst->max_row > 5 * (nnz / nrow)) { return false; } allocate_host(*nnz_ell, &dst->val); allocate_host(*nnz_ell, &dst->col); set_to_zero_host(*nnz_ell, dst->val); set_to_zero_host(*nnz_ell, dst->col); #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType i = 0; i < nrow; ++i) { IndexType n = 0; for(IndexType j = src.row_offset[i]; j < src.row_offset[i + 1]; ++j) { IndexType ind = ELL_IND(i, n, nrow, dst->max_row); dst->val[ind] = src.val[j]; dst->col[ind] = src.col[j]; ++n; } for(IndexType j = src.row_offset[i + 1] - src.row_offset[i]; j < dst->max_row; ++j) { IndexType ind = ELL_IND(i, n, nrow, dst->max_row); dst->val[ind] = static_cast(0); dst->col[ind] = static_cast(-1); ++n; } } return true; } template bool ell_to_csr(int omp_threads, IndexType nnz, IndexType nrow, IndexType ncol, const MatrixELL& src, MatrixCSR* dst, IndexType* nnz_csr) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); allocate_host(nrow + 1, &dst->row_offset); set_to_zero_host(nrow + 1, dst->row_offset); #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType ai = 0; ai < nrow; ++ai) { for(IndexType n = 0; n < src.max_row; ++n) { IndexType aj = ELL_IND(ai, n, nrow, src.max_row); if((src.col[aj] >= 0) && (src.col[aj] < ncol)) { ++dst->row_offset[ai]; } } } *nnz_csr = 0; for(IndexType i = 0; i < nrow; ++i) { IndexType tmp = dst->row_offset[i]; dst->row_offset[i] = *nnz_csr; *nnz_csr += tmp; } dst->row_offset[nrow] = *nnz_csr; allocate_host(*nnz_csr, &dst->col); allocate_host(*nnz_csr, &dst->val); set_to_zero_host(*nnz_csr, dst->col); set_to_zero_host(*nnz_csr, dst->val); #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType ai = 0; ai < nrow; ++ai) { IndexType ind = dst->row_offset[ai]; for(IndexType n = 0; n < src.max_row; ++n) { IndexType aj = ELL_IND(ai, n, nrow, src.max_row); if((src.col[aj] >= 0) && (src.col[aj] < ncol)) { dst->col[ind] = src.col[aj]; dst->val[ind] = src.val[aj]; ++ind; } } } return true; } template bool hyb_to_csr(int omp_threads, IndexType nnz, IndexType nrow, IndexType ncol, IndexType nnz_ell, IndexType nnz_coo, const MatrixHYB& src, MatrixCSR* dst, IndexType* nnz_csr) { assert(nnz > 0); assert(nnz == nnz_ell + nnz_coo); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); allocate_host(nrow + 1, &dst->row_offset); set_to_zero_host(nrow + 1, dst->row_offset); IndexType start; start = 0; // TODO //#ifdef _OPENMP // #pragma omp parallel for private(start) //#endif for(IndexType ai = 0; ai < nrow; ++ai) { // ELL for(IndexType n = 0; n < src.ELL.max_row; ++n) { IndexType aj = ELL_IND(ai, n, nrow, src.ELL.max_row); if((src.ELL.col[aj] >= 0) && (src.ELL.col[aj] < ncol)) { dst->row_offset[ai] += 1; } } // COO for(IndexType i = start; i < nnz_coo; ++i) { if(src.COO.row[i] == ai) { dst->row_offset[ai] += 1; ++start; } if(src.COO.row[i] > ai) { break; } } } *nnz_csr = 0; for(IndexType i = 0; i < nrow; ++i) { IndexType tmp = dst->row_offset[i]; dst->row_offset[i] = *nnz_csr; *nnz_csr += tmp; } dst->row_offset[nrow] = *nnz_csr; allocate_host(*nnz_csr, &dst->col); allocate_host(*nnz_csr, &dst->val); set_to_zero_host(*nnz_csr, dst->col); set_to_zero_host(*nnz_csr, dst->val); start = 0; // TODO //#ifdef _OPENMP //#pragma omp parallel for private(start) //#endif for(IndexType ai = 0; ai < nrow; ++ai) { IndexType ind = dst->row_offset[ai]; // ELL for(IndexType n = 0; n < src.ELL.max_row; ++n) { IndexType aj = ELL_IND(ai, n, nrow, src.ELL.max_row); if((src.ELL.col[aj] >= 0) && (src.ELL.col[aj] < ncol)) { dst->col[ind] = src.ELL.col[aj]; dst->val[ind] = src.ELL.val[aj]; ++ind; } } // COO for(IndexType i = start; i < nnz_coo; ++i) { if(src.COO.row[i] == ai) { dst->col[ind] = src.COO.col[i]; dst->val[ind] = src.COO.val[i]; ++ind; ++start; } if(src.COO.row[i] > ai) { break; } } } return true; } template bool coo_to_csr(int omp_threads, IndexType nnz, IndexType nrow, IndexType ncol, const MatrixCOO& src, MatrixCSR* dst) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); allocate_host(nrow + 1, &dst->row_offset); allocate_host(nnz, &dst->col); allocate_host(nnz, &dst->val); // COO has to be sorted by rows for(IndexType i = 1; i < nnz; ++i) { assert(src.row[i] >= src.row[i - 1]); } // Initialize row offset with zeros set_to_zero_host(nrow + 1, dst->row_offset); // Compute nnz entries per row of CSR for(IndexType i = 0; i < nnz; ++i) { ++dst->row_offset[src.row[i] + 1]; } // Do exclusive scan to obtain row ptrs for(IndexType i = 0; i < nrow; ++i) { dst->row_offset[i + 1] += dst->row_offset[i]; } assert(dst->row_offset[nrow] == nnz); #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType i = 0; i < nnz; ++i) { dst->col[i] = src.col[i]; dst->val[i] = src.val[i]; } // Sorting the col (per row) // Bubble sort algorithm #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType i = 0; i < nrow; ++i) { for(IndexType j = dst->row_offset[i]; j < dst->row_offset[i + 1]; ++j) { for(IndexType jj = dst->row_offset[i]; jj < dst->row_offset[i + 1] - 1; ++jj) { if(dst->col[jj] > dst->col[jj + 1]) { // swap elements IndexType ind = dst->col[jj]; ValueType val = dst->val[jj]; dst->col[jj] = dst->col[jj + 1]; dst->val[jj] = dst->val[jj + 1]; dst->col[jj + 1] = ind; dst->val[jj + 1] = val; } } } } return true; } template bool csr_to_dia(int omp_threads, IndexType nnz, IndexType nrow, IndexType ncol, const MatrixCSR& src, MatrixDIA* dst, IndexType* nnz_dia) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); // Determine number of populated diagonals dst->num_diag = 0; std::vector diag_idx(nrow + ncol, 0); // Loop over rows and increment ndiag counter if diag offset has not been visited yet for(IndexType i = 0; i < nrow; ++i) { for(IndexType j = src.row_offset[i]; j < src.row_offset[i + 1]; ++j) { // Diagonal offset the current entry belongs to IndexType offset = src.col[j] - i + nrow; if(!diag_idx[offset]) { diag_idx[offset] = 1; ++dst->num_diag; } } } IndexType size = nrow > ncol ? nrow : ncol; *nnz_dia = size * dst->num_diag; // Conversion fails if DIA nnz exceeds 5 times CSR nnz if(dst->num_diag > 5 * (nnz / size)) { return false; } // Allocate DIA matrix allocate_host(dst->num_diag, &dst->offset); allocate_host(*nnz_dia, &dst->val); set_to_zero_host(*nnz_dia, dst->val); for(IndexType i = 0, d = 0; i < nrow + ncol; ++i) { // Fill DIA offset, if i-th diagonal is populated if(diag_idx[i]) { // Store offset index for reverse index access diag_idx[i] = d; // Store diagonals offset, where the diagonal is offset 0 // Left from diagonal offsets are decreasing // Right from diagonal offsets are increasing dst->offset[d++] = i - nrow; } } #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType i = 0; i < nrow; ++i) { for(IndexType j = src.row_offset[i]; j < src.row_offset[i + 1]; ++j) { // Diagonal offset the current entry belongs to IndexType offset = src.col[j] - i + nrow; dst->val[DIA_IND(i, diag_idx[offset], nrow, dst->num_diag)] = src.val[j]; } } return true; } template bool dia_to_csr(int omp_threads, IndexType nnz, IndexType nrow, IndexType ncol, const MatrixDIA& src, MatrixCSR* dst, IndexType* nnz_csr) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); // Allocate CSR row pointer array allocate_host(nrow + 1, &dst->row_offset); // Extract CSR row offsets dst->row_offset[0] = 0; for(IndexType i = 0; i < nrow; ++i) { // Initialize row offset of i-th row dst->row_offset[i + 1] = dst->row_offset[i]; for(IndexType n = 0; n < src.num_diag; ++n) { IndexType j = i + src.offset[n]; if(j >= 0 && j < ncol) { // Exclude padded zeros if(src.val[DIA_IND(i, n, nrow, src.num_diag)] != static_cast(0)) { ++dst->row_offset[i + 1]; } } } } // CSR nnz *nnz_csr = dst->row_offset[nrow]; // Allocate CSR matrix allocate_host(*nnz_csr, &dst->col); allocate_host(*nnz_csr, &dst->val); // Fill CSR col and val arrays #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType i = 0; i < nrow; ++i) { IndexType idx = dst->row_offset[i]; for(IndexType n = 0; n < src.num_diag; ++n) { IndexType j = i + src.offset[n]; if(j >= 0 && j < ncol) { ValueType val = src.val[DIA_IND(i, n, nrow, src.num_diag)]; if(val != static_cast(0)) { dst->col[idx] = j; dst->val[idx] = val; ++idx; } } } } return true; } template bool csr_to_hyb(int omp_threads, IndexType nnz, IndexType nrow, IndexType ncol, const MatrixCSR& src, MatrixHYB* dst, IndexType* nnz_hyb, IndexType* nnz_ell, IndexType* nnz_coo) { assert(nnz > 0); assert(nrow > 0); assert(ncol > 0); omp_set_num_threads(omp_threads); // Determine ELL width by average nnz per row if(dst->ELL.max_row == 0) { dst->ELL.max_row = (nnz - 1) / nrow + 1; } // ELL nnz is ELL width times nrow *nnz_ell = dst->ELL.max_row * nrow; *nnz_coo = 0; // Array to hold COO part nnz per row IndexType* coo_row_ptr = NULL; allocate_host(nrow + 1, &coo_row_ptr); // If there is no ELL part, its easy... if(*nnz_ell == 0) { *nnz_coo = nnz; // copy rowoffset to coorownnz } else { // Compute COO nnz per row and COO nnz #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType i = 0; i < nrow; ++i) { IndexType row_nnz = src.row_offset[i + 1] - src.row_offset[i] - dst->ELL.max_row; coo_row_ptr[i + 1] = (row_nnz > 0) ? row_nnz : 0; } // Exclusive scan coo_row_ptr[0] = 0; for(IndexType i = 0; i < nrow; ++i) { coo_row_ptr[i + 1] += coo_row_ptr[i]; } *nnz_coo = coo_row_ptr[nrow]; } *nnz_hyb = *nnz_coo + *nnz_ell; if(*nnz_hyb <= 0) { return false; } // ELL if(*nnz_ell > 0) { allocate_host(*nnz_ell, &dst->ELL.val); allocate_host(*nnz_ell, &dst->ELL.col); } // COO if(*nnz_coo > 0) { allocate_host(*nnz_coo, &dst->COO.row); allocate_host(*nnz_coo, &dst->COO.col); allocate_host(*nnz_coo, &dst->COO.val); } #ifdef _OPENMP #pragma omp parallel for #endif for(IndexType i = 0; i < nrow; ++i) { IndexType p = 0; IndexType row_begin = src.row_offset[i]; IndexType row_end = src.row_offset[i + 1]; IndexType coo_idx = dst->COO.row ? coo_row_ptr[i] : 0; // Fill HYB matrix for(IndexType j = row_begin; j < row_end; ++j) { if(p < dst->ELL.max_row) { // Fill ELL part IndexType idx = ELL_IND(i, p++, nrow, dst->ELL.max_row); dst->ELL.col[idx] = src.col[j]; dst->ELL.val[idx] = src.val[j]; } else { dst->COO.row[coo_idx] = i; dst->COO.col[coo_idx] = src.col[j]; dst->COO.val[coo_idx] = src.val[j]; ++coo_idx; } } // Pad remaining ELL structure for(IndexType j = row_end - row_begin; j < dst->ELL.max_row; ++j) { IndexType idx = ELL_IND(i, p++, nrow, dst->ELL.max_row); dst->ELL.col[idx] = -1; dst->ELL.val[idx] = static_cast(0); } } free_host(&coo_row_ptr); return true; } template bool csr_to_coo(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixCOO* dst); template bool csr_to_coo(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixCOO* dst); #ifdef SUPPORT_COMPLEX template bool csr_to_coo(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR, int>& src, MatrixCOO, int>* dst); template bool csr_to_coo(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR, int>& src, MatrixCOO, int>* dst); #endif template bool csr_to_coo(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixCOO* dst); template bool csr_to_mcsr(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixMCSR* dst); template bool csr_to_mcsr(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixMCSR* dst); #ifdef SUPPORT_COMPLEX template bool csr_to_mcsr(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR, int>& src, MatrixMCSR, int>* dst); template bool csr_to_mcsr(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR, int>& src, MatrixMCSR, int>* dst); #endif template bool csr_to_mcsr(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixMCSR* dst); template bool mcsr_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixMCSR& src, MatrixCSR* dst); template bool mcsr_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixMCSR& src, MatrixCSR* dst); #ifdef SUPPORT_COMPLEX template bool mcsr_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixMCSR, int>& src, MatrixCSR, int>* dst); template bool mcsr_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixMCSR, int>& src, MatrixCSR, int>* dst); #endif template bool mcsr_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixMCSR& src, MatrixCSR* dst); template bool csr_to_dia(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixDIA* dst, int* nnz_dia); template bool csr_to_dia(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixDIA* dst, int* nnz_dia); #ifdef SUPPORT_COMPLEX template bool csr_to_dia(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR, int>& src, MatrixDIA, int>* dst, int* nnz_dia); template bool csr_to_dia(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR, int>& src, MatrixDIA, int>* dst, int* nnz_dia); #endif template bool csr_to_dia(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixDIA* dst, int* nnz_dia); template bool csr_to_hyb(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixHYB* dst, int* nnz_hyb, int* nnz_ell, int* nnz_coo); template bool csr_to_hyb(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixHYB* dst, int* nnz_hyb, int* nnz_ell, int* nnz_coo); #ifdef SUPPORT_COMPLEX template bool csr_to_hyb(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR, int>& src, MatrixHYB, int>* dst, int* nnz_hyb, int* nnz_ell, int* nnz_coo); template bool csr_to_hyb(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR, int>& src, MatrixHYB, int>* dst, int* nnz_hyb, int* nnz_ell, int* nnz_coo); #endif template bool csr_to_hyb(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixHYB* dst, int* nnz_hyb, int* nnz_ell, int* nnz_coo); template bool csr_to_ell(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixELL* dst, int* nnz_ell); template bool csr_to_ell(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixELL* dst, int* nnz_ell); #ifdef SUPPORT_COMPLEX template bool csr_to_ell(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR, int>& src, MatrixELL, int>* dst, int* nnz_ell); template bool csr_to_ell(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR, int>& src, MatrixELL, int>* dst, int* nnz_ell); #endif template bool csr_to_ell(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixELL* dst, int* nnz_ell); template bool csr_to_dense(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixDENSE* dst); template bool csr_to_dense(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixDENSE* dst); #ifdef SUPPORT_COMPLEX template bool csr_to_dense(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR, int>& src, MatrixDENSE>* dst); template bool csr_to_dense(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR, int>& src, MatrixDENSE>* dst); #endif template bool csr_to_dense(int omp_threads, int nnz, int nrow, int ncol, const MatrixCSR& src, MatrixDENSE* dst); template bool dense_to_csr(int omp_threads, int nrow, int ncol, const MatrixDENSE& src, MatrixCSR* dst, int* nnz); template bool dense_to_csr(int omp_threads, int nrow, int ncol, const MatrixDENSE& src, MatrixCSR* dst, int* nnz); #ifdef SUPPORT_COMPLEX template bool dense_to_csr(int omp_threads, int nrow, int ncol, const MatrixDENSE>& src, MatrixCSR, int>* dst, int* nnz); template bool dense_to_csr(int omp_threads, int nrow, int ncol, const MatrixDENSE>& src, MatrixCSR, int>* dst, int* nnz); #endif template bool dense_to_csr(int omp_threads, int nrow, int ncol, const MatrixDENSE& src, MatrixCSR* dst, int* nnz); template bool dia_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixDIA& src, MatrixCSR* dst, int* nnz_csr); template bool dia_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixDIA& src, MatrixCSR* dst, int* nnz_csr); #ifdef SUPPORT_COMPLEX template bool dia_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixDIA, int>& src, MatrixCSR, int>* dst, int* nnz_csr); template bool dia_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixDIA, int>& src, MatrixCSR, int>* dst, int* nnz_csr); #endif template bool dia_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixDIA& src, MatrixCSR* dst, int* nnz_csr); template bool ell_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixELL& src, MatrixCSR* dst, int* nnz_csr); template bool ell_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixELL& src, MatrixCSR* dst, int* nnz_csr); #ifdef SUPPORT_COMPLEX template bool ell_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixELL, int>& src, MatrixCSR, int>* dst, int* nnz_csr); template bool ell_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixELL, int>& src, MatrixCSR, int>* dst, int* nnz_csr); #endif template bool ell_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixELL& src, MatrixCSR* dst, int* nnz_csr); template bool coo_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixCOO& src, MatrixCSR* dst); template bool coo_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixCOO& src, MatrixCSR* dst); #ifdef SUPPORT_COMPLEX template bool coo_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixCOO, int>& src, MatrixCSR, int>* dst); template bool coo_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixCOO, int>& src, MatrixCSR, int>* dst); #endif template bool coo_to_csr(int omp_threads, int nnz, int nrow, int ncol, const MatrixCOO& src, MatrixCSR* dst); template bool hyb_to_csr(int omp_threads, int nnz, int nrow, int ncol, int nnz_ell, int nnz_coo, const MatrixHYB& src, MatrixCSR* dst, int* nnz_csr); template bool hyb_to_csr(int omp_threads, int nnz, int nrow, int ncol, int nnz_ell, int nnz_coo, const MatrixHYB& src, MatrixCSR* dst, int* nnz_csr); #ifdef SUPPORT_COMPLEX template bool hyb_to_csr(int omp_threads, int nnz, int nrow, int ncol, int nnz_ell, int nnz_coo, const MatrixHYB, int>& src, MatrixCSR, int>* dst, int* nnz_csr); template bool hyb_to_csr(int omp_threads, int nnz, int nrow, int ncol, int nnz_ell, int nnz_coo, const MatrixHYB, int>& src, MatrixCSR, int>* dst, int* nnz_csr); #endif template bool hyb_to_csr(int omp_threads, int nnz, int nrow, int ncol, int nnz_ell, int nnz_coo, const MatrixHYB& src, MatrixCSR* dst, int* nnz_csr); } // namespace rocalution