0 稀疏矩阵
稀疏矩阵是指矩阵中大部分元素为0的矩阵。存储这些0值数据会耗费大量的存储空间,并且计算时也会产生不必要的时间浪费。为了更有效地存储和处理这种类型的矩阵,有几种不同的稀疏矩阵格式。
- COO格式:COO格式(坐标格式)用三个数组存储非零元素的行、列索引以及值。
- CSR格式:CSR格式(压缩行格式)用三个数组存储矩阵的非零元素值、列索引和行指针。行指针数组指示每行中第一个非零元素的位置。
- CSC格式:CSC格式(压缩列格式)与CSR格式类似,但是是按列存储非零元素。
- DIA格式:DIA格式(对角线格式)使用一个二维数组存储非零元素。数组中的每一行表示矩阵的一个对角线,并且只有矩阵中存在的对角线上的元素才被存储。
- BSR格式:BSR格式(块压缩行格式)用四个数组存储矩阵的非零元素。其中三个数组与CSR格式相同,第四个数组存储块的大小。
- ELL格式:ELL格式(行程格式)使用两个数组存储矩阵的非零元素。其中一个数组存储元素的值,另一个数组存储元素在每行中的位置。每行中最大非零元素数量相同。
MKL中主要用到的稀疏矩阵格式有COO,CSR及CSC(与CSR类似)三种,以下将简要介绍COO格式与CSR格式:
(1COO(Coordinate,坐标格式
COO 格式的优点:非常简单直观,易于理解和实现,同时可以处理任意稀疏度的矩阵。
例:
(2CSR(Compressed Sparse Row,行压缩格式
CSR格式的一维数组包含三个部分:数据、列索引和行指针。假设稀疏矩阵的大小为m × n,其中非零元素个数为nnz。分别介绍这三个数组的含义:
- 数据数组(values array):存储非零元素的值,大小为nnz。
- 列索引数组(column indices array):存储每个非零元素所在的列数,大小为nnz。
- 行指针数组(row pointer array):存储每一行的第一个非零元素在数据数组中的下标,大小为m+1。
1 稀疏矩阵乘法
所用示例如下,矩阵A、B分别为
(1matlab计算结果
A=[1,-1,-3,0,0;
-2,5,0,0,0;
0,0,4,6,4;
-4,0,2,7,0;
0,8,0,0,0;];
B=[1,-1,-3,0;
-2,5,0,0;
0,0,4,6;
-4,0,2,7;
0,8,0,0];
A*B
输出为:
(2稀疏矩阵X稠密矩阵
函数将使用MKL库中的稀疏矩阵乘法接口mkl_sparse_s_mm
实现\(y = alpha * A * x + beta * y\,具体用法及参数详解如下:
mkl_sparse_s_mm(operation, //表示矩阵乘法的操作类型,可以是普通/转置/共轭转置
alpha, //乘法系数
A, //稀疏矩阵A
descr, //结构体,描述矩阵的属性,包括存储格式、存储顺序等
SPARSE_LAYOUT_ROW_MAJOR,//矩阵存储顺序
x, //X矩阵,稠密
columns, // 矩阵x的列数
ldx, //矩阵x的第一维
beta, //加法后系数
y, //y矩阵,即输出矩阵
ldy //矩阵y的第一维
;
此流程简述如下:
- 获取待稀疏表示矩阵\(A\的COO格式(ia,ja,value),以及非零元素个数nnz;
- 根据三组数据创建COO格式稀疏矩阵,并通过MKL转换接口将其转为CSR格式;
- 执行稀疏矩阵csrA与稠密矩阵denseB的乘积,使用
mkl_sparse_s_mm
接口计算矩阵乘法,结果为稠密矩阵C; - 将计算结果转为需要的尺寸(此例为二维数组)返回。
稀疏矩阵coo乘稠密矩阵接口
/*
输入:
ia 稀疏矩阵A的行索引,一维MKL整型
ja 稀疏矩阵A的列索引,一维MKL整型
a 稀疏矩阵A的数据值,一维浮点型
nnz 非零元素个数
denseB 稠密矩阵B数据,类型为float型的二维数组
rowsA 稀疏矩阵A的行数
colsA 稀疏矩阵A的列数
colsC A、B两矩阵相乘结果C的列数
flag 对稀疏矩阵A的操作,选项为0、1、2。0-A 1-AT(A矩阵的转置 2-AH(A矩阵的共轭转置 默认为0
输出:
denseC 稠密矩阵C数据,为A与B相乘后的结果,类型为float型的二维数组
*/
bool MKL_Sparse_CooXDense(int *ia, int *ja, float *a, int nnz, float **denseB, float **denseC, int rowsA, int colsA, int colsC, int flag;
函数代码
bool MKL_Sparse_CooXDense(MKL_INT *ia, MKL_INT *ja, float *a, int nnz, float **denseB, float **denseC, int rowsA, int colsA, int colsC, int flag {
//生成csr格式稀疏矩阵
sparse_matrix_t csrA, cooA;
sparse_status_t status = mkl_sparse_s_create_coo(&cooA,
SPARSE_INDEX_BASE_ZERO,
rowsA, // number of rows
colsA, // number of cols
nnz, // number of nonzeros
ia,
ja,
a;
if (status != SPARSE_STATUS_SUCCESS {
printf("Error creating COO sparse matrix.\n";
}
mkl_sparse_convert_csr(cooA, SPARSE_OPERATION_NON_TRANSPOSE, &csrA;
//调用mkl稀疏矩阵与稠密矩阵乘法 C=alpha*op(A*B+beta*C
double alpha = 1.0;
double beta = 0.0;
int M, N, K;
int ncols, ldx, ldy;
if (flag == 1 || flag == 2 { //转置或共轭转置,AT或AH
M = colsA;
N = rowsA;
K = colsC;
ncols = K;
ldx = K;
ldy = K;
}
else { //默认的情况下,A
M = rowsA;
N = colsA;
K = colsC;
ncols = N;
ldx = K;
ldy = K;
}
//将二维稠密矩阵B转为一维
float *denseB1D = (float*mkl_malloc(N*K * sizeof(float, 64;
for (int i = 0; i < N; i++ {
memcpy(denseB1D + i * K, denseB[i], K * sizeof(float;
}
struct matrix_descr descr = { SPARSE_MATRIX_TYPE_GENERAL, SPARSE_FILL_MODE_FULL, SPARSE_DIAG_NON_UNIT };
float *denseC1D = (float*mkl_malloc(M * K * sizeof(float, 64;
sparse_operation_t operation = SPARSE_OPERATION_NON_TRANSPOSE; //默认 op(A=A;
if (flag == 0 { //稀疏矩阵 op(A=A
operation = SPARSE_OPERATION_NON_TRANSPOSE;
}
if (flag == 1 {//稀疏矩阵 op(A=AT
operation = SPARSE_OPERATION_TRANSPOSE;
}
else if (flag == 2
{ //稀疏矩阵 op(A=AH
operation = SPARSE_OPERATION_CONJUGATE_TRANSPOSE;
}
else {//稀疏矩阵 op(A=A
operation = SPARSE_OPERATION_NON_TRANSPOSE;
}
mkl_sparse_s_mm(operation, alpha, csrA, descr, SPARSE_LAYOUT_ROW_MAJOR, denseB1D, ncols, ldx, beta, denseC1D, ldy;
//将计算结果转为2维
for (int i = 0; i < M; i++ {
memcpy(denseC[i], denseC1D + i * K, K * sizeof(float;
}
//释放
mkl_sparse_destroy(csrA;
mkl_sparse_destroy(cooA;
mkl_free(denseB1D;
mkl_free(denseC1D;
return true;
}
执行main.cpp中的MKL_Sparse_CooXDense_Demo(
后,
(3稀疏矩阵X稀疏矩阵
mkl_sparse_spmm接口实现\(C = op(A * B\,该接口的用法相对简单,
mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,//是否对A矩阵进行操作
csrA, //A矩阵,CSR格式
csrB, //B矩阵,CSR格式
&csrC //C矩阵,CSR格式
;
在进行稀疏矩阵示例之前,先补充两个封装函数:MKL_Coo2Csr(
、Print_Sparse_Csr_Matrix(
。
/*
函数功能:根据已知坐标、元素值,创建CSR稀疏矩阵
输入:
float *a 稀疏矩阵的值 ----参照稀疏矩阵的coo格式
MKL_INT *ia 稀疏矩阵的行指针
MKL_INT *ja 稀疏矩阵的列索引
int nnz 稀疏矩阵的数量
int nrows稀疏矩阵的行数
int ncols稀疏矩阵的列数
输出:
sparse_matrix_t CSR格式稀疏矩阵
*/
sparse_matrix_t MKL_Coo2Csr(int* ia, int *ja, float *a, int nrows, int ncols, int nnz {
//建立coo矩阵A 与 csrA矩阵
sparse_matrix_t cooA, csrA;
mkl_sparse_s_create_coo(&cooA,
SPARSE_INDEX_BASE_ZERO,
nrows, // number of rows
ncols, // number of cols
nnz, // number of nonzeros
ia,
ja,
a;
//coo转csr
mkl_sparse_convert_csr(cooA, SPARSE_OPERATION_NON_TRANSPOSE, &csrA;
//释放cooA矩阵
mkl_sparse_destroy(cooA;
//返回csrA矩阵
return csrA;
}
/*
函数功能:打印CSR稀疏矩阵的前n行n列元素
输入:
sparse_matrix_t CSR格式稀疏矩阵
int m 前m行
int n 前n列
*/
void Print_Sparse_Csr_Matrix(sparse_matrix_t csrA,int m,int n {
sparse_index_base_t indexing;
int nrows;
int ncols;
MKL_INT* csr_row_start;
MKL_INT* csr_row_end;
MKL_INT* csr_col_indx;
float* csr_values;
mkl_sparse_s_export_csr(csrA, &indexing, &nrows, &ncols, &csr_row_start, &csr_row_end, &csr_col_indx, &csr_values;
float **A_dense = alloc2float(ncols, nrows;
memset(A_dense[0], 0, nrows*ncols * sizeof(float;
//将value转换为普通二维数组
for (int i = 0; i < nrows; i++ {
for (int j = csr_row_start[i]; j < csr_row_start[i + 1]; j++ {
A_dense[i][csr_col_indx[j]] = csr_values[j];
}
}
//输出稠密矩阵
for (int i = 0; i < m; i++ {
for (int j = 0; j < n; j++ {
printf("%f ", A_dense[i][j];
}
printf("\n";
}
free2float(A_dense;
}
稀疏矩阵(csr乘稀疏矩阵(csr接口
/*
输入:
float *a 稀疏矩阵A的属性 ----参照稀疏矩阵的coo格式
MKL_INT *ia
MKL_INT *ja
int nnzA
int rowsA
int colsA
float *b 稀疏矩阵B的属性 ----参照稀疏矩阵的coo格式
MKL_INT *ib
MKL_INT *jb
int nnzB
int rowsB
int colsB
输出:
sparse_matrix_t 稀疏矩阵C(A*B
*/
sparse_matrix_t MKL_Sparse_CooXCoo(
int* ia, int *ja, float *a, int rowsA, int colsA, int nnzA,
int* ib, int *jb, float *b, int rowsB, int colsB, int nnzB;
函数代码
sparse_matrix_t MKL_Sparse_CooXCoo(
int* ia, int *ja, float *a, int rowsA, int colsA, int nnzA,
int* ib, int *jb, float *b, int rowsB, int colsB, int nnzB {
//根据坐标创建csrA和csrB
sparse_matrix_t csrA, csrB, csrC;
csrA = MKL_Coo2Csr(ia, ja, a, rowsA, colsA, nnzA;
csrB = MKL_Coo2Csr(ib, jb, b, rowsB, colsB, nnzB;
//csrC创建
mkl_sparse_d_create_csr(&csrC,
SPARSE_INDEX_BASE_ZERO,
rowsA, // number of rows
colsB, // number of cols
NULL, // number of nonzeros
NULL,
NULL,
NULL;
//矩阵乘法
mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE, csrA, csrB, &csrC;
//释放矩阵A和B
mkl_sparse_destroy(csrA;
mkl_sparse_destroy(csrB;
return csrC;
}
执行main.cpp中的MKL_Sparse_CooXCoo_Demo(
后,
2 稀疏矩阵求逆
(1matlab计算结果
作为标准答案,验证后续调用的正确性。
A = [1 2 4 0 0;
2 2 0 0 0;
4 0 3 0 0;
0 0 0 4 0;
0 0 0 0 5];
A_inv = inv(A
输出为:
(2MKL计算
PARDISO 的求解过程包括以下几个步骤:
- 输入矩阵:提供线性方程组的稀疏矩阵\(A\,稀疏CSR格式。
- 分析矩阵:对矩阵进行预处理和分解,生成求解器所需的数据结构和信息,包括消元树、消元顺序、LU 分解等。
- 求解线性方程组:使用 LU 分解求解线性方程组,可以直接求解$ A X = B \(或者\ AX = λX $问题。
- 输出解向量:输出求解得到的解向量 \(X\。
关于PARDISO接口参数的详细描述参考oneMKL PARDISO Parameters in Tabular Form (intel.com,以下简单介绍:
PARDISO(pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &perm, &nrhs, iparm, &msglvl, b, x, &error;
-
mnum:与maxfct一起使用,用于区分不同的矩阵。
-
- 1 - 实对称矩阵;
- 2 - 实对称正定矩阵;
- -2 - 实对称不定矩阵;
- 3 - 复对称矩阵;
- 11 - 实数、非对称矩阵;
-
phase:指定PARDISO的阶段。具体取值如:
- 11-分析阶段;
- 12-分析、数值分解阶段;
- 13-分析、数值分解、求解阶段;
- 22-数值分解阶段;
- 23-数值分解、求解阶段;
- 33-求解、迭代阶段;
- -1-释放所有矩阵内存;
-
a:稀疏矩阵\(A\的非零元素(CSR格式中的values)。
-
ja:CSR格式中的列索引。
-
nrhs:官方解释为:需要求解的右侧数(Number of right-hand sides that need to be solved for,一般为1。
-
- iparm[0]:0-使用默认值,非0-使用自定义参数;
- iparm[11]:对稀疏矩阵A进行操作后求解。0-求解\(AX=B\,1-求解\(A^HX=B\,2-求解\(A^TX=B\;
- iparm[12]: 使用(非)对称加权匹配提高准确性,0-禁用,1-开启;
- iparm[27]: 单精度/双精度,0-double,1-float;
- iparm[34]: 以0或1作为初始索引,0-从1开始索引,1-从0开始索引;
-
msglvl:Message level information,0-不生成输出,1-打印计算信息。
-
x:\(X\矩阵。
pt:指向PARDISO内部数据结构的指针。数组长度为64,必须用零初始化,并且不再改动。
稀疏矩阵求逆接口
/*
输入:
float *a 稀疏矩阵的值 ----参照稀疏矩阵的csr格式
MKL_INT *ia 稀疏矩阵的行指针
MKL_INT *ja 稀疏矩阵的列索引
int nnz 稀疏矩阵的数量
int n 稀疏矩阵的维度 n*n
MKL_INT mtype 稀疏矩阵类型
输出:
float **Ainv [稠密矩阵]稀疏矩阵的逆 n*n
*/
bool MKL_Sparse_Inverse(float **Ainv, float *a, MKL_INT *ia, MKL_INT *ja, int nnz, int n,MKL_INT mtype;
函数代码
bool MKL_Sparse_Inverse(float **Ainv, float *a, MKL_INT *ia, MKL_INT *ja, int nnz, int n, MKL_INT mtype {
/*STEP1 根据输入数组创建COO格式稀疏矩阵*/
//由于CSR格式不易表示,所以采取的路线为通过坐标创建COO格式矩阵
//再通过mkl接口将COO矩阵转为CSR矩阵
sparse_matrix_t csrA, cooA;
sparse_status_t status = mkl_sparse_s_create_coo(&cooA,
SPARSE_INDEX_BASE_ZERO,
n, // 稀疏矩阵的行、列
n,
nnz, // 非零元素个数
ia,//行索引
ja,//列索引
a;//矩阵元素值
if (status != SPARSE_STATUS_SUCCESS {
printf("Error creating COO sparse matrix.\n";
}
//coo转csr格式
mkl_sparse_convert_csr(cooA, SPARSE_OPERATION_NON_TRANSPOSE, &csrA;
/*STEP2 根据CSR格式稀疏矩阵得到其ia,ja,a三组数据*/
sparse_index_base_t indexing;
int nrows;
int ncols;
MKL_INT* ia_csr = (MKL_INT*mkl_malloc(nnz * sizeof(MKL_INT, 64;//csr格式的行指针
MKL_INT* csr_row_end = (MKL_INT*mkl_malloc(nnz * sizeof(MKL_INT, 64;
MKL_INT* ja_csr = (MKL_INT*mkl_malloc(nnz * sizeof(MKL_INT, 64;//csr格式的列索引
float* a_csr = (float*mkl_malloc(nnz * sizeof(float, 64;//csr格式的矩阵值
//利用mkl_sparse_s_export_csr接口实现
mkl_sparse_s_export_csr(csrA, &indexing, &nrows, &ncols, &ia_csr, &csr_row_end, &ja_csr, &a_csr;
/*Step3 设置稀疏矩阵参数*/
//初始化B矩阵和X矩阵
float *b = NULL; //保存单位矩阵用于求逆
float *x = NULL; //解矩阵
b = (float*mkl_malloc(n*n * sizeof(float, 64;
x = (float*mkl_malloc(n*n * sizeof(float, 64;
//将B矩阵初始化为单位阵
for (int i = 0; i < n; i++ {
for (int j = 0; j < n; j++ {
if (i == j {
b[i*n + j] = 1;
}
else {
b[i*n + j] = 0;
}
}
}
//初始化pt
void *pt[64];
for (int i = 0; i < 64; i++
{
pt[i] = 0;
}
//初始化矩阵相关控制参数
MKL_INT maxfct = 1;
MKL_INT mnum = 1;
MKL_INT phase = 1;
MKL_INT perm = 0;
MKL_INT nrhs = n;
MKL_INT error = 0;
MKL_INT msglvl = 0;
//设置iparm参数
MKL_INT iparm[64];
//批量初始化
for (int i = 0; i < 64; i++
{
iparm[i] = 0;
}
iparm[0] = 1; //开启自定义
iparm[12] = 1; //提高准确性
iparm[27] = 1; //为float型
iparm[34] = 1; //初始索引为0
/*Step4 分析矩阵、数值分解、求解*/
phase = 13; //phase设置为13,表示分析、数值分解、求解阶段
PARDISO(pt, &maxfct, &mnum, &mtype, &phase, &n, a_csr, ia_csr, ja_csr, &perm, &nrhs, iparm, &msglvl, b, x, &error;
//将解矩阵copy给输出,即A的逆矩阵
for (int i = 0; i < n; i++ {
memcpy(Ainv[i], x + i * n, n * sizeof(float;
}
//释放矩阵内存
phase = -1; //phase设置为-1
PARDISO(pt, &maxfct, &mnum, &mtype, &phase, &n, a_csr, ia_csr, ja_csr, &perm, &nrhs, iparm, &msglvl, b, x, &error;
//释放内存
mkl_sparse_destroy(cooA;
mkl_sparse_destroy(csrA;
mkl_free(x;
mkl_free(b;
return true;
}
在执行main.cpp中的MKL_Sparse_Inverse_Demo(
之后,输出如下,与matlab结果一致:
完整代码
Ⅰ MKL_Sparse_Methods.h
#pragma once
#include<stdio.h>
#include<stdlib.h>
#include "alloc.h"
#include"mkl.h"
#include "mkl_types.h"
#include"mkl_lapacke.h"
#include "mkl_spblas.h"
bool MKL_Sparse_CooXDense(MKL_INT *ia, MKL_INT *ja, float *a, int nnz, float **denseB, float **denseC, int rowsA, int colsA, int colsC, int flag;
sparse_matrix_t MKL_Sparse_CooXCoo(
int* ia, int *ja, float *a, int rowsA, int colsA, int nnzA,
int* ib, int *jb, float *b, int rowsB, int colsB, int nnzB;
sparse_matrix_t MKL_Coo2Csr(int* ia, int *ja, float *a, int nrows, int ncols, int nnz;
void Print_Sparse_Csr_Matrix(sparse_matrix_t csrA,int m,int n;
bool MKL_Sparse_Inverse(float **Ainv, float *a, MKL_INT *ia, MKL_INT *ja, int nnz, int n, MKL_INT mtype;
Ⅱ MKL_Sparse_Methods.cpp
#include "MKL_Sparse_Methods.h"
bool MKL_Sparse_CooXDense(MKL_INT *ia, MKL_INT *ja, float *a, int nnz, float **denseB, float **denseC, int rowsA, int colsA, int colsC, int flag {
//生成csr格式稀疏矩阵
sparse_matrix_t csrA, cooA;
sparse_status_t status = mkl_sparse_s_create_coo(&cooA,
SPARSE_INDEX_BASE_ZERO,
rowsA, // number of rows
colsA, // number of cols
nnz, // number of nonzeros
ia,
ja,
a;
if (status != SPARSE_STATUS_SUCCESS {
printf("Error creating COO sparse matrix.\n";
}
mkl_sparse_convert_csr(cooA, SPARSE_OPERATION_NON_TRANSPOSE, &csrA;
//调用mkl稀疏矩阵与稠密矩阵乘法 C=alpha*op(A*B+beta*C
double alpha = 1.0;
double beta = 0.0;
int M, N, K;
int ncols, ldx, ldy;
if (flag == 1 || flag == 2 {
M = colsA;
N = rowsA;
K = colsC;
ncols = K;
ldx = K;
ldy = K;
}
else { //默认的情况下
M = rowsA;
N = colsA;
K = colsC;
ncols = N;
ldx = K;
ldy = K;
}
//将二维稠密矩阵B转为一维
float *denseB1D = (float*mkl_malloc(N*K * sizeof(float, 64;
for (int i = 0; i < N; i++ {
memcpy(denseB1D + i * K, denseB[i], K * sizeof(float;
}
struct matrix_descr descr = { SPARSE_MATRIX_TYPE_GENERAL, SPARSE_FILL_MODE_FULL, SPARSE_DIAG_NON_UNIT };
float *denseC1D = (float*mkl_malloc(M * K * sizeof(float, 64;
sparse_operation_t operation = SPARSE_OPERATION_NON_TRANSPOSE; //默认 op(A=A;
if (flag == 0 { //稀疏矩阵 op(A=A
operation = SPARSE_OPERATION_NON_TRANSPOSE;
}
if (flag == 1 {//稀疏矩阵 op(A=AT
operation = SPARSE_OPERATION_TRANSPOSE;
}
else if (flag == 2
{ //稀疏矩阵 op(A=AH
operation = SPARSE_OPERATION_CONJUGATE_TRANSPOSE;
}
else {//稀疏矩阵 op(A=A
operation = SPARSE_OPERATION_NON_TRANSPOSE;
}
mkl_sparse_s_mm(operation, alpha, csrA, descr, SPARSE_LAYOUT_ROW_MAJOR, denseB1D, ncols, ldx, beta, denseC1D, ldy;
//将计算结果转为2维
for (int i = 0; i < M; i++ {
memcpy(denseC[i], denseC1D + i * K, K * sizeof(float;
}
//释放
mkl_sparse_destroy(csrA;
mkl_sparse_destroy(cooA;
mkl_free(denseB1D;
mkl_free(denseC1D;
return true;
}
//根据已知坐标、元素值,创建CSR稀疏矩阵
sparse_matrix_t MKL_Coo2Csr(int* ia, int *ja, float *a, int nrows, int ncols, int nnz {
//建立coo矩阵A 与 csrA矩阵
sparse_matrix_t cooA, csrA;
mkl_sparse_s_create_coo(&cooA,
SPARSE_INDEX_BASE_ZERO,
nrows, // number of rows
ncols, // number of cols
nnz, // number of nonzeros
ia,
ja,
a;
//coo转csr
mkl_sparse_convert_csr(cooA, SPARSE_OPERATION_NON_TRANSPOSE, &csrA;
//释放cooA矩阵
mkl_sparse_destroy(cooA;
//返回csrA矩阵
return csrA;
}
void Print_Sparse_Csr_Matrix(sparse_matrix_t csrA,int m,int n {
sparse_index_base_t indexing;
int nrows;
int ncols;
MKL_INT* csr_row_start;
MKL_INT* csr_row_end;
MKL_INT* csr_col_indx;
float* csr_values;
mkl_sparse_s_export_csr(csrA, &indexing, &nrows, &ncols, &csr_row_start, &csr_row_end, &csr_col_indx, &csr_values;
float **A_dense = alloc2float(ncols, nrows;
memset(A_dense[0], 0, nrows*ncols * sizeof(float;
//将value转换为普通二维数组
for (int i = 0; i < nrows; i++ {
for (int j = csr_row_start[i]; j < csr_row_start[i + 1]; j++ {
A_dense[i][csr_col_indx[j]] = csr_values[j];
}
}
//输出稠密矩阵
for (int i = 0; i < m; i++ {
for (int j = 0; j < n; j++ {
printf("%f ", A_dense[i][j];
}
printf("\n";
}
free2float(A_dense;
}
//Coo格式×Coo格式矩阵乘法
sparse_matrix_t MKL_Sparse_CooXCoo(
int* ia, int *ja, float *a, int rowsA, int colsA, int nnzA,
int* ib, int *jb, float *b, int rowsB, int colsB, int nnzB {
//根据坐标创建csrA和csrB
sparse_matrix_t csrA, csrB, csrC;
csrA = MKL_Coo2Csr(ia, ja, a, rowsA, colsA, nnzA;
csrB = MKL_Coo2Csr(ib, jb, b, rowsB, colsB, nnzB;
//csrC创建
mkl_sparse_d_create_csr(&csrC,
SPARSE_INDEX_BASE_ZERO,
rowsA, // number of rows
colsB, // number of cols
NULL, // number of nonzeros
NULL,
NULL,
NULL;
mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE, csrA, csrB, &csrC;
//释放矩阵A和B
mkl_sparse_destroy(csrA;
mkl_sparse_destroy(csrB;
return csrC;
}
//稀疏矩阵求逆
bool MKL_Sparse_Inverse(float **Ainv, float *a, MKL_INT *ia, MKL_INT *ja, int nnz, int n, MKL_INT mtype {
/*STEP1 根据输入数组创建COO格式稀疏矩阵*/
//由于CSR格式不易表示,所以采取的路线为通过坐标创建COO格式矩阵
//再通过mkl接口将COO矩阵转为CSR矩阵
sparse_matrix_t csrA, cooA;
sparse_status_t status = mkl_sparse_s_create_coo(&cooA,
SPARSE_INDEX_BASE_ZERO,
n, // 稀疏矩阵的行、列
n,
nnz, // 非零元素个数
ia,//行索引
ja,//列索引
a;//矩阵元素值
if (status != SPARSE_STATUS_SUCCESS {
printf("Error creating COO sparse matrix.\n";
}
//coo转csr格式
mkl_sparse_convert_csr(cooA, SPARSE_OPERATION_NON_TRANSPOSE, &csrA;
/*STEP2 根据CSR格式稀疏矩阵得到其ia,ja,a三组数据*/
sparse_index_base_t indexing;
int nrows;
int ncols;
MKL_INT* ia_csr = (MKL_INT*mkl_malloc(nnz * sizeof(MKL_INT, 64;//csr格式的行指针
MKL_INT* csr_row_end = (MKL_INT*mkl_malloc(nnz * sizeof(MKL_INT, 64;
MKL_INT* ja_csr = (MKL_INT*mkl_malloc(nnz * sizeof(MKL_INT, 64;//csr格式的列索引
float* a_csr = (float*mkl_malloc(nnz * sizeof(float, 64;//csr格式的矩阵值
//利用mkl_sparse_s_export_csr接口实现
mkl_sparse_s_export_csr(csrA, &indexing, &nrows, &ncols, &ia_csr, &csr_row_end, &ja_csr, &a_csr;
/*Step3 设置稀疏矩阵参数*/
//初始化B矩阵和X矩阵
float *b = NULL; //保存单位矩阵用于求逆
float *x = NULL; //解矩阵
b = (float*mkl_malloc(n*n * sizeof(float, 64;
x = (float*mkl_malloc(n*n * sizeof(float, 64;
//将B矩阵初始化为单位阵
for (int i = 0; i < n; i++ {
for (int j = 0; j < n; j++ {
if (i == j {
b[i*n + j] = 1;
}
else {
b[i*n + j] = 0;
}
}
}
//初始化pt
void *pt[64];
for (int i = 0; i < 64; i++
{
pt[i] = 0;
}
//初始化矩阵相关控制参数
MKL_INT maxfct = 1;
MKL_INT mnum = 1;
MKL_INT phase = 1;
MKL_INT perm = 0;
MKL_INT nrhs = n;
MKL_INT error = 0;
MKL_INT msglvl = 0;
//设置iparm参数
MKL_INT iparm[64];
//批量初始化
for (int i = 0; i < 64; i++
{
iparm[i] = 0;
}
iparm[0] = 1; //开启自定义
iparm[12] = 1; //提高准确性
iparm[27] = 1; //为float型
iparm[34] = 1; //初始索引为0
/*Step4 分析矩阵、数值分解、求解*/
phase = 13; //phase设置为13,表示分析、数值分解、求解阶段
PARDISO(pt, &maxfct, &mnum, &mtype, &phase, &n, a_csr, ia_csr, ja_csr, &perm, &nrhs, iparm, &msglvl, b, x, &error;
//将解矩阵copy给输出,即A的逆矩阵
for (int i = 0; i < n; i++ {
memcpy(Ainv[i], x + i * n, n * sizeof(float;
}
//释放矩阵内存
phase = -1; //phase设置为-1
PARDISO(pt, &maxfct, &mnum, &mtype, &phase, &n, a_csr, ia_csr, ja_csr, &perm, &nrhs, iparm, &msglvl, b, x, &error;
//释放内存
mkl_sparse_destroy(cooA;
mkl_sparse_destroy(csrA;
mkl_free(x;
mkl_free(b;
return true;
}
Ⅲ main.cpp
#include "MKL_Sparse_Methods.h"
#include "alloc.h"
#define M 6
#define N 5
#define K 4
void MKL_Sparse_CooXDense_Demo(;
void MKL_Sparse_CooXCoo_Demo(;
void MKL_Sparse_Inverse_Demo(;
int main( {
MKL_Sparse_CooXDense_Demo(;//稀疏乘稠密
MKL_Sparse_CooXCoo_Demo(; //稀疏×稀疏
MKL_Sparse_Inverse_Demo(;//稀疏矩阵求逆
return 0;
}
//稀疏乘稠密
void MKL_Sparse_CooXDense_Demo( {
int flag = 0; /* flag=0时表示A(COO*B(Dense
flag=1时表示AT(COO*B(Dense*/
int rowsA, colsA;
if (flag == 0 {
rowsA = M, colsA = N;
}
else if (flag == 1 {
rowsA = N, colsA = M;
}
int rowsB = N, colsB = K;
int rowsC = M, colsC = K;
float Atemp[M][N] = {
{1,-1,-3,0,0},
{-2,5,0,0,0},
{0,0,4,6,4},
{-4,0,2,7,0},
{0,8,0,0,-5},
{1,0,0,0,0},
};
float ATtemp[N][M] = {
{1,-2,0,-4,0,1},
{-1,5,0,0,8,0},
{-3,0,4,2,0,0},
{0,0,6,7,0,0},
{0,0,4,0,-5,0},
};
float Btemp[N][K] = {
{1,-1,-3,0},
{-2,5,0,0},
{0,0,4,6},
{-4,0,2,7},
{0,8,0,0}
};
//将一般二维数组转换为alloc表示
float **matrixA = alloc2float(colsA, rowsA;
memset(matrixA[0], 0, rowsA*colsA * sizeof(float;
float **matrixB = alloc2float(colsB, rowsB;
memset(matrixB[0], 0, rowsB*colsB * sizeof(float;
float **matrixC = alloc2float(colsC, rowsC;
memset(matrixC[0], 0, rowsC*colsC * sizeof(float;
//复制二维数组到二级指针
if (flag == 0 {
memcpy(matrixA[0], Atemp, rowsA*colsA * sizeof(float;
}
else if (flag == 1 {
memcpy(matrixA[0], ATtemp, rowsA*colsA * sizeof(float;
}
memcpy(matrixB[0], Btemp, rowsB*colsB * sizeof(float;
//统计二维数组的非零元素个数
int nnz = 0;
for (int i = 0; i < rowsA; i++ {
for (int j = 0; j < colsA; j++ {
//统计nnz
if (matrixA[i][j] != 0 {
nnz++;
}
}
}
//获取稠密矩阵的coo形式
MKL_INT *ia = (MKL_INT*mkl_malloc(nnz * sizeof(MKL_INT, 64;
MKL_INT *ja = (MKL_INT*mkl_malloc(nnz * sizeof(MKL_INT, 64;
float *a = (float*mkl_malloc(nnz * sizeof(float, 64;
//获取coo数据
int k = 0;
for (int i = 0; i < rowsA; i++ {
for (int j = 0; j < colsA; j++ {
if (matrixA[i][j] != 0.0 {
a[k] = matrixA[i][j];
ia[k] = i;
ja[k] = j;
k++;
}
}
}
//执行矩阵乘法
MKL_Sparse_CooXDense(ia, ja, a, nnz, matrixB, matrixC, rowsA, colsA, colsC, flag;
/* 输出结果 */
printf("*************** MKL Sparse X Dense ***************\n ";
printf("=============== flag=%d ================\n", flag;
for (int i = 0; i < rowsC; i++ {
for (int j = 0; j < colsC; j++ {
printf("%f ", matrixC[i][j];
}
printf("\n";
}
free2float(matrixA;
free2float(matrixB;
free2float(matrixC;
mkl_free(ia;
mkl_free(ja;
mkl_free(a;
}
//稀疏×稀疏
void MKL_Sparse_CooXCoo_Demo( {
int rowsA = M, colsA = N;
int rowsB = N, colsB = K;
int rowsC = M, colsC = K;
float Atemp[M][N] = {
{1,-1,-3,0,0},
{-2,5,0,0,0},
{0,0,4,6,4},
{-4,0,2,7,0},
{0,8,0,0,-5},
{1,0,0,0,0},
};
float Btemp[N][K] = {
{1,-1,-3,0},
{-2,5,0,0},
{0,0,4,6},
{-4,0,2,7},
{0,8,0,0}
};
//将一般二维数组转换为alloc表示
float **matrixA = alloc2float(colsA, rowsA;
memset(matrixA[0], 0, rowsA*colsA * sizeof(float;
float **matrixB = alloc2float(colsB, rowsB;
memset(matrixB[0], 0, rowsB*colsB * sizeof(float;
//复制二维数组到二级指针
memcpy(matrixA[0], Atemp, rowsA*colsA * sizeof(float;
memcpy(matrixB[0], Btemp, rowsB*colsB * sizeof(float;
//***********A矩阵稀疏表示
//统计二维数组的非零元素个数
int nnzA = 0; //A矩阵
for (int i = 0; i < rowsA; i++ {
for (int j = 0; j < colsA; j++ {
//统计nnz
if (matrixA[i][j] != 0 {
nnzA++;
}
}
}
//获取稠密矩阵的coo形式
MKL_INT *ia = (MKL_INT*mkl_malloc(nnzA * sizeof(MKL_INT, 64;
MKL_INT *ja = (MKL_INT*mkl_malloc(nnzA * sizeof(MKL_INT, 64;
float *a = (float*mkl_malloc(nnzA * sizeof(float, 64;
//获取coo数据
int k = 0;
for (int i = 0; i < rowsA; i++ {
for (int j = 0; j < colsA; j++ {
if (matrixA[i][j] != 0.0 {
a[k] = matrixA[i][j];
ia[k] = i;
ja[k] = j;
k++;
}
}
}
//***********B矩阵稀疏表示
int nnzB = 0; //B矩阵
for (int i = 0; i < rowsB; i++ {
for (int j = 0; j < colsB; j++ {
//统计nnz
if (matrixB[i][j] != 0 {
nnzB++;
}
}
}
//获取稠密矩阵的coo形式
MKL_INT *ib = (MKL_INT*mkl_malloc(nnzB * sizeof(MKL_INT, 64;
MKL_INT *jb = (MKL_INT*mkl_malloc(nnzB * sizeof(MKL_INT, 64;
float *b = (float*mkl_malloc(nnzB * sizeof(float, 64;
//获取coo数据
k = 0;
for (int i = 0; i < rowsB; i++ {
for (int j = 0; j < colsB; j++ {
if (matrixB[i][j] != 0.0 {
b[k] = matrixB[i][j];
ib[k] = i;
jb[k] = j;
k++;
}
}
}
sparse_matrix_t csrC = MKL_Sparse_CooXCoo(ia, ja, a, rowsA, colsA, nnzA, ib, jb, b, rowsB, colsB, nnzB;
printf("*************** MKL Sparse X Sparse ***************\n ";
Print_Sparse_Csr_Matrix(csrC, rowsC, colsC; //打印矩阵C结果
mkl_sparse_destroy(csrC;
mkl_free(ia;
mkl_free(ja;
mkl_free(a;
mkl_free(ib;
mkl_free(jb;
mkl_free(b;
free2float(matrixA;
free2float(matrixB;
}
//稀疏矩阵求逆
void MKL_Sparse_Inverse_Demo( {
int rowsA = N, colsA = N;
float Atemp[N][N] = {
{1,2,4,0,0},
{2,2,0,0,0},
{4,0,3,0,0},
{0,0,0,4,0},
{0,0,0,0,5},
};
//将一般二维数组转换为alloc表示
float **matrixA = alloc2float(colsA, rowsA;
memset(matrixA[0], 0, rowsA*colsA * sizeof(float;
float **matrixA_inv = alloc2float(colsA, rowsA;
memset(matrixA[0], 0, rowsA*colsA * sizeof(float;
//复制二维数组到二级指针
memcpy(matrixA[0], Atemp, rowsA*colsA * sizeof(float;
//统计二维数组的非零元素个数
int nnz = 0;
for (int i = 0; i < rowsA; i++ {
for (int j = 0; j < colsA; j++ {
//统计nnz
if (matrixA[i][j] != 0 {
nnz++;
}
}
}
//获取稠密矩阵的coo形式
MKL_INT *ia = (MKL_INT*mkl_malloc(nnz * sizeof(MKL_INT, 64;
MKL_INT *ja = (MKL_INT*mkl_malloc(nnz * sizeof(MKL_INT, 64;
float *a = (float*mkl_malloc(nnz * sizeof(float, 64;
//获取coo数据
int k = 0;
for (int i = 0; i < rowsA; i++ {
for (int j = 0; j < colsA; j++ {
if (matrixA[i][j] != 0.0 {
a[k] = matrixA[i][j];
ia[k] = i;
ja[k] = j;
k++;
}
}
}
MKL_INT mtype = 11; //设置矩阵类型为一般实矩阵
MKL_Sparse_Inverse(matrixA, a, ia, ja, nnz, rowsA, mtype;
printf("*************** MKL Sparse Matrix Inverse ***************\n ";
for (int i = 0; i < N; i++ {
for (int j = 0; j < N; j++ {
printf("%f ", matrixA[i][j];
}
printf("\n";
}
free2float(matrixA;
free2float(matrixA_inv;
mkl_free(ia;
mkl_free(ja;
mkl_free(a;
}