> 文章列表 > 一个同步到 Lapack 算法的 Householder 变换来三对角化一个对称矩阵的实现

一个同步到 Lapack 算法的 Householder 变换来三对角化一个对称矩阵的实现

一个同步到 Lapack 算法的 Householder 变换来三对角化一个对称矩阵的实现

Householder Transformation:

任意 U(i) 是一个 n 维实数向量(或者复数向量),

P(i) = I - U(i) * U(i)' / H(i)

其中:   Hi = (1/2) * Ui' * Ui

Hi = = H(i)等;

—————————————————————

变换矩阵:

Q = P(n-3)*P(n-2)*...*P(1)*P(0)

T = Q * A * Q'

A = inv(Q) * A * inv(Q')

_____________________________________

在计算如下值得过程中,

Pi = I - Ui * Ui' / Hi

Hi = (1/2) * Ui' * Ui

,为了实现数值稳定,Hi作为一个分母,这里在选取 U[t-1] 的时候,最大化了U[t-1]的绝对值:

通过:

U[t - 1] += get_sign(U[t - 1]) * sqrt(delta);

tridiagonal_symmetirc_matrix.cpp

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <cuComplex.h>#define NA 5#define PRINT_ALL 1void print_matrix(float *A, int M, int N, int lda);
void print_vector(float *A, int len);
void cpu_gemm(int M, int N, int K, float *A, int lda, float *B, int ldb, float *C, int ldc, float alpha);    // C = alpha * A*B
void cpu_gemm_NT(int M, int N, int K, float *A, int lda, float *B, int ldb, float *C, int ldc, float alpha); // C = alpha * A*B'void tridiag_symm_mat_Householder(float *A, int n, int lda, float *Q, int ldq);
int symmetric_matrix_test();
int hermite_matrix_test();
void complex_gemm(int M, int N, int K, cuComplex *A, int lda, cuComplex *B, int ldb, cuComplex *C, int ldc, float alpha);
void complex_gemm_NH(int M, int N, int K, cuComplex *A, int lda, cuComplex *B, int ldb, cuComplex *C, int ldc, float alpha);
float get_sign(float aij);union np_f_i
{float f;int i;
};
typedef union np_f_i np_f_i;float get_sign(float aij)
{np_f_i x;x.f = aij;return (x.i >> (sizeof(float) - 1)) ? -1.0 : 1.0;
}void tridiag_symm_mat_Householder(float *A, int n, int lda, float *Q, int ldq)
{for (int i = 0; i < n - 2; i++) // i = 0 ~ 3;{int t;float H;float delta;float U[NA];float P[NA * NA];int ldp = n;t = n - i - 1;// 01. fill the first Umemset(U, 0x00, NA * sizeof(float));for (int idx = 0; idx < t; idx++){ // U(0:t-1) = A(t, 0:t-1)U[idx] = A[t + idx * lda];}// void print_vector(float *A, int len)printf("U(%d) first =\\n", i);print_vector(U, n);// 02. calculat delta:delta = 0.0f;for (int idx = 0; idx < t; idx++){delta += U[idx] * U[idx];}printf("delta(%d)=%7.4f\\n", i, delta);// 03. fill the second UU[t - 1] += get_sign(U[t - 1]) * sqrt(delta); // for numerical stable;printf("U(%d) second =\\n", i);print_vector(U, n);// 04. calculate H, a scalarH = 0.0f;for (int idx = 0; idx < t; idx++){H += U[idx] * U[idx];}H *= 0.5f;printf("H_i(%d) = %7.4f\\n", i, H);// 05. calculate P = -U*U'/H;     P = I - U*U'/H;// void cpu_gemm_NT(int M, int N, int K, float *A, int lda, float *B, int ldb, float *C, int ldc, float alpha); // C = alpha * A*B'cpu_gemm(n, n, 1, U, n, U, 1, P, n, -1.0f / H); // not need NTprintf("P(%d) first=\\n", i);print_matrix(P, n, n, ldp);// P = I + P      = I - U*U'/Hfor (int idx = 0; idx < n; idx++){P[idx + idx * ldp] += 1.0f;}printf("P(%d) second=\\n", i);print_matrix(P, n, n, ldp);// 06. A(i+1) = P(i)*A(i)*P(i)cpu_gemm(n, n, n, P, ldp, A, lda, A, lda, 1.0f);cpu_gemm(n, n, n, A, lda, P, ldp, A, lda, 1.0f);printf("A(%d) =\\n", i);print_matrix(A, n, n, lda);// 07. Q(i+1) = P(i)*Q(i);         Q = P(n-3)*P(n-2)*...*P(0)*Icpu_gemm(n, n, n, P, ldp, Q, ldq, Q, ldq, 1.0f);printf("Q(%d) =\\n", i);print_matrix(Q, n, n, ldq);}
}int main()
{printf("\\nN-1 to 2  tridiaganal symm ...\\n");static float A_h[NA * NA] = {10.0, 1.0, 2.0, 3.0, 4.0,1.0, 9.0, -1.0, 2.0, -3.0,2.0, -1.0, 7.0, 3.0, -5.0,3.0, 2.0, 3.0, 12.0, -1.0,4.0, -3.0, -5.0, -1.0, 15.0};///// i=0,1,...,n-3;   t=n-i-1;float *A = nullptr;float *Q = nullptr;int n = NA;int lda = n;int ldq = n;// alloc AA = (float *)malloc(n * n * sizeof(float));memcpy(A, A_h, n * n * sizeof(float));printf("A(0) =\\n");print_matrix(A, n, n, lda);// alloc QQ = (float *)malloc(ldq * n * sizeof(float));memset(Q, 0x00, ldq * n * sizeof(float));for (int idx = 0; idx < n; idx++){Q[idx + idx * ldq] = 1.0f;}// void tridiag_symm_mat_Householder(float *A, int n, int lda, float *Q, int ldq)tridiag_symm_mat_Householder(A, n, lda, Q, ldq);/**for (int i = 0; i < n - 2; i++)// i = 0 ~ 3;{int t;float H;float delta;float U[NA];float P[NA * NA];int ldp = n;t = n - i - 1;// 01. fill the first Umemset(U, 0x00, NA * sizeof(float));for (int idx = 0; idx < t; idx++){ // U(0:t-1) = A(t, 0:t-1)U[idx] = A[t + idx * lda];}// void print_vector(float *A, int len)printf("U(%d) first =\\n", i);print_vector(U, n);// 02. calculat delta:delta = 0.0f;for (int idx = 0; idx < t; idx++){delta += U[idx] * U[idx];}printf("delta(%d)=%7.4f\\n", i, delta);// 03. fill the second UU[t - 1] += get_sign(U[t - 1]) * sqrt(delta); // for numerical stable;printf("U(%d) second =\\n", i);print_vector(U, n);// 04. calculate H, a scalarH = 0.0f;for (int idx = 0; idx < t; idx++){H += U[idx] * U[idx];}H *= 0.5f;printf("H_i(%d) = %7.4f\\n", i, H);// 05. calculate P = -U*U'/H;     P = I - U*U'/H;// void cpu_gemm_NT(int M, int N, int K, float *A, int lda, float *B, int ldb, float *C, int ldc, float alpha); // C = alpha * A*B'cpu_gemm(n, n, 1, U, n, U, 1, P, n, -1.0f / H);// not need NTprintf("P(%d) first=\\n", i);print_matrix(P, n, n, ldp);// P = I + P      = I - U*U'/Hfor (int idx = 0; idx < n; idx++){P[idx + idx * ldp] += 1.0f;}printf("P(%d) second=\\n", i);print_matrix(P, n, n, ldp);// 06. A(i+1) = P(i)*A(i)*P(i)cpu_gemm(n, n, n, P, ldp, A, lda, A, lda, 1.0f);cpu_gemm(n, n, n, A, lda, P, ldp, A, lda, 1.0f);printf("A(%d) =\\n", i);print_matrix(A, n, n, lda);// 07. Q(i+1) = P(i)*Q(i);         Q = P(n-3)*P(n-2)*...*P(0)*Icpu_gemm(n, n, n, P, ldp, Q, ldq, Q, ldq, 1.0f);printf("Q(%d) =\\n", i);print_matrix(Q, n, n, ldq);}*/printf("T=[ ...\\n");print_matrix(A, n, n, lda);printf("]\\nQ=[ ...\\n");print_matrix(Q, n, n, ldq);printf("]\\n");return 0;
}void print_matrix(float *A, int M, int N, int lda)
{for (int i = 0; i < M; i++){for (int j = 0; j < N; j++){printf("%7.3f ", A[i + j * lda]);}printf("; ...\\n");}// printf("\\n");
}void print_vector(float *A, int len)
{for (int idx = 0; idx < len; idx++){printf("%7.4f ", A[idx]);}printf("\\n");
}void cpu_gemm(int M, int N, int K, float *A_h, int lda, float *B_h, int ldb, float *C, int ldc, float alpha)
{float *A = nullptr;float *B = nullptr;A = (float *)malloc(lda * K * sizeof(float));B = (float *)malloc(ldb * N * sizeof(float));memcpy(A, A_h, lda * K * sizeof(float));memcpy(B, B_h, ldb * N * sizeof(float));for (int i = 0; i < M; i++){for (int j = 0; j < N; j++){float sigma;sigma = 0.0f;for (int k = 0; k < K; k++){sigma += A[i + k * lda] * B[k + j * ldb];}C[i + j * ldc] = alpha * sigma;}}free(A);free(B);
}void cpu_gemm_NT(int M, int N, int K, float *A_h, int lda, float *B_h, int ldb, float *C, int ldc, float alpha)
{float *A = nullptr;float *B = nullptr;A = (float *)malloc(lda * K * sizeof(float));B = (float *)malloc(ldb * N * sizeof(float));memcpy(A, A_h, lda * K * sizeof(float));memcpy(B, B_h, ldb * N * sizeof(float));for (int i = 0; i < M; i++){for (int j = 0; j < N; j++){float sigma;sigma = 0.0f;for (int k = 0; k < K; k++){sigma += A[i + k * lda] * B[k * ldb + j];}C[i + j * ldc] = alpha * sigma;}}free(A);free(B);
}void complex_gemm_NH(int M, int N, int K, cuComplex *A, int lda, cuComplex *B, int ldb, cuComplex *C, int ldc, float alpha)
{cuComplex zero_;zero_ = make_cuFloatComplex(0.0f, 0.0f);for (int i = 0; i < M; i++){for (int j = 0; j < N; j++){cuComplex sigma;sigma = zero_;for (int k = 0; k < K; k++){// sigma += A[i + k*lda]*B[k+j*ldb];sigma = cuCaddf(sigma, cuCmulf(A[i + k * lda], cuConjf(B[k * ldb + j])));}C[i + j * ldc] = make_cuFloatComplex(alpha * cuCrealf(sigma), alpha * cuCimagf(sigma));}}
}void complex_gemm(int M, int N, int K, cuComplex *A, int lda, cuComplex *B, int ldb, cuComplex *C, int ldc, float alpha)
{cuComplex zero_;zero_ = make_cuFloatComplex(0.0f, 0.0f);for (int i = 0; i < M; i++){for (int j = 0; j < N; j++){cuComplex sigma;sigma = zero_;for (int k = 0; k < K; k++){// sigma += A[i + k*lda]*B[k+j*ldb];sigma = cuCaddf(sigma, cuCmulf(A[i + k * lda], B[k + j * ldb]));}C[i + j * ldc] = make_cuFloatComplex(alpha * cuCrealf(sigma), alpha * cuCimagf(sigma));}}
}/**
#include <stdio.h>
#include <math.h>void strq(double *a, int n, double *q, double* b, double* c)//int n;  double a[],q[],b[],c[];
{int i,j,k,u,v;double h,f,g,h2;for (i=0; i<=n-1; i++)for (j=0; j<=n-1; j++){ u=i*n+j; q[u]=a[u];}for (i=n-1; i>=1; i--){ h=0.0;if (i>1)for (k=0; k<=i-1; k++){ u=i*n+k; h=h+q[u]*q[u];}if (h+1.0==1.0){ c[i]=0.0;if (i==1) c[i]=q[i*n+i-1];b[i]=0.0;}else{ c[i]=sqrt(h);u=i*n+i-1;if (q[u]>0.0) c[i]=-c[i];h=h-q[u]*c[i];q[u]=q[u]-c[i];f=0.0;for (j=0; j<=i-1; j++){ q[j*n+i]=q[i*n+j]/h;g=0.0;for (k=0; k<=j; k++)g=g+q[j*n+k]*q[i*n+k];if (j+1<=i-1)for (k=j+1; k<=i-1; k++)g=g+q[k*n+j]*q[i*n+k];c[j]=g/h;f=f+g*q[j*n+i];}h2=f/(h+h);for (j=0; j<=i-1; j++){ f=q[i*n+j];g=c[j]-h2*f;c[j]=g;for (k=0; k<=j; k++){ u=j*n+k;q[u]=q[u]-f*c[k]-g*q[i*n+k];}}b[i]=h;}}for (i=0; i<=n-2; i++) c[i]=c[i+1];c[n-1]=0.0;b[0]=0.0;for (i=0; i<=n-1; i++){ if ((b[i]!=0.0)&&(i-1>=0))for (j=0; j<=i-1; j++){ g=0.0;for (k=0; k<=i-1; k++)g=g+q[i*n+k]*q[k*n+j];for (k=0; k<=i-1; k++){ u=k*n+j;q[u]=q[u]-g*q[k*n+i];}}u=i*n+i;b[i]=q[u]; q[u]=1.0;if (i-1>=0)for (j=0; j<=i-1; j++){ q[i*n+j]=0.0; q[j*n+i]=0.0;}}return;}int  main()
{int i,j;static double b[5],c[5],q[5*5];static double a[5*5]={10.0,  1.0,  2.0,  3.0,  4.0,1.0,  9.0, -1.0,  2.0, -3.0,2.0, -1.0,  7.0,  3.0, -5.0,3.0,  2.0,  3.0, 12.0, -1.0,4.0, -3.0, -5.0, -1.0, 15.0};int n = 5;strq(a,n,q,b,c);printf("MAT A IS:\\n");int lda = n;int ldq = n;for (i=0; i<=4; i++){for (j=0; j<=4; j++)printf("%7.3f ",a[i*lda + j]);printf("\\n");}printf("\\n");printf("MAT Q IS:\\n");for (i=0; i<=4; i++){for (j=0; j<=4; j++)printf("%7.3f ",q[i*ldq + j]);printf("\\n");}printf("\\n");printf("MAT B IS:\\n");for (i=0; i<=4; i++)printf("%7.3f ",b[i]);printf("\\n\\n");printf("MAT C IS:\\n");for (i=0; i<=4; i++)printf("%7.3f ",c[i]);printf("\\n\\n");return 0;
}***/