a pthread problem
Hello,
I run my pthread code on Linux with 4 processors. However, the speed up is only 2 times.
The code is about solving equation (G+s(i)C)z(i)=B*us(i), i=1,...,n. Here G,C are m*m matrix, B*us(i) is a m*1 vector and s(i) are n different numbers. I need to solve the equation n times to get z(1)...z(n). Here, I use multithread to solve the n equations.
eg. Now I have 4 thread and n=12. Thread(1) solves 4 equations (G+s(1,2,3,4)C)X(1,2,3,4)=B. Thread(2) solves (G+s(5,6,7,8)C)X(5,6,7,8)=B....
I use pthread_creat() and pthread_join(). However, for 4thread, it is not as 4 times faster as 1 thread, only 2 times instead. For 2 thread, it is about 1.5 times as 1 thread. What I have observed that, to solve one equation under 4 thread is much slower than solving one equation using 1 thread.
Can anybody tell me what is the reason? Thanks a lot.
Below is the code. Thanks,
typedef struct{
int thread_no;
int allthread;
mat *G,*C,*B;
mat *us, *Z;
vec *samples;
double *Control, *Info;
} parm;
void sampleLU(int thread_no, int allthread, mat *G, mat *C, mat *B, mat &us, mat &Z, vec &samples, double Control[], double Info[])
{
Real_Timer lu_symbolic_init, lu_symbolic_free, lu_numerical, lu_solve_time;
Real_Timer sCpG_run_time;
int np = samples.size();
int nDim = B->m;
int start, stop;
start = thread_no * (int)(np/allthread);
stop = start + (int)(np/allthread) - 1;
if( thread_no == allthread-1 ) stop = np-1;
for (int i = start; i<=stop; i++ ){
cs *A;
if(thread_no == 0) {
sCpG_run_time.start();
A = G+sample(i)*C;
sCpG_run_time.stop();
}
else{
A = G+sample(i)*C;
}
/* LU decomposition */
...
if(i == start){
if(thread_no == 0) {
lu_symbolic_init.start();
A = LU;
lu_symbolic_init.stop();
}
else {
A = LU
}
}
/* solve Az = b */
double* z = new double[nDim];
vec b(nDim);
b.zeros();
if(thread_no == 0) lu_solve_time.start();
LUz=b
vec zz(z, nDim);
delete [] z;
Z.set_col(i, zz);
...
}
if(thread_no == 0){
std::cout << "Thread" << thread_no << " sC+G \t:" << sCpG_run_time.get_time() << std::endl;
std::cout << "Thread" << thread_no << " symbolic initial time: \t"<<lu_symbolic_init.get_time()<<std::endl;
std::cout << "Thread" << thread_no << " LU decomposition time \t:" << lu_numerical.get_time() << std::endl;
std::cout << "Thread" << thread_no << " symbolic free time: \t"<<lu_symbolic_init.get_time()<<std::endl;
std::cout << "Thread" << thread_no << " LU solve time \t: " << lu_solve_time.get_time() <<std::endl;
}
}
void * psampleLU(void *arg)
{
parm *p = (parm *)arg;
sampleLU(p->thread_no, p->allthread, p->G, p->C, p->B, *(p->us), *(p->Z), *(p->samples), p->Control, p->Info);
return NULL;
}