QR_MUMPS
qrm_solve_rt.F90
Go to the documentation of this file.
1 !! ##############################################################################################
2 !!
3 !! Copyright 2012 CNRS, INPT
4 !!
5 !! This file is part of qr_mumps.
6 !!
7 !! qr_mumps is free software: you can redistribute it and/or modify
8 !! it under the terms of the GNU Lesser General Public License as
9 !! published by the Free Software Foundation, either version 3 of
10 !! the License, or (at your option) any later version.
11 !!
12 !! qr_mumps is distributed in the hope that it will be useful,
13 !! but WITHOUT ANY WARRANTY; without even the implied warranty of
14 !! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 !! GNU Lesser General Public License for more details.
16 !!
17 !! You can find a copy of the GNU Lesser General Public License
18 !! in the qr_mumps/doc directory.
19 !!
20 !! ##############################################################################################
21 
22 
23 !! ##############################################################################################
33 
34 
35 #include "qrm_common.h"
36 
45 subroutine _qrm_solve_rt(qrm_mat, b, x)
46 
47  use _qrm_spmat_mod
48  use _qrm_rfpf_mod
49  use qrm_mem_mod
50  use qrm_common_mod
51  use qrm_task_mod
52  use qrm_queue_mod
53  use _qrm_solve_mod, protect => _qrm_solve_rt
54  use qrm_error_mod
55  implicit none
56 
57  type(_qrm_spmat_type), target :: qrm_mat
58  _qrm_data, intent(inout) :: b(:,:)
59  _qrm_data, intent(out) :: x(:,:)
60 
61  integer :: qrm_nth, nth, thn, info, f, dones
62  integer :: i, c
63  type(qrm_task_type) :: task
64  type(qrm_task_queue_handle) :: tq_h
65  type(qrm_queue) :: ready_q
66  type(_qrm_front_type), pointer :: front
67  integer, allocatable :: status(:)
68  type(qrm_adata_type), pointer :: adata
69  type(_qrm_fdata_type), pointer :: fdata
70  logical :: got_task
71 #if defined (_OPENMP)
72  integer(kind=omp_lock_kind), allocatable :: locks(:)
73  integer(kind=omp_lock_kind) :: dlock
74 #endif
75 
76  ! error management
77  integer :: err_act
78  character(len=*), parameter :: name='qrm_aply_rt'
79 
80  call qrm_err_act_save(err_act)
81 
82  __qrm_prnt_dbg('("Solving for R^T")')
83 
84  ! simplify
85  adata => qrm_mat%adata
86  fdata => qrm_mat%fdata
87 
88  ! check if there is a singleton block and eventually solve for it
89  if (qrm_mat%adata%ncsing .gt. 0) then
90  call _qrm_solve_sing_front(qrm_mat, b, x, 't')
91  __qrm_check_ret(name,'qrm_solve_sing_front',9999)
92  end if
93 
94  ! initialize queue
95  call qrm_queue_init(ready_q , adata%nnodes, qrm_lifo_)
96 
97  ! initialize to safe values for the case where openmp is not used
98  ! the status and locks arrays are needed because multiple solves may
99  ! be run in parallel on the same fdata
100  call qrm_aalloc(status, qrm_mat%adata%nnodes)
101  __qrm_check_ret(name,'qrm_aalloc',9999)
102  !$ allocate(locks(adata%nnodes))
103 
104  do f = 1, adata%nnodes
105  status(f) = 0
106  !$ call omp_init_lock(locks(f))
107  do i=adata%childptr(f), adata%childptr(f+1)-1
108  c = adata%child(i)
109  if(adata%small(c) .eq. 0) status(f) = status(f)-1
110  end do
111  end do
112 
113  ! push all the leaves in the ready queue
114  do i=adata%nleaves, 1, -1
115  call qrm_queue_push(ready_q, adata%leaves(i))
116  status(adata%leaves(i)) = qrm_ready_
117  end do
118  !$ call omp_init_lock(dlock)
119 
120  if(adata%ncsing .gt. 0) then
121  dones = 1
122  else
123  dones = 0
124  end if
125 
126 #if defined (_OPENMP)
127  call omp_set_num_threads(1)
128  qrm_nth = qrm_mat%icntl(qrm_nthreads_)
129 #endif
130 
131  !$omp parallel &
132  !$omp & num_threads(qrm_nth) &
133  !$omp & private(got_task, task, nth, thn, info) &
134  !$omp & shared(ready_q, status, locks, dlock, dones, tq_h)
135 
136 #if defined (_OPENMP)
137  nth = omp_get_num_threads()
138  thn = omp_get_thread_num()
139 #else
140  nth = 1
141  thn = 0
142 #endif
143  info = 0
144 
145  call qrm_par_mem_init()
146  call qrm_init_task_queue(tq_h)
147 
148  taskloop: do
149  if(qrm_err_stack%nelem .gt. 0) goto 9998
150 
151  ! fill up the queue if there are too few tasks
152  if(qrm_task_queue_card(tq_h) .lt. nth) then
153  call fill_queue_rt( )
154  end if
155 
156  got_task = qrm_get_task(tq_h, task)
157  if(.not. got_task) cycle taskloop ! queue is empty
158 
159  select case(task%id)
160  case(qrm_task_exit_)
161  !$omp barrier
162  ! done, exit
163  exit
164  case(qrm_task_app_)
165  ! apply a set of Householder vectors
166  call solve_rt(task, thn)
167  end select
168  end do taskloop
169 
170 9998 continue
171  call qrm_clean_task_queue(tq_h)
172  call qrm_par_mem_finalize()
173  !$omp end parallel
174 
175 
176  call qrm_adealloc(status)
177  !$ deallocate(locks)
178  call qrm_queue_free(ready_q)
179 
180  if(qrm_err_stack%nelem .gt. 0) then
181  call qrm_err_push(21, name)
182  goto 9999
183  end if
184 
185  call qrm_err_act_restore(err_act)
186  return
187 
188 9999 continue ! error management
189  call qrm_err_act_restore(err_act)
190  if(err_act .eq. qrm_abort_) then
191  call qrm_err_check()
192  end if
193 
194  return
195 
196 contains
197 
198 
199 
200 
201 !==========================================================================================
202 !==========================================================================================
203  subroutine fill_queue_rt( )
204  implicit none
205 
206  type(_qrm_front_type), pointer :: front
207  integer :: f
208  integer :: thn
209  logical :: found
210  type(qrm_task_type) :: tsk
211 
212 #if defined (_OPENMP)
213  thn=omp_get_thread_num()
214 #else
215  thn=0
216 #endif
217 
218  found = .false.
219 
220  f = 0
221  do
222  ! write(*,*)thn,' queue next'
223  f = qrm_queue_next(ready_q, f)
224  if(f .eq. 0) exit
225 
226  front => fdata%front_list(f)
227 
228  !$ if(.not. omp_test_lock(locks(f))) cycle
229  if(status(f) .eq. qrm_ready_) then
230  tsk = qrm_task_type(qrm_task_app_, front%num, 0, 0, .false.)
231  if(qrm_sched_task(tq_h, tsk, 'h')) then
232  ! mark the column as assigned
233  found = .true.
234  status(f) = qrm_busy_
235  end if
236 
237  end if
238  !$ call omp_unset_lock(locks(f))
239  end do
240 
241  ! if nothing was found above, then check if the facto is over
242  ! otherwise return
243  if(found) return
244 
245  call check_solvert_over( )
246 
247  return
248  end subroutine fill_queue_rt
249 
250 
251 
252 
253 !==========================================================================================
254 !==========================================================================================
255  subroutine check_solvert_over( )
256  ! checks whether the apply_qt is over and eventually schedules a
257  ! termination task
258  implicit none
259 
260  type(qrm_task_type) :: tsk
261  logical :: found
262 
263  ! all the fronts are done
264  if(dones .eq. fdata%nfronts) then
265  tsk = qrm_task_type(qrm_task_exit_, 0, 0, 0, .false.)
266  found = qrm_sched_task(tq_h, tsk, 't')
267  end if
268 
269  return
270  end subroutine check_solvert_over
271 
272 
273 
274 
275 !==========================================================================================
276 !==========================================================================================
277  subroutine solve_rt(task, thn)
278  implicit none
279  type(qrm_task_type) :: task
280  integer :: thn
281 
282  type(_qrm_front_type), pointer :: front
283  integer :: f, p, c, info
284 
285  front => null()
286 
287  ! to make things easier
288  front => qrm_mat%fdata%front_list(task%front)
289  f = qrm_mat%adata%parent(task%front)
290  info = 0
291 
292  ! sweep over the children to check whether there are small subtrees
293  ! that have to be treated
294  do p = adata%childptr(front%num), adata%childptr(front%num+1)-1
295  c = adata%child(p)
296  if(adata%small(c) .eq. 1) call do_subtree_rt(c, info)
297  if(info .ne. 0) goto 9997
298  end do
299 
300  call front_rt(front, info)
301  status(task%front) = qrm_done_
302 
303  !$ call omp_set_lock( dlock )
304  dones = dones+1
305  !$ call omp_unset_lock( dlock )
306 
307  if(f .ne. 0) then
308  !$ call omp_set_lock( locks(f) )
309  status(f) = status(f)+1
310  if(status(f) .eq. qrm_ready_) call qrm_queue_push(ready_q, f)
311  !$ call omp_unset_lock( locks(f) )
312  end if
313  call qrm_queue_rm(ready_q, task%front)
314 
315 9997 continue
316  return
317  end subroutine solve_rt
318 
319 
320 
321 !==========================================================================================
322 !==========================================================================================
323  subroutine do_subtree_rt(fnum, info)
324  implicit none
325 
326  integer :: fnum, info
327 
328  type(_qrm_front_type), pointer :: front
329  integer :: node, f
330 
331  info = 0
332 
333  node = fnum
334 
335  subtree: do
336 
337  front => fdata%front_list(node)
338 
339  if (status(node) .eq. qrm_ready_) then
340  ! the front is ready to be assembled and factorized
341 
342  ! factorize
343  call front_rt(front, info)
344  if(info .ne. 0) goto 9998
345 
346  !$ call omp_set_lock( dlock )
347  dones = dones+1
348  !$ call omp_unset_lock( dlock )
349 
350  f = adata%parent(node)
351  if(f .ne. 0) then
352  !$ call omp_set_lock( locks(f) )
353  status(f) = status(f)+1
354  !$ call omp_unset_lock( locks(f) )
355  end if
356 
357  status(node) = qrm_done_
358 
359  if(node .eq. fnum) exit subtree ! we're done
360 
361  node = adata%parent(node)
362  if(node .eq. 0) exit subtree
363 
364  else
365  ! go down the subtree
366  node = adata%child(adata%childptr(node+1)+status(node))
367  cycle subtree
368  end if
369 
370 
371  end do subtree
372 
373 9998 continue
374  return
375  end subroutine do_subtree_rt
376 
377 
378 
379 
380 !==========================================================================================
381 !==========================================================================================
382  subroutine front_rt(front, info)
384  use _qrm_rfpf_mod
385  use _qrm_spmat_mod
386  use qrm_mem_mod
387  use qrm_common_mod
388  implicit none
389 
390  type(_qrm_front_type) :: front
391  integer :: info
392 
393 
394  integer :: thn
395 
396 #if defined (sprec) || defined (dprec)
397  character, parameter :: tr='t', notr='n'
398 #elif defined (cprec) || defined (zprec)
399  character, parameter :: tr='c', notr='n'
400 #endif
401 
402  integer :: pv1, c, k, m, pv2, n, r, i, cnt, jp, pk, j
403  _qrm_data, allocatable :: in_b(:,:)
404  ! error management
405  character(len=*), parameter :: name='solve_rt'
406 
407 #if defined (_OPENMP)
408  thn=omp_get_thread_num()
409 #else
410  thn = 0
411 #endif
412 
413  ! shortcut
414  if (min(front%m, front%n) .le. 0) goto 10
415  if (front%npiv .le. 0) goto 10
416 
417  !$ call omp_set_lock( locks(front%num) )
418  ! write(*,'(i3," =--> Solve R'' : ",i4)')thn, front%num
419 
420  ! allocate everything that's needed
421  call qrm_aalloc(in_b, front%n, size(x,2))
422  __qrm_check_ret(name,'qrm_aalloc',9999)
423 
424  ! do computations
425  in_b(1:front%npiv,:) = b(front%cols(1:front%npiv),:)
426  in_b(front%npiv+1:front%n,:) = _qrm_zero
427 
428 #if defined (RFPF)
429  ! ! pv1 = 1
430  ! r=1
431  ! do
432  ! if (r .gt. front%npiv) then
433  ! exit
434  ! else
435  ! k = min(front%nb, front%npiv-r+1)
436  ! end if
437 
438  ! n = front%n - r-k+1
439  ! if(n .le. 0) then
440  ! pv2 = 1
441  ! else
442  ! pv2 = pv1+(k*(k+1))/2
443  ! end if
444 
445  ! call _xrpsm('l', 'u', tr, 'n', k, size(b,2), _qrm_one, front%r(pv1), in_b(r,1), front%n)
446 
447  ! !FIXME: why _qrm_one in the BETA parameter???
448  ! if(n .gt. 0) call _xgemm(tr, notr, n, size(b,2), k, -_qrm_one, front%r(pv2), &
449  ! &k, in_b(r,1), front%n, _qrm_one, in_b(r+k,1), front%n)
450 
451  ! pv1 = pv1+(k*(k+1))/2 + k*n
452  ! r = r+k
453  ! end do
454 #else
455  n = size(in_b,2)
456  cnt=1
457  outer: do jp = 1, front%npiv, front%nb
458  pk = min(front%nb, front%npiv-jp+1)
459  if(pk .le. 0) exit outer
460 
461  inner: do j = jp, jp+pk-1, front%ib
462  m = min(front%ib, jp+pk - j)
463  if(m .le. 0) exit inner
464  k = front%n-j-m+1
465 
466  ! write(*,*)thn !,front%num,'++>',cnt, j, k+m, m
467  call _xtrsm('l', 'u', tr, 'n', m, n, _qrm_one, front%r(cnt), m, &
468  & in_b(j,1), front%n)
469  if(k.gt.0) call _xgemm(tr, notr, k, n, m, -_qrm_one, front%r(cnt+m*m), m, &
470  & in_b(j,1), front%n, _qrm_one, in_b(j+m,1), front%n)
471  cnt = cnt+m*(front%n-j+1)
472  end do inner
473  end do outer
474 #endif
475 
476 
477  !FIXME check if this critical region can be replaced with a lock of some sort
478  !$omp critical (sc_gt)
479  x(front%rows(1:front%npiv),:) = in_b(1:front%npiv,:)
480  b(front%cols(front%npiv+1:front%n),:) = b(front%cols(front%npiv+1:front%n),:)+in_b(front%npiv+1:front%n,:)
481  !$omp end critical (sc_gt)
482 
483  call qrm_adealloc(in_b)
484  __qrm_check_ret(name,'qrm_adealloc',9999)
485 
486  !$ call omp_unset_lock( locks(front%num) )
487 
488 10 continue
489 
490 9999 continue ! error
491  return
492  end subroutine front_rt
493 
494 
495 end subroutine _qrm_solve_rt
subroutine qrm_clean_task_queue(h)
Destroyes a set of queues.
This module contains generic interfaces for a number of auxiliary tools.
subroutine check_solvert_over()
Generic interface for the qrm_adealloc_i, qrm_adealloc_2i, qrm_adealloc_s, qrm_adealloc_2s, qrm_adealloc_3s, qrm_adealloc_d, qrm_adealloc_2d, qrm_adealloc_3d, qrm_adealloc_c, qrm_adealloc_2c, qrm_adealloc_3c, qrm_adealloc_z, qrm_adealloc_2z, qrm_adealloc_3z, routines.
This module contains all the facilities for front queues.
This type defines the handle for the queues attached to a family of threads.
This module contains the interfaces of all non-typed routines.
A data type meant to to define a queue.
subroutine qrm_err_push(code, sub, ied, aed)
This subroutine pushes an error on top of the stack.
subroutine qrm_par_mem_finalize()
subroutine front_rt(front, info)
subroutine solve_rt(task, thn)
subroutine qrm_par_mem_init()
This routine has to be called at the beginning of a parallel section. Afterwards, each thread will up...
This module contains all the interfaces for the typed routines in the solve phase.
subroutine qrm_err_act_save(err_act)
Saves a copy of the qrm_err_act variable.
subroutine _qrm_solve_sing_front(qrm_mat, b, x, trans)
This function handles the front containing the singletons during the solve for R or R'...
This module contains the definition of a task type that is used for scheduling tasks during the facto...
logical function qrm_sched_task(h, tsk, pol, q)
Pushes a task on a queue.
This module contains all the error management routines and data.
integer, parameter qrm_task_exit_
subroutine qrm_queue_rm(q, n)
Removes (without returning it) an element from a queue.
subroutine qrm_init_task_queue(h)
Inititalizes a set of queues attached to a family of threads referenced through the handle h...
subroutine _qrm_solve_rt(qrm_mat, b, x)
This function solves for R' against multiple vectors.
Generic interface for the qrm_aalloc_i, qrm_aalloc_2i, qrm_aalloc_s, qrm_aalloc_2s, qrm_aalloc_3s, qrm_aalloc_d, qrm_aalloc_2d, qrm_aalloc_3d, qrm_aalloc_c, qrm_aalloc_2c, qrm_aalloc_3c, qrm_aalloc_z, qrm_aalloc_2z, qrm_aalloc_3z, routines.
Definition: qrm_mem_mod.F90:78
integer, parameter qrm_abort_
Possible actions to be performed upon detection of an error.
logical function qrm_get_task(h, tsk)
Pops a task from a queue. Tasks are always popped from the head of the queue. The return value is ...
This type defines a computational task.
subroutine do_subtree_rt(fnum, info)
subroutine qrm_err_check()
This subroutine checks the errors stack. If something is found all the entries in the stack are poppe...
subroutine qrm_queue_push(q, elem)
Pushes an element on a queue.
type(qrm_err_stack_type), save qrm_err_stack
The errors stack.
This type defines the data structure used to store a matrix.
integer function qrm_task_queue_card(h)
Returns the number of tasks present on a set of queues referenced by a handle.
This module contains the definition of the basic sparse matrix type and of the associated methods...
subroutine qrm_queue_free(q)
Frees a queue.
integer, parameter qrm_task_app_
integer, parameter qrm_lifo_
parameter to define the policy of the queue: LIFO
This module implements the memory handling routines. Pretty mucch allocations and deallocations...
Definition: qrm_mem_mod.F90:38
This module contains an implementation of some operations on triangular/trapezoidal matrices stored i...
subroutine qrm_err_act_restore(err_act)
Restores the value of the qrm_err_act variable.
integer function qrm_queue_next(q, n)
Returns the element that follows n in the queue q. Very useful for sweeping through a queue...
subroutine fill_queue_rt()