96 #include "klu2_internal.h" 
   97 #include "klu2_kernel.hpp" 
   98 #include "klu2_memory.hpp" 
  100 template <
typename Entry, 
typename Int>
 
  101 size_t KLU_kernel_factor            
 
  136     KLU_common<Entry, Int> *Common
 
  139     double maxlnz, dunits ;
 
  141     Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
 
  142     Int lsize, usize, anz, ok ;
 
  144     ASSERT (Common != NULL) ;
 
  151     anz = Ap [n+k1] - Ap [k1] ;
 
  156         Lsize = MAX (Lsize, 1.0) ;
 
  157         lsize = (Int) Lsize * anz + n ;
 
  161         lsize = (Int) Lsize ;
 
  166     lsize  = MAX (n+1, lsize) ;
 
  167     usize  = MAX (n+1, usize) ;
 
  169     maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
 
  170     maxlnz = MIN (maxlnz, ((
double) INT_MAX)) ;
 
  171     lsize  = (Int) MIN (maxlnz, lsize) ;
 
  172     usize  = (Int) MIN (maxlnz, usize) ;
 
  174     PRINTF ((
"Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
 
  175         n, anz, k1, lsize, usize, maxlnz)) ;
 
  182     *p_LU = (Unit *) NULL ;
 
  186     Pinv = (Int *) W ;      W += n ;
 
  187     Stack = (Int *) W ;     W += n ;
 
  188     Flag = (Int *) W ;      W += n ;
 
  189     Lpend = (Int *) W ;     W += n ;
 
  190     Ap_pos = (Int *) W ;    W += n ;
 
  192     dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
 
  193              DUNITS (Int, usize) + DUNITS (Entry, usize) ;
 
  194     lusize = (size_t) dunits ;
 
  195     ok = !INT_OVERFLOW (dunits) ; 
 
  196     LU = (Unit *) (ok ? KLU_malloc (lusize, 
sizeof (Unit), Common) : NULL) ;
 
  200         Common->status = KLU_OUT_OF_MEMORY ;
 
  210     lusize = KLU_kernel<Entry> (n, Ap, Ai, Ax, Q, lusize,
 
  211             Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
 
  212             X, Stack, Flag, Ap_pos, Lpend,
 
  213             k1, PSinv, Rs, Offp, Offi, Offx, Common) ;
 
  219     if (Common->status < KLU_OK)
 
  221         LU = (Unit *) KLU_free (LU, lusize, 
sizeof (Unit), Common) ;
 
  225     PRINTF ((
" in klu noffdiag %d\n", Common->noffdiag)) ;
 
  238 template <
typename Entry, 
typename Int>
 
  260             for (k = 0 ; k < n ; k++)
 
  263                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
 
  265                 for (p = 0 ; p < len ; p++)
 
  268                     MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
 
  275             for (k = 0 ; k < n ; k++)
 
  278                 x [1] = X [2*k + 1] ;
 
  279                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
 
  280                 for (p = 0 ; p < len ; p++)
 
  284                     MULT_SUB (X [2*i], lik, x [0]) ;
 
  285                     MULT_SUB (X [2*i + 1], lik, x [1]) ;
 
  292             for (k = 0 ; k < n ; k++)
 
  295                 x [1] = X [3*k + 1] ;
 
  296                 x [2] = X [3*k + 2] ;
 
  297                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
 
  298                 for (p = 0 ; p < len ; p++)
 
  302                     MULT_SUB (X [3*i], lik, x [0]) ;
 
  303                     MULT_SUB (X [3*i + 1], lik, x [1]) ;
 
  304                     MULT_SUB (X [3*i + 2], lik, x [2]) ;
 
  311             for (k = 0 ; k < n ; k++)
 
  314                 x [1] = X [4*k + 1] ;
 
  315                 x [2] = X [4*k + 2] ;
 
  316                 x [3] = X [4*k + 3] ;
 
  317                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
 
  318                 for (p = 0 ; p < len ; p++)
 
  322                     MULT_SUB (X [4*i], lik, x [0]) ;
 
  323                     MULT_SUB (X [4*i + 1], lik, x [1]) ;
 
  324                     MULT_SUB (X [4*i + 2], lik, x [2]) ;
 
  325                     MULT_SUB (X [4*i + 3], lik, x [3]) ;
 
  342 template <
typename Entry, 
typename Int>
 
  356     Entry x [4], uik, ukk ;
 
  366             for (k = n-1 ; k >= 0 ; k--)
 
  368                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
 
  370                 DIV (x [0], X [k], Udiag [k]) ;
 
  372                 for (p = 0 ; p < len ; p++)
 
  375                     MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;
 
  384             for (k = n-1 ; k >= 0 ; k--)
 
  386                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
 
  390                 DIV (x [0], X [2*k], ukk) ;
 
  391                 DIV (x [1], X [2*k + 1], ukk) ;
 
  394                 X [2*k + 1] = x [1] ;
 
  395                 for (p = 0 ; p < len ; p++)
 
  401                     MULT_SUB (X [2*i], uik, x [0]) ;
 
  402                     MULT_SUB (X [2*i + 1], uik, x [1]) ;
 
  410             for (k = n-1 ; k >= 0 ; k--)
 
  412                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
 
  415                 DIV (x [0], X [3*k], ukk) ;
 
  416                 DIV (x [1], X [3*k + 1], ukk) ;
 
  417                 DIV (x [2], X [3*k + 2], ukk) ;
 
  420                 X [3*k + 1] = x [1] ;
 
  421                 X [3*k + 2] = x [2] ;
 
  422                 for (p = 0 ; p < len ; p++)
 
  426                     MULT_SUB (X [3*i], uik, x [0]) ;
 
  427                     MULT_SUB (X [3*i + 1], uik, x [1]) ;
 
  428                     MULT_SUB (X [3*i + 2], uik, x [2]) ;
 
  436             for (k = n-1 ; k >= 0 ; k--)
 
  438                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
 
  441                 DIV (x [0], X [4*k], ukk) ;
 
  442                 DIV (x [1], X [4*k + 1], ukk) ;
 
  443                 DIV (x [2], X [4*k + 2], ukk) ;
 
  444                 DIV (x [3], X [4*k + 3], ukk) ;
 
  447                 X [4*k + 1] = x [1] ;
 
  448                 X [4*k + 2] = x [2] ;
 
  449                 X [4*k + 3] = x [3] ;
 
  450                 for (p = 0 ; p < len ; p++)
 
  455                     MULT_SUB (X [4*i], uik, x [0]) ;
 
  456                     MULT_SUB (X [4*i + 1], uik, x [1]) ;
 
  457                     MULT_SUB (X [4*i + 2], uik, x [2]) ;
 
  458                     MULT_SUB (X [4*i + 3], uik, x [3]) ;
 
  476 template <
typename Entry, 
typename Int>
 
  502             for (k = n-1 ; k >= 0 ; k--)
 
  504                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
 
  506                 for (p = 0 ; p < len ; p++)
 
  512                         MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
 
  518                         MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
 
  527             for (k = n-1 ; k >= 0 ; k--)
 
  530                 x [1] = X [2*k + 1] ;
 
  531                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
 
  532                 for (p = 0 ; p < len ; p++)
 
  545                     MULT_SUB (x [0], lik, X [2*i]) ;
 
  546                     MULT_SUB (x [1], lik, X [2*i + 1]) ;
 
  549                 X [2*k + 1] = x [1] ;
 
  555             for (k = n-1 ; k >= 0 ; k--)
 
  558                 x [1] = X [3*k + 1] ;
 
  559                 x [2] = X [3*k + 2] ;
 
  560                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
 
  561                 for (p = 0 ; p < len ; p++)
 
  574                     MULT_SUB (x [0], lik, X [3*i]) ;
 
  575                     MULT_SUB (x [1], lik, X [3*i + 1]) ;
 
  576                     MULT_SUB (x [2], lik, X [3*i + 2]) ;
 
  579                 X [3*k + 1] = x [1] ;
 
  580                 X [3*k + 2] = x [2] ;
 
  586             for (k = n-1 ; k >= 0 ; k--)
 
  589                 x [1] = X [4*k + 1] ;
 
  590                 x [2] = X [4*k + 2] ;
 
  591                 x [3] = X [4*k + 3] ;
 
  592                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
 
  593                 for (p = 0 ; p < len ; p++)
 
  606                     MULT_SUB (x [0], lik, X [4*i]) ;
 
  607                     MULT_SUB (x [1], lik, X [4*i + 1]) ;
 
  608                     MULT_SUB (x [2], lik, X [4*i + 2]) ;
 
  609                     MULT_SUB (x [3], lik, X [4*i + 3]) ;
 
  612                 X [4*k + 1] = x [1] ;
 
  613                 X [4*k + 2] = x [2] ;
 
  614                 X [4*k + 3] = x [3] ;
 
  630 template <
typename Entry, 
typename Int>
 
  647     Entry x [4], uik, ukk ;
 
  657             for (k = 0 ; k < n ; k++)
 
  659                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
 
  661                 for (p = 0 ; p < len ; p++)
 
  667                         MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
 
  673                         MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
 
  679                     CONJ (ukk, Udiag [k]) ;
 
  686                 DIV (X [k], x [0], ukk) ;
 
  692             for (k = 0 ; k < n ; k++)
 
  694                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
 
  696                 x [1] = X [2*k + 1] ;
 
  697                 for (p = 0 ; p < len ; p++)
 
  710                     MULT_SUB (x [0], uik, X [2*i]) ;
 
  711                     MULT_SUB (x [1], uik, X [2*i + 1]) ;
 
  716                     CONJ (ukk, Udiag [k]) ;
 
  723                 DIV (X [2*k], x [0], ukk) ;
 
  724                 DIV (X [2*k + 1], x [1], ukk) ;
 
  730             for (k = 0 ; k < n ; k++)
 
  732                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
 
  734                 x [1] = X [3*k + 1] ;
 
  735                 x [2] = X [3*k + 2] ;
 
  736                 for (p = 0 ; p < len ; p++)
 
  749                     MULT_SUB (x [0], uik, X [3*i]) ;
 
  750                     MULT_SUB (x [1], uik, X [3*i + 1]) ;
 
  751                     MULT_SUB (x [2], uik, X [3*i + 2]) ;
 
  756                     CONJ (ukk, Udiag [k]) ;
 
  763                 DIV (X [3*k], x [0], ukk) ;
 
  764                 DIV (X [3*k + 1], x [1], ukk) ;
 
  765                 DIV (X [3*k + 2], x [2], ukk) ;
 
  771             for (k = 0 ; k < n ; k++)
 
  773                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
 
  775                 x [1] = X [4*k + 1] ;
 
  776                 x [2] = X [4*k + 2] ;
 
  777                 x [3] = X [4*k + 3] ;
 
  778                 for (p = 0 ; p < len ; p++)
 
  791                     MULT_SUB (x [0], uik, X [4*i]) ;
 
  792                     MULT_SUB (x [1], uik, X [4*i + 1]) ;
 
  793                     MULT_SUB (x [2], uik, X [4*i + 2]) ;
 
  794                     MULT_SUB (x [3], uik, X [4*i + 3]) ;
 
  799                     CONJ (ukk, Udiag [k]) ;
 
  806                 DIV (X [4*k], x [0], ukk) ;
 
  807                 DIV (X [4*k + 1], x [1], ukk) ;
 
  808                 DIV (X [4*k + 2], x [2], ukk) ;
 
  809                 DIV (X [4*k + 3], x [3], ukk) ;