Lorenz Minder on Wed, 13 May 2009 08:52:45 +0200

 Re: Another problem with matrix inversion

```Hi,

BA writes:
> On Mon, May 11, 2009 at 05:55:18AM +0200, Lorenz Minder wrote:
> > Hi,
> >
> > There's another problem with matrix inversion that I noticed.  In
> > GP/PARI if you want to invert a matrix modulo some non-prime integer m,
> > then things will not generally work out nicely.  Example:
> >
> > parisize = 4000000, primelimit = 500000
> > ? A = [ Mod(9, 10), Mod(1, 10), Mod(9, 10); Mod(6, 10), Mod(9, 10),
> Mod(7, 10); Mod(4, 10), Mod(0, 10), Mod(1, 10)];
> > ? 1/A
> >   ***   impossible inverse modulo: Mod(5, 10).
>
> PARI only know perform polynomial and linear algebra over a field, so
> it assumes that moduli are prime.

Yes, of course.  My point is that it would be better if it worked also
if the base-ring is not a field. Since even in that case, this is a
meaningful question, a useful answer would be preferable, in my opinion.

I did a sketchy implementation of what I outlined in the other mail, and
a patch is attached.  This patch only works for small integers right now,
as I have only modified Flm_gauss(), but not FpM_gauss().  It's not
yet production code, only a prototype for discussion.

It seems to work fine for what I need it, and so I'd like to
ask if a more complete and tested patch among the same lines would be
considered for inclusion in PARI.

Baring any mistakes I may have made, the code should work for any
modulus, and claim that a matrix is singular only if it is indeed
singular.

As is, the code in the patch reduces to the original algorithm if p is
a prime. For composite moduli, O(log(m)) copies of the matrix are kept
on the stack in the worst case.

Incidentially, this patch seems to fix a nasty bug with Flm_gauss() when
the right hand side is a t_COL instead of a t_MAT; but I could not find
any caller using it that way in any case. (And I _might_ be
misunderstanding the old code, but I think that is unlikely.)

I have now also noticed that there is matsolvemod(), which seems to work
fine, too. So alternatively it would be possible to implement an
algorithm on top of modsolvemod(); would that be better or does it have
other drawbacks such as slower performance?  (Is it possible to give a
bunch of right hand sides to matsolvemod() at once?)

Best,
--Lorenz
--
Neu: GMX FreeDSL Komplettanschluss mit DSL 6.000 Flatrate + Telefonanschluss für nur 17,95 Euro/mtl.!* http://dslspecial.gmx.de/freedsl-surfflat/?ac=OM.AD.PD003K11308T4569a
```
```diff --git a/src/basemath/alglin1.c b/src/basemath/alglin1.c
--- a/src/basemath/alglin1.c
+++ b/src/basemath/alglin1.c
@@ -19,6 +19,9 @@
/**                          (first part)                          **/
/**                                                                **/
/********************************************************************/
+
+#include <assert.h>
+
#include "pari.h"
#include "paripriv.h"

@@ -615,6 +618,8 @@
}
return u;
}
+
+#if 0
/* assume 0 <= a[i,j] < p */
static uGEN
Fl_get_col_OK(GEN a, uGEN b, long li, ulong p)
@@ -637,15 +642,19 @@
}
return u;
}
+#endif
+
static uGEN
-Fl_get_col(GEN a, uGEN b, long li, ulong p)
+Fl_get_col(GEN a, uGEN b, long li, long last_row, ulong p)
{
uGEN u = (uGEN)cgetg(li+1,t_VECSMALL);
ulong m = b[li] % p;
long i,j;

-  u[li] = Fl_mul(m, ucoeff(a,li,li), p);
-  for (i=li-1; i>0; i--)
+  for (i=li; i>last_row; i--) {
+    u[i] = b[i] % p;
+  }
+  for (; i>0; i--)
{
m = b[i]%p;
for (j = i+1; j <= li; j++)
@@ -674,12 +683,14 @@
gel(b,i) = Fq_red(gel(b,i), T,p);
gel(b,k) = gsub(gel(b,k), gmul(m, gel(b,i)));
}
+#if 0
static void /* assume m < p && SMALL_ULONG(p) && (! (b[i] & b[k] & HIGHMASK)) */
_Fl_submul_OK(uGEN b, long k, long i, ulong m, ulong p)
{
b[k] -= m * b[i];
if (b[k] & HIGHMASK) b[k] %= p;
}
+#endif
static void /* assume m < p */
_Fl_submul(uGEN b, long k, long i, ulong m, ulong p)
{
@@ -923,40 +934,270 @@
return z;
}

-/* destroy a, b */
+/* Helper functions for Flm_gauss_sp */
+
+static int
+ulong_cmp(const void *v1, const void *v2)
+{
+  const ulong l1 = *(const ulong *)v1;
+  const ulong l2 = *(const ulong *)v2;
+
+  if(l1 < l2) return -1;
+  if(l1 > l2) return 1;
+  return 0;
+}
+
+/*
+	find_coprime_factors(a, b)
+
+	Given a, b; find m such that m | ab and that
+
+	  m * (ab/m)
+
+	is a coprime factorization of a * b.  This fails when a and b are
+	powers of the same integer.  Otherwise it succeeds.
+
+	RETURN	m	coprime on success
+		a * b	otherwise.
+ */
+
+static ulong
+find_coprime_factors(ulong a, ulong b)
+{
+   const int N = sizeof(ulong) * 8;
+   ulong r[N];
+
+   int n = 2;
+   r[0] = a;
+   r[1] = b;
+
+   /* Build a list of factors that are either coprime or identical */
+   int i;
+   for(i = 1; i < n; ++i) {
+     int j;
+     for(j = 0; j < i; ++j) {
+       if(r[i] == r[j])
+         continue;
+       long s;
+       ulong v, v1;
+       ulong g = xgcduu(r[i], r[j], 1, &v, &v1, &s);
+       if(g == 1)
+         continue;
+       if(g == r[j]) {
+         r[i] /= g;
+	 r[n++] = g;
+	 --j;
+	 continue;
+       }
+       if(g == r[i]) {
+         r[j] /= g;
+	 r[n++] = g;
+	 --j;
+	 continue;
+       }
+
+       r[i] /= g;
+       r[j] /= g;
+       r[n++] = g;
+       r[n++] = g;
+     }
+   }
+
+#if 1
+   /* Ok, so this was messy, recheck. */
+   for(i = 1; i < n; ++i) {
+     assert(r[i] != 1);
+     int j;
+     for(j = 0; j < i; ++j) {
+       if(r[i] == r[j])
+         continue;
+       long s;
+       ulong v, v1;
+       ulong g = xgcduu(r[i], r[j], 1, &v, &v1, &s);
+       assert(g == 1);
+     }
+   }
+#endif
+
+   /* Sort */
+   qsort(r, n, sizeof(ulong), ulong_cmp);
+
+   /* Build return value */
+   ulong ret = r[0];
+   for(i = 1; i < n && r[i] == r[0]; ++i)
+     ret *= r[0];
+
+   return ret;
+}
+
+/*
+	GEN Flm_gauss_sp(a, b, ulong p)
+
+	Perform Gaussian elimination modulo a small integer p. a is the
+	matrix, b the right hand side, which can be either a matrix or a
+	column vector.
+
+	This is the worker function that destroys both a and b.
+ */
+
static GEN
Flm_gauss_sp(GEN a, GEN b, ulong p)
{
-  long i, j, k, li, bco, aco = lg(a)-1;
-  const int OK_ulong = 0;
-  int iscol;
-  GEN u;
+  /* Return Empty matrix if a is empty. */
+  const long int aco = lg(a)-1;		/* aco: # of columns of a */
+  if (!aco) return cgetg(1,t_MAT);

-  if (!aco) return cgetg(1,t_MAT);
-  li = lg(a[1])-1;
-  bco = lg(b)-1;
-  iscol = (typ(b)!=t_MAT);
+  const long int li = lg(a[1])-1;	/* li: # of rows of a */
+
+  /* If b is a column vector,
+     temporarily wrap it into a matrix. */
+  const int iscol = (typ(b)!=t_MAT);
if (iscol) b = mkmat(b);
+  const long int bco = lg(b)-1;		/* bco: # of cols of b */
+
+  /* First step: Triangularize the matrix. */
+  long int i, j, k;
for (i=1; i<=aco; i++)
{
-    ulong invpiv;
-    /* Fl_get_col wants 0 <= a[i,j] < p for all i,j */
-    if (OK_ulong) for (k = 1; k < i; k++) ucoeff(a,k,i) %= p;
+    /* Find a pivot row in the ith column */
+    int done = 0;	/* Flag to cancel the rest of the triangularization
+    			   procedure */
for (k = i; k <= li; k++)
{
-      ulong piv = ( ucoeff(a,k,i) %= p );
-      if (piv) { ucoeff(a,k,i) = Fl_inv(piv, p); break; }
+      const ulong piv = ( ucoeff(a,k,i) %= p );
+      if (piv != 0) {
+
+        /* Compute the inverse of the pivot */
+	ulong xv, xv1;
+	long s;
+	ulong g = xgcduu(p, piv, 1, &xv, &xv1, &s);
+
+	if (g != 1ul) {
+	  /*
+	   * Not invertible ?
+	   *
+	   * If this happens, g is a nontrivial factor of p, and so we
+	   * can split the computation into two instances that solve the
+	   * remaining problem mod g and mod (p/g), respectively.
+	   * Then we lift these partial solutions to one that is the
+	   * correct solution mod p.
+	   *
+	   * The splitting will not work if g and pg := p/g are not
+	   * coprime.  We can always find a coprime factorization
+	   * unless g and pg are powers of the same integer.  If this is
+	   * true for every candidate pivot, the matrix is not
+	   * invertible.
+	   */
+
+	  /* Make sure p and pg have no common factors */
+	  g = find_coprime_factors(g, p/g);
+	  if(g == p)  /* Row can't be pivot. */
+	    continue;
+	  ulong pg = p/g;
+
+          /* Exchange the rows k and i */
+	  if (k != i)
+	  {
+	    for (j=i; j<=aco; j++) swap(gcoeff(a,i,j), gcoeff(a,k,j));
+	    for (j=1; j<=bco; j++) swap(gcoeff(b,i,j), gcoeff(b,k,j));
+	  }
+
+	  /* Save sp, so we can later reclaim scratch storage. */
+	  pari_sp av = avma;
+
+	  /* Build and solve Z/gZ instance */
+          GEN aa = cgetg(1 + (aco - i + 1), t_MAT);
+	  for (j = 1; j <= aco - i + 1; ++j) {
+	    GEN v = (gel(aa, j) = cgetg(1 + (li - i + 1), t_VECSMALL));
+	    long int e;
+	    for (e = 1; e <= li - i + 1; ++e) {
+	      v[e] = gel(a, j + i - 1)[e + i - 1] % g;
+	    }
+	  }
+
+	  GEN bb = cgetg(bco + 1, t_MAT);
+	  for (j = 1; j <= bco; ++j) {
+	    GEN v = (gel(bb, j) = cgetg(1 + (li - i + 1), t_VECSMALL));
+	    long int e;
+	    for (e = 1; e <= li - i + 1; ++e) {
+	      v[e] = gel(b, j)[e + i - 1] % g;
+	    }
+	  }
+
+	  GEN sg = Flm_gauss_sp(aa, bb, g);
+	  if(sg == 0) { avma = av; return NULL; }
+
+	  /* Build and solve Z/(p/g)Z instance */
+          aa = cgetg(1 + (aco - i + 1), t_MAT);
+	  for (j = 1; j <= aco - i + 1; ++j) {
+	    GEN v = (gel(aa, j) = cgetg(1 + (li - i + 1), t_VECSMALL));
+	    long int e;
+	    for (e = 1; e <= li - i + 1; ++e) {
+	      v[e] = gel(a, j + i - 1)[e + i - 1] % pg;
+	    }
+	  }
+
+	  bb = cgetg(bco + 1, t_MAT);
+	  for (j = 1; j <= bco; ++j) {
+	    GEN v = (gel(bb, j) = cgetg(1 + (li - i + 1), t_VECSMALL));
+	    long int e;
+	    for (e = 1; e <= li - i + 1; ++e) {
+	      v[e] = gel(b, j)[e + i - 1] % pg;
+	    }
+	  }
+
+	  GEN spg = Flm_gauss_sp(aa, bb, pg);
+	  if(spg == 0) { avma = av; return NULL; }
+
+	  /* Combine.
+	   * Here,	ipg is 1 mod p/g and 0 mod g,
+	   *		ig  is 0 mod p/g and 1 mod g.
+	   */
+	  const ulong ipg = Fl_mul(g, Fl_inv(g % pg, pg), p);
+	  const ulong ig = Fl_mul(pg, Fl_inv(pg % g, g), p);
+	  for (j = 1; j <= bco; ++j) {
+	    long int e;
+	    GEN v = gel(b, j);
+	    GEN vsg = gel(sg, j);
+	    GEN vspg = gel(spg, j);
+	    for (e = 1; e <= li - i + 1; ++e) {
+	      v[e + i - 1] = Fl_add(Fl_mul(ig, vsg[e], p),
+	        Fl_mul(ipg, vspg[e], p), p);
+	    }
+	  }
+
+	  /* Done, readjust counters and go to backsubstitution */
+	  avma = av;
+	  --i;
+          done = 1;
+	  break;
+	}
+
+        /* We store the inverse pivot value in the (k, i) position for
+	   later use. */
+	xv = xv1 % p;
+	if (s < 0) xv = p - xv;
+        ucoeff(a,k,i) = xv;
+	break;
+      }
}
-    /* found a pivot on line k */
+
+    /* no pivot? matrix not invertible, abort */
if (k > li) return NULL;
+    if (done) break;
+
+    /* swap rows to bring pivot to row i */
if (k != i)
-    { /* swap lines so that k = i */
+    {
for (j=i; j<=aco; j++) swap(gcoeff(a,i,j), gcoeff(a,k,j));
for (j=1; j<=bco; j++) swap(gcoeff(b,i,j), gcoeff(b,k,j));
}
+
+    /* Last column? Done. */
if (i == aco) break;

-    invpiv = ucoeff(a,i,i); /* 1/piv mod p */
+    /* Use the pivot row to eliminate column i in rows > i */
+    const ulong invpiv = ucoeff(a,i,i); /* 1/piv mod p */
for (k=i+1; k<=li; k++)
{
ulong m = ( ucoeff(a,k,i) %= p );
@@ -966,20 +1207,18 @@
if (m == 1) {
for (j=i+1; j<=aco; j++) _Fl_sub((uGEN)a[j],k,i, p);
for (j=1;   j<=bco; j++) _Fl_sub((uGEN)b[j],k,i, p);
-      } else if (OK_ulong) {
-	for (j=i+1; j<=aco; j++) _Fl_submul_OK((uGEN)a[j],k,i,m, p);
-	for (j=1;   j<=bco; j++) _Fl_submul_OK((uGEN)b[j],k,i,m, p);
} else {
for (j=i+1; j<=aco; j++) _Fl_submul((uGEN)a[j],k,i,m, p);
for (j=1;   j<=bco; j++) _Fl_submul((uGEN)b[j],k,i,m, p);
}
}
}
-  u = cgetg(bco+1,t_MAT);
-  if (OK_ulong)
-    for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col_OK(a,(uGEN)b[j], aco,p);
-  else
-    for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col(a,(uGEN)b[j], aco,p);
+
+  /* Second Step: Backsubstitute. */
+  GEN u = cgetg(bco+1,t_MAT);
+  for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col(a,(uGEN)b[j], aco, i, p);
+
+  /* Done, unwrap result if necessary */
return iscol? gel(u,1): u;
}

```