Bill Allombert on Wed, 20 May 2009 15:43:52 +0200


[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]

Re: Another problem with matrix inversion


On Wed, May 20, 2009 at 12:00:21PM +0200, Lorenz Minder wrote:
> Hi,

Hello,

Sorry for the rushed our reply, but maybe you would prefer it to 
a long delayed one.

> +
> +#include <assert.h>
> +

Please do not use assert.h. 
Instead you can use pari_err(bugparier,"function, reason");

> @@ -581,13 +584,16 @@
>    return u;
>  }
>  static GEN
> -Fp_get_col(GEN a, GEN b, long li, GEN p)
> +Fp_get_col(GEN a, GEN b, long li, long last_row, GEN p)

I take you are going to use it for non-prime moduli ? 
So it it should probably renamed to ZnM_get_col.
Add a comment that explain what last_row do.

>  {
>    GEN u = cgetg(li+1,t_COL);
>    long i, j;
>  
> -  gel(u,li) = Fp_mul(gel(b,li), gcoeff(a,li,li), p);
> -  for (i=li-1; i>0; i--)
> +  for (i=li; i>last_row; i--) {
> +    pari_sp av = avma;
> +    gel(u, i) = gerepileuptoint(av, modii(gel(b, i), p));
> +  }

you do not need to use gerepileuptoint() here: there no garbage on
the stack, only the result from modii, which you want to keep.

> +  for (; i>0; i--)
>    {
>      pari_sp av = avma;
>      GEN m = gel(b,i);
> @@ -637,15 +643,18 @@
>    }
>    return u;
>  }
> +
>  static uGEN
> -Fl_get_col(GEN a, uGEN b, long li, ulong p)
> +Fl_get_col(GEN a, uGEN b, long li, long last_row, ulong p)

Same issue with the name. Furthermore I do not like the ZlM prefix because
Zp is for padic integers, so Zl should be for hypothetic l-adic integer
for small l. We should pick something else.

>  {
>    uGEN u = (uGEN)cgetg(li+1,t_VECSMALL);
> -  ulong m = b[li] % p;
> +  ulong m;
>    long i,j;
>  
> -  u[li] = Fl_mul(m, ucoeff(a,li,li), p);
> -  for (i=li-1; i>0; i--)
> +  for (i=li; i>last_row; i--) {
> +    u[i] = b[i] % p;
> +  }

Please do not put brace around single statement.

> +  for (; i>0; i--)
>    {
>      m = b[i]%p;
>      for (j = i+1; j <= li; j++)
> @@ -749,7 +758,7 @@
>    if (!is_FpM(&a, &p)) return 0;
>    if (!b)
>    {
> -    a = FpM_inv(a,p);
> +    a = ZnM_inv(a,p);
>      if (a) a = FpM_to_mod(a, p);
>    }

Same naming issue: it is annoying to mix FpM and ZnM functions.
(If a function is used with a ZnM as input , it should be called ZnM_something)
Maybe you should simply add aliases in pariinl.h
src/headers/pariinl.h.

>    else
> @@ -758,12 +767,12 @@
>      {
>        case t_COL:
>          if (!is_FpC(&b, &p)) return 0;
> -        a = FpM_gauss(a,b,p);
> +        a = ZnM_gauss(a,b,p);
>          if (a) a = FpC_to_mod(a, p);
>          break;
>        case t_MAT:
>          if (!is_FpM(&b, &p)) return 0;
> -        a = FpM_gauss(a,b,p);
> +        a = ZnM_gauss(a,b,p);
>          if (a) a = FpM_to_mod(a, p);
>          break;
>        default: return 0;

Same problem.

> @@ -771,6 +780,7 @@
>    }
>    *u = a; return 1;
>  }
> +
>  /* Gaussan Elimination. Compute a^(-1)*b
>   * b is a matrix or column vector, NULL meaning: take the identity matrix
>   * If a and b are empty, the result is the empty matrix.
> @@ -976,10 +986,309 @@
>    if (OK_ulong)
>      for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col_OK(a,(uGEN)b[j], aco,p);
>    else
> -    for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col(a,(uGEN)b[j], aco,p);
> +    for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col(a,(uGEN)b[j], aco, aco, p);
>    return u;
>  }
>  
> +/* Helper functions for Zlm_gauss_fl */
> +
> +static int
> +ulong_cmp(const void *v1, const void *v2)
> +{
> +  const ulong l1 = *(const ulong *)v1;
> +  const ulong l2 = *(const ulong *)v2;
> +
> +  if(l1 < l2) return -1;
> +  if(l1 > l2) return 1;
> +  return 0;
> +}

this function already exist.

> +
> +/*
> +	find_coprime_factors(a, b)
> +
> +	Given a, b; find m such that m | ab and that
> +
> +	  m * (ab/m)
> +
> +	is a coprime factorization of a * b.  This fails when a and b are
> +	powers of the same integer.  Otherwise it succeeds.
> +
> +	RETURN	m	coprime on success
> +		a * b	otherwise.
> + */

These functions should probably serve as a basis for a general 
'unfactored integers' facility.

> +static ulong
> +find_coprime_factors(ulong a, ulong b)
> +{
> +   const int N = sizeof(ulong) * 8;
> +   ulong r[N];
> +
> +   int n = 2;
> +   r[0] = a;
> +   r[1] = b;
> +
> +   /* Build a list of factors that are either coprime or identical */
> +   int i;
> +   for(i = 1; i < n; ++i) {
> +     int j;
> +     for(j = 0; j < i; ++j) {
> +       if(r[i] == r[j])
> +         continue;
> +       long s;
> +       ulong v  = (r[i] > r[j] ? r[i] : r[j]),
> +             v1 = (r[i] > r[j] ? r[j] : r[i]);
> +       ulong g = xgcduu(v, v1, 1, &v, &v1, &s);
> +       if(g == 1)
> +         continue;
> +       if(g == r[j]) {
> +         r[i] /= g;
> +	 r[n++] = g;
> +	 --j;
> +	 continue;
> +       }
> +       if(g == r[i]) {
> +         r[j] /= g;
> +	 r[n++] = g;
> +	 --j;
> +	 continue;
> +       }
> +
> +       r[i] /= g;
> +       r[j] /= g;
> +       r[n++] = g;
> +       r[n++] = g;
> +     }
> +   }
> +
> +   /* Sort */
> +   qsort(r, n, sizeof(ulong), ulong_cmp);

Try to avoid qsort and use one of PARI sort function.

> +#if 0
> +   /* Debugging: Recheck */
> +   for(i = 1; i < n; ++i) {
> +     assert(r[i] != 1);
> +     int j;
> +     for(j = 0; j < i; ++j) {
> +       if(r[i] == r[j])
> +         continue;
> +       long s;
> +       ulong v, v1;
> +       ulong g = xgcduu(r[i], r[j], 1, &v, &v1, &s);
> +       assert(g == 1);
> +     }
> +   }
> +#endif
> +
> +   /* Build return value */
> +   ulong ret = r[0];
> +   for(i = 1; i < n && r[i] == r[0]; ++i)
> +     ret *= r[0];
> +
> +   return ret;
> +}
> +
> +static GEN
> +Zlm_gauss_fl(GEN a, GEN b, ulong p, int mod_is_prime);
> +
> +static int
> +Zlm_gauss_splitinst(GEN a, GEN b, ulong m, int i, int k, ulong g)
> +{
> +  const int aco = lg(a) - 1;
> +  const int bco = lg(b) - 1;
> +  const int li = lg(a[1]) - 1;
> +
> +  /* Find two coprime factors for g */
> +  g = find_coprime_factors(g, m/g);
> +  if(g == m)
> +    return 0;	/* This row can't be used for pivot. */
> +  ulong mg = m/g;
> +
> +  /* Save sp, so we can later reclaim scratch storage. */
> +  pari_sp av = avma;
> +
> +  /* Swap rows i and k */
> +  if (k != i)
> +  {
> +    int j;
> +    for (j=i; j<=aco; j++) swap(gcoeff(a,i,j), gcoeff(a,k,j));
> +    for (j=1; j<=bco; j++) swap(gcoeff(b,i,j), gcoeff(b,k,j));
> +  }
> +
> +  /* Build and solve Z/gZ instance */
> +  GEN aa = cgetg(1 + (aco - i + 1), t_MAT);
> +  int j;
> +  for (j = 1; j <= aco - i + 1; ++j) {
> +    GEN v = (gel(aa, j) = cgetg(1 + (li - i + 1), t_VECSMALL));
> +    long int e;
> +    for (e = 1; e <= li - i + 1; ++e) {
> +      v[e] = gel(a, j + i - 1)[e + i - 1] % g;
> +    }
> +  }
> +
> +  GEN bb = cgetg(bco + 1, t_MAT);
> +  for (j = 1; j <= bco; ++j) {
> +    GEN v = (gel(bb, j) = cgetg(1 + (li - i + 1), t_VECSMALL));
> +    long int e;
> +    for (e = 1; e <= li - i + 1; ++e) {
> +      v[e] = gel(b, j)[e + i - 1] % g;
> +    }
> +  }
> +
> +  GEN sg = Zlm_gauss_fl(aa, bb, g, 0);
> +  if(sg == 0) { avma = av; return 1; }
> +
> +  /* Build and solve Z/(m/g)Z instance */
> +  aa = cgetg(1 + (aco - i + 1), t_MAT);
> +  for (j = 1; j <= aco - i + 1; ++j) {
> +    GEN v = (gel(aa, j) = cgetg(1 + (li - i + 1), t_VECSMALL));
> +    long int e;
> +    for (e = 1; e <= li - i + 1; ++e) {
> +      v[e] = gel(a, j + i - 1)[e + i - 1] % mg;
> +    }
> +  }
> +
> +  bb = cgetg(bco + 1, t_MAT);
> +  for (j = 1; j <= bco; ++j) {
> +    GEN v = (gel(bb, j) = cgetg(1 + (li - i + 1), t_VECSMALL));
> +    long int e;
> +    for (e = 1; e <= li - i + 1; ++e) {
> +      v[e] = gel(b, j)[e + i - 1] % mg;
> +    }
> +  }
> +
> +  GEN spg = Zlm_gauss_fl(aa, bb, mg, 0);
> +  if(spg == 0) { avma = av; return 1; }
> +
> +  /* Combine.
> +   * Here,	ipg is 1 mod m/g and 0 mod g,
> +   *		ig  is 0 mod m/g and 1 mod g.
> +   */
> +  const ulong ipg = Fl_mul(g, Fl_inv(g % mg, mg), m);
> +  const ulong ig = Fl_mul(mg, Fl_inv(mg % g, g), m);
> +  for (j = 1; j <= bco; ++j) {
> +    long int e;
> +    GEN v = gel(b, j);
> +    GEN vsg = gel(sg, j);
> +    GEN vspg = gel(spg, j);
> +    for (e = 1; e <= li - i + 1; ++e) {
> +      v[e + i - 1] = Fl_add(Fl_mul(ig, vsg[e], m),
> +        Fl_mul(ipg, vspg[e], m), m);
> +    }
> +  }
> +
> +  /* Done */
> +  avma = av;
> +  return 2;
> +}
> +
> +/* Zlm_gauss_fl().  A version of Zlm_gauss() that does not copy a, b and
> +   thus garbles them in the process.  It also takes an additional
> +   mod_is_prime flag.
> +   mod_is_prime: Flag to abort with error if a nonzero noninvertible
> +   pivot is found. Useful to imitate Flm_gauss_sp(). */
> +static GEN
> +Zlm_gauss_fl(GEN a, GEN b, ulong p, int mod_is_prime)
> +{
> +  /* Return Empty matrix if a is empty. */
> +  const long int aco = lg(a)-1;		/* aco: # of columns of a */
> +  if (!aco) return cgetg(1,t_MAT);
> +
> +  const long int li = lg(a[1])-1;	/* li: # of rows of a */
> +
> +  /* If b is a column vector,
> +     temporarily wrap it into a matrix. */
> +  const int iscol = (typ(b)!=t_MAT);
> +  if (iscol) b = mkmat(b);
> +  const long int bco = lg(b)-1;		/* bco: # of cols of b */
> +
> +  /* First step: Triangularize the matrix. */
> +  long int i, j, k;
> +  for (i=1; i<=aco; i++)
> +  {
> +    /* Find a pivot row in the ith column */
> +    int done = 0;	/* Flag to cancel the rest of the triangularization
> +			   procedure */
> +    for (k = i; k <= li; k++)
> +    {
> +      const ulong piv = ( ucoeff(a,k,i) %= p );
> +      if (piv != 0) {
> +
> +        /* Compute the inverse of the pivot */
> +	ulong xv, xv1;
> +	long s;
> +	ulong g = xgcduu(p, piv, 1, &xv, &xv1, &s);
> +
> +	if (g != 1ul) {
> +	  if(mod_is_prime) {
> +	    /* Trigger error */
> +	    Fl_inv(piv, p);
> +	    return NULL;
> +	  }
> +
> +	  /* Pivot not invertible ? -> attempt to split to 2 instances */
> +	  const int r = Zlm_gauss_splitinst(a, b, p, i, k, g);
> +	  if (r == 0)
> +	    continue; /* could not use pivot at row k */
> +	  else if (r == 1)
> +	    return NULL; /* system has no valid solution */
> +
> +	  /* Done, readjust counters and go to backsubstitution */
> +	  --i;
> +          done = 1;
> +	  break;
> +	}
> +
> +        /* We store the inverse pivot value in the (k, i) position for
> +	   later use. */
> +	xv = xv1 % p;
> +	if (s < 0) xv = p - xv;
> +        ucoeff(a,k,i) = xv;
> +	break;
> +      }
> +    }
> +
> +    /* no pivot? matrix not invertible, abort */
> +    if (k > li) return NULL;
> +    if (done) break;
> +
> +    /* Exchange the rows k and i */
> +    if (k != i)
> +    {
> +      for (j=i; j<=aco; j++) swap(gcoeff(a,i,j), gcoeff(a,k,j));
> +      for (j=1; j<=bco; j++) swap(gcoeff(b,i,j), gcoeff(b,k,j));
> +    }
> +
> +    /* Last column? Done. */
> +    if (i == aco) break;
> +
> +    /* Use the pivot row to eliminate column i in rows > i */
> +    const ulong invpiv = ucoeff(a,i,i); /* 1/piv mod p */
> +    for (k=i+1; k<=li; k++)
> +    {
> +      ulong m = ( ucoeff(a,k,i) %= p );
> +      if (!m) continue;
> +
> +      m = Fl_mul(m, invpiv, p);
> +      if (m == 1) {
> +	for (j=i+1; j<=aco; j++) _Fl_sub((uGEN)a[j],k,i, p);
> +	for (j=1;   j<=bco; j++) _Fl_sub((uGEN)b[j],k,i, p);
> +      } else {
> +	for (j=i+1; j<=aco; j++) _Fl_submul((uGEN)a[j],k,i,m, p);
> +	for (j=1;   j<=bco; j++) _Fl_submul((uGEN)b[j],k,i,m, p);
> +      }
> +    }
> +  }
> +
> +  /* Second Step: Backsubstitute. */
> +  GEN u = cgetg(bco+1,t_MAT);
> +  for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col(a,(uGEN)b[j], aco, i, p);
> +
> +  /* Done, unwrap result if necessary */
> +  return iscol? gel(u,1): u;
> +}
> +
> +
> +
>  GEN
>  matid_Flm(long n)
>  {
> @@ -1002,6 +1311,10 @@
>  Flm_inv(GEN a, ulong p) {
>    return Flm_inv_sp(RgM_shallowcopy(a), p);
>  }
> +GEN
> +Zlm_inv(GEN a, ulong m) {
> +  return Zlm_gauss_fl(RgM_shallowcopy(a), matid_Flm(lg(a)-1), m, 0);
> +}
>  
>  GEN
>  FpM_gauss(GEN a, GEN b, GEN p)
> @@ -1062,7 +1375,7 @@
>  
>    if(DEBUGLEVEL>4) fprintferr("Solving the triangular system\n");
>    u = cgetg(bco+1,t_MAT);
> -  for (j=1; j<=bco; j++) gel(u,j) = Fp_get_col(a, gel(b,j), aco, p);
> +  for (j=1; j<=bco; j++) gel(u,j) = Fp_get_col(a, gel(b,j), aco, aco, p);
>    return gerepilecopy(av, iscol? gel(u,1): u);
>  }
>  GEN
> @@ -1120,9 +1433,318 @@
>    return gerepilecopy(av, iscol? gel(u,1): u);
>  }
>  
> +static int
> +cmp_GENii(const void *a, const void *b)
> +{
> +	GEN aa = *(GEN *)a;
> +	GEN bb = *(GEN *)b;
> +	return cmpii(aa, bb);
> +}
> +
> +/*
> +	Z_find_coprime_factors(a, b)
> +
> +	A version of find_coprime_factors() that works with t_INT GENs.
> + */
> +
> +static GEN
> +Z_find_coprime_factors(GEN a, GEN b)
> +{
> +   const int N = (lg(a) - 1) + (lg(b) - 1) * sizeof(ulong) * 8;
> +   GEN r[N];
> +
> +   pari_sp av = avma;
> +   int n = 2;
> +   r[0] = a;
> +   r[1] = b;
> +  
> +   /* Build a list of factors that are either coprime or identical. */
> +   int i;
> +   for(i = 1; i < n; ++i) {
> +     int j;
> +     for(j = 0; j < i; ++j) {
> +       if(equalii(r[i], r[j]))
> +         continue;
> +       GEN g = gcdii(r[i], r[j]);
> +       if(equali1(g))
> +         continue;
> +       if(equalii(g, r[j])) {
> +         r[i] = divii(r[i], g);
> +	 r[n++] = g;
> +	 --j;
> +	 continue;
> +       }
> +       if(equalii(g, r[i])) {
> +         r[j] = divii(r[j], g);
> +	 r[n++] = g;
> +	 --j;
> +	 continue;
> +       }
> +       
> +       r[i] = divii(r[i], g);
> +       r[j] = divii(r[j], g);
> +       r[n++] = g;
> +       r[n++] = gcopy(g);
> +     }
> +   }
> +
> +   /* Sort */
> +   qsort(r, n, sizeof(GEN), cmp_GENii);
> +
> +#if 0
> +   /* Debugging: Recheck */
> +   for(i = 1; i < n; ++i) {
> +     assert(!gequal1(r[i]));
> +     int j;
> +     for(j = 0; j < i; ++j) {
> +       if(equalii(r[i], r[j]))
> +         continue;
> +       GEN g = gcdii(r[i], r[j]);
> +       assert(gequal1(g));
> +     }
> +   }
> +#endif
> +
> +   /* Build return value */
> +   for(i = 1; i < n && equalii(r[i], r[0]); ++i)
> +     ;
> +   GEN ret = powiu(r[0], i);
> +
> +   /* Done */
> +   ret = gerepileupto(av, ret);
> +   return ret;
> +}
> +
> +static int
> +ZnM_gauss_splitinst(GEN a, GEN b, GEN m, int i, int k, GEN g)
> +{
> +  const int aco = lg(a) - 1;
> +  const int bco = lg(b) - 1;
> +  const int li = lg(a[1]) - 1;
> +
> +  /* Find two coprime factors for g */
> +  g = Z_find_coprime_factors(g, divii(m, g));
> +  if(equalii(g, m))
> +    return 0;	/* This row can't be used for pivot. */
> +  GEN mg = divii(m, g);
> +
> +  /* Save sp, so we can later reclaim scratch storage. */
> +  pari_sp av = avma;
> +
> +  /* Swap rows i and k */
> +  if (k != i)
> +  {
> +    int j;
> +    for (j=i; j<=aco; j++) swap(gcoeff(a,i,j), gcoeff(a,k,j));
> +    for (j=1; j<=bco; j++) swap(gcoeff(b,i,j), gcoeff(b,k,j));
> +  }
> +
> +  /* Build and solve Z/gZ instance */
> +  GEN aa = cgetg(1 + (aco - i + 1), t_MAT);
> +  int j;
> +  for (j = 1; j <= aco - i + 1; ++j) {
> +    GEN v = (gel(aa, j) = cgetg(1 + (li - i + 1), t_COL));
> +    long int e;
> +    for (e = 1; e <= li - i + 1; ++e) {
> +      gel(v, e) = modii(gcoeff(a, e + i - 1, j + i - 1), g);
> +    }
> +  }
> +
> +  GEN bb = cgetg(bco + 1, t_MAT);
> +  for (j = 1; j <= bco; ++j) {
> +    GEN v = (gel(bb, j) = cgetg(1 + (li - i + 1), t_COL));
> +    int e;
> +    for (e = 1; e <= li - i + 1; ++e) {
> +      gel(v, e) = modii(gcoeff(b, e + i - 1, j), g);
> +    }
> +  }
> +
> +  GEN sg = ZnM_gauss(aa, bb, g);
> +  if(sg == 0) { avma = av; return 1; }
> +
> +  /* Build and solve Z/(m/g)Z instance */
> +  aa = cgetg(1 + (aco - i + 1), t_MAT);
> +  for (j = 1; j <= aco - i + 1; ++j) {
> +    GEN v = (gel(aa, j) = cgetg(1 + (li - i + 1), t_COL));
> +    int e;
> +    for (e = 1; e <= li - i + 1; ++e) {
> +      gel(v, e) = modii(gcoeff(a, e + i - 1, j + i - 1), mg);
> +    }
> +  }
> +
> +  bb = cgetg(bco + 1, t_MAT);
> +  for (j = 1; j <= bco; ++j) {
> +    GEN v = (gel(bb, j) = cgetg(1 + (li - i + 1), t_COL));
> +    long int e;
> +    for (e = 1; e <= li - i + 1; ++e) {
> +      gel(v, e) = modii(gcoeff(b, e + i - 1, j), mg);
> +    }
> +  }
> +
> +  GEN spg = ZnM_gauss(aa, bb, mg);
> +  if(spg == 0) { avma = av; return 1; }
> +
> +  /* Combine.
> +   * Here,	ipg is 1 mod m/g and 0 mod g,
> +   *		ig  is 0 mod m/g and 1 mod g.
> +   */
> +  GEN ipg = Fp_mul(g, Fp_inv(modii(g, mg), mg), m);
> +  GEN ig = Fp_mul(mg, Fp_inv(modii(mg, g), g), m);
> +  for (j = 1; j <= bco; ++j) {
> +    long int e;
> +    GEN v = gel(b, j);
> +    GEN vsg = gel(sg, j);
> +    GEN vspg = gel(spg, j);
> +    for (e = 1; e <= li - i + 1; ++e) {
> +      gel(v, e + i - 1) = Fp_add(Fp_mul(ig, gel(vsg, e), m),
> +        Fp_mul(ipg, gel(vspg, e), m), m);
> +    }
> +  }
> +
> +  /* Done */
> +  avma = av;
> +  return 2;
> +}
> +
> +/* ZnM_gauss_fl(): A version of ZnM_gauss() that takes an additional
> +   flag, mod_is_prime.  If this flag, computation is aborted with an
> +   error, if a nonzero non-invertible pivot is encountered.  This flag
> +   is used to imitate the behaviour of FpM_gauss(). */
> +static
> +GEN ZnM_gauss_fl(GEN a, GEN b, GEN p, int mod_is_prime)
> +{
> +  pari_sp av = avma, lim;
> +  long i, j, k, li, bco, aco;
> +  int iscol;
> +  GEN u;
> +
> +  if (!init_gauss(a, &b, &aco, &li, &iscol)) return cgetg(1, t_MAT);
> +
> +  /* Special case small moduli */
> +  if (lgefint(p) == 3)
> +  {
> +    ulong pp = (ulong)p[2];
> +    a = ZM_to_Flm(a, pp);
> +    b = ZM_to_Flm(b, pp);
> +    u = Zlm_gauss_fl(a,b, pp, mod_is_prime);
> +    if (!u) {avma = av; return u;}
> +    u = iscol? Flc_to_ZC(gel(u,1)): Flm_to_ZM(u);
> +    return gerepileupto(av, u);
> +  }
> +
> +  lim = stack_lim(av,1);
> +  a = RgM_shallowcopy(a);
> +  bco = lg(b)-1;
> +  for (i=1; i<=aco; i++)
> +  {
> +    int done = 0;  /* Flag to cancel rest of triangularization step */
> +
> +    /* Look for a pivot in the ith column */
> +    GEN invpiv;
> +    for (k = i; k <= li; k++)
> +    {
> +      GEN piv = remii(gcoeff(a,k,i), p);
> +      if (signe(piv)) {
> +        GEN res;
> +	if(!invmod(piv, p, &res)) {
> +	  if(mod_is_prime) {
> +	    /* Trigger error */
> +	    Fp_inv(piv, p);
> +	  }
> +
> +	  /* Not invertible ?
> +	   *
> +	   * Now, res contains a factor of p.  Split into two instances.
> +	   */
> +	  const int r = ZnM_gauss_splitinst(a, b, p, i, k, res);
> +	  if (r == 0)
> +	    continue; /* could not use pivot at row k */
> +	  else if (r == 1)
> +	    return NULL; /* system has no valid solution */
> +
> +	  /* Done, readjust counters and go to backsubstitution */
> +	  --i;
> +          done = 1;
> +	  break;
> +	} else {
> +          gcoeff(a,k,i) = res;
> +	}
> +	break;
> +      } else {
> +	gcoeff(a,k,i) = gen_0;
> +      }
> +    }
> +
> +    /* No pivot? Not invertible, exit */
> +    if (k > li) return NULL;
> +    if (done) break;
> +
> +    /* Swap lines so the pivot is in line i */
> +    if (k != i)
> +    { /* swap lines so that k = i */
> +      for (j=i; j<=aco; j++) swap(gcoeff(a,i,j), gcoeff(a,k,j));
> +      for (j=1; j<=bco; j++) swap(gcoeff(b,i,j), gcoeff(b,k,j));
> +    }
> +    if (i == aco) break;
> +
> +    invpiv = gcoeff(a,i,i); /* 1/piv mod p */
> +    for (k=i+1; k<=li; k++)
> +    {
> +      GEN m = remii(gcoeff(a,k,i), p); gcoeff(a,k,i) = gen_0;
> +      if (!signe(m)) continue;
> +
> +      m = Fp_mul(m, invpiv, p);
> +      for (j=i+1; j<=aco; j++) _Fp_submul(gel(a,j),k,i,m, p);
> +      for (j=1  ; j<=bco; j++) _Fp_submul(gel(b,j),k,i,m, p);
> +    }
> +    if (low_stack(lim, stack_lim(av,1)))
> +    {
> +      if(DEBUGMEM>1) pari_warn(warnmem,"FpM_gauss. i=%ld",i);
> +      gerepileall(av,2, &a,&b);
> +    }
> +  }
> +
> +  if(DEBUGLEVEL>4) fprintferr("Solving the triangular system\n");
> +  u = cgetg(bco+1,t_MAT);
> +  for (j=1; j<=bco; j++) gel(u,j) = Fp_get_col(a, gel(b,j), aco, i, p);
> +  return gerepilecopy(av, iscol? gel(u,1): u);
> +}
> +
> +/*
> +	ZnM_gauss(a, b, m)
> +
> +	Solve ax = b (mod m), return NULL on error, otherwise a solution
> +	vector.
> +
> + */
> +
> +GEN ZnM_gauss(GEN a, GEN b, GEN m)
> +{
> +  return ZnM_gauss_fl(a, b, m, 0);
> +}
> +
> +/*
> +	GEN Zlm_gauss(a, b, ulong p)
> +
> +	Perform Gaussian elimination modulo a small integer p. a is the
> +	matrix, b the right hand side, which can be either a matrix or a
> +	column vector.
> +
> +	The matrices are t_MAT with t_VECSMALL as column vectors.
> + */
> +
> +
> +GEN
> +Zlm_gauss(GEN a, GEN b, ulong m) {
> +  return Zlm_gauss_fl(RgM_shallowcopy(a), shallowcopy(b), m, 0);
> +}
> +
>  GEN
>  FpM_inv(GEN x, GEN p) { return FpM_gauss(x, NULL, p); }
>  
> +GEN
> +ZnM_inv(GEN x, GEN m) { return ZnM_gauss(x, NULL, m); }
> +
>  /* M integral, dM such that M' = dM M^-1 is integral [e.g det(M)]. Return M' */
>  GEN
>  ZM_inv(GEN M, GEN dM)
> diff --git a/src/headers/paridecl.h b/src/headers/paridecl.h
> --- a/src/headers/paridecl.h
> +++ b/src/headers/paridecl.h
> @@ -518,6 +518,10 @@
>  GEN     RgM_solve_realimag(GEN x, GEN y);
>  GEN     ZM_detmult(GEN A);
>  GEN     ZM_inv(GEN M, GEN dM);
> +GEN     ZnM_gauss(GEN a, GEN b, GEN m);
> +GEN     ZnM_inv(GEN x, GEN m);
> +GEN     Zlm_gauss(GEN a, GEN b, ulong m);
> +GEN     Zlm_inv(GEN a, ulong m);
>  GEN     apply0(GEN A, GEN f);
>  GEN     deplin(GEN x);
>  GEN     det(GEN a);

> diff --git a/src/basemath/alglin1.c b/src/basemath/alglin1.c
> --- a/src/basemath/alglin1.c
> +++ b/src/basemath/alglin1.c
> @@ -621,28 +621,6 @@
>    }
>    return u;
>  }
> -/* assume 0 <= a[i,j] < p */
> -static uGEN
> -Fl_get_col_OK(GEN a, uGEN b, long li, ulong p)
> -{
> -  uGEN u = (uGEN)cgetg(li+1,t_VECSMALL);
> -  ulong m = b[li] % p;
> -  long i,j;
> -
> -  u[li] = (m * ucoeff(a,li,li)) % p;
> -  for (i = li-1; i > 0; i--)
> -  {
> -    m = p - b[i]%p;
> -    for (j = i+1; j <= li; j++) {
> -      if (m & HIGHBIT) m %= p;
> -      m += ucoeff(a,i,j) * u[j]; /* 0 <= u[j] < p */
> -    }
> -    m %= p;
> -    if (m) m = ((p-m) * ucoeff(a,i,i)) % p;
> -    u[i] = m;
> -  }
> -  return u;
> -}
>  
>  static uGEN
>  Fl_get_col(GEN a, uGEN b, long li, long last_row, ulong p)
> @@ -683,12 +661,6 @@
>    gel(b,i) = Fq_red(gel(b,i), T,p);
>    gel(b,k) = gsub(gel(b,k), gmul(m, gel(b,i)));
>  }
> -static void /* assume m < p && SMALL_ULONG(p) && (! (b[i] & b[k] & HIGHMASK)) */
> -_Fl_submul_OK(uGEN b, long k, long i, ulong m, ulong p)
> -{
> -  b[k] -= m * b[i];
> -  if (b[k] & HIGHMASK) b[k] %= p;
> -}
>  static void /* assume m < p */
>  _Fl_submul(uGEN b, long k, long i, ulong m, ulong p)
>  {
> @@ -933,61 +905,14 @@
>    return z;
>  }
>  
> +static GEN
> +Zlm_gauss_fl(GEN a, GEN b, ulong p, int mod_is_prime);
> +
>  /* destroy a, b */
>  static GEN
>  Flm_gauss_sp(GEN a, GEN b, ulong p)
>  {
> -  long i, j, k, li, bco, aco = lg(a)-1;
> -  const int OK_ulong = 0;
> -  GEN u;
> -
> -  if (!aco) return cgetg(1,t_MAT);
> -  li = lg(a[1])-1;
> -  bco = lg(b)-1;
> -  for (i=1; i<=aco; i++)
> -  {
> -    ulong invpiv;
> -    /* Fl_get_col wants 0 <= a[i,j] < p for all i,j */
> -    if (OK_ulong) for (k = 1; k < i; k++) ucoeff(a,k,i) %= p;
> -    for (k = i; k <= li; k++)
> -    {
> -      ulong piv = ( ucoeff(a,k,i) %= p );
> -      if (piv) { ucoeff(a,k,i) = Fl_inv(piv, p); break; }
> -    }
> -    /* found a pivot on line k */
> -    if (k > li) return NULL;
> -    if (k != i)
> -    { /* swap lines so that k = i */
> -      for (j=i; j<=aco; j++) swap(gcoeff(a,i,j), gcoeff(a,k,j));
> -      for (j=1; j<=bco; j++) swap(gcoeff(b,i,j), gcoeff(b,k,j));
> -    }
> -    if (i == aco) break;
> -
> -    invpiv = ucoeff(a,i,i); /* 1/piv mod p */
> -    for (k=i+1; k<=li; k++)
> -    {
> -      ulong m = ( ucoeff(a,k,i) %= p );
> -      if (!m) continue;
> -
> -      m = Fl_mul(m, invpiv, p);
> -      if (m == 1) {
> -	for (j=i+1; j<=aco; j++) _Fl_sub((uGEN)a[j],k,i, p);
> -	for (j=1;   j<=bco; j++) _Fl_sub((uGEN)b[j],k,i, p);
> -      } else if (OK_ulong) {
> -	for (j=i+1; j<=aco; j++) _Fl_submul_OK((uGEN)a[j],k,i,m, p);
> -	for (j=1;   j<=bco; j++) _Fl_submul_OK((uGEN)b[j],k,i,m, p);
> -      } else {
> -	for (j=i+1; j<=aco; j++) _Fl_submul((uGEN)a[j],k,i,m, p);
> -	for (j=1;   j<=bco; j++) _Fl_submul((uGEN)b[j],k,i,m, p);
> -      }
> -    }
> -  }
> -  u = cgetg(bco+1,t_MAT);
> -  if (OK_ulong)
> -    for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col_OK(a,(uGEN)b[j], aco,p);
> -  else
> -    for (j=1; j<=bco; j++) ugel(u,j) = Fl_get_col(a,(uGEN)b[j], aco, aco, p);
> -  return u;
> +  return Zlm_gauss_fl(a, b, p, 1);
>  }
>  
>  /* Helper functions for Zlm_gauss_fl */
> @@ -1087,9 +1012,6 @@
>     return ret;
>  }
>  
> -static GEN
> -Zlm_gauss_fl(GEN a, GEN b, ulong p, int mod_is_prime);
> -
>  static int
>  Zlm_gauss_splitinst(GEN a, GEN b, ulong m, int i, int k, ulong g)
>  {
> @@ -1316,67 +1238,13 @@
>    return Zlm_gauss_fl(RgM_shallowcopy(a), matid_Flm(lg(a)-1), m, 0);
>  }
>  
> +static
> +GEN ZnM_gauss_fl(GEN a, GEN b, GEN p, int mod_is_prime);
> +
>  GEN
>  FpM_gauss(GEN a, GEN b, GEN p)
>  {
> -  pari_sp av = avma, lim;
> -  long i, j, k, li, bco, aco;
> -  int iscol;
> -  GEN u;
> -
> -  if (!init_gauss(a, &b, &aco, &li, &iscol)) return cgetg(1, t_MAT);
> -  if (lgefint(p) == 3)
> -  {
> -    ulong pp = (ulong)p[2];
> -    a = ZM_to_Flm(a, pp);
> -    b = ZM_to_Flm(b, pp);
> -    u = Flm_gauss_sp(a,b, pp);
> -    if (!u) {avma = av; return u;}
> -    u = iscol? Flc_to_ZC(gel(u,1)): Flm_to_ZM(u);
> -    return gerepileupto(av, u);
> -  }
> -  lim = stack_lim(av,1);
> -  a = RgM_shallowcopy(a);
> -  bco = lg(b)-1;
> -  for (i=1; i<=aco; i++)
> -  {
> -    GEN invpiv;
> -    for (k = i; k <= li; k++)
> -    {
> -      GEN piv = remii(gcoeff(a,k,i), p);
> -      if (signe(piv)) { gcoeff(a,k,i) = Fp_inv(piv, p); break; }
> -      gcoeff(a,k,i) = gen_0;
> -    }
> -    /* found a pivot on line k */
> -    if (k > li) return NULL;
> -    if (k != i)
> -    { /* swap lines so that k = i */
> -      for (j=i; j<=aco; j++) swap(gcoeff(a,i,j), gcoeff(a,k,j));
> -      for (j=1; j<=bco; j++) swap(gcoeff(b,i,j), gcoeff(b,k,j));
> -    }
> -    if (i == aco) break;
> -
> -    invpiv = gcoeff(a,i,i); /* 1/piv mod p */
> -    for (k=i+1; k<=li; k++)
> -    {
> -      GEN m = remii(gcoeff(a,k,i), p); gcoeff(a,k,i) = gen_0;
> -      if (!signe(m)) continue;
> -
> -      m = Fp_mul(m, invpiv, p);
> -      for (j=i+1; j<=aco; j++) _Fp_submul(gel(a,j),k,i,m, p);
> -      for (j=1  ; j<=bco; j++) _Fp_submul(gel(b,j),k,i,m, p);
> -    }
> -    if (low_stack(lim, stack_lim(av,1)))
> -    {
> -      if(DEBUGMEM>1) pari_warn(warnmem,"FpM_gauss. i=%ld",i);
> -      gerepileall(av,2, &a,&b);
> -    }
> -  }
> -
> -  if(DEBUGLEVEL>4) fprintferr("Solving the triangular system\n");
> -  u = cgetg(bco+1,t_MAT);
> -  for (j=1; j<=bco; j++) gel(u,j) = Fp_get_col(a, gel(b,j), aco, aco, p);
> -  return gerepilecopy(av, iscol? gel(u,1): u);
> +  return ZnM_gauss_fl(a, b, p, 1);
>  }
>  GEN
>  FqM_gauss(GEN a, GEN b, GEN T, GEN p)

You also need to add a test-suite, and the documentation of the new public
function.

I am a bit concerned that your patch is large, fixing the interface issue
would make it larger yet, and does not handle the general matrix inversion
problem. 

It would be nice if the whole strategy was abstracted to be reused in similar
situation.

Do you think there is really a need for 'ZlM' ?  In that range, factoring
the modulus is trivial, and that would reduce the size of the patch 
somewhat.

Cheers,
Bill.