| Peter Bruin on Sat, 07 Feb 2015 13:22:20 +0100 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
| FlxqM_mul_Kronecker |
Bonjour, Here is a patch to add a new (private) function FlxqM_mul_Kronecker that I have been working on since the Atelier PARI/GP in January. It uses Kronecker substitution to multiply matrices over non-prime finite fields of small characteristics. Over the range of matrices I tested it on, it is on average 60%-80% faster than the existing FlxqM_mul; for some examples, the speedup is up to 4-5 times. When multiplying m x n matrices by n x k matrices, the speedup is already noticeable (up to 20%) when n >= 2 (again on average over a range of reasonable field sizes; in a small number of "unlucky" cases, it is up to 20% slower because it has to use larger integers than Flx_mulspec because several products must be added). The new FlxqM_mul_Kronecker is called by FlxqM_mul (and hence by FFM_mul and gmul) precisely when n >= 2. All new functions are covered by the new tests. Thanks, Peter
diff --git a/src/basemath/Flx.c b/src/basemath/Flx.c
index f0ddc04..0b7e536 100644
--- a/src/basemath/Flx.c
+++ b/src/basemath/Flx.c
@@ -645,9 +645,32 @@ Flx_mulspec_basecase(GEN x, GEN y, ulong p, long nx, long ny)
}
static GEN
+Flx_to_int_spec(GEN x, long l) {
+ long i;
+ GEN w, y;
+ if (l == 0)
+ return gen_0;
+ y = cgetipos(l + 2);
+ for (i = 0, w = int_LSW(y); i < l; i++, w = int_nextW(w))
+ *w = x[i];
+ return y;
+}
+
+static GEN
int_to_Flx(GEN z, ulong p)
{
long i, l = lgefint(z);
+ GEN x = cgetg(l, t_VECSMALL), w;
+ for (w = int_LSW(z), i = 2; i < l; w = int_nextW(w), i++)
+ x[i] = ((ulong) *w) % p;
+ return Flx_renormalize(x, l);
+}
+
+/* z has least significant word first */
+static GEN
+int_LSWfirst_to_Flx(GEN z, ulong p)
+{
+ long i, l = lgefint(z);
GEN x = cgetg(l, t_VECSMALL);
for (i=2; i<l; i++) x[i] = uel(z,i)%p;
return Flx_renormalize(x, l);
@@ -657,7 +680,7 @@ INLINE GEN
Flx_mulspec_mulii(GEN a, GEN b, ulong p, long na, long nb)
{
GEN z=muliispec(a,b,na,nb);
- return int_to_Flx(z,p);
+ return int_LSWfirst_to_Flx(z,p);
}
static GEN
@@ -888,7 +911,7 @@ static GEN
Flx_sqrspec_sqri(GEN a, ulong p, long na)
{
GEN z=sqrispec(a,na);
- return int_to_Flx(z,p);
+ return int_LSWfirst_to_Flx(z,p);
}
static GEN
@@ -3968,6 +3991,113 @@ FlxqV_roots_to_pol(GEN V, GEN T, ulong p, long v)
return gerepileupto(ltop, FlxqXV_prod(W, T, p));
}
+/*** FlxqM ***/
+
+static GEN
+zx_to_int_halfspec(GEN x, long l) {
+ if (l == 0)
+ return gen_0;
+ return Flx_to_int_halfspec(x, l);
+}
+
+static GEN
+zx_to_int_spec_2(GEN x, long l) {
+ return Flx_eval2BILspec(x, 2, l);
+}
+
+static GEN
+zx_to_int_spec_3(GEN x, long l) {
+ return Flx_eval2BILspec(x, 3, l);
+}
+
+static GEN
+int_to_Flx_2(GEN x, ulong p) {
+ long d = (lgefint(x)-1)/2 - 1;
+ return Z_mod2BIL_Flx_2(x, d, p);
+}
+
+static GEN
+int_to_Flx_3(GEN x, ulong p) {
+ long d = lgefint(x)/3 - 1;
+ return Z_mod2BIL_Flx_3(x, d, p);
+}
+
+static GEN
+FlxM_to_ZM(GEN M, GEN (*pack)(GEN, long)) {
+ long i, j, l, lc;
+ GEN N = cgetg_copy(M, &l), x;
+ if (l == 1)
+ return N;
+ lc = lgcols(M);
+ for (j = 1; j < l; j++) {
+ gel(N, j) = cgetg(lc, t_COL);
+ for (i = 1; i < lc; i++) {
+ x = gcoeff(M, i, j);
+ gcoeff(N, i, j) = pack(x + 2, lgpol(x));
+ }
+ }
+ return N;
+}
+
+static GEN
+ZM_to_FlxqM(GEN M, GEN T, ulong p,
+ GEN (*unpack)(GEN, ulong)) {
+ long i, j, l, lc, sv = evalvarn(get_Flx_var(T));
+ GEN N = cgetg_copy(M, &l), x;
+ if (l == 1)
+ return N;
+ lc = lgcols(M);
+ for (j = 1; j < l; j++) {
+ gel(N, j) = cgetg(lc, t_COL);
+ for (i = 1; i < lc; i++) {
+ x = unpack(gcoeff(M, i, j), p);
+ x[1] = sv;
+ gcoeff(N, i, j) = Flx_rem(x, T, p);
+ }
+ }
+ return N;
+}
+
+GEN
+FlxqM_mul_Kronecker(GEN A, GEN B, GEN T, ulong p) {
+ pari_sp av = avma;
+ long l, n = lg(A) - 1;
+ GEN C, D, z;
+ GEN (*pack)(GEN, long), (*unpack)(GEN, ulong);
+
+ /*
+ cf. Flx_mulspec(), maxlengthcoeffpol()
+ z can be 1, 2, 3 or (theoretically) 4 words long
+ */
+ z = muliu(muliu(sqru(p - 1), degpol(T)), n);
+ l = lgefint(z);
+ avma = av;
+
+ if (l == 3) {
+ if (HIGHWORD(z[2]) == 0) {
+ pack = zx_to_int_halfspec;
+ unpack = int_to_Flx_half;
+ } else {
+ pack = Flx_to_int_spec;
+ unpack = int_to_Flx;
+ }
+ } else if (l == 4) {
+ pack = zx_to_int_spec_2;
+ unpack = int_to_Flx_2;
+ } else if (l == 5) {
+ pack = zx_to_int_spec_3;
+ unpack = int_to_Flx_3;
+ } else {
+ /* not implemented, probably won't occur in practice */
+ return NULL;
+ }
+ A = FlxM_to_ZM(A, pack);
+ B = FlxM_to_ZM(B, pack);
+ C = ZM_mul(A, B);
+ D = ZM_to_FlxqM(C, T, p, unpack);
+ return gerepilecopy(av, D);
+}
+
/*******************************************************************/
/* */
/* (Fl[X]/T(X))[Y] / S(Y) */
diff --git a/src/basemath/alglin1.c b/src/basemath/alglin1.c
index f2976cb..229f6f4 100644
--- a/src/basemath/alglin1.c
+++ b/src/basemath/alglin1.c
@@ -976,7 +976,17 @@ FlxqM_FlxqC_mul(GEN A, GEN B, GEN T, unsigned long p) {
GEN
FlxqM_mul(GEN A, GEN B, GEN T, unsigned long p) {
void *E;
- const struct bb_field *ff = get_Flxq_field(&E, T, p);
+ const struct bb_field *ff;
+ long n = lg(A) - 1;
+
+ if (n == 0)
+ return cgetg(1, t_MAT);
+ if (n > 1) {
+ GEN C = FlxqM_mul_Kronecker(A, B, T, p);
+ if (C != NULL)
+ return C;
+ }
+ ff = get_Flxq_field(&E, T, p);
return gen_matmul(A, B, E, ff);
}
diff --git a/src/headers/paripriv.h b/src/headers/paripriv.h
index 4b0677e..dbcfc10 100644
--- a/src/headers/paripriv.h
+++ b/src/headers/paripriv.h
@@ -562,6 +562,10 @@ void pari_close_files(void);
int popinfile(void);
pariFILE* try_pipe(const char *cmd, int flag);
+/* Flx.c */
+
+GEN FlxqM_mul_Kronecker(GEN A, GEN B, GEN T, ulong p);
+
/* Flxq_log.c */
GEN Flxq_log_index(GEN a0, GEN b0, GEN m, GEN T0, ulong p);
diff --git a/src/test/32/ff b/src/test/32/ff
index 2d90cb3..8a57055 100644
--- a/src/test/32/ff
+++ b/src/test/32/ff
@@ -319,6 +319,15 @@ t^4 + t^2
[3*t^2 + 3*t, 18446744073709551628*t^4 + 5*t^3 + 4*t^2 + 1844674407370955162
7*t + 18446744073709551628, 18446744073709551628*t^4 + 5*t^3 + 4*t^2 + 18446
744073709551628*t]~
+? test(q)=my(t=ffgen(q,'t),M=matrix(10,10,i,j,random(t)));subst(charpoly(M),'x,M)==0;
+? test(nextprime(2^7)^5)
+1
+? test(nextprime(2^15)^5)
+1
+? test(nextprime(2^31)^5)
+1
+? test(nextprime(2^63)^5)
+1
? p=2^64+13;g=ffprimroot(ffgen(p^2),&o);a=2*g^0;
? v=[I,-1,Mat(1),matid(2)/2];
? for(i=1,#v,print(iferr(fflog(a,g,v[i]),E,E)));
diff --git a/src/test/in/ff b/src/test/in/ff
index 6b47f60..c4b5b28 100644
--- a/src/test/in/ff
+++ b/src/test/in/ff
@@ -106,6 +106,15 @@ test(2^5)
test(7^5)
test((2^64+13)^5)
+test(q)={
+ my(t = ffgen(q, 't), M = matrix(10, 10, i, j, random(t)));
+ subst(charpoly(M), 'x, M) == 0;
+}
+test(nextprime(2^7)^5)
+test(nextprime(2^15)^5)
+test(nextprime(2^31)^5)
+test(nextprime(2^63)^5)
+
p=2^64+13; g=ffprimroot(ffgen(p^2), &o); a=2*g^0;
v=[I,-1,Mat(1),matid(2)/2];
for(i=1,#v, print(iferr(fflog(a,g,v[i]),E,E)));