Peter Bruin on Sat, 07 Feb 2015 13:22:20 +0100 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
FlxqM_mul_Kronecker |
Bonjour, Here is a patch to add a new (private) function FlxqM_mul_Kronecker that I have been working on since the Atelier PARI/GP in January. It uses Kronecker substitution to multiply matrices over non-prime finite fields of small characteristics. Over the range of matrices I tested it on, it is on average 60%-80% faster than the existing FlxqM_mul; for some examples, the speedup is up to 4-5 times. When multiplying m x n matrices by n x k matrices, the speedup is already noticeable (up to 20%) when n >= 2 (again on average over a range of reasonable field sizes; in a small number of "unlucky" cases, it is up to 20% slower because it has to use larger integers than Flx_mulspec because several products must be added). The new FlxqM_mul_Kronecker is called by FlxqM_mul (and hence by FFM_mul and gmul) precisely when n >= 2. All new functions are covered by the new tests. Thanks, Peter
diff --git a/src/basemath/Flx.c b/src/basemath/Flx.c index f0ddc04..0b7e536 100644 --- a/src/basemath/Flx.c +++ b/src/basemath/Flx.c @@ -645,9 +645,32 @@ Flx_mulspec_basecase(GEN x, GEN y, ulong p, long nx, long ny) } static GEN +Flx_to_int_spec(GEN x, long l) { + long i; + GEN w, y; + if (l == 0) + return gen_0; + y = cgetipos(l + 2); + for (i = 0, w = int_LSW(y); i < l; i++, w = int_nextW(w)) + *w = x[i]; + return y; +} + +static GEN int_to_Flx(GEN z, ulong p) { long i, l = lgefint(z); + GEN x = cgetg(l, t_VECSMALL), w; + for (w = int_LSW(z), i = 2; i < l; w = int_nextW(w), i++) + x[i] = ((ulong) *w) % p; + return Flx_renormalize(x, l); +} + +/* z has least significant word first */ +static GEN +int_LSWfirst_to_Flx(GEN z, ulong p) +{ + long i, l = lgefint(z); GEN x = cgetg(l, t_VECSMALL); for (i=2; i<l; i++) x[i] = uel(z,i)%p; return Flx_renormalize(x, l); @@ -657,7 +680,7 @@ INLINE GEN Flx_mulspec_mulii(GEN a, GEN b, ulong p, long na, long nb) { GEN z=muliispec(a,b,na,nb); - return int_to_Flx(z,p); + return int_LSWfirst_to_Flx(z,p); } static GEN @@ -888,7 +911,7 @@ static GEN Flx_sqrspec_sqri(GEN a, ulong p, long na) { GEN z=sqrispec(a,na); - return int_to_Flx(z,p); + return int_LSWfirst_to_Flx(z,p); } static GEN @@ -3968,6 +3991,113 @@ FlxqV_roots_to_pol(GEN V, GEN T, ulong p, long v) return gerepileupto(ltop, FlxqXV_prod(W, T, p)); } +/*** FlxqM ***/ + +static GEN +zx_to_int_halfspec(GEN x, long l) { + if (l == 0) + return gen_0; + return Flx_to_int_halfspec(x, l); +} + +static GEN +zx_to_int_spec_2(GEN x, long l) { + return Flx_eval2BILspec(x, 2, l); +} + +static GEN +zx_to_int_spec_3(GEN x, long l) { + return Flx_eval2BILspec(x, 3, l); +} + +static GEN +int_to_Flx_2(GEN x, ulong p) { + long d = (lgefint(x)-1)/2 - 1; + return Z_mod2BIL_Flx_2(x, d, p); +} + +static GEN +int_to_Flx_3(GEN x, ulong p) { + long d = lgefint(x)/3 - 1; + return Z_mod2BIL_Flx_3(x, d, p); +} + +static GEN +FlxM_to_ZM(GEN M, GEN (*pack)(GEN, long)) { + long i, j, l, lc; + GEN N = cgetg_copy(M, &l), x; + if (l == 1) + return N; + lc = lgcols(M); + for (j = 1; j < l; j++) { + gel(N, j) = cgetg(lc, t_COL); + for (i = 1; i < lc; i++) { + x = gcoeff(M, i, j); + gcoeff(N, i, j) = pack(x + 2, lgpol(x)); + } + } + return N; +} + +static GEN +ZM_to_FlxqM(GEN M, GEN T, ulong p, + GEN (*unpack)(GEN, ulong)) { + long i, j, l, lc, sv = evalvarn(get_Flx_var(T)); + GEN N = cgetg_copy(M, &l), x; + if (l == 1) + return N; + lc = lgcols(M); + for (j = 1; j < l; j++) { + gel(N, j) = cgetg(lc, t_COL); + for (i = 1; i < lc; i++) { + x = unpack(gcoeff(M, i, j), p); + x[1] = sv; + gcoeff(N, i, j) = Flx_rem(x, T, p); + } + } + return N; +} + +GEN +FlxqM_mul_Kronecker(GEN A, GEN B, GEN T, ulong p) { + pari_sp av = avma; + long l, n = lg(A) - 1; + GEN C, D, z; + GEN (*pack)(GEN, long), (*unpack)(GEN, ulong); + + /* + cf. Flx_mulspec(), maxlengthcoeffpol() + z can be 1, 2, 3 or (theoretically) 4 words long + */ + z = muliu(muliu(sqru(p - 1), degpol(T)), n); + l = lgefint(z); + avma = av; + + if (l == 3) { + if (HIGHWORD(z[2]) == 0) { + pack = zx_to_int_halfspec; + unpack = int_to_Flx_half; + } else { + pack = Flx_to_int_spec; + unpack = int_to_Flx; + } + } else if (l == 4) { + pack = zx_to_int_spec_2; + unpack = int_to_Flx_2; + } else if (l == 5) { + pack = zx_to_int_spec_3; + unpack = int_to_Flx_3; + } else { + /* not implemented, probably won't occur in practice */ + return NULL; + } + A = FlxM_to_ZM(A, pack); + B = FlxM_to_ZM(B, pack); + C = ZM_mul(A, B); + D = ZM_to_FlxqM(C, T, p, unpack); + return gerepilecopy(av, D); +} + /*******************************************************************/ /* */ /* (Fl[X]/T(X))[Y] / S(Y) */ diff --git a/src/basemath/alglin1.c b/src/basemath/alglin1.c index f2976cb..229f6f4 100644 --- a/src/basemath/alglin1.c +++ b/src/basemath/alglin1.c @@ -976,7 +976,17 @@ FlxqM_FlxqC_mul(GEN A, GEN B, GEN T, unsigned long p) { GEN FlxqM_mul(GEN A, GEN B, GEN T, unsigned long p) { void *E; - const struct bb_field *ff = get_Flxq_field(&E, T, p); + const struct bb_field *ff; + long n = lg(A) - 1; + + if (n == 0) + return cgetg(1, t_MAT); + if (n > 1) { + GEN C = FlxqM_mul_Kronecker(A, B, T, p); + if (C != NULL) + return C; + } + ff = get_Flxq_field(&E, T, p); return gen_matmul(A, B, E, ff); } diff --git a/src/headers/paripriv.h b/src/headers/paripriv.h index 4b0677e..dbcfc10 100644 --- a/src/headers/paripriv.h +++ b/src/headers/paripriv.h @@ -562,6 +562,10 @@ void pari_close_files(void); int popinfile(void); pariFILE* try_pipe(const char *cmd, int flag); +/* Flx.c */ + +GEN FlxqM_mul_Kronecker(GEN A, GEN B, GEN T, ulong p); + /* Flxq_log.c */ GEN Flxq_log_index(GEN a0, GEN b0, GEN m, GEN T0, ulong p); diff --git a/src/test/32/ff b/src/test/32/ff index 2d90cb3..8a57055 100644 --- a/src/test/32/ff +++ b/src/test/32/ff @@ -319,6 +319,15 @@ t^4 + t^2 [3*t^2 + 3*t, 18446744073709551628*t^4 + 5*t^3 + 4*t^2 + 1844674407370955162 7*t + 18446744073709551628, 18446744073709551628*t^4 + 5*t^3 + 4*t^2 + 18446 744073709551628*t]~ +? test(q)=my(t=ffgen(q,'t),M=matrix(10,10,i,j,random(t)));subst(charpoly(M),'x,M)==0; +? test(nextprime(2^7)^5) +1 +? test(nextprime(2^15)^5) +1 +? test(nextprime(2^31)^5) +1 +? test(nextprime(2^63)^5) +1 ? p=2^64+13;g=ffprimroot(ffgen(p^2),&o);a=2*g^0; ? v=[I,-1,Mat(1),matid(2)/2]; ? for(i=1,#v,print(iferr(fflog(a,g,v[i]),E,E))); diff --git a/src/test/in/ff b/src/test/in/ff index 6b47f60..c4b5b28 100644 --- a/src/test/in/ff +++ b/src/test/in/ff @@ -106,6 +106,15 @@ test(2^5) test(7^5) test((2^64+13)^5) +test(q)={ + my(t = ffgen(q, 't), M = matrix(10, 10, i, j, random(t))); + subst(charpoly(M), 'x, M) == 0; +} +test(nextprime(2^7)^5) +test(nextprime(2^15)^5) +test(nextprime(2^31)^5) +test(nextprime(2^63)^5) + p=2^64+13; g=ffprimroot(ffgen(p^2), &o); a=2*g^0; v=[I,-1,Mat(1),matid(2)/2]; for(i=1,#v, print(iferr(fflog(a,g,v[i]),E,E)));