Peter Bruin on Tue, 10 Feb 2015 00:00:07 +0100 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
Re: FlxqM_mul_Kronecker |
Hello Bill, > On Sat, Feb 07, 2015 at 01:22:47PM +0100, Peter Bruin wrote: >> Bonjour, >> >> Here is a patch to add a new (private) function FlxqM_mul_Kronecker that >> I have been working on since the Atelier PARI/GP in January. It uses >> Kronecker substitution to multiply matrices over non-prime finite fields >> of small characteristics. Over the range of matrices I tested it on, it >> is on average 60%-80% faster than the existing FlxqM_mul; for some >> examples, the speedup is up to 4-5 times. > > I read your patch. Thanks! More changes are in the attached patch. (I guess it is not possible for me to directly update the peter-FlxqM_mul_Kronecker branch?) > I think the function FlxM_to_ZM and ZM_to_FlxqM should be renamed > to something more awkward, maybe FlxM_pack_ZM/ZM_pack_FlxqM They are now called kron_pack_FlxM and kron_unpack_FlxM; how about that? > The function zx_to_int* should also be renamed. zx denotes polynomials > with small signed coefficient. Maybe we should add pack/unpack in the > name since the functions are only wrapper for use as (*pack) or > (*unpack). These have been renamed to kron_pack_Flx_spec* and kron_unpack_Flx*. > In FlxqM_mul_Kronecker, maybe a switch(l-2) would be better. OK, done. Although the new int_to_Flx was renamed to kron_unpack_Flx, I left the renaming of the old int_to_Flx to int_LSWfirst_to_Flx in place because it is both more awkward and more descriptive. Finally, I discovered that kron_unpack_FlxqM had an extraneous evalvarn(); this is also fixed by the attached patch. Thanks, Peter
diff --git a/src/basemath/Flx.c b/src/basemath/Flx.c index c12d467..5dca5e7 100644 --- a/src/basemath/Flx.c +++ b/src/basemath/Flx.c @@ -644,28 +644,6 @@ Flx_mulspec_basecase(GEN x, GEN y, ulong p, long nx, long ny) z -= 2; return Flx_renormalize(z, lz); } -static GEN -Flx_to_int_spec(GEN x, long l) { - long i; - GEN w, y; - if (l == 0) - return gen_0; - y = cgetipos(l + 2); - for (i = 0, w = int_LSW(y); i < l; i++, w = int_nextW(w)) - *w = x[i]; - return y; -} - -static GEN -int_to_Flx(GEN z, ulong p) -{ - long i, l = lgefint(z); - GEN x = cgetg(l, t_VECSMALL), w; - for (w = int_LSW(z), i = 2; i < l; w = int_nextW(w), i++) - x[i] = ((ulong) *w) % p; - return Flx_renormalize(x, l); -} - /* z has least significant word first */ static GEN int_LSWfirst_to_Flx(GEN z, ulong p) @@ -3994,36 +3972,58 @@ FlxqV_roots_to_pol(GEN V, GEN T, ulong p, long v) /*** FlxqM ***/ static GEN -zx_to_int_halfspec(GEN x, long l) { +kron_pack_Flx_spec_half(GEN x, long l) { if (l == 0) return gen_0; return Flx_to_int_halfspec(x, l); } static GEN -zx_to_int_spec_2(GEN x, long l) { +kron_pack_Flx_spec(GEN x, long l) { + long i; + GEN w, y; + if (l == 0) + return gen_0; + y = cgetipos(l + 2); + for (i = 0, w = int_LSW(y); i < l; i++, w = int_nextW(w)) + *w = x[i]; + return y; +} + +static GEN +kron_pack_Flx_spec_2(GEN x, long l) { return Flx_eval2BILspec(x, 2, l); } static GEN -zx_to_int_spec_3(GEN x, long l) { +kron_pack_Flx_spec_3(GEN x, long l) { return Flx_eval2BILspec(x, 3, l); } static GEN -int_to_Flx_2(GEN x, ulong p) { +kron_unpack_Flx(GEN z, ulong p) +{ + long i, l = lgefint(z); + GEN x = cgetg(l, t_VECSMALL), w; + for (w = int_LSW(z), i = 2; i < l; w = int_nextW(w), i++) + x[i] = ((ulong) *w) % p; + return Flx_renormalize(x, l); +} + +static GEN +kron_unpack_Flx_2(GEN x, ulong p) { long d = (lgefint(x)-1)/2 - 1; return Z_mod2BIL_Flx_2(x, d, p); } static GEN -int_to_Flx_3(GEN x, ulong p) { +kron_unpack_Flx_3(GEN x, ulong p) { long d = lgefint(x)/3 - 1; return Z_mod2BIL_Flx_3(x, d, p); } static GEN -FlxM_to_ZM(GEN M, GEN (*pack)(GEN, long)) { +kron_pack_FlxM(GEN M, GEN (*pack)(GEN, long)) { long i, j, l, lc; GEN N = cgetg_copy(M, &l), x; if (l == 1) @@ -4040,9 +4040,9 @@ FlxM_to_ZM(GEN M, GEN (*pack)(GEN, long)) { } static GEN -ZM_to_FlxqM(GEN M, GEN T, ulong p, GEN (*unpack)(GEN, ulong)) +kron_unpack_FlxqM(GEN M, GEN T, ulong p, GEN (*unpack)(GEN, ulong)) { - long i, j, l, lc, sv = evalvarn(get_Flx_var(T)); + long i, j, l, lc, sv = get_Flx_var(T); GEN N = cgetg_copy(M, &l), x; if (l == 1) return N; @@ -4073,28 +4073,32 @@ FlxqM_mul_Kronecker(GEN A, GEN B, GEN T, ulong p) { l = lgefint(z); avma = av; - if (l == 3) { + switch (l - 2) { + case 1: if (HIGHWORD(z[2]) == 0) { - pack = zx_to_int_halfspec; + pack = kron_pack_Flx_spec_half; unpack = int_to_Flx_half; } else { - pack = Flx_to_int_spec; - unpack = int_to_Flx; + pack = kron_pack_Flx_spec; + unpack = kron_unpack_Flx; } - } else if (l == 4) { - pack = zx_to_int_spec_2; - unpack = int_to_Flx_2; - } else if (l == 5) { - pack = zx_to_int_spec_3; - unpack = int_to_Flx_3; - } else { + break; + case 2: + pack = kron_pack_Flx_spec_2; + unpack = kron_unpack_Flx_2; + break; + case 3: + pack = kron_pack_Flx_spec_3; + unpack = kron_unpack_Flx_3; + break; + default: /* not implemented, probably won't occur in practice */ return NULL; } - A = FlxM_to_ZM(A, pack); - B = FlxM_to_ZM(B, pack); + A = kron_pack_FlxM(A, pack); + B = kron_pack_FlxM(B, pack); C = ZM_mul(A, B); - D = ZM_to_FlxqM(C, T, p, unpack); + D = kron_unpack_FlxqM(C, T, p, unpack); return gerepilecopy(av, D); }