| Peter Bruin on Tue, 10 Feb 2015 00:00:07 +0100 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
| Re: FlxqM_mul_Kronecker |
Hello Bill, > On Sat, Feb 07, 2015 at 01:22:47PM +0100, Peter Bruin wrote: >> Bonjour, >> >> Here is a patch to add a new (private) function FlxqM_mul_Kronecker that >> I have been working on since the Atelier PARI/GP in January. It uses >> Kronecker substitution to multiply matrices over non-prime finite fields >> of small characteristics. Over the range of matrices I tested it on, it >> is on average 60%-80% faster than the existing FlxqM_mul; for some >> examples, the speedup is up to 4-5 times. > > I read your patch. Thanks! More changes are in the attached patch. (I guess it is not possible for me to directly update the peter-FlxqM_mul_Kronecker branch?) > I think the function FlxM_to_ZM and ZM_to_FlxqM should be renamed > to something more awkward, maybe FlxM_pack_ZM/ZM_pack_FlxqM They are now called kron_pack_FlxM and kron_unpack_FlxM; how about that? > The function zx_to_int* should also be renamed. zx denotes polynomials > with small signed coefficient. Maybe we should add pack/unpack in the > name since the functions are only wrapper for use as (*pack) or > (*unpack). These have been renamed to kron_pack_Flx_spec* and kron_unpack_Flx*. > In FlxqM_mul_Kronecker, maybe a switch(l-2) would be better. OK, done. Although the new int_to_Flx was renamed to kron_unpack_Flx, I left the renaming of the old int_to_Flx to int_LSWfirst_to_Flx in place because it is both more awkward and more descriptive. Finally, I discovered that kron_unpack_FlxqM had an extraneous evalvarn(); this is also fixed by the attached patch. Thanks, Peter
diff --git a/src/basemath/Flx.c b/src/basemath/Flx.c
index c12d467..5dca5e7 100644
--- a/src/basemath/Flx.c
+++ b/src/basemath/Flx.c
@@ -644,28 +644,6 @@ Flx_mulspec_basecase(GEN x, GEN y, ulong p, long nx, long ny)
z -= 2; return Flx_renormalize(z, lz);
}
-static GEN
-Flx_to_int_spec(GEN x, long l) {
- long i;
- GEN w, y;
- if (l == 0)
- return gen_0;
- y = cgetipos(l + 2);
- for (i = 0, w = int_LSW(y); i < l; i++, w = int_nextW(w))
- *w = x[i];
- return y;
-}
-
-static GEN
-int_to_Flx(GEN z, ulong p)
-{
- long i, l = lgefint(z);
- GEN x = cgetg(l, t_VECSMALL), w;
- for (w = int_LSW(z), i = 2; i < l; w = int_nextW(w), i++)
- x[i] = ((ulong) *w) % p;
- return Flx_renormalize(x, l);
-}
-
/* z has least significant word first */
static GEN
int_LSWfirst_to_Flx(GEN z, ulong p)
@@ -3994,36 +3972,58 @@ FlxqV_roots_to_pol(GEN V, GEN T, ulong p, long v)
/*** FlxqM ***/
static GEN
-zx_to_int_halfspec(GEN x, long l) {
+kron_pack_Flx_spec_half(GEN x, long l) {
if (l == 0)
return gen_0;
return Flx_to_int_halfspec(x, l);
}
static GEN
-zx_to_int_spec_2(GEN x, long l) {
+kron_pack_Flx_spec(GEN x, long l) {
+ long i;
+ GEN w, y;
+ if (l == 0)
+ return gen_0;
+ y = cgetipos(l + 2);
+ for (i = 0, w = int_LSW(y); i < l; i++, w = int_nextW(w))
+ *w = x[i];
+ return y;
+}
+
+static GEN
+kron_pack_Flx_spec_2(GEN x, long l) {
return Flx_eval2BILspec(x, 2, l);
}
static GEN
-zx_to_int_spec_3(GEN x, long l) {
+kron_pack_Flx_spec_3(GEN x, long l) {
return Flx_eval2BILspec(x, 3, l);
}
static GEN
-int_to_Flx_2(GEN x, ulong p) {
+kron_unpack_Flx(GEN z, ulong p)
+{
+ long i, l = lgefint(z);
+ GEN x = cgetg(l, t_VECSMALL), w;
+ for (w = int_LSW(z), i = 2; i < l; w = int_nextW(w), i++)
+ x[i] = ((ulong) *w) % p;
+ return Flx_renormalize(x, l);
+}
+
+static GEN
+kron_unpack_Flx_2(GEN x, ulong p) {
long d = (lgefint(x)-1)/2 - 1;
return Z_mod2BIL_Flx_2(x, d, p);
}
static GEN
-int_to_Flx_3(GEN x, ulong p) {
+kron_unpack_Flx_3(GEN x, ulong p) {
long d = lgefint(x)/3 - 1;
return Z_mod2BIL_Flx_3(x, d, p);
}
static GEN
-FlxM_to_ZM(GEN M, GEN (*pack)(GEN, long)) {
+kron_pack_FlxM(GEN M, GEN (*pack)(GEN, long)) {
long i, j, l, lc;
GEN N = cgetg_copy(M, &l), x;
if (l == 1)
@@ -4040,9 +4040,9 @@ FlxM_to_ZM(GEN M, GEN (*pack)(GEN, long)) {
}
static GEN
-ZM_to_FlxqM(GEN M, GEN T, ulong p, GEN (*unpack)(GEN, ulong))
+kron_unpack_FlxqM(GEN M, GEN T, ulong p, GEN (*unpack)(GEN, ulong))
{
- long i, j, l, lc, sv = evalvarn(get_Flx_var(T));
+ long i, j, l, lc, sv = get_Flx_var(T);
GEN N = cgetg_copy(M, &l), x;
if (l == 1)
return N;
@@ -4073,28 +4073,32 @@ FlxqM_mul_Kronecker(GEN A, GEN B, GEN T, ulong p) {
l = lgefint(z);
avma = av;
- if (l == 3) {
+ switch (l - 2) {
+ case 1:
if (HIGHWORD(z[2]) == 0) {
- pack = zx_to_int_halfspec;
+ pack = kron_pack_Flx_spec_half;
unpack = int_to_Flx_half;
} else {
- pack = Flx_to_int_spec;
- unpack = int_to_Flx;
+ pack = kron_pack_Flx_spec;
+ unpack = kron_unpack_Flx;
}
- } else if (l == 4) {
- pack = zx_to_int_spec_2;
- unpack = int_to_Flx_2;
- } else if (l == 5) {
- pack = zx_to_int_spec_3;
- unpack = int_to_Flx_3;
- } else {
+ break;
+ case 2:
+ pack = kron_pack_Flx_spec_2;
+ unpack = kron_unpack_Flx_2;
+ break;
+ case 3:
+ pack = kron_pack_Flx_spec_3;
+ unpack = kron_unpack_Flx_3;
+ break;
+ default:
/* not implemented, probably won't occur in practice */
return NULL;
}
- A = FlxM_to_ZM(A, pack);
- B = FlxM_to_ZM(B, pack);
+ A = kron_pack_FlxM(A, pack);
+ B = kron_pack_FlxM(B, pack);
C = ZM_mul(A, B);
- D = ZM_to_FlxqM(C, T, p, unpack);
+ D = kron_unpack_FlxqM(C, T, p, unpack);
return gerepilecopy(av, D);
}