Re: Faster exponentiation in some extensions of finite fields of small c

Jean-Pierre Flori on Fri, 15 Jan 2016 11:28:47 +0100

[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]

Re: Faster exponentiation in some extensions of finite fields of small characteristics

To: "pari-dev@pari.math.u-bordeaux.fr" <pari-dev@pari.math.u-bordeaux.fr>
Subject: Re: Faster exponentiation in some extensions of finite fields of small characteristics
From: Jean-Pierre Flori <jpflori@gmail.com>
Date: Fri, 15 Jan 2016 11:28:20 +0100
Delivery-date: Fri, 15 Jan 2016 11:28:47 +0100
Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=mime-version:in-reply-to:references:from:date:message-id:subject:to :content-type; bh=WwBYUHEdGffMKSGh1qENwYAzvMIwTNgswaQj7bd9A7E=; b=cZwT93JWMOzsrbGO1JCctbMcrA1JRiJKJVbitpXdWNTVuOJyJqxgvQ4R6eFgY2MSk0 moLifXhZV1TSJIF0CtuMhlXWWDSZwKG+GPPyg7VhyN+z73i8ju860C3oHm31hh2sJLNd OZ9XXDTxqAYteeJ+9aJ6mqXMtXUsh/r7YnHKOS9ZmHHkT9E+WiLWYXxwsQMO2EAyIvxa qoOP5r6cvuA3odcgh/CRxsDbI+TbtMlvF2htb6cXjy1vWx4joyLgQx7xDtvrKG1u+eFb r+Hnhjf14b6pJsduuOLChZF3I8bLoEpOicfRztDTBPUHduFLF+OnnCJzG2kfPsW0CuBv 8tvQ==
In-reply-to: <CAHhGz88o7r5O7hhMT4xp2zuqyXOHJNfth09ktDwVh-oKLv+Lxg@mail.gmail.com>
References: <CAHhGz88i74c6vqcBEDAExaxe17b7BnERio-Db-Ja5fRTNotvXA@mail.gmail.com> <CAHhGz89CejAVoHcKF0t1Wr0aRgnOr6L=G2z7JXYX8jQQmGJwFQ@mail.gmail.com> <CAHhGz8_pfXwKT04L5e-XFTWbANiJF5BwYcCD7K8GUxRic95KTw@mail.gmail.com> <CAHhGz88o7r5O7hhMT4xp2zuqyXOHJNfth09ktDwVh-oKLv+Lxg@mail.gmail.com>

Dear all,

Here is a simplified patch only adding the 4-into-1 stuff.
It could surely be refactorized as well, but it's simple enough to be included.
It lacks proper tuning.

Improvement example:
* before:
? a = ffgen(7^1000,'a)
time = 32 ms.
%1 = a
? a^(7^1000)
time = 724 ms.
%2 = a
* after:
? a = ffgen(7^1000,'a)
time = 20 ms.
%1 = a
? a^(7^1000)
time = 368 ms.
%2 = a

so a 2x speedup as expected.

Best,

2016-01-14 17:05 GMT+01:00 Jean-Pierre Flori <jpflori@gmail.com>:
> Looking at what Peter did, I now see I should just wrap his code!
> http://pari.math.u-bordeaux.fr/archives/pari-dev-1510/msg00004.html
>
> 2016-01-14 13:32 GMT+01:00 Jean-Pierre Flori <jpflori@gmail.com>:
>> Should be better now.
>>
>> 2016-01-14 13:22 GMT+01:00 Jean-Pierre Flori <jpflori@gmail.com>:
>>> It seems it was an old buggy version of the patch.
>>> I'm trying to relocate the right commit.
>>>
>>> 2016-01-14 11:58 GMT+01:00 Jean-Pierre Flori <jpflori@gmail.com>:
>>>> Dear all,
>>>>
>>>> Here is a preliminary patch to speed up exponentiation in some
>>>> extensions of finite fields of small characteristics (< size of
>>>> machine word/2).
>>>>
>>>> It packs more coefficients of the polynomial representation over the
>>>> prime field into machine words when the finite field element is mapped
>>>> to a multiprecision integer (i.e. KS).
>>>> Two functions are added:
>>>> * one function which packs 4 coeffs into a machine word,
>>>> * one generic funciton which packs the coeffs as much as possible
>>>> possibly crossing machine words boundaries.
>>>>
>>>> I did not take care of adding new tuning parameters or smartly
>>>> choosing between the different functions, e.g. calling the
>>>> 4-coeffs-in-1-word function when the (product) coeff size is
>>>> BITS_IN_LONG/4-\epsilon might be more efficient than using the generic
>>>> function which does more complicated packing when the polynomial
>>>> degree is not large.
>>>>
>>>> I did not add (yet) optimal packing when the product coeffs are larger
>>>> than a machine word.
>>>>
>>>> I did not really check it is bug free.
>>>>
>>>> Best,
>>>> --
>>>> Jean-Pierre Flori
>>>
>>>
>>>
>>> --
>>> Jean-Pierre Flori
>>
>>
>>
>> --
>> Jean-Pierre Flori
>
>
>
> --
> Jean-Pierre Flori



-- 
Jean-Pierre Flori

diff --git a/src/basemath/Flx.c b/src/basemath/Flx.c
index 715cb02..e071788 100644
--- a/src/basemath/Flx.c
+++ b/src/basemath/Flx.c
@@ -594,6 +594,12 @@ Flx_shiftip(pari_sp av, GEN x, long v)
   avma = (pari_sp)y; return y;
 }
 
+#define BITS_IN_QUARTULONG (BITS_IN_HALFULONG >> 1)
+#define QUARTMASK ((1UL<<BITS_IN_QUARTULONG)-1UL)
+#define LLQUARTWORD(x) ((x) & QUARTMASK)
+#define HLQUARTWORD(x) (((x) >> BITS_IN_QUARTULONG) & QUARTMASK)
+#define LHQUARTWORD(x) (((x) >> (2*BITS_IN_QUARTULONG)) & QUARTMASK)
+#define HHQUARTWORD(x) (((x) >> (3*BITS_IN_QUARTULONG)) & QUARTMASK)
 INLINE long
 maxlengthcoeffpol(ulong p, long n)
 {
@@ -601,7 +607,11 @@ maxlengthcoeffpol(ulong p, long n)
   GEN z = muliu(sqru(p-1), n);
   long l = lgefint(z);
   avma = ltop;
-  if (l==3 && HIGHWORD(z[2])==0) return 0;
+  if (l==3 && HIGHWORD(z[2])==0)
+  {
+    if (HLQUARTWORD(z[2]) == 0) return -1;
+    else return 0;
+  }
   return l-2;
 }
 
@@ -708,6 +718,57 @@ Flx_mulspec_halfmulii(GEN a, GEN b, ulong p, long na, long nb)
   return int_to_Flx_half(z,p);
 }
 
+static GEN
+Flx_to_int_quartspec(GEN a, long na)
+{
+  long j;
+  long n = (na+3)>>2UL;
+  GEN V = cgetipos(2+n);
+  GEN w;
+  for (w = int_LSW(V), j=0; j+3<na; j+=4, w=int_nextW(w))
+    *w = a[j]|(a[j+1]<<BITS_IN_QUARTULONG)|(a[j+2]<<(2*BITS_IN_QUARTULONG))|(a[j+3]<<(3*BITS_IN_QUARTULONG));
+  switch (na-j)
+  {
+  case 3:
+    *w = a[j]|(a[j+1]<<BITS_IN_QUARTULONG)|(a[j+2]<<(2*BITS_IN_QUARTULONG));
+    break;
+  case 2:
+    *w = a[j]|(a[j+1]<<BITS_IN_QUARTULONG);
+    break;
+  case 1:
+    *w = a[j];
+    break;
+  case 0:
+    break;
+  }
+  return V;
+}
+
+static GEN
+int_to_Flx_quart(GEN z, ulong p)
+{
+  long i;
+  long lx = (lgefint(z)-2)*4+2;
+  GEN w, x = cgetg(lx, t_VECSMALL);
+  for (w = int_LSW(z), i=2; i<lx; i+=4, w=int_nextW(w))
+  {
+    x[i]   = LLQUARTWORD((ulong)*w)%p;
+    x[i+1] = HLQUARTWORD((ulong)*w)%p;
+    x[i+2] = LHQUARTWORD((ulong)*w)%p;
+    x[i+3] = HHQUARTWORD((ulong)*w)%p;
+  }
+  return Flx_renormalize(x, lx);
+}
+
+static GEN
+Flx_mulspec_quartmulii(GEN a, GEN b, ulong p, long na, long nb)
+{
+  GEN A = Flx_to_int_quartspec(a,na);
+  GEN B = Flx_to_int_quartspec(b,nb);
+  GEN z = mulii(A,B);
+  return int_to_Flx_quart(z,p);
+}
+
 /*Eval x in 2^(k*BIL) in linear time, k==2 or 3*/
 static GEN
 Flx_eval2BILspec(GEN x, long k, long l)
@@ -766,6 +827,7 @@ Flx_mulspec_mulii_inflate(GEN x, GEN y, long N, ulong p, long nx, long ny)
   return gerepileupto(av, Z_mod2BIL_Flx(z, N, nx+ny-2, p));
 }
 
+#define Flx_MUL_QUARTMULII_LIMIT Flx_MUL_HALFMULII_LIMIT
 /* fast product (Karatsuba) of polynomials a,b. These are not real GENs, a+2,
  * b+2 were sent instead. na, nb = number of terms of a, b.
  * Only c, c0, c1, c2 are genuine GEN.
@@ -785,6 +847,10 @@ Flx_mulspec(GEN a, GEN b, ulong p, long na, long nb)
   av = avma;
   switch (maxlengthcoeffpol(p,nb))
   {
+  case -1:
+    if (na>=Flx_MUL_QUARTMULII_LIMIT)
+      return Flx_shiftip(av,Flx_mulspec_quartmulii(a,b,p,na,nb), v);
+    break;
   case 0:
     if (na>=Flx_MUL_HALFMULII_LIMIT)
       return Flx_shiftip(av,Flx_mulspec_halfmulii(a,b,p,na,nb), v);
@@ -910,6 +976,13 @@ Flx_sqrspec_halfsqri(GEN a, ulong p, long na)
 }
 
 static GEN
+Flx_sqrspec_quartsqri(GEN a, ulong p, long na)
+{
+  GEN z = sqri(Flx_to_int_quartspec(a,na));
+  return int_to_Flx_quart(z,p);
+}
+
+static GEN
 Flx_sqrspec_sqri_inflate(GEN x, long N, ulong p, long nx)
 {
   pari_sp av = avma;
@@ -917,6 +990,7 @@ Flx_sqrspec_sqri_inflate(GEN x, long N, ulong p, long nx)
   return gerepileupto(av, Z_mod2BIL_Flx(z, N, (nx-1)*2, p));
 }
 
+#define Flx_SQR_QUARTSQRI_LIMIT Flx_SQR_HALFSQRI_LIMIT
 static GEN
 Flx_sqrspec(GEN a, ulong p, long na)
 {
@@ -930,6 +1004,10 @@ Flx_sqrspec(GEN a, ulong p, long na)
   av = avma;
   switch(maxlengthcoeffpol(p,na))
   {
+  case -1:
+    if (na>=Flx_SQR_QUARTSQRI_LIMIT)
+      return Flx_shiftip(av, Flx_sqrspec_quartsqri(a,p,na), v);
+    break;
   case 0:
     if (na>=Flx_SQR_HALFSQRI_LIMIT)
       return Flx_shiftip(av, Flx_sqrspec_halfsqri(a,p,na), v);
@@ -1033,6 +1111,11 @@ Flx_multhreshold(GEN T, ulong p, long half, long mul, long mul2, long kara)
   long na = lgpol(T);
   switch (maxlengthcoeffpol(p,na))
   {
+  case -1:
+    /* To be fixed for quart */
+    if (na>=Flx_MUL_QUARTMULII_LIMIT)
+      return na>=half;
+    break;
   case 0:
     if (na>=Flx_MUL_HALFMULII_LIMIT)
       return na>=half;

Follow-Ups:
- Re: Faster exponentiation in some extensions of finite fields of small characteristics
  - From: Jean-Pierre Flori <jpflori@gmail.com>
- Re: Faster exponentiation in some extensions of finite fields of small characteristics
  - From: Bill Allombert <Bill.Allombert@math.u-bordeaux.fr>

References:
- Faster exponentiation in some extensions of finite fields of small characteristics
  - From: Jean-Pierre Flori <jpflori@gmail.com>
- Re: Faster exponentiation in some extensions of finite fields of small characteristics
  - From: Jean-Pierre Flori <jpflori@gmail.com>
- Re: Faster exponentiation in some extensions of finite fields of small characteristics
  - From: Jean-Pierre Flori <jpflori@gmail.com>
- Re: Faster exponentiation in some extensions of finite fields of small characteristics
  - From: Jean-Pierre Flori <jpflori@gmail.com>

Prev by Date: Re: Faster exponentiation in some extensions of finite fields of small characteristics
Next by Date: Re: Faster exponentiation in some extensions of finite fields of small characteristics
Previous by thread: Re: Faster exponentiation in some extensions of finite fields of small characteristics
Next by thread: Re: Faster exponentiation in some extensions of finite fields of small characteristics
Index(es):
- Date
- Thread