Ilya Zakharevich on Fri, 5 Feb 1999 04:24:31 -0500


[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]

[PATCH] Hiccups in diffptr


This is a first shot to enable yet bigger prime tables in PARI.

We introduce hiccups into diffptr table in a backward-compatible way.  The
"real" end of the table is marked by "\0\0", the differences which are
larger than 256 are introduced as 

  0 2 low-byte high-byte 

(2 for a possibility of further extension if needed).  Thus the old code
which balks out on the first \0 it sees will not misinterpret new
data in the table.  This also means that no slowdown should be
noticable, since the internal loops remain exactly as they were.

Note that I updated only *some* things which are using diffptr.  Note
also that among three functions forprimes() prodeuler() and direuler()
I see three different ways to increment a prime using diffptr!

This probably should be fixed (to use GENs, since with a couple of further
additions the primelimit will be able to go above MAXLONG).

Enjoy,
Ilya

P.S.  I tested only the fact that forprimes() works near 436273000.

--- ./src/basemath/arith2.c~	Sun Nov  8 21:26:04 1998
+++ ./src/basemath/arith2.c	Fri Feb  5 03:59:34 1999
@@ -78,15 +78,17 @@ initprimes1(long size, long *lenp, long 
     *r++ = (q-s) << 1;
   }
   *r++=0;
+  *r++=0;
   *lenp = r - p;
   *lastp = ((s - p) << 1) + 1;
-  return (byteptr) gprealloc(p,r-p,size + 1);
+  return (byteptr) gprealloc(p,r-p,size + 2);
 }
 
 #define PRIME_ARENA (512 * 1024)
 
 /* Here's the workhorse.  This is recursive, although normally the first
    recursive call will bottom out and invoke initprimes1() at once.
+   Lenp includes two sentinels \0 at the end.
    (Not static;  might conceivably be useful to someone in library mode) */
 byteptr
 initprimes0(ulong maxnum, long *lenp, long *lastp)
@@ -100,15 +102,15 @@ initprimes0(ulong maxnum, long *lenp, lo
   maxnum |= 1;			/* make it odd. */
 
   /* Checked to be enough up to 40e6, attained at 155893 */
-  size = (long) (1.09 * maxnum/log((double)maxnum)) + 145;
+  size = (long) (1.09 * maxnum/log((double)maxnum)) + 145 + 1;
   p1 = (byteptr) gpmalloc(size);
   rootnum = (long) sqrt((double)maxnum); /* cast it back to a long */
   rootnum |= 1;
   {
     byteptr p2 = initprimes0(rootnum, &psize, &last); /* recursive call */
-    memcpy(p1, p2, psize); free(p2);
+    memcpy(p1, p2, psize - 1); free(p2);
   }
-  fin1 = p1 + psize - 1;
+  fin1 = p1 + psize - 2;
   remains = (maxnum - rootnum) >> 1; /* number of odd numbers to check */
 
   need = 100 * rootnum;		/* Make % overhead negligeable. */
@@ -165,9 +167,22 @@ initprimes0(ulong maxnum, long *lenp, lo
     /* now q runs over addresses corresponding to primes */
     for (q = p; ; plast = q++)
     {
+      int d;
+
       while (*q) q++;		/* use 0-sentinel at end */
       if (q >= fin) break;
-      *curdiff++ = (q - plast) << 1;
+      d = (q - plast) << 1;
+      if (d < 256) {
+	  *curdiff++ = d;
+      } else {
+	  /* Insert a 'hiccup'.  Not all library code is able to take this
+	     into account yet, but the old code will just finish
+	     prematurely without getting wrong info. */
+	  *curdiff++ = 0;
+	  *curdiff++ = 2;		/* Reserved - number of bytes */
+	  *curdiff++ = (d & 255);	/* Non zero! */
+	  *curdiff++ = (d >> 8);	/* Non zero! */
+      }
     }
     plast -= asize - 1;
     remains -= asize - 1;
@@ -175,6 +190,7 @@ initprimes0(ulong maxnum, long *lenp, lo
   } /* while (remains) */
   last = curlow - ((p - plast) << 1);
   *curdiff++ = 0;		/* sentinel */
+  *curdiff++ = 0;		/* second sentinel */
   *lenp = curdiff - p1;
   *lastp = last;
   if (alloced) free(p);
@@ -198,8 +214,11 @@ initprimes(long maxnum)
   /* switch to unsigned: adding the 257 _could_ wrap to a negative number. */
   ulong maxnum1 = ((maxnum<65302?65302:maxnum)+257ul);
 
+#if 0	/* In fact the limit is the square of one below, since
+	   the sieve code does not take hiccups into account */
   if (maxnum1 > 436273000)
     err(talker,"impossible to have prestored primes > 436273009");
+#endif
 
   p = initprimes0(maxnum1, &len, &last);
 #if 0 /* not yet... GN */
--- ./src/basemath/ifactor1.c~	Sun Nov  8 21:26:06 1998
+++ ./src/basemath/ifactor1.c	Fri Feb  5 04:03:06 1999
@@ -297,24 +297,31 @@ snextpr(ulong p, byteptr *d, long *rcn, 
   static GEN gp = (GEN)pp;
   long d1 = **d, rcn0;
 
+  if (!d1 && (*d)[1]) {
+      d1 = (*d)[2] + 256*((*d)[3]);
+      (*d) += 3;
+  }
   if (d1)
   {
     if (*rcn != NPRC)
     {
+      long d2 = d1;
+
       rcn0 = *rcn;
-      while (d1 > 0)
+      while (d2 > 0)
       {
-	d1 -= prc210_d1[*rcn];
+	d2 -= prc210_d1[*rcn];
 	if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
       }
-      if (d1 < 0)
+      if (d2 < 0)
       {
 	fprintferr("snextpr: prime %lu wasn\'t %lu mod 210\n",
 		   p, (ulong)prc210_rp[rcn0]);
 	err(bugparier, "[caller of] snextpr");
       }
     }
-    return p + *(*d)++;
+    (*d)++;
+    return p + d1;
   }
   /* we are beyond the diffptr table */
   if (*rcn == NPRC)		/* we need to initialize this now */
--- ./src/language/sumiter.c~	Sun Nov  8 21:26:30 1998
+++ ./src/language/sumiter.c	Fri Feb  5 04:02:20 1999
@@ -90,14 +90,23 @@ forprime(entree *ep, GEN ga, GEN gb, cha
   ga = gceil(ga); gb = gfloor(gb);
   if (is_bigint(ga) || is_bigint(gb)) err(primer1);
   a = itos(ga); if (a<=0) a = 1;
-  b = 0; while (*p && a > b) b += *p++;
+  b = 0; 
+  do {
+      while (*p && a > b) b += *p++;
+  } while (a > b && p[1] && (b += p[2] + 256*p[3], p += 4));
   prime[2] = b; b = itos(gb);
   avma = av; push_val(ep, prime);
   while (prime[2] <= b)
   {
-    if (!*p) err(primer1);
+    if (!p[0] && !p[1]) err(primer1);
     (void)lisseq(ch); if (check_break_status(DOLOOP,NULL)) break;
-    avma = av; prime[2] += *p++;
+    avma = av; 
+    if (p[0]) {
+	prime[2] += *p++;
+    } else {
+	prime[2] += p[2] + 256*p[3];
+	p += 4;
+    }
   }
   pop_val(ep);
 }
@@ -424,7 +433,9 @@ prodeuler(entree *ep, GEN a, GEN b, char
 
   affsr(1,x); prime = 0;
   b = gfloor(b); a = gceil(a);
-  while (*p && cmpis(a,prime)>0) prime += *p++;
+  do {
+      while (*p && cmpis(a,prime)>0) prime += *p++;
+  } while (cmpis(a,prime)>0 && p[1] && (prime += p[2] + 256*p[3], p += 4));
   if (cmpsi(prime,b) > 0)
   {
     av=avma; if (gcmp1(subii(a,b))) { avma=av; return x; }
@@ -434,9 +445,15 @@ prodeuler(entree *ep, GEN a, GEN b, char
   a = stoi(prime); push_val(ep, a);
   do
   {
-    if (!*p) err(primer1);
+    if (!p[0] && p[1]) err(primer1);
     p1 = lisexpr(ch); if (did_break) err(breaker,"prodeuler");
-    x=gmul(x,p1); a = addsi(*p++,a);
+    x=gmul(x,p1); 
+    if (p[0]) {
+	a = addsi(*p++,a);
+    } else {
+	a = addsi(p[2] + 256*p[3],a);
+	p += 4;
+    }
     if (low_stack(lim, (av+bot)>>1))
     {
       GEN *gptr[2]; gptr[0]=&x; gptr[1]=&a;
@@ -466,11 +483,13 @@ direuler(entree *ep, GEN a, GEN b, char 
   x1=cgetg(n+1,t_VEC); b = gcopy(b); av=avma;
   x=cgetg(n+1,t_VEC); x[1]=un; for (i=2; i<=n; i++) x[i]=zero;
 
-  while (*p && gcmpgs(a,prime) > 0) prime += *p++;
+  do {
+      while (*p && gcmpgs(a,prime) > 0) prime += *p++;
+  } while (gcmpgs(a,prime)>0 && p[1] && (prime += p[2] + 256*p[3], p += 4));
   a = stoi(prime); push_val(ep, a);
   while (gcmp(a,b)<=0)
   {
-    if (!*p) err(primer1);
+    if (!p[0] && !p[1]) err(primer1);
     p1 = lisexpr(ch); if (did_break) err(breaker,"direuler");
     polnum=numer(p1); polden=denom(p1);
     tx = typ(polnum);
@@ -524,7 +543,13 @@ direuler(entree *ep, GEN a, GEN b, char 
       if (DEBUGMEM>1) err(warnmem,"direuler");
       gerepilemany(av,gptr,2);
     }
-    a = addsi(*p++,a); ep->value = (void*) a;
+    if (p[0]) {
+	a = addsi(*p++,a);
+    } else {
+	a = addsi(p[2] + 256*p[3],a);
+	p += 4;
+    }
+    ep->value = (void*) a;
   }
   pop_val(ep); tetpil=avma;
   return gerepile(av0,tetpil,gcopy(x));