/* factor -- print prime factors of n. This is the factor utility
Copyright (C) 1986-2018 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */ The GNUv3 license
/* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
Arbitrary-precision code adapted by James Youngman from Torbjörn
Granlund's factorize.c, from GNU MP version 4.2.2.
In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
Contains code from GNU MP. */
/* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
or, with GMP, numbers of any size.
Code organisation:
There are several variants of many functions, for handling one word, two
words, and GMP's mpz_t type. If the one-word variant is called foo, the
two-word variant will be foo2, and the one for mpz_t will be mp_foo. In
some cases, the plain function variants will handle both one-word and
two-word numbers, evidenced by function arguments.
The factoring code for two words will fall into the code for one word when
progress allows that.
Using GMP is optional. Define HAVE_GMP to make this code include GMP
factoring code. The GMP factoring code is based on GMP's demos/factorize.c
(last synced 2012-09-07). The GMP-based factoring code will stay in GMP
factoring code even if numbers get small enough for using the two-word
code.
Algorithm:
(1) Perform trial division using a small primes table, but without hardware
division since the primes table store inverses modulo the word base.
(The GMP variant of this code doesn't make use of the precomputed
inverses, but instead relies on GMP for fast divisibility testing.)
(2) Check the nature of any non-factored part using Miller-Rabin for
detecting composites, and Lucas for detecting primes.
(3) Factor any remaining composite part using the Pollard-Brent rho
algorithm or if USE_SQUFOF is defined to 1, try that first.
Status of found factors are checked again using Miller-Rabin and Lucas.
We prefer using Hensel norm in the divisions, not the more familiar
Euclidian norm, since the former leads to much faster code. In the
Pollard-Brent rho code and the prime testing code, we use Montgomery's
trick of multiplying all n-residues by the word base, allowing cheap Hensel
reductions mod n.
Improvements:
* Use modular inverses also for exact division in the Lucas code, and
elsewhere. A problem is to locate the inverses not from an index, but
from a prime. We might instead compute the inverse on-the-fly.
* Tune trial division table size (not forgetting that this is a standalone
program where the table will be read from disk for each invocation).
* Implement less naive powm, using k-ary exponentiation for k = 3 or
perhaps k = 4.
* Try to speed trial division code for single uintmax_t numbers, i.e., the
code using DIVBLOCK. It currently runs at 2 cycles per prime (Intel SBR,
IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
using gcc 4.6 and 4.7. Some software pipelining should help; 1, 2, and 4
respectively cycles ought to be possible.
* The redcify function could be vastly improved by using (plain Euclidian)
pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
GMP's gmp-impl.h). The redcify2 function could be vastly improved using
similar methoods. These functions currently dominate run time when using
the -w option.
*/
/* Whether to recursively factor to prove primality,
or run faster probabilistic tests. */
#ifndef PROVE_PRIMALITY Line 89
# define PROVE_PRIMALITY 1 Line 90
#endif Line 91
/* Faster for certain ranges but less general. */
#ifndef USE_SQUFOF Line 94
# define USE_SQUFOF 0 Line 95
#endif Line 96
/* Output SQUFOF statistics. */
#ifndef STAT_SQUFOF Line 99
# define STAT_SQUFOF 0 Line 100
#endif Line 101
#include <config.h> Provides system specific information
#include <getopt.h> ...!includes auto-comment...
#include <stdio.h> Provides standard I/O capability
#if HAVE_GMP Line 107
# include <gmp.h> Line 108
# if !HAVE_DECL_MPZ_INITS Line 109
# include <stdarg.h> ...!includes auto-comment...
# endif Line 111
#endif Line 112
#include <assert.h> ...!includes auto-comment...
#include "system.h" ...!includes auto-comment...
#include "die.h" ...!includes auto-comment...
#include "error.h" ...!includes auto-comment...
#include "full-write.h" ...!includes auto-comment...
#include "quote.h" ...!includes auto-comment...
#include "readtokens.h" ...!includes auto-comment...
#include "xstrtol.h" ...!includes auto-comment...
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "factor" Line 125
#define AUTHORS \ Line 127
proper_name ("Paul Rubin"), \ Line 128
proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \ Line 129
proper_name_utf8 ("Niels Moller", "Niels M\303\266ller") Line 130
/* Token delimiters when reading from a file. */
#define DELIM "\n\t " Line 133
#ifndef USE_LONGLONG_H Line 135
/* With the way we use longlong.h, it's only safe to use
when UWtype = UHWtype, as there were various cases
(as can be seen in the history for longlong.h) where
for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
to avoid compile time or run time issues. */
# if LONG_MAX == INTMAX_MAX Line 141
# define USE_LONGLONG_H 1 Line 142
# endif Line 143
#endif Line 144
#if USE_LONGLONG_H Line 146
/* Make definitions for longlong.h to make it do what it can do for us */
/* bitcount for uintmax_t */
# if UINTMAX_MAX == UINT32_MAX Line 151
# define W_TYPE_SIZE 32 Line 152
# elif UINTMAX_MAX == UINT64_MAX Line 153
# define W_TYPE_SIZE 64 Line 154
# elif UINTMAX_MAX == UINT128_MAX Line 155
# define W_TYPE_SIZE 128 Line 156
# endif Line 157
# define UWtype uintmax_t Line 159
# define UHWtype unsigned long int Line 160
# undef UDWtype Line 161
# if HAVE_ATTRIBUTE_MODE Line 162
typedef unsigned int UQItype __attribute__ ((mode (QI))); Line 163
typedef int SItype __attribute__ ((mode (SI))); Line 164
typedef unsigned int USItype __attribute__ ((mode (SI))); Line 165
typedef int DItype __attribute__ ((mode (DI))); Line 166
typedef unsigned int UDItype __attribute__ ((mode (DI))); Line 167
# else Line 168
typedef unsigned char UQItype; Line 169
typedef long SItype; Line 170
typedef unsigned long int USItype; Line 171
# if HAVE_LONG_LONG_INT Line 172
typedef long long int DItype; Line 173
typedef unsigned long long int UDItype; Line 174
# else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */ Line 175
typedef long int DItype; Line 176
typedef unsigned long int UDItype; Line 177
# endif Line 178
# endif Line 179
# define LONGLONG_STANDALONE /* Don't require GMP's longlong.h mdep files */Line 180
# define ASSERT(x) /* FIXME make longlong.h really standalone */ Line 181
# define __GMP_DECLSPEC /* FIXME make longlong.h really standalone */ Line 182
# define __clz_tab factor_clz_tab /* Rename to avoid glibc collision */ Line 183
# ifndef __GMP_GNUC_PREREQ Line 184
# define __GMP_GNUC_PREREQ(a,b) 1 Line 185
# endif Line 186
/* These stub macros are only used in longlong.h in certain system compiler
combinations, so ensure usage to avoid -Wunused-macros warnings. */
# if __GMP_GNUC_PREREQ (1,1) && defined __clz_tab Line 190
ASSERT (1) Line 191
__GMP_DECLSPEC Line 192
# endif Line 193
# if _ARCH_PPC Line 195
# define HAVE_HOST_CPU_FAMILY_powerpc 1 Line 196
# endif Line 197
# include "longlong.h" Line 198
# ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB Line 199
const unsigned char factor_clz_tab[129] = Line 200
{
1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, Line 202
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, Line 203
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, Line 204
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, Line 205
9
}; Block 1
# endif Line 208
#else /* not USE_LONGLONG_H */ Line 210
# define W_TYPE_SIZE (8 * sizeof (uintmax_t)) Line 212
# define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2)) Line 213
# define __ll_lowpart(t) ((uintmax_t) (t) & (__ll_B - 1)) Line 214
# define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2)) Line 215
#endif Line 217
#if !defined __clz_tab && !defined UHWtype Line 219
/* Without this seemingly useless conditional, gcc -Wunused-macros
warns that each of the two tested macros is unused on Fedora 18.
FIXME: this is just an ugly band-aid. Fix it properly. */
#endif Line 223
/* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
#define MAX_NFACTS 26 Line 226
enum Line 228
{
DEV_DEBUG_OPTION = CHAR_MAX + 1 Line 230
}; Block 2
static struct option const long_options[] = Line 233
{
{"-debug", no_argument, NULL, DEV_DEBUG_OPTION}, Line 235
{GETOPT_HELP_OPTION_DECL}, Line 236
{GETOPT_VERSION_OPTION_DECL}, Line 237
{NULL, 0, NULL, 0} Line 238
}; Block 3
struct factors Line 241
{
uintmax_t plarge[2]; /* Can have a single large factor */ Line 243
uintmax_t p[MAX_NFACTS]; Line 244
unsigned char e[MAX_NFACTS]; Line 245
unsigned char nfactors; Line 246
}; Block 4
#if HAVE_GMP Line 249
struct mp_factors Line 250
{
mpz_t *p; Line 252
unsigned long int *e; Line 253
unsigned long int nfactors; Line 254
}; Block 5
#endif Line 256
static void factor (uintmax_t, uintmax_t, struct factors *); Line 258
#ifndef umul_ppmm Line 260
# define umul_ppmm(w1, w0, u, v) \ Line 261
do { \ Line 262
uintmax_t __x0, __x1, __x2, __x3; \ Line 263
unsigned long int __ul, __vl, __uh, __vh; \ Line 264
uintmax_t __u = (u), __v = (v); \ Line 265
\
__ul = __ll_lowpart (__u); \ Line 267
__uh = __ll_highpart (__u); \ Line 268
__vl = __ll_lowpart (__v); \ Line 269
__vh = __ll_highpart (__v); \ Line 270
\
__x0 = (uintmax_t) __ul * __vl; \ Line 272
__x1 = (uintmax_t) __ul * __vh; \ Line 273
__x2 = (uintmax_t) __uh * __vl; \ Line 274
__x3 = (uintmax_t) __uh * __vh; \ Line 275
\
__x1 += __ll_highpart (__x0);/* this can't give carry */ \ Line 277
__x1 += __x2; /* but this indeed can */ \ Line 278
if (__x1 < __x2) /* did we get it? */ \ Line 279
__x3 += __ll_B; /* yes, add it in the proper pos. */ \ Line 280
\
(w1) = __x3 + __ll_highpart (__x1); \ Line 282
(w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0); \ Line 283
} while (0) Line 284Block 6
#endif Line 285
#if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION Line 287
/* Define our own, not needing normalization. This function is
currently not performance critical, so keep it simple. Similar to
the mod macro below. */
# undef udiv_qrnnd Line 291
# define udiv_qrnnd(q, r, n1, n0, d) \ Line 292
do { \ Line 293
uintmax_t __d1, __d0, __q, __r1, __r0; \ Line 294
\
assert ((n1) < (d)); \ Line 296
__d1 = (d); __d0 = 0; \ Line 297
__r1 = (n1); __r0 = (n0); \ Line 298
__q = 0; \ Line 299
for (unsigned int __i = W_TYPE_SIZE; __i > 0; __i--) \ Line 300
{ \ Line 301
rsh2 (__d1, __d0, __d1, __d0, 1); \ Line 302
__q <<= 1; \ Line 303
if (ge2 (__r1, __r0, __d1, __d0)) \ Line 304
{ \ Line 305
__q++; \ Line 306
sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0); \ Line 307
} \ Line 308
} \ Line 309
(r) = __r0; \ Line 310
(q) = __q; \ Line 311
} while (0) Line 312Block 7
#endif Line 313
#if !defined add_ssaaaa Line 315
# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ Line 316
do { \ Line 317
uintmax_t _add_x; \ Line 318
_add_x = (al) + (bl); \ Line 319
(sh) = (ah) + (bh) + (_add_x < (al)); \ Line 320
(sl) = _add_x; \ Line 321
} while (0) Line 322Block 8
#endif Line 323
#define rsh2(rh, rl, ah, al, cnt) \ Line 325
do { \ Line 326
(rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt)); \ Line 327
(rh) = (ah) >> (cnt); \ Line 328
} while (0) Line 329Block 9
#define lsh2(rh, rl, ah, al, cnt) \ Line 331
do { \ Line 332
(rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt))); \ Line 333
(rl) = (al) << (cnt); \ Line 334
} while (0) Line 335Block 10
#define ge2(ah, al, bh, bl) \ Line 337
((ah) > (bh) || ((ah) == (bh) && (al) >= (bl))) Line 338
#define gt2(ah, al, bh, bl) \ Line 340
((ah) > (bh) || ((ah) == (bh) && (al) > (bl))) Line 341
#ifndef sub_ddmmss Line 343
# define sub_ddmmss(rh, rl, ah, al, bh, bl) \ Line 344
do { \ Line 345
uintmax_t _cy; \ Line 346
_cy = (al) < (bl); \ Line 347
(rl) = (al) - (bl); \ Line 348
(rh) = (ah) - (bh) - _cy; \ Line 349
} while (0) Line 350Block 11
#endif Line 351
#ifndef count_leading_zeros Line 353
# define count_leading_zeros(count, x) do { \ Line 354
uintmax_t __clz_x = (x); \ Line 355
unsigned int __clz_c; \ Line 356
for (__clz_c = 0; \ Line 357
(__clz_x & ((uintmax_t) 0xff << (W_TYPE_SIZE - 8))) == 0; \ Line 358
__clz_c += 8) \ Line 359
__clz_x <<= 8; \ Line 360
for (; (intmax_t)__clz_x >= 0; __clz_c++) \ Line 361
__clz_x <<= 1; \ Line 362
(count) = __clz_c; \ Line 363
} while (0) Line 364Block 12
#endif Line 365
#ifndef count_trailing_zeros Line 367
# define count_trailing_zeros(count, x) do { \ Line 368
uintmax_t __ctz_x = (x); \ Line 369
unsigned int __ctz_c = 0; \ Line 370
while ((__ctz_x & 1) == 0) \ Line 371
{ \ Line 372
__ctz_x >>= 1; \ Line 373
__ctz_c++; \ Line 374
} \ Line 375
(count) = __ctz_c; \ Line 376
} while (0) Line 377Block 13
#endif Line 378
/* Requires that a < n and b <= n */
#define submod(r,a,b,n) \ Line 381
do { \ Line 382
uintmax_t _t = - (uintmax_t) (a < b); \ Line 383
(r) = ((n) & _t) + (a) - (b); \ Line 384
} while (0) Line 385Block 14
#define addmod(r,a,b,n) \ Line 387
submod ((r), (a), ((n) - (b)), (n)) Line 388
/* Modular two-word addition and subtraction. For performance reasons, the
most significant bit of n1 must be clear. The destination variables must be
distinct from the mod operand. */
#define addmod2(r1, r0, a1, a0, b1, b0, n1, n0) \ Line 393
do { \ Line 394
add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0)); \ Line 395
if (ge2 ((r1), (r0), (n1), (n0))) \ Line 396
sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0)); \ Line 397
} while (0) Line 398Block 15
#define submod2(r1, r0, a1, a0, b1, b0, n1, n0) \ Line 399
do { \ Line 400
sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0)); \ Line 401
if ((intmax_t) (r1) < 0) \ Line 402
add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0)); \ Line 403
} while (0) Line 404Block 16
#define HIGHBIT_TO_MASK(x) \ Line 406
(((intmax_t)-1 >> 1) < 0 \ Line 407
? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1)) \ Line 408
: ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1)) \ Line 409
? UINTMAX_MAX : (uintmax_t) 0)) Line 410
/* Compute r = a mod d, where r = <*t1,retval>, a = <a1,a0>, d = <d1,d0>.
Requires that d1 != 0. */
static uintmax_t Line 414
mod2 (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0) Line 415
{
int cntd, cnta; Line 417
assert (d1 != 0); Line 419
if (a1 == 0) Line 421
{
*r1 = 0; Line 423
return a0; Line 424
}
count_leading_zeros (cntd, d1); Line 427
count_leading_zeros (cnta, a1); Line 428
int cnt = cntd - cnta; Line 429
lsh2 (d1, d0, d1, d0, cnt); Line 430
for (int i = 0; i < cnt; i++) Line 431
{
if (ge2 (a1, a0, d1, d0)) Line 433
sub_ddmmss (a1, a0, a1, a0, d1, d0); Line 434
rsh2 (d1, d0, d1, d0, 1); Line 435
}
*r1 = a1; Line 438
return a0; Line 439
} Block 17
static uintmax_t _GL_ATTRIBUTE_CONST Line 442
gcd_odd (uintmax_t a, uintmax_t b) Line 443
{
if ( (b & 1) == 0) Line 445
{
uintmax_t t = b; Line 447
b = a; Line 448
a = t; Line 449
}
if (a == 0) Line 451
return b; Line 452
/* Take out least significant one bit, to make room for sign */
b >>= 1; Line 455
for (;;) Line 457
{
uintmax_t t; Line 459
uintmax_t bgta; Line 460
while ((a & 1) == 0) Line 462
a >>= 1; Line 463
a >>= 1; Line 464
t = a - b; Line 466
if (t == 0) Line 467
return (a << 1) + 1; Line 468
bgta = HIGHBIT_TO_MASK (t); Line 470
/* b <-- min (a, b) */
b += (bgta & t); Line 473
/* a <-- |a - b| */
a = (t ^ bgta) - bgta; Line 476
}
} Block 18
static uintmax_t Line 480
gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)Line 481
{
assert (b0 & 1); Line 483
if ( (a0 | a1) == 0) Line 485
{
*r1 = b1; Line 487
return b0; Line 488
}
while ((a0 & 1) == 0) Line 491
rsh2 (a1, a0, a1, a0, 1); Line 492
for (;;) Line 494
{
if ((b1 | a1) == 0) Line 496
{
*r1 = 0; Line 498
return gcd_odd (b0, a0); Line 499
}
if (gt2 (a1, a0, b1, b0)) Line 502
{
sub_ddmmss (a1, a0, a1, a0, b1, b0); Line 504
do
rsh2 (a1, a0, a1, a0, 1); Line 506
while ((a0 & 1) == 0); Line 507
}
else if (gt2 (b1, b0, a1, a0)) Line 509
{
sub_ddmmss (b1, b0, b1, b0, a1, a0); Line 511
do
rsh2 (b1, b0, b1, b0, 1); Line 513
while ((b0 & 1) == 0); Line 514
}
else Line 516
break; Line 517
}
*r1 = a1; Line 520
return a0; Line 521
} Block 19
static void Line 524
factor_insert_multiplicity (struct factors *factors, Line 525
uintmax_t prime, unsigned int m) Line 526
{
unsigned int nfactors = factors->nfactors; Line 528
uintmax_t *p = factors->p; Line 529
unsigned char *e = factors->e; Line 530
/* Locate position for insert new or increment e. */
int i; Line 533
for (i = nfactors - 1; i >= 0; i--) Line 534
{
if (p[i] <= prime) Line 536
break; Line 537
}
if (i < 0 || p[i] != prime) Line 540
{
for (int j = nfactors - 1; j > i; j--) Line 542
{
p[j + 1] = p[j]; Line 544
e[j + 1] = e[j]; Line 545
}
p[i + 1] = prime; Line 547
e[i + 1] = m; Line 548
factors->nfactors = nfactors + 1; Line 549
}
else Line 551
{
e[i] += m; Line 553
}
} Block 20
#define factor_insert(f, p) factor_insert_multiplicity (f, p, 1) Line 557
static void Line 559
factor_insert_large (struct factors *factors, Line 560
uintmax_t p1, uintmax_t p0) Line 561
{
if (p1 > 0) Line 563
{
assert (factors->plarge[1] == 0); Line 565
factors->plarge[0] = p0; Line 566
factors->plarge[1] = p1; Line 567
}
else Line 569
factor_insert (factors, p0); Line 570
} Block 21
#if HAVE_GMP Line 573
# if !HAVE_DECL_MPZ_INITS Line 575
# define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__) Line 577
# define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__) Line 578
static void Line 580
mpz_va_init (void (*mpz_single_init)(mpz_t), ...) Line 581
{
va_list ap; Line 583
va_start (ap, mpz_single_init); Line 585
mpz_t *mpz; Line 587
while ((mpz = va_arg (ap, mpz_t *))) Line 588
mpz_single_init (*mpz); Line 589
va_end (ap); Line 591
} Block 22
# endif Line 593
static void mp_factor (mpz_t, struct mp_factors *); Line 595
static void Line 597
mp_factor_init (struct mp_factors *factors) Line 598
{
factors->p = NULL; Line 600
factors->e = NULL; Line 601
factors->nfactors = 0; Line 602
} Block 23
static void Line 605
mp_factor_clear (struct mp_factors *factors) Line 606
{
for (unsigned int i = 0; i < factors->nfactors; i++) Line 608
mpz_clear (factors->p[i]); Line 609
free (factors->p); Line 611
free (factors->e); Line 612
} Block 24
static void Line 615
mp_factor_insert (struct mp_factors *factors, mpz_t prime) Line 616
{
unsigned long int nfactors = factors->nfactors; Line 618
mpz_t *p = factors->p; Line 619
unsigned long int *e = factors->e; Line 620
long i; Line 621
/* Locate position for insert new or increment e. */
for (i = nfactors - 1; i >= 0; i--) Line 624
{
if (mpz_cmp (p[i], prime) <= 0) Line 626
break; Line 627
}
if (i < 0 || mpz_cmp (p[i], prime) != 0) Line 630
{
p = xrealloc (p, (nfactors + 1) * sizeof p[0]); Line 632
e = xrealloc (e, (nfactors + 1) * sizeof e[0]); Line 633
mpz_init (p[nfactors]); Line 635
for (long j = nfactors - 1; j > i; j--) Line 636
{
mpz_set (p[j + 1], p[j]); Line 638
e[j + 1] = e[j]; Line 639
}
mpz_set (p[i + 1], prime); Line 641
e[i + 1] = 1; Line 642
factors->p = p; Line 644
factors->e = e; Line 645
factors->nfactors = nfactors + 1; Line 646
}
else Line 648
{
e[i] += 1; Line 650
}
} Block 25
static void Line 654
mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime) Line 655
{
mpz_t pz; Line 657
mpz_init_set_ui (pz, prime); Line 659
mp_factor_insert (factors, pz); Line 660
mpz_clear (pz); Line 661
} Block 26
#endif /* HAVE_GMP */ Line 663
/* Number of bits in an uintmax_t. */
enum { W = sizeof (uintmax_t) * CHAR_BIT }; Line 667
/* Verify that uintmax_t does not have holes in its representation. */
verify (UINTMAX_MAX >> (W - 1) == 1); Line 670
#define P(a,b,c,d) a, Line 672
static const unsigned char primes_diff[] = { Line 673
#include "primes.h" ...!includes auto-comment...
0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */ Line 675
};
#undef P Line 677
#define PRIMES_PTAB_ENTRIES \ Line 679
(sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1) Line 680
#define P(a,b,c,d) b, Line 682
static const unsigned char primes_diff8[] = { Line 683
#include "primes.h" ...!includes auto-comment...
0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */ Line 685
};
#undef P Line 687
struct primes_dtab Line 689
{
uintmax_t binv, lim; Line 691
}; Block 30
#define P(a,b,c,d) {c,d}, Line 694Block 31
static const struct primes_dtab primes_dtab[] = { Line 695
#include "primes.h" ...!includes auto-comment...
{1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */ Line 697
};
#undef P Line 699
/* Verify that uintmax_t is not wider than
the integers used to generate primes.h. */
verify (W <= WIDE_UINT_BITS); Line 703
/* debugging for developers. Enables devmsg().
This flag is used only in the GMP code. */
static bool dev_debug = false; Line 707
/* Prove primality or run probabilistic tests. */
static bool flag_prove_primality = PROVE_PRIMALITY; Line 710
/* Number of Miller-Rabin tests to run when not proving primality. */
#define MR_REPS 25 Line 713
static void Line 715
factor_insert_refind (struct factors *factors, uintmax_t p, unsigned int i, Line 716
unsigned int off) Line 717
{
for (unsigned int j = 0; j < off; j++) Line 719
p += primes_diff[i + j]; Line 720
factor_insert (factors, p); Line 721
} Block 33
/* Trial division with odd primes uses the following trick.
Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity,
consider the case t < B (this is the second loop below).
From our tables we get
binv = p^{-1} (mod B)
lim = floor ( (B-1) / p ).
First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim
(and all quotients in this range occur for some t).
Then t = q * p is true also (mod B), and p is invertible we get
q = t * binv (mod B).
Next, assume that t is *not* divisible by p. Since multiplication
by binv (mod B) is a one-to-one mapping,
t * binv (mod B) > lim,
because all the smaller values are already taken.
This can be summed up by saying that the function
q(t) = binv * t (mod B)
is a permutation of the range 0 <= t < B, with the curious property
that it maps the multiples of p onto the range 0 <= q <= lim, in
order, and the non-multiples of p onto the range lim < q < B.
*/
static uintmax_t Line 757
factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0, Line 758
struct factors *factors) Line 759
{
if (t0 % 2 == 0) Line 761
{
unsigned int cnt; Line 763
if (t0 == 0) Line 765
{
count_trailing_zeros (cnt, t1); Line 767
t0 = t1 >> cnt; Line 768
t1 = 0; Line 769
cnt += W_TYPE_SIZE; Line 770
}
else Line 772
{
count_trailing_zeros (cnt, t0); Line 774
rsh2 (t1, t0, t1, t0, cnt); Line 775
}
factor_insert_multiplicity (factors, 2, cnt); Line 778
}
uintmax_t p = 3; Line 781
unsigned int i; Line 782
for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++) Line 783
{
for (;;) Line 785
{
uintmax_t q1, q0, hi, lo _GL_UNUSED; Line 787
q0 = t0 * primes_dtab[i].binv; Line 789
umul_ppmm (hi, lo, q0, p); Line 790
if (hi > t1) Line 791
break; Line 792
hi = t1 - hi; Line 793
q1 = hi * primes_dtab[i].binv; Line 794
if (LIKELY (q1 > primes_dtab[i].lim)) Line 795
break; Line 796
t1 = q1; t0 = q0; Line 797
factor_insert (factors, p); Line 798
}
p += primes_diff[i + 1]; Line 800
}
if (t1p) Line 802
*t1p = t1; Line 803
#define DIVBLOCK(I) \ Line 805
do { \ Line 806
for (;;) \ Line 807
{ \ Line 808
q = t0 * pd[I].binv; \ Line 809
if (LIKELY (q > pd[I].lim)) \ Line 810
break; \ Line 811
t0 = q; \ Line 812
factor_insert_refind (factors, p, i + 1, I); \ Line 813
} \ Line 814
} while (0) Line 815
for (; i < PRIMES_PTAB_ENTRIES; i += 8) Line 817
{
uintmax_t q; Line 819
const struct primes_dtab *pd = &primes_dtab[i]; Line 820
DIVBLOCK (0); Line 821
DIVBLOCK (1); Line 822
DIVBLOCK (2); Line 823
DIVBLOCK (3); Line 824
DIVBLOCK (4); Line 825
DIVBLOCK (5); Line 826
DIVBLOCK (6); Line 827
DIVBLOCK (7); Line 828
p += primes_diff8[i]; Line 830
if (p * p > t0) Line 831
break; Line 832
}
return t0; Line 835
} Block 34
#if HAVE_GMP Line 838
static void Line 839
mp_factor_using_division (mpz_t t, struct mp_factors *factors) Line 840
{
mpz_t q; Line 842
unsigned long int p; Line 843
devmsg ("[trial division] "); Line 845
mpz_init (q); Line 847
p = mpz_scan1 (t, 0); Line 849
mpz_div_2exp (t, t, p); Line 850
while (p) Line 851
{
mp_factor_insert_ui (factors, 2); Line 853
--p; Line 854
}
p = 3; Line 857
for (unsigned int i = 1; i <= PRIMES_PTAB_ENTRIES;) Line 858
{
if (! mpz_divisible_ui_p (t, p)) Line 860
{
p += primes_diff[i++]; Line 862
if (mpz_cmp_ui (t, p * p) < 0) Line 863
break; Line 864
}
else Line 866
{
mpz_tdiv_q_ui (t, t, p); Line 868
mp_factor_insert_ui (factors, p); Line 869
}
}
mpz_clear (q); Line 873
} Block 35
#endif Line 875
/* Entry i contains (2i+1)^(-1) mod 2^8. */
static const unsigned char binvert_table[128] = Line 878
{
0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF, Line 880
0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF, Line 881
0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF, Line 882
0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF, Line 883
0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF, Line 884
0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F, Line 885
0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F, Line 886
0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F, Line 887
0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F, Line 888
0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F, Line 889
0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F, Line 890
0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F, Line 891
0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F, Line 892
0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F, Line 893
0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F, Line 894
0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF Line 895
}; Block 36
/* Compute n^(-1) mod B, using a Newton iteration. */
#define binv(inv,n) \ Line 899
do { \ Line 900
uintmax_t __n = (n); \ Line 901
uintmax_t __inv; \ Line 902
\
__inv = binvert_table[(__n / 2) & 0x7F]; /* 8 */ \ Line 904
if (W_TYPE_SIZE > 8) __inv = 2 * __inv - __inv * __inv * __n; \ Line 905
if (W_TYPE_SIZE > 16) __inv = 2 * __inv - __inv * __inv * __n; \ Line 906
if (W_TYPE_SIZE > 32) __inv = 2 * __inv - __inv * __inv * __n; \ Line 907
\
if (W_TYPE_SIZE > 64) \ Line 909
{ \ Line 910
int __invbits = 64; \ Line 911
do { \ Line 912
__inv = 2 * __inv - __inv * __inv * __n; \ Line 913
__invbits *= 2; \ Line 914
} while (__invbits < W_TYPE_SIZE); \ Line 915
} \ Line 916
\
(inv) = __inv; \ Line 918
} while (0) Line 919Block 37
/* q = u / d, assuming d|u. */
#define divexact_21(q1, q0, u1, u0, d) \ Line 922
do { \ Line 923
uintmax_t _di, _q0; \ Line 924
binv (_di, (d)); \ Line 925
_q0 = (u0) * _di; \ Line 926
if ((u1) >= (d)) \ Line 927
{ \ Line 928
uintmax_t _p1, _p0 _GL_UNUSED; \ Line 929
umul_ppmm (_p1, _p0, _q0, d); \ Line 930
(q1) = ((u1) - _p1) * _di; \ Line 931
(q0) = _q0; \ Line 932
} \ Line 933
else \ Line 934
{ \ Line 935
(q0) = _q0; \ Line 936
(q1) = 0; \ Line 937
} \ Line 938
} while (0) Line 939Block 38
/* x B (mod n). */
#define redcify(r_prim, r, n) \ Line 942
do { \ Line 943
uintmax_t _redcify_q _GL_UNUSED; \ Line 944
udiv_qrnnd (_redcify_q, r_prim, r, 0, n); \ Line 945
} while (0) Line 946Block 39
/* x B^2 (mod n). Requires x > 0, n1 < B/2 */
#define redcify2(r1, r0, x, n1, n0) \ Line 949
do { \ Line 950
uintmax_t _r1, _r0, _i; \ Line 951
if ((x) < (n1)) \ Line 952
{ \ Line 953
_r1 = (x); _r0 = 0; \ Line 954
_i = W_TYPE_SIZE; \ Line 955
} \ Line 956
else \ Line 957
{ \ Line 958
_r1 = 0; _r0 = (x); \ Line 959
_i = 2*W_TYPE_SIZE; \ Line 960
} \ Line 961
while (_i-- > 0) \ Line 962
{ \ Line 963
lsh2 (_r1, _r0, _r1, _r0, 1); \ Line 964
if (ge2 (_r1, _r0, (n1), (n0))) \ Line 965
sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0)); \ Line 966
} \ Line 967
(r1) = _r1; \ Line 968
(r0) = _r0; \ Line 969
} while (0) Line 970Block 40
/* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
Both a and b must be in redc form, the result will be in redc form too. */
static inline uintmax_t Line 974
mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi) Line 975
{
uintmax_t rh, rl, q, th, tl _GL_UNUSED, xh; Line 977
umul_ppmm (rh, rl, a, b); Line 979
q = rl * mi; Line 980
umul_ppmm (th, tl, q, m); Line 981
xh = rh - th; Line 982
if (rh < th) Line 983
xh += m; Line 984
return xh; Line 986
} Block 41
/* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
Both a and b must be in redc form, the result will be in redc form too.
For performance reasons, the most significant bit of m must be clear. */
static uintmax_t Line 992
mulredc2 (uintmax_t *r1p, Line 993
uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0, Line 994
uintmax_t m1, uintmax_t m0, uintmax_t mi) Line 995
{
uintmax_t r1, r0, q, p1, p0 _GL_UNUSED, t1, t0, s1, s0; Line 997
mi = -mi; Line 998
assert ( (a1 >> (W_TYPE_SIZE - 1)) == 0); Line 999
assert ( (b1 >> (W_TYPE_SIZE - 1)) == 0); Line 1000
assert ( (m1 >> (W_TYPE_SIZE - 1)) == 0); Line 1001
/* First compute a0 * <b1, b0> B^{-1}
+-----+
|a0 b0|
+--+--+--+
|a0 b1|
+--+--+--+
|q0 m0|
+--+--+--+
|q0 m1|
-+--+--+--+
|r1|r0| 0|
+--+--+--+
*/
umul_ppmm (t1, t0, a0, b0); Line 1016
umul_ppmm (r1, r0, a0, b1); Line 1017
q = mi * t0; Line 1018
umul_ppmm (p1, p0, q, m0); Line 1019
umul_ppmm (s1, s0, q, m1); Line 1020
r0 += (t0 != 0); /* Carry */ Line 1021
add_ssaaaa (r1, r0, r1, r0, 0, p1); Line 1022
add_ssaaaa (r1, r0, r1, r0, 0, t1); Line 1023
add_ssaaaa (r1, r0, r1, r0, s1, s0); Line 1024
/* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
+-----+
|a1 b0|
+--+--+
|r1|r0|
+--+--+--+
|a1 b1|
+--+--+--+
|q1 m0|
+--+--+--+
|q1 m1|
-+--+--+--+
|r1|r0| 0|
+--+--+--+
*/
umul_ppmm (t1, t0, a1, b0); Line 1041
umul_ppmm (s1, s0, a1, b1); Line 1042
add_ssaaaa (t1, t0, t1, t0, 0, r0); Line 1043
q = mi * t0; Line 1044
add_ssaaaa (r1, r0, s1, s0, 0, r1); Line 1045
umul_ppmm (p1, p0, q, m0); Line 1046
umul_ppmm (s1, s0, q, m1); Line 1047
r0 += (t0 != 0); /* Carry */ Line 1048
add_ssaaaa (r1, r0, r1, r0, 0, p1); Line 1049
add_ssaaaa (r1, r0, r1, r0, 0, t1); Line 1050
add_ssaaaa (r1, r0, r1, r0, s1, s0); Line 1051
if (ge2 (r1, r0, m1, m0)) Line 1053
sub_ddmmss (r1, r0, r1, r0, m1, m0); Line 1054
*r1p = r1; Line 1056
return r0; Line 1057
} Block 42
static uintmax_t _GL_ATTRIBUTE_CONST Line 1060
powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one) Line 1061
{
uintmax_t y = one; Line 1063
if (e & 1) Line 1065
y = b; Line 1066
while (e != 0) Line 1068
{
b = mulredc (b, b, n, ni); Line 1070
e >>= 1; Line 1071
if (e & 1) Line 1073
y = mulredc (y, b, n, ni); Line 1074
}
return y; Line 1077
} Block 43
static uintmax_t Line 1080
powm2 (uintmax_t *r1m, Line 1081
const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np, Line 1082
uintmax_t ni, const uintmax_t *one) Line 1083
{
uintmax_t r1, r0, b1, b0, n1, n0; Line 1085
unsigned int i; Line 1086
uintmax_t e; Line 1087
b0 = bp[0]; Line 1089
b1 = bp[1]; Line 1090
n0 = np[0]; Line 1091
n1 = np[1]; Line 1092
r0 = one[0]; Line 1094
r1 = one[1]; Line 1095
for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1) Line 1097
{
if (e & 1) Line 1099
{
r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni); Line 1101
r1 = *r1m; Line 1102
}
b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni); Line 1104
b1 = *r1m; Line 1105
}
for (e = ep[1]; e > 0; e >>= 1) Line 1107
{
if (e & 1) Line 1109
{
r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni); Line 1111
r1 = *r1m; Line 1112
}
b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni); Line 1114
b1 = *r1m; Line 1115
}
*r1m = r1; Line 1117
return r0; Line 1118
} Block 44
static bool _GL_ATTRIBUTE_CONST Line 1121
millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q, Line 1122
unsigned int k, uintmax_t one) Line 1123
{
uintmax_t y = powm (b, q, n, ni, one); Line 1125
uintmax_t nm1 = n - one; /* -1, but in redc representation. */ Line 1127
if (y == one || y == nm1) Line 1129
return true; Line 1130
for (unsigned int i = 1; i < k; i++) Line 1132
{
y = mulredc (y, y, n, ni); Line 1134
if (y == nm1) Line 1136
return true; Line 1137
if (y == one) Line 1138
return false; Line 1139
}
return false; Line 1141
} Block 45
static bool Line 1144
millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp, Line 1145
const uintmax_t *qp, unsigned int k, const uintmax_t *one) Line 1146
{
uintmax_t y1, y0, nm1_1, nm1_0, r1m; Line 1148
y0 = powm2 (&r1m, bp, qp, np, ni, one); Line 1150
y1 = r1m; Line 1151
if (y0 == one[0] && y1 == one[1]) Line 1153
return true; Line 1154
sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]); Line 1156
if (y0 == nm1_0 && y1 == nm1_1) Line 1158
return true; Line 1159
for (unsigned int i = 1; i < k; i++) Line 1161
{
y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni); Line 1163
y1 = r1m; Line 1164
if (y0 == nm1_0 && y1 == nm1_1) Line 1166
return true; Line 1167
if (y0 == one[0] && y1 == one[1]) Line 1168
return false; Line 1169
}
return false; Line 1171
} Block 46
#if HAVE_GMP Line 1174
static bool Line 1175
mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y, Line 1176
mpz_srcptr q, unsigned long int k) Line 1177
{
mpz_powm (y, x, q, n); Line 1179
if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0) Line 1181
return true; Line 1182
for (unsigned long int i = 1; i < k; i++) Line 1184
{
mpz_powm_ui (y, y, 2, n); Line 1186
if (mpz_cmp (y, nm1) == 0) Line 1187
return true; Line 1188
if (mpz_cmp_ui (y, 1) == 0) Line 1189
return false; Line 1190
}
return false; Line 1192
} Block 47
#endif Line 1194
/* Lucas' prime test. The number of iterations vary greatly, up to a few dozen
have been observed. The average seem to be about 2. */
static bool Line 1198
prime_p (uintmax_t n) Line 1199
{
int k; Line 1201
bool is_prime; Line 1202
uintmax_t a_prim, one, ni; Line 1203
struct factors factors; Line 1204
if (n <= 1) Line 1206
return false; Line 1207
/* We have already casted out small primes. */
if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) Line 1210
return true; Line 1211
/* Precomputation for Miller-Rabin. */
uintmax_t q = n - 1; Line 1214
for (k = 0; (q & 1) == 0; k++) Line 1215
q >>= 1; Line 1216
uintmax_t a = 2; Line 1218
binv (ni, n); /* ni <- 1/n mod B */ Line 1219
redcify (one, 1, n); Line 1220
addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */ Line 1221
/* Perform a Miller-Rabin test, finds most composites quickly. */
if (!millerrabin (n, ni, a_prim, q, k, one)) Line 1224
return false; Line 1225
if (flag_prove_primality) Line 1227
{
/* Factor n-1 for Lucas. */
factor (0, n - 1, &factors); Line 1230
}
/* Loop until Lucas proves our number prime, or Miller-Rabin proves our
number composite. */
for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++) Line 1235
{
if (flag_prove_primality) Line 1237
{
is_prime = true; Line 1239
for (unsigned int i = 0; i < factors.nfactors && is_prime; i++) Line 1240
{
is_prime Line 1242
= powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one; Line 1243
}
}
else Line 1246
{
/* After enough Miller-Rabin runs, be content. */
is_prime = (r == MR_REPS - 1); Line 1249
}
if (is_prime) Line 1252
return true; Line 1253
a += primes_diff[r]; /* Establish new base. */ Line 1255
/* The following is equivalent to redcify (a_prim, a, n). It runs faster
on most processors, since it avoids udiv_qrnnd. If we go down the
udiv_qrnnd_preinv path, this code should be replaced. */
{
uintmax_t s1, s0; Line 1261
umul_ppmm (s1, s0, one, a); Line 1262
if (LIKELY (s1 == 0)) Line 1263
a_prim = s0 % n; Line 1264
else Line 1265
{
uintmax_t dummy _GL_UNUSED; Line 1267
udiv_qrnnd (dummy, a_prim, s1, s0, n); Line 1268
}
}
if (!millerrabin (n, ni, a_prim, q, k, one)) Line 1272
return false; Line 1273
}
error (0, 0, _("Lucas prime test failure. This should not happen")); Line 1276
abort (); ...!common auto-comment...
} Block 48
static bool Line 1280
prime2_p (uintmax_t n1, uintmax_t n0) Line 1281
{
uintmax_t q[2], nm1[2]; Line 1283
uintmax_t a_prim[2]; Line 1284
uintmax_t one[2]; Line 1285
uintmax_t na[2]; Line 1286
uintmax_t ni; Line 1287
unsigned int k; Line 1288
struct factors factors; Line 1289
if (n1 == 0) Line 1291
return prime_p (n0); Line 1292
nm1[1] = n1 - (n0 == 0); Line 1294
nm1[0] = n0 - 1; Line 1295
if (nm1[0] == 0) Line 1296
{
count_trailing_zeros (k, nm1[1]); Line 1298
q[0] = nm1[1] >> k; Line 1300
q[1] = 0; Line 1301
k += W_TYPE_SIZE; Line 1302
}
else Line 1304
{
count_trailing_zeros (k, nm1[0]); Line 1306
rsh2 (q[1], q[0], nm1[1], nm1[0], k); Line 1307
}
uintmax_t a = 2; Line 1310
binv (ni, n0); Line 1311
redcify2 (one[1], one[0], 1, n1, n0); Line 1312
addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0); Line 1313
/* FIXME: Use scalars or pointers in arguments? Some consistency needed. */
na[0] = n0; Line 1316
na[1] = n1; Line 1317
if (!millerrabin2 (na, ni, a_prim, q, k, one)) Line 1319
return false; Line 1320
if (flag_prove_primality) Line 1322
{
/* Factor n-1 for Lucas. */
factor (nm1[1], nm1[0], &factors); Line 1325
}
/* Loop until Lucas proves our number prime, or Miller-Rabin proves our
number composite. */
for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++) Line 1330
{
bool is_prime; Line 1332
uintmax_t e[2], y[2]; Line 1333
if (flag_prove_primality) Line 1335
{
is_prime = true; Line 1337
if (factors.plarge[1]) Line 1338
{
uintmax_t pi; Line 1340
binv (pi, factors.plarge[0]); Line 1341
e[0] = pi * nm1[0]; Line 1342
e[1] = 0; Line 1343
y[0] = powm2 (&y[1], a_prim, e, na, ni, one); Line 1344
is_prime = (y[0] != one[0] || y[1] != one[1]); Line 1345
}
for (unsigned int i = 0; i < factors.nfactors && is_prime; i++) Line 1347
{
/* FIXME: We always have the factor 2. Do we really need to
handle it here? We have done the same powering as part
of millerrabin. */
if (factors.p[i] == 2) Line 1352
rsh2 (e[1], e[0], nm1[1], nm1[0], 1); Line 1353
else Line 1354
divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]); Line 1355
y[0] = powm2 (&y[1], a_prim, e, na, ni, one); Line 1356
is_prime = (y[0] != one[0] || y[1] != one[1]); Line 1357
}
}
else Line 1360
{
/* After enough Miller-Rabin runs, be content. */
is_prime = (r == MR_REPS - 1); Line 1363
}
if (is_prime) Line 1366
return true; Line 1367
a += primes_diff[r]; /* Establish new base. */ Line 1369
redcify2 (a_prim[1], a_prim[0], a, n1, n0); Line 1370
if (!millerrabin2 (na, ni, a_prim, q, k, one)) Line 1372
return false; Line 1373
}
error (0, 0, _("Lucas prime test failure. This should not happen")); Line 1376
abort (); ...!common auto-comment...
} Block 49
#if HAVE_GMP Line 1380
static bool Line 1381
mp_prime_p (mpz_t n) Line 1382
{
bool is_prime; Line 1384
mpz_t q, a, nm1, tmp; Line 1385
struct mp_factors factors; Line 1386
if (mpz_cmp_ui (n, 1) <= 0) Line 1388
return false; Line 1389
/* We have already casted out small primes. */
if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0) Line 1392
return true; Line 1393
mpz_inits (q, a, nm1, tmp, NULL); Line 1395
/* Precomputation for Miller-Rabin. */
mpz_sub_ui (nm1, n, 1); Line 1398
/* Find q and k, where q is odd and n = 1 + 2**k * q. */
unsigned long int k = mpz_scan1 (nm1, 0); Line 1401
mpz_tdiv_q_2exp (q, nm1, k); Line 1402
mpz_set_ui (a, 2); Line 1404
/* Perform a Miller-Rabin test, finds most composites quickly. */
if (!mp_millerrabin (n, nm1, a, tmp, q, k)) Line 1407
{
is_prime = false; Line 1409
goto ret2; Line 1410
}
if (flag_prove_primality) Line 1413
{
/* Factor n-1 for Lucas. */
mpz_set (tmp, nm1); Line 1416
mp_factor (tmp, &factors); Line 1417
}
/* Loop until Lucas proves our number prime, or Miller-Rabin proves our
number composite. */
for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++) Line 1422
{
if (flag_prove_primality) Line 1424
{
is_prime = true; Line 1426
for (unsigned long int i = 0; i < factors.nfactors && is_prime; i++) Line 1427
{
mpz_divexact (tmp, nm1, factors.p[i]); Line 1429
mpz_powm (tmp, a, tmp, n); Line 1430
is_prime = mpz_cmp_ui (tmp, 1) != 0; Line 1431
}
}
else Line 1434
{
/* After enough Miller-Rabin runs, be content. */
is_prime = (r == MR_REPS - 1); Line 1437
}
if (is_prime) Line 1440
goto ret1; Line 1441
mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */ Line 1443
if (!mp_millerrabin (n, nm1, a, tmp, q, k)) Line 1445
{
is_prime = false; Line 1447
goto ret1; Line 1448
}
}
error (0, 0, _("Lucas prime test failure. This should not happen")); Line 1452
abort (); ...!common auto-comment...
ret1: Line 1455
if (flag_prove_primality) Line 1456
mp_factor_clear (&factors); Line 1457
ret2: Line 1458
mpz_clears (q, a, nm1, tmp, NULL); Line 1459
return is_prime; Line 1461
} Block 50
#endif Line 1463
static void Line 1465
factor_using_pollard_rho (uintmax_t n, unsigned long int a, Line 1466
struct factors *factors) Line 1467
{
uintmax_t x, z, y, P, t, ni, g; Line 1469
unsigned long int k = 1; Line 1471
unsigned long int l = 1; Line 1472
redcify (P, 1, n); Line 1474
addmod (x, P, P, n); /* i.e., redcify(2) */ Line 1475
y = z = x; Line 1476
while (n != 1) Line 1478
{
assert (a < n); Line 1480
binv (ni, n); /* FIXME: when could we use old 'ni' value? */ Line 1482
for (;;) Line 1484
{
do
{
x = mulredc (x, x, n, ni); Line 1488
addmod (x, x, a, n); Line 1489
submod (t, z, x, n); Line 1491
P = mulredc (P, t, n, ni); Line 1492
if (k % 32 == 1) Line 1494
{
if (gcd_odd (P, n) != 1) Line 1496
goto factor_found; Line 1497
y = x; Line 1498
}
}
while (--k != 0); Line 1501
z = x; Line 1503
k = l; Line 1504
l = 2 * l; Line 1505
for (unsigned long int i = 0; i < k; i++) Line 1506
{
x = mulredc (x, x, n, ni); Line 1508
addmod (x, x, a, n); Line 1509
}
y = x; Line 1511
}
factor_found: Line 1514
do
{
y = mulredc (y, y, n, ni); Line 1517
addmod (y, y, a, n); Line 1518
submod (t, z, y, n); Line 1520
g = gcd_odd (t, n); Line 1521
}
while (g == 1); Line 1523
if (n == g) Line 1525
{
/* Found n itself as factor. Restart with different params. */
factor_using_pollard_rho (n, a + 1, factors); Line 1528
return; Line 1529
}
n = n / g; Line 1532
if (!prime_p (g)) Line 1534
factor_using_pollard_rho (g, a + 1, factors); Line 1535
else Line 1536
factor_insert (factors, g); Line 1537
if (prime_p (n)) Line 1539
{
factor_insert (factors, n); Line 1541
break; Line 1542
}
x = x % n; Line 1545
z = z % n; Line 1546
y = y % n; Line 1547
}
} Block 51
static void Line 1551
factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a, Line 1552
struct factors *factors) Line 1553
{
uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m; Line 1555
unsigned long int k = 1; Line 1557
unsigned long int l = 1; Line 1558
redcify2 (P1, P0, 1, n1, n0); Line 1560
addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */ Line 1561
y1 = z1 = x1; Line 1562
y0 = z0 = x0; Line 1563
while (n1 != 0 || n0 != 1) Line 1565
{
binv (ni, n0); Line 1567
for (;;) Line 1569
{
do
{
x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni); Line 1573
x1 = r1m; Line 1574
addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0); Line 1575
submod2 (t1, t0, z1, z0, x1, x0, n1, n0); Line 1577
P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni); Line 1578
P1 = r1m; Line 1579
if (k % 32 == 1) Line 1581
{
g0 = gcd2_odd (&g1, P1, P0, n1, n0); Line 1583
if (g1 != 0 || g0 != 1) Line 1584
goto factor_found; Line 1585
y1 = x1; y0 = x0; Line 1586
}
}
while (--k != 0); Line 1589
z1 = x1; z0 = x0; Line 1591
k = l; Line 1592
l = 2 * l; Line 1593
for (unsigned long int i = 0; i < k; i++) Line 1594
{
x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni); Line 1596
x1 = r1m; Line 1597
addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0); Line 1598
}
y1 = x1; y0 = x0; Line 1600
}
factor_found: Line 1603
do
{
y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni); Line 1606
y1 = r1m; Line 1607
addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0); Line 1608
submod2 (t1, t0, z1, z0, y1, y0, n1, n0); Line 1610
g0 = gcd2_odd (&g1, t1, t0, n1, n0); Line 1611
}
while (g1 == 0 && g0 == 1); Line 1613
if (g1 == 0) Line 1615
{
/* The found factor is one word, and > 1. */
divexact_21 (n1, n0, n1, n0, g0); /* n = n / g */ Line 1618
if (!prime_p (g0)) Line 1620
factor_using_pollard_rho (g0, a + 1, factors); Line 1621
else Line 1622
factor_insert (factors, g0); Line 1623
}
else Line 1625
{
/* The found factor is two words. This is highly unlikely, thus hard
to trigger. Please be careful before you change this code! */
uintmax_t ginv; Line 1629
if (n1 == g1 && n0 == g0) Line 1631
{
/* Found n itself as factor. Restart with different params. */
factor_using_pollard_rho2 (n1, n0, a + 1, factors); Line 1634
return; Line 1635
}
binv (ginv, g0); /* Compute n = n / g. Since the result will */ Line 1638
n0 = ginv * n0; /* fit one word, we can compute the quotient */ Line 1639
n1 = 0; /* modulo B, ignoring the high divisor word. */ Line 1640
if (!prime2_p (g1, g0)) Line 1642
factor_using_pollard_rho2 (g1, g0, a + 1, factors); Line 1643
else Line 1644
factor_insert_large (factors, g1, g0); Line 1645
}
if (n1 == 0) Line 1648
{
if (prime_p (n0)) Line 1650
{
factor_insert (factors, n0); Line 1652
break; Line 1653
}
factor_using_pollard_rho (n0, a, factors); Line 1656
return; Line 1657
}
if (prime2_p (n1, n0)) Line 1660
{
factor_insert_large (factors, n1, n0); Line 1662
break; Line 1663
}
x0 = mod2 (&x1, x1, x0, n1, n0); Line 1666
z0 = mod2 (&z1, z1, z0, n1, n0); Line 1667
y0 = mod2 (&y1, y1, y0, n1, n0); Line 1668
}
} Block 52
#if HAVE_GMP Line 1672
static void Line 1673
mp_factor_using_pollard_rho (mpz_t n, unsigned long int a, Line 1674
struct mp_factors *factors) Line 1675
{
mpz_t x, z, y, P; Line 1677
mpz_t t, t2; Line 1678
devmsg ("[pollard-rho (%lu)] ", a); Line 1680
mpz_inits (t, t2, NULL); Line 1682
mpz_init_set_si (y, 2); Line 1683
mpz_init_set_si (x, 2); Line 1684
mpz_init_set_si (z, 2); Line 1685
mpz_init_set_ui (P, 1); Line 1686
unsigned long long int k = 1; Line 1688
unsigned long long int l = 1; Line 1689
while (mpz_cmp_ui (n, 1) != 0) Line 1691
{
for (;;) Line 1693
{
do
{
mpz_mul (t, x, x); Line 1697
mpz_mod (x, t, n); Line 1698
mpz_add_ui (x, x, a); Line 1699
mpz_sub (t, z, x); Line 1701
mpz_mul (t2, P, t); Line 1702
mpz_mod (P, t2, n); Line 1703
if (k % 32 == 1) Line 1705
{
mpz_gcd (t, P, n); Line 1707
if (mpz_cmp_ui (t, 1) != 0) Line 1708
goto factor_found; Line 1709
mpz_set (y, x); Line 1710
}
}
while (--k != 0); Line 1713
mpz_set (z, x); Line 1715
k = l; Line 1716
l = 2 * l; Line 1717
for (unsigned long long int i = 0; i < k; i++) Line 1718
{
mpz_mul (t, x, x); Line 1720
mpz_mod (x, t, n); Line 1721
mpz_add_ui (x, x, a); Line 1722
}
mpz_set (y, x); Line 1724
}
factor_found: Line 1727
do
{
mpz_mul (t, y, y); Line 1730
mpz_mod (y, t, n); Line 1731
mpz_add_ui (y, y, a); Line 1732
mpz_sub (t, z, y); Line 1734
mpz_gcd (t, t, n); Line 1735
}
while (mpz_cmp_ui (t, 1) == 0); Line 1737
mpz_divexact (n, n, t); /* divide by t, before t is overwritten */ Line 1739
if (!mp_prime_p (t)) Line 1741
{
devmsg ("[composite factor--restarting pollard-rho] "); Line 1743
mp_factor_using_pollard_rho (t, a + 1, factors); Line 1744
}
else Line 1746
{
mp_factor_insert (factors, t); Line 1748
}
if (mp_prime_p (n)) Line 1751
{
mp_factor_insert (factors, n); Line 1753
break; Line 1754
}
mpz_mod (x, x, n); Line 1757
mpz_mod (z, z, n); Line 1758
mpz_mod (y, y, n); Line 1759
}
mpz_clears (P, t2, t, z, x, y, NULL); Line 1762
} Block 53
#endif Line 1764
#if USE_SQUFOF Line 1766
/* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If
algorithm is replaced, consider also returning the remainder. */
static uintmax_t _GL_ATTRIBUTE_CONST Line 1769
isqrt (uintmax_t n) Line 1770
{
uintmax_t x; Line 1772
unsigned c; Line 1773
if (n == 0) Line 1774
return 0; Line 1775
count_leading_zeros (c, n); Line 1777
/* Make x > sqrt(n). This will be invariant through the loop. */
x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) / 2); Line 1780
for (;;) Line 1782
{
uintmax_t y = (x + n/x) / 2; Line 1784
if (y >= x) Line 1785
return x; Line 1786
x = y; Line 1788
}
} Block 54
static uintmax_t _GL_ATTRIBUTE_CONST Line 1792
isqrt2 (uintmax_t nh, uintmax_t nl) Line 1793
{
unsigned int shift; Line 1795
uintmax_t x; Line 1796
/* Ensures the remainder fits in an uintmax_t. */
assert (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2))); Line 1799
if (nh == 0) Line 1801
return isqrt (nl); Line 1802
count_leading_zeros (shift, nh); Line 1804
shift &= ~1; Line 1805
/* Make x > sqrt(n) */
x = isqrt ( (nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1; Line 1808
x <<= (W_TYPE_SIZE - shift) / 2; Line 1809
/* Do we need more than one iteration? */
for (;;) Line 1812
{
uintmax_t r _GL_UNUSED; Line 1814
uintmax_t q, y; Line 1815
udiv_qrnnd (q, r, nh, nl, x); Line 1816
y = (x + q) / 2; Line 1817
if (y >= x) Line 1819
{
uintmax_t hi, lo; Line 1821
umul_ppmm (hi, lo, x + 1, x + 1); Line 1822
assert (gt2 (hi, lo, nh, nl)); Line 1823
umul_ppmm (hi, lo, x, x); Line 1825
assert (ge2 (nh, nl, hi, lo)); Line 1826
sub_ddmmss (hi, lo, nh, nl, hi, lo); Line 1827
assert (hi == 0); Line 1828
return x; Line 1830
}
x = y; Line 1833
}
} Block 55
/* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
# define MAGIC64 0x0202021202030213ULL Line 1838
# define MAGIC63 0x0402483012450293ULL Line 1839
# define MAGIC65 0x218a019866014613ULL Line 1840
# define MAGIC11 0x23b Line 1841
/* Return the square root if the input is a square, otherwise 0. */
static uintmax_t _GL_ATTRIBUTE_CONST Line 1844
is_square (uintmax_t x) Line 1845
{
/* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
computing the square root. */
if (((MAGIC64 >> (x & 63)) & 1) Line 1849
&& ((MAGIC63 >> (x % 63)) & 1) Line 1850
/* Both 0 and 64 are squares mod (65) */
&& ((MAGIC65 >> ((x % 65) & 63)) & 1) Line 1852
&& ((MAGIC11 >> (x % 11) & 1))) Line 1853
{
uintmax_t r = isqrt (x); Line 1855
if (r*r == x) Line 1856
return r; Line 1857
}
return 0; Line 1859
}
/* invtab[i] = floor(0x10000 / (0x100 + i) */
static const unsigned short invtab[0x81] = Line 1863
{
0x200, Line 1865
0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1, Line 1866
0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7, Line 1867
0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af, Line 1868
0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199, Line 1869
0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186, Line 1870
0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174, Line 1871
0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164, Line 1872
0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155, Line 1873
0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147, Line 1874
0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b, Line 1875
0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f, Line 1876
0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124, Line 1877
0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a, Line 1878
0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111, Line 1879
0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108, Line 1880
0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100, Line 1881
}; Block 57
/* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case
that q < 0x40; here it instead uses a table of (Euclidian) inverses. */
# define div_smallq(q, r, u, d) \ Line 1886
do { \ Line 1887
if ((u) / 0x40 < (d)) \ Line 1888
{ \ Line 1889
int _cnt; \ Line 1890
uintmax_t _dinv, _mask, _q, _r; \ Line 1891
count_leading_zeros (_cnt, (d)); \ Line 1892
_r = (u); \ Line 1893
if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \ Line 1894
{ \ Line 1895
_dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \ Line 1896
_q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \ Line 1897
} \ Line 1898
else \ Line 1899
{ \ Line 1900
_dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \ Line 1901
_q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \ Line 1902
} \ Line 1903
_r -= _q*(d); \ Line 1904
\
_mask = -(uintmax_t) (_r >= (d)); \ Line 1906
(r) = _r - (_mask & (d)); \ Line 1907
(q) = _q - _mask; \ Line 1908
assert ( (q) * (d) + (r) == u); \ Line 1909
} \ Line 1910
else \ Line 1911
{ \ Line 1912
uintmax_t _q = (u) / (d); \ Line 1913
(r) = (u) - _q * (d); \ Line 1914
(q) = _q; \ Line 1915
} \ Line 1916
} while (0) Line 1917Block 58
/* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
= 3025, P = 1737, representing F_{18} = (-6314, 2* 1737, 3025),
with 3025 = 55^2.
Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
representing G_0 = (-55, 2*4652, 8653).
In the notation of the paper:
S_{-1} = 55, S_0 = 8653, R_0 = 4652
Put
t_0 = floor([q_0 + R_0] / S0) = 1
R_1 = t_0 * S_0 - R_0 = 4001
S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
*/
/* Multipliers, in order of efficiency:
0.7268 3*5*7*11 = 1155 = 3 (mod 4)
0.7317 3*5*7 = 105 = 1
0.7820 3*5*11 = 165 = 1
0.7872 3*5 = 15 = 3
0.8101 3*7*11 = 231 = 3
0.8155 3*7 = 21 = 1
0.8284 5*7*11 = 385 = 1
0.8339 5*7 = 35 = 3
0.8716 3*11 = 33 = 1
0.8774 3 = 3 = 3
0.8913 5*11 = 55 = 3
0.8972 5 = 5 = 1
0.9233 7*11 = 77 = 1
0.9295 7 = 7 = 3
0.9934 11 = 11 = 3
*/
# define QUEUE_SIZE 50 Line 1954
#endif Line 1955
#if STAT_SQUFOF Line 1957
# define Q_FREQ_SIZE 50 Line 1958
/* Element 0 keeps the total */
static unsigned int q_freq[Q_FREQ_SIZE + 1]; Line 1960
# define MIN(a,b) ((a) < (b) ? (a) : (b)) Line 1961
#endif Line 1962
#if USE_SQUFOF Line 1964
/* Return true on success. Expected to fail only for numbers
>= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
static bool Line 1967
factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors) Line 1968
{
/* Uses algorithm and notation from
SQUARE FORM FACTORIZATION
JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
https://homes.cerias.purdue.edu/~ssw/squfof.pdf
*/
static const unsigned int multipliers_1[] = Line 1978
{ /* = 1 (mod 4) */ Line 1979
105, 165, 21, 385, 33, 5, 77, 1, 0 Line 1980
};
static const unsigned int multipliers_3[] = Line 1982
{ /* = 3 (mod 4) */ Line 1983
1155, 15, 231, 35, 3, 55, 7, 11, 0 Line 1984
};
const unsigned int *m; Line 1987
struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE]; Line 1989
if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2))) Line 1991
return false; Line 1992
uintmax_t sqrt_n = isqrt2 (n1, n0); Line 1994
if (n0 == sqrt_n * sqrt_n) Line 1996
{
uintmax_t p1, p0; Line 1998
umul_ppmm (p1, p0, sqrt_n, sqrt_n); Line 2000
assert (p0 == n0); Line 2001
if (n1 == p1) Line 2003
{
if (prime_p (sqrt_n)) Line 2005
factor_insert_multiplicity (factors, sqrt_n, 2); Line 2006
else Line 2007
{
struct factors f; Line 2009
f.nfactors = 0; Line 2011
if (!factor_using_squfof (0, sqrt_n, &f)) Line 2012
{
/* Try pollard rho instead */
factor_using_pollard_rho (sqrt_n, 1, &f); Line 2015
}
/* Duplicate the new factors */
for (unsigned int i = 0; i < f.nfactors; i++) Line 2018
factor_insert_multiplicity (factors, f.p[i], 2*f.e[i]); Line 2019
}
return true; Line 2021
}
}
/* Select multipliers so we always get n * mu = 3 (mod 4) */
for (m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1; Line 2026
*m; m++) Line 2027
{
uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B; Line 2029
unsigned int i; Line 2030
unsigned int mu = *m; Line 2031
unsigned int qpos = 0; Line 2032
assert (mu * n0 % 4 == 3); Line 2034
/* In the notation of the paper, with mu * n == 3 (mod 4), we
get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
I understand it, the necessary bound is 4 \mu^3 < n, or 32
mu^3 < n.
However, this seems insufficient: With n = 37243139 and mu =
105, we get a trivial factor, from the square 38809 = 197^2,
without any corresponding Q earlier in the iteration.
Requiring 64 mu^3 < n seems sufficient. */
if (n1 == 0) Line 2046
{
if ((uintmax_t) mu*mu*mu >= n0 / 64) Line 2048
continue; Line 2049
}
else Line 2051
{
if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu) Line 2053
continue; Line 2054
}
umul_ppmm (Dh, Dl, n0, mu); Line 2056
Dh += n1 * mu; Line 2057
assert (Dl % 4 != 1); Line 2059
assert (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2)); Line 2060
S = isqrt2 (Dh, Dl); Line 2062
Q1 = 1; Line 2064
P = S; Line 2065
/* Square root remainder fits in one word, so ignore high part. */
Q = Dl - P*P; Line 2068
/* FIXME: When can this differ from floor(sqrt(2 sqrt(D)))? */
L = isqrt (2*S); Line 2070
B = 2*L; Line 2071
L1 = mu * 2 * L; Line 2072
/* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
4 D. */
for (i = 0; i <= B; i++) Line 2077
{
uintmax_t q, P1, t, rem; Line 2079
div_smallq (q, rem, S+P, Q); Line 2081
P1 = S - rem; /* P1 = q*Q - P */ Line 2082
IF_LINT (assert (q > 0 && Q > 0)); Line 2084
# if STAT_SQUFOF Line 2086
q_freq[0]++; Line 2087
q_freq[MIN (q, Q_FREQ_SIZE)]++; Line 2088
# endif Line 2089
if (Q <= L1) Line 2091
{
uintmax_t g = Q; Line 2093
if ( (Q & 1) == 0) Line 2095
g /= 2; Line 2096
g /= gcd_odd (g, mu); Line 2098
if (g <= L) Line 2100
{
if (qpos >= QUEUE_SIZE) Line 2102
die (EXIT_FAILURE, 0, _("squfof queue overflow")); Line 2103
queue[qpos].Q = g; Line 2104
queue[qpos].P = P % g; Line 2105
qpos++; Line 2106
}
}
/* I think the difference can be either sign, but mod
2^W_TYPE_SIZE arithmetic should be fine. */
t = Q1 + q * (P - P1); Line 2112
Q1 = Q; Line 2113
Q = t; Line 2114
P = P1; Line 2115
if ( (i & 1) == 0) Line 2117
{
uintmax_t r = is_square (Q); Line 2119
if (r) Line 2120
{
for (unsigned int j = 0; j < qpos; j++) Line 2122
{
if (queue[j].Q == r) Line 2124
{
if (r == 1) Line 2126
/* Traversed entire cycle. */
goto next_multiplier; Line 2128
/* Need the absolute value for divisibility test. */
if (P >= queue[j].P) Line 2131
t = P - queue[j].P; Line 2132
else Line 2133
t = queue[j].P - P; Line 2134
if (t % r == 0) Line 2135
{
/* Delete entries up to and including entry
j, which matched. */
memmove (queue, queue + j + 1, Line 2139
(qpos - j - 1) * sizeof (queue[0])); Line 2140
qpos -= (j + 1); Line 2141
}
goto next_i; Line 2143
}
}
/* We have found a square form, which should give a
factor. */
Q1 = r; Line 2149
assert (S >= P); /* What signs are possible? */ Line 2150
P += r * ((S - P) / r); Line 2151
/* Note: Paper says (N - P*P) / Q1, that seems incorrect
for the case D = 2N. */
/* Compute Q = (D - P*P) / Q1, but we need double
precision. */
uintmax_t hi, lo; Line 2157
umul_ppmm (hi, lo, P, P); Line 2158
sub_ddmmss (hi, lo, Dh, Dl, hi, lo); Line 2159
udiv_qrnnd (Q, rem, hi, lo, Q1); Line 2160
assert (rem == 0); Line 2161
for (;;) Line 2163
{
/* Note: There appears to by a typo in the paper,
Step 4a in the algorithm description says q <--
floor([S+P]/\hat Q), but looking at the equations
in Sec. 3.1, it should be q <-- floor([S+P] / Q).
(In this code, \hat Q is Q1). */
div_smallq (q, rem, S+P, Q); Line 2170
P1 = S - rem; /* P1 = q*Q - P */ Line 2171
# if STAT_SQUFOF Line 2173
q_freq[0]++; Line 2174
q_freq[MIN (q, Q_FREQ_SIZE)]++; Line 2175
# endif Line 2176
if (P == P1) Line 2177
break; Line 2178
t = Q1 + q * (P - P1); Line 2179
Q1 = Q; Line 2180
Q = t; Line 2181
P = P1; Line 2182
}
if ( (Q & 1) == 0) Line 2185
Q /= 2; Line 2186
Q /= gcd_odd (Q, mu); Line 2187
assert (Q > 1 && (n1 || Q < n0)); Line 2189
if (prime_p (Q)) Line 2191
factor_insert (factors, Q); Line 2192
else if (!factor_using_squfof (0, Q, factors)) Line 2193
factor_using_pollard_rho (Q, 2, factors); Line 2194
divexact_21 (n1, n0, n1, n0, Q); Line 2196
if (prime2_p (n1, n0)) Line 2198
factor_insert_large (factors, n1, n0); Line 2199
else Line 2200
{
if (!factor_using_squfof (n1, n0, factors)) Line 2202
{
if (n1 == 0) Line 2204
factor_using_pollard_rho (n0, 1, factors); Line 2205
else Line 2206
factor_using_pollard_rho2 (n1, n0, 1, factors); Line 2207
}
}
return true; Line 2211
}
}
next_i:; Line 2214
}
next_multiplier:; Line 2216
}
return false; Line 2218
}
#endif Line 2220
/* Compute the prime factors of the 128-bit number (T1,T0), and put the
results in FACTORS. */
static void Line 2224
factor (uintmax_t t1, uintmax_t t0, struct factors *factors) Line 2225
{
factors->nfactors = 0; Line 2227
factors->plarge[1] = 0; Line 2228
if (t1 == 0 && t0 < 2) Line 2230
return; Line 2231
t0 = factor_using_division (&t1, t1, t0, factors); Line 2233
if (t1 == 0 && t0 < 2) Line 2235
return; Line 2236
if (prime2_p (t1, t0)) Line 2238
factor_insert_large (factors, t1, t0); Line 2239
else Line 2240
{
#if USE_SQUFOF Line 2242
if (factor_using_squfof (t1, t0, factors)) Line 2243
return; Line 2244
#endif Line 2245
if (t1 == 0) Line 2247
factor_using_pollard_rho (t0, 1, factors); Line 2248
else Line 2249
factor_using_pollard_rho2 (t1, t0, 1, factors); Line 2250
}
} Block 60
#if HAVE_GMP Line 2254
/* Use Pollard-rho to compute the prime factors of
arbitrary-precision T, and put the results in FACTORS. */
static void Line 2257
mp_factor (mpz_t t, struct mp_factors *factors) Line 2258
{
mp_factor_init (factors); Line 2260
if (mpz_sgn (t) != 0) Line 2262
{
mp_factor_using_division (t, factors); Line 2264
if (mpz_cmp_ui (t, 1) != 0) Line 2266
{
devmsg ("[is number prime?] "); Line 2268
if (mp_prime_p (t)) Line 2269
mp_factor_insert (factors, t); Line 2270
else Line 2271
mp_factor_using_pollard_rho (t, 1, factors); Line 2272
}
}
} Block 61
#endif Line 2276
static strtol_error Line 2278
strto2uintmax (uintmax_t *hip, uintmax_t *lop, const char *s) Line 2279
{
unsigned int lo_carry; Line 2281
uintmax_t hi = 0, lo = 0; Line 2282
strtol_error err = LONGINT_INVALID; Line 2284
/* Skip initial spaces and '+'. */
for (;;) Line 2287
{
char c = *s; Line 2289
if (c == ' ') Line 2290
s++; Line 2291
else if (c == '+') Line 2292
{
s++; Line 2294
break; Line 2295
}
else Line 2297
break; Line 2298
}
/* Initial scan for invalid digits. */
const char *p = s; Line 2302
for (;;) Line 2303
{
unsigned int c = *p++; Line 2305
if (c == 0) Line 2306
break; Line 2307
if (UNLIKELY (!ISDIGIT (c))) Line 2309
{
err = LONGINT_INVALID; Line 2311
break; Line 2312
}
err = LONGINT_OK; /* we've seen at least one valid digit */ Line 2315
}
for (;err == LONGINT_OK;) Line 2318
{
unsigned int c = *s++; Line 2320
if (c == 0) Line 2321
break; Line 2322
c -= '0'; Line 2324
if (UNLIKELY (hi > ~(uintmax_t)0 / 10)) Line 2326
{
err = LONGINT_OVERFLOW; Line 2328
break; Line 2329
}
hi = 10 * hi; Line 2331
lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1)); Line 2333
lo_carry += 10 * lo < 2 * lo; Line 2334
lo = 10 * lo; Line 2336
lo += c; Line 2337
lo_carry += lo < c; Line 2339
hi += lo_carry; Line 2340
if (UNLIKELY (hi < lo_carry)) Line 2341
{
err = LONGINT_OVERFLOW; Line 2343
break; Line 2344
}
}
*hip = hi; Line 2348
*lop = lo; Line 2349
return err; Line 2351
} Block 62
/* Structure and routines for buffering and outputting full lines,
to support parallel operation efficiently. */
static struct lbuf_ Line 2356
{
char *buf; Line 2358
char *end; Line 2359
} lbuf; Line 2360Block 63
/* 512 is chosen to give good performance,
and also is the max guaranteed size that
consumers can read atomically through pipes.
Also it's big enough to cater for max line length
even with 128 bit uintmax_t. */
#define FACTOR_PIPE_BUF 512 Line 2367
static void Line 2369
lbuf_alloc (void) Line 2370
{
if (lbuf.buf) Line 2372
return; Line 2373
/* Double to ensure enough space for
previous numbers + next number. */
lbuf.buf = xmalloc (FACTOR_PIPE_BUF * 2); Line 2377
lbuf.end = lbuf.buf; Line 2378
} Block 64
/* Write complete LBUF to standard output. */
static void Line 2382
lbuf_flush (void) Line 2383
{
size_t size = lbuf.end - lbuf.buf; Line 2385
if (full_write (STDOUT_FILENO, lbuf.buf, size) != size) Line 2386...!syscalls auto-comment...
die (EXIT_FAILURE, errno, "%s", _("write error")); Line 2387
lbuf.end = lbuf.buf; Line 2388
} Block 65
/* Add a character C to LBUF and if it's a newline
and enough bytes are already buffered,
then write atomically to standard output. */
static void Line 2394
lbuf_putc (char c) Line 2395
{
*lbuf.end++ = c; Line 2397
if (c == '\n') Line 2399
{
size_t buffered = lbuf.end - lbuf.buf; Line 2401
/* Provide immediate output for interactive input. */
static int line_buffered = -1; Line 2404
if (line_buffered == -1) Line 2405
line_buffered = isatty (STDIN_FILENO); Line 2406
if (line_buffered) Line 2407
lbuf_flush (); Line 2408
else if (buffered >= FACTOR_PIPE_BUF) Line 2409
{
/* Write output in <= PIPE_BUF chunks
so consumers can read atomically. */
char const *tend = lbuf.end; Line 2413
/* Since a umaxint_t's factors must fit in 512
we're guaranteed to find a newline here. */
char *tlend = lbuf.buf + FACTOR_PIPE_BUF; Line 2417
while (*--tlend != '\n'); Line 2418
tlend++; Line 2419
lbuf.end = tlend; Line 2421
lbuf_flush (); Line 2422
/* Buffer the remainder. */
memcpy (lbuf.buf, tlend, tend - tlend); Line 2425
lbuf.end = lbuf.buf + (tend - tlend); Line 2426
}
}
} Block 66
/* Buffer an int to the internal LBUF. */
static void Line 2432
lbuf_putint (uintmax_t i, size_t min_width) Line 2433
{
char buf[INT_BUFSIZE_BOUND (uintmax_t)]; Line 2435
char const *umaxstr = umaxtostr (i, buf); Line 2436
size_t width = sizeof (buf) - (umaxstr - buf) - 1; Line 2437
size_t z = width; Line 2438
for (; z < min_width; z++) Line 2440
*lbuf.end++ = '0'; Line 2441
memcpy (lbuf.end, umaxstr, width); Line 2443
lbuf.end += width; Line 2444
} Block 67
static void Line 2447
print_uintmaxes (uintmax_t t1, uintmax_t t0) Line 2448
{
uintmax_t q, r; Line 2450
if (t1 == 0) Line 2452
lbuf_putint (t0, 0); Line 2453
else Line 2454
{
/* Use very plain code here since it seems hard to write fast code
without assuming a specific word size. */
q = t1 / 1000000000; Line 2458
r = t1 % 1000000000; Line 2459
udiv_qrnnd (t0, r, r, t0, 1000000000); Line 2460
print_uintmaxes (q, t0); Line 2461
lbuf_putint (r, 9); Line 2462
}
} Block 68
/* Single-precision factoring */
static void Line 2467
print_factors_single (uintmax_t t1, uintmax_t t0) Line 2468
{
struct factors factors; Line 2470
print_uintmaxes (t1, t0); Line 2472
lbuf_putc (':'); Line 2473
factor (t1, t0, &factors); Line 2475
for (unsigned int j = 0; j < factors.nfactors; j++) Line 2477
for (unsigned int k = 0; k < factors.e[j]; k++) Line 2478
{
lbuf_putc (' '); Line 2480
print_uintmaxes (0, factors.p[j]); Line 2481
}
if (factors.plarge[1]) Line 2484
{
lbuf_putc (' '); Line 2486
print_uintmaxes (factors.plarge[1], factors.plarge[0]); Line 2487
}
lbuf_putc ('\n'); Line 2490
} Block 69
/* Emit the factors of the indicated number. If we have the option of using
either algorithm, we select on the basis of the length of the number.
For longer numbers, we prefer the MP algorithm even if the native algorithm
has enough digits, because the algorithm is better. The turnover point
depends on the value. */
static bool Line 2498
print_factors (const char *input) Line 2499
{
uintmax_t t1, t0; Line 2501
/* Try converting the number to one or two words. If it fails, use GMP or
print an error message. The 2nd condition checks that the most
significant bit of the two-word number is clear, in a typesize neutral
way. */
strtol_error err = strto2uintmax (&t1, &t0, input); Line 2507
switch (err) Line 2509
{
case LONGINT_OK: Line 2511
if (((t1 << 1) >> 1) == t1) Line 2512
{
devmsg ("[using single-precision arithmetic] "); Line 2514
print_factors_single (t1, t0); Line 2515
return true; Line 2516
}
break; Line 2518
case LONGINT_OVERFLOW: Line 2520
/* Try GMP. */
break; Line 2522
default: Line 2524
error (0, 0, _("%s is not a valid positive integer"), quote (input)); Line 2525
return false; Line 2526
}
#if HAVE_GMP Line 2529
devmsg ("[using arbitrary-precision arithmetic] "); Line 2530
mpz_t t; Line 2531
struct mp_factors factors; Line 2532
mpz_init_set_str (t, input, 10); Line 2534
gmp_printf ("%Zd:", t); Line 2536
mp_factor (t, &factors); Line 2537
for (unsigned int j = 0; j < factors.nfactors; j++) Line 2539
for (unsigned int k = 0; k < factors.e[j]; k++) Line 2540
gmp_printf (" %Zd", factors.p[j]); Line 2541
mp_factor_clear (&factors); Line 2543
mpz_clear (t); Line 2544
putchar ('\n'); Line 2545
fflush (stdout); Line 2546
return true; Line 2547
#else Line 2548
error (0, 0, _("%s is too large"), quote (input)); Line 2549
return false; Line 2550
#endif Line 2551
} Block 70
void Line 2554
usage (int status) Line 2555
{
if (status != EXIT_SUCCESS) Line 2557
emit_try_help (); ...!common auto-comment...
else Line 2559
{
printf (_("\ Line 2561
Usage: %s [NUMBER]...\n\ Line 2562
or: %s OPTION\n\ Line 2563
"), Line 2564
program_name, program_name); Line 2565
fputs (_("\ Line 2566
Print the prime factors of each specified integer NUMBER. If none\n\ Line 2567
are specified on the command line, read them from standard input.\n\ Line 2568
\n\
"), stdout); Line 2570
fputs (HELP_OPTION_DESCRIPTION, stdout); Line 2571
fputs (VERSION_OPTION_DESCRIPTION, stdout); Line 2572
emit_ancillary_info (PROGRAM_NAME); Line 2573
}
exit (status); Line 2575
} Block 71
static bool Line 2578
do_stdin (void) Line 2579
{
bool ok = true; Line 2581
token_buffer tokenbuffer; Line 2582
init_tokenbuffer (&tokenbuffer); Line 2584
while (true) Line 2586
{
size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1, Line 2588
&tokenbuffer); Line 2589
if (token_length == (size_t) -1) Line 2590
break; Line 2591
ok &= print_factors (tokenbuffer.buffer); Line 2592
}
free (tokenbuffer.buffer); Line 2594
return ok; Line 2596
} Block 72
int
main (int argc, char **argv) Line 2600
{
initialize_main (&argc, &argv); VMS-specific entry point handling wildcard expansion
set_program_name (argv[0]); Retains program name and discards path
setlocale (LC_ALL, ""); Sets up internationalization (i18n)
bindtextdomain (PACKAGE, LOCALEDIR); Assigns i18n directorySets text domain for _() [gettext()] function
textdomain (PACKAGE); Sets text domain for _() [gettext()] function
lbuf_alloc (); Line 2608
atexit (close_stdout); Close stdout on exit (see gnulib)
atexit (lbuf_flush); Close stdout on exit (see gnulib)
int c; Line 2612
while ((c = getopt_long (argc, argv, "", long_options, NULL)) != -1) Line 2613
{
switch (c) Line 2615
{
case DEV_DEBUG_OPTION: Line 2617
dev_debug = true; Line 2618
break; Line 2619
case_GETOPT_HELP_CHAR; Line 2621
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); Line 2623
default: Line 2625
usage (EXIT_FAILURE); Line 2626
}
}
#if STAT_SQUFOF Line 2630
memset (q_freq, 0, sizeof (q_freq)); Line 2631
#endif Line 2632
bool ok; Line 2634
if (argc <= optind) Line 2635
ok = do_stdin (); Line 2636
else Line 2637
{
ok = true; Line 2639
for (int i = optind; i < argc; i++) Line 2640
if (! print_factors (argv[i])) Line 2641
ok = false; Line 2642
}
#if STAT_SQUFOF Line 2645
if (q_freq[0] > 0) Line 2646
{
double acc_f; Line 2648
printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]); Line 2649
for (unsigned int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++) Line 2650
{
double f = (double) q_freq[i] / q_freq[0]; Line 2652
acc_f += f; Line 2653
printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i, Line 2654
100.0 * f, 100.0 * acc_f); Line 2655
}
}
#endif Line 2658
return ok ? EXIT_SUCCESS : EXIT_FAILURE; Line 2660
}