/* sort - sort lines of text (with all kinds of options). This is the sort utility
Copyright (C) 1988-2018 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Written December 1988 by Mike Haertel.
The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
or (US mail) as Mike Haertel c/o Free Software Foundation.
Ørn E. Hansen added NLS support in 1997. */ The GNUv3 license
#include <config.h> Provides system specific information
#include <getopt.h> ...!includes auto-comment...
#include <pthread.h> ...!includes auto-comment...
#include <sys/resource.h> ...!includes auto-comment...
#include <sys/types.h> Provides system data types
#include <sys/wait.h> ...!includes auto-comment...
#include <signal.h> ...!includes auto-comment...
#include <assert.h> ...!includes auto-comment...
#include "system.h" ...!includes auto-comment...
#include "argmatch.h" ...!includes auto-comment...
#include "die.h" ...!includes auto-comment...
#include "error.h" ...!includes auto-comment...
#include "fadvise.h" ...!includes auto-comment...
#include "filevercmp.h" ...!includes auto-comment...
#include "flexmember.h" ...!includes auto-comment...
#include "hard-locale.h" ...!includes auto-comment...
#include "hash.h" ...!includes auto-comment...
#include "heap.h" ...!includes auto-comment...
#include "ignore-value.h" ...!includes auto-comment...
#include "md5.h" ...!includes auto-comment...
#include "mbswidth.h" ...!includes auto-comment...
#include "nproc.h" ...!includes auto-comment...
#include "physmem.h" ...!includes auto-comment...
#include "posixver.h" ...!includes auto-comment...
#include "quote.h" ...!includes auto-comment...
#include "randread.h" ...!includes auto-comment...
#include "readtokens0.h" ...!includes auto-comment...
#include "stdlib--.h" ...!includes auto-comment...
#include "strnumcmp.h" ...!includes auto-comment...
#include "xmemcoll.h" ...!includes auto-comment...
#include "xnanosleep.h" ...!includes auto-comment...
#include "xstrtol.h" ...!includes auto-comment...
#ifndef RLIMIT_DATA Line 57
struct rlimit { size_t rlim_cur; }; Line 58Block 1
# define getrlimit(Resource, Rlp) (-1) Line 59
#endif Line 60
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "sort" Line 63
#define AUTHORS \ Line 65
proper_name ("Mike Haertel"), \ Line 66
proper_name ("Paul Eggert") Line 67
#if HAVE_LANGINFO_CODESET Line 69
# include <langinfo.h> ...!includes auto-comment...
#endif Line 71
/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
present. */
#ifndef SA_NOCLDSTOP Line 75
# define SA_NOCLDSTOP 0 Line 76
/* No sigprocmask. Always 'return' zero. */
# define sigprocmask(How, Set, Oset) (0) Line 78
# define sigset_t int Line 79
# if ! HAVE_SIGINTERRUPT Line 80
# define siginterrupt(sig, flag) /* empty */ Line 81
# endif Line 82
#endif Line 83
#if GNULIB_defined_pthread_functions Line 85
# undef pthread_sigmask Line 86
# define pthread_sigmask(how, set, oset) sigprocmask (how, set, oset) Line 87
#endif Line 88
#if !defined OPEN_MAX && defined NR_OPEN Line 90
# define OPEN_MAX NR_OPEN Line 91
#endif Line 92
#if !defined OPEN_MAX Line 93
# define OPEN_MAX 20 Line 94
#endif Line 95
#define UCHAR_LIM (UCHAR_MAX + 1) Line 97
#if HAVE_C99_STRTOLD Line 99
# define long_double long double Line 100
#else Line 101
# define long_double double Line 102
# undef strtold Line 103
# define strtold strtod Line 104
#endif Line 105
#ifndef DEFAULT_TMPDIR Line 107
# define DEFAULT_TMPDIR "/tmp" Line 108
#endif Line 109
/* Maximum number of lines to merge every time a NODE is taken from
the merge queue. Node is at LEVEL in the binary merge tree,
and is responsible for merging TOTAL lines. */
#define MAX_MERGE(total, level) (((total) >> (2 * ((level) + 1))) + 1) Line 114
/* Heuristic value for the number of lines for which it is worth creating
a subthread, during an internal merge sort. I.e., it is a small number
of "average" lines for which sorting via two threads is faster than
sorting via one on an "average" system. On a dual-core 2.0 GHz i686
system with 3GB of RAM and 2MB of L2 cache, a file containing 128K
lines of gensort -a output is sorted slightly faster with --parallel=2
than with --parallel=1. By contrast, using --parallel=1 is about 10%
faster than using --parallel=2 with a 64K-line input. */
enum { SUBTHREAD_LINES_HEURISTIC = 128 * 1024 }; Line 124Block 2
verify (4 <= SUBTHREAD_LINES_HEURISTIC); Line 125
/* The number of threads after which there are
diminishing performance gains. */
enum { DEFAULT_MAX_THREADS = 8 }; Line 129
/* Exit statuses. */
enum Line 132
{
/* POSIX says to exit with status 1 if invoked with -c and the
input is not properly sorted. */
SORT_OUT_OF_ORDER = 1, Line 136
/* POSIX says any other irregular exit must exit with a status
code greater than 1. */
SORT_FAILURE = 2 Line 140
};
enum Line 143
{
/* The number of times we should try to fork a compression process
(we retry if the fork call fails). We don't _need_ to compress
temp files, this is just to reduce disk access, so this number
can be small. Each retry doubles in duration. */
MAX_FORK_TRIES_COMPRESS = 4, Line 149
/* The number of times we should try to fork a decompression process.
If we can't fork a decompression process, we can't sort, so this
number should be big. Each retry doubles in duration. */
MAX_FORK_TRIES_DECOMPRESS = 9 Line 154
};
enum Line 157
{
/* Level of the end-of-merge node, one level above the root. */
MERGE_END = 0, Line 160
/* Level of the root node in merge tree. */
MERGE_ROOT = 1 Line 163
};
/* The representation of the decimal point in the current locale. */
static int decimal_point; Line 167
/* Thousands separator; if -1, then there isn't one. */
static int thousands_sep; Line 170
/* Nonzero if the corresponding locales are hard. */
static bool hard_LC_COLLATE; Line 173
#if HAVE_NL_LANGINFO Line 174
static bool hard_LC_TIME; Line 175
#endif Line 176
#define NONZERO(x) ((x) != 0) Line 178
/* The kind of blanks for '-b' to skip in various options. */
enum blanktype { bl_start, bl_end, bl_both }; Line 181
/* The character marking end of line. Default to \n. */
static char eolchar = '\n'; Line 184
/* Lines are held in core as counted strings. */
struct line Line 187
{
char *text; /* Text of the line. */ Line 189
size_t length; /* Length including final newline. */ Line 190
char *keybeg; /* Start of first key. */ Line 191
char *keylim; /* Limit of first key. */ Line 192
}; Block 8
/* Input buffers. */
struct buffer Line 196
{
char *buf; /* Dynamically allocated buffer, Line 198
partitioned into 3 regions:
- input data;
- unused area;
- an array of lines, in reverse order. */
size_t used; /* Number of bytes used for input data. */ Line 203
size_t nlines; /* Number of lines in the line array. */ Line 204
size_t alloc; /* Number of bytes allocated. */ Line 205
size_t left; /* Number of bytes left from previous reads. */ Line 206
size_t line_bytes; /* Number of bytes to reserve for each line. */ Line 207
bool eof; /* An EOF has been read. */ Line 208
}; Block 9
/* Sort key. */
struct keyfield Line 212
{
size_t sword; /* Zero-origin 'word' to start at. */ Line 214
size_t schar; /* Additional characters to skip. */ Line 215
size_t eword; /* Zero-origin last 'word' of key. */ Line 216
size_t echar; /* Additional characters in field. */ Line 217
bool const *ignore; /* Boolean array of characters to ignore. */ Line 218
char const *translate; /* Translation applied to characters. */ Line 219
bool skipsblanks; /* Skip leading blanks when finding start. */ Line 220
bool skipeblanks; /* Skip leading blanks when finding end. */ Line 221
bool numeric; /* Flag for numeric comparison. Handle Line 222
strings of digits with optional decimal
point, but no exponential notation. */
bool random; /* Sort by random hash of key. */ Line 225
bool general_numeric; /* Flag for general, numeric comparison. Line 226
Handle numbers in exponential notation. */
bool human_numeric; /* Flag for sorting by human readable Line 228
units with either SI or IEC prefixes. */
bool month; /* Flag for comparison by month name. */ Line 230
bool reverse; /* Reverse the sense of comparison. */ Line 231
bool version; /* sort by version number */ Line 232
bool traditional_used; /* Traditional key option format is used. */ Line 233
struct keyfield *next; /* Next keyfield to try. */ Line 234
}; Block 10
struct month Line 237
{
char const *name; Line 239
int val; Line 240
}; Block 11
/* Binary merge tree node. */
struct merge_node Line 244
{
struct line *lo; /* Lines to merge from LO child node. */ Line 246
struct line *hi; /* Lines to merge from HI child node. */ Line 247
struct line *end_lo; /* End of available lines from LO. */ Line 248
struct line *end_hi; /* End of available lines from HI. */ Line 249
struct line **dest; /* Pointer to destination of merge. */ Line 250
size_t nlo; /* Total Lines remaining from LO. */ Line 251
size_t nhi; /* Total lines remaining from HI. */ Line 252
struct merge_node *parent; /* Parent node. */ Line 253
struct merge_node *lo_child; /* LO child node. */ Line 254
struct merge_node *hi_child; /* HI child node. */ Line 255
unsigned int level; /* Level in merge tree. */ Line 256
bool queued; /* Node is already in heap. */ Line 257
pthread_mutex_t lock; /* Lock for node operations. */ Line 258
}; Block 12
/* Priority queue of merge nodes. */
struct merge_node_queue Line 262
{
struct heap *priority_queue; /* Priority queue of merge tree nodes. */ Line 264
pthread_mutex_t mutex; /* Lock for queue operations. */ Line 265
pthread_cond_t cond; /* Conditional wait for empty queue to populate Line 266
when popping. */
}; Block 13
/* Used to implement --unique (-u). */
static struct line saved_line; Line 271
/* FIXME: None of these tables work with multibyte character sets.
Also, there are many other bugs when handling multibyte characters.
One way to fix this is to rewrite 'sort' to use wide characters
internally, but doing this with good performance is a bit
tricky. */
/* Table of blanks. */
static bool blanks[UCHAR_LIM]; Line 280
/* Table of non-printing characters. */
static bool nonprinting[UCHAR_LIM]; Line 283
/* Table of non-dictionary characters (not letters, digits, or blanks). */
static bool nondictionary[UCHAR_LIM]; Line 286
/* Translation table folding lower case to upper. */
static char fold_toupper[UCHAR_LIM]; Line 289
#define MONTHS_PER_YEAR 12 Line 291
/* Table mapping month names to integers.
Alphabetic order allows binary search. */
static struct month monthtab[] = Line 295
{
{"APR", 4}, Line 297
{"AUG", 8}, Line 298
{"DEC", 12}, Line 299
{"FEB", 2}, Line 300
{"JAN", 1}, Line 301
{"JUL", 7}, Line 302
{"JUN", 6}, Line 303
{"MAR", 3}, Line 304
{"MAY", 5}, Line 305
{"NOV", 11}, Line 306
{"OCT", 10}, Line 307
{"SEP", 9} Line 308
}; Block 14
/* During the merge phase, the number of files to merge at once. */
#define NMERGE_DEFAULT 16 Line 312
/* Minimum size for a merge or check buffer. */
#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line)) Line 315
/* Minimum sort size; the code might not work with smaller sizes. */
#define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE) Line 318
/* The number of bytes needed for a merge or check buffer, which can
function relatively efficiently even if it holds only one line. If
a longer line is seen, this value is increased. */
static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024); Line 323
/* The approximate maximum number of bytes of main memory to use, as
specified by the user. Zero if the user has not specified a size. */
static size_t sort_size; Line 327
/* The initial allocation factor for non-regular files.
This is used, e.g., when reading from a pipe.
Don't make it too big, since it is multiplied by ~130 to
obtain the size of the actual buffer sort will allocate.
Also, there may be 8 threads all doing this at the same time. */
#define INPUT_FILE_SIZE_GUESS (128 * 1024) Line 334
/* Array of directory names in which any temporary files are to be created. */
static char const **temp_dirs; Line 337
/* Number of temporary directory names used. */
static size_t temp_dir_count; Line 340
/* Number of allocated slots in temp_dirs. */
static size_t temp_dir_alloc; Line 343
/* Flag to reverse the order of all comparisons. */
static bool reverse; Line 346
/* Flag for stable sort. This turns off the last ditch bytewise
comparison of lines, and instead leaves lines in the same order
they were read if all keys compare equal. */
static bool stable; Line 351
/* If TAB has this value, blanks separate fields. */
enum { TAB_DEFAULT = CHAR_MAX + 1 }; Line 354
/* Tab character separating fields. If TAB_DEFAULT, then fields are
separated by the empty string between a non-blank character and a blank
character. */
static int tab = TAB_DEFAULT; Line 359
/* Flag to remove consecutive duplicate lines from the output.
Only the last of a sequence of equal lines will be output. */
static bool unique; Line 363
/* Nonzero if any of the input files are the standard input. */
static bool have_read_stdin; Line 366
/* List of key field comparisons to be tried. */
static struct keyfield *keylist; Line 369
/* Program used to (de)compress temp files. Must accept -d. */
static char const *compress_program; Line 372
/* Annotate the output with extra info to aid the user. */
static bool debug; Line 375
/* Maximum number of files to merge in one go. If more than this
number are present, temp files will be used. */
static unsigned int nmerge = NMERGE_DEFAULT; Line 379
/* Output an error to stderr and exit using async-signal-safe routines.
This can be used safely from signal handlers,
and between fork and exec of multithreaded processes. */
static void async_safe_die (int, const char *) ATTRIBUTE_NORETURN; Line 385
static void Line 386
async_safe_die (int errnum, const char *errstr) Line 387
{
ignore_value (write (STDERR_FILENO, errstr, strlen (errstr))); Line 389...!syscalls auto-comment...
/* Even if defined HAVE_STRERROR_R, we can't use it,
as it may return a translated string etc. and even if not
may call malloc which is unsafe. We might improve this
by testing for sys_errlist and using that if available.
For now just report the error number. */
if (errnum) Line 396
{
char errbuf[INT_BUFSIZE_BOUND (errnum)]; Line 398
char *p = inttostr (errnum, errbuf); Line 399
ignore_value (write (STDERR_FILENO, ": errno ", 8)); Line 400...!syscalls auto-comment...
ignore_value (write (STDERR_FILENO, p, strlen (p))); Line 401...!syscalls auto-comment...
}
ignore_value (write (STDERR_FILENO, "\n", 1)); Line 404...!syscalls auto-comment...
_exit (SORT_FAILURE); Line 406
} Block 16
/* Report MESSAGE for FILE, then clean up and exit.
If FILE is null, it represents standard output. */
static void sort_die (char const *, char const *) ATTRIBUTE_NORETURN; Line 412
static void Line 413
sort_die (char const *message, char const *file) Line 414
{
die (SORT_FAILURE, errno, "%s: %s", message, Line 416
quotef (file ? file : _("standard output"))); Line 417
} Block 17
void Line 420
usage (int status) Line 421
{
if (status != EXIT_SUCCESS) Line 423
emit_try_help (); ...!common auto-comment...
else Line 425
{
printf (_("\ Line 427
Usage: %s [OPTION]... [FILE]...\n\ Line 428
or: %s [OPTION]... --files0-from=F\n\ Line 429
"), Line 430
program_name, program_name); Line 431
fputs (_("\ Line 432
Write sorted concatenation of all FILE(s) to standard output.\n\ Line 433
"), stdout); Line 434
emit_stdin_note (); ...!common auto-comment...
emit_mandatory_arg_note (); ...!common auto-comment...
fputs (_("\ Line 439
Ordering options:\n\ Line 440
\n\
"), stdout); Line 442
fputs (_("\ Line 443
-b, --ignore-leading-blanks ignore leading blanks\n\ Line 444
-d, --dictionary-order consider only blanks and alphanumeric characters\ Line 445
\n\
-f, --ignore-case fold lower case to upper case characters\n\ Line 447
"), stdout); Line 448
fputs (_("\ Line 449
-g, --general-numeric-sort compare according to general numerical value\n\ Line 450
-i, --ignore-nonprinting consider only printable characters\n\ Line 451
-M, --month-sort compare (unknown) < 'JAN' < ... < 'DEC'\n\ Line 452
"), stdout); Line 453
fputs (_("\ Line 454
-h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)\n\ Line 455
"), stdout); Line 456
fputs (_("\ Line 457
-n, --numeric-sort compare according to string numerical value\n\ Line 458
-R, --random-sort shuffle, but group identical keys. See shuf(1)\n\Line 459
--random-source=FILE get random bytes from FILE\n\ Line 460
-r, --reverse reverse the result of comparisons\n\ Line 461
"), stdout); Line 462
fputs (_("\ Line 463
--sort=WORD sort according to WORD:\n\ Line 464
general-numeric -g, human-numeric -h, month -M,\Line 465
\n\
numeric -n, random -R, version -V\n\ Line 467
-V, --version-sort natural sort of (version) numbers within text\n\ Line 468
\n\
"), stdout); Line 470
fputs (_("\ Line 471
Other options:\n\ Line 472
\n\
"), stdout); Line 474
fputs (_("\ Line 475
--batch-size=NMERGE merge at most NMERGE inputs at once;\n\ Line 476
for more use temp files\n\ Line 477
"), stdout); Line 478
fputs (_("\ Line 479
-c, --check, --check=diagnose-first check for sorted input; do not sort\n\ Line 480
-C, --check=quiet, --check=silent like -c, but do not report first bad line\ Line 481
\n\
--compress-program=PROG compress temporaries with PROG;\n\ Line 483
decompress them with PROG -d\n\ Line 484
"), stdout); Line 485
fputs (_("\ Line 486
--debug annotate the part of the line used to sort,\n\ Line 487
and warn about questionable usage to stderr\n\ Line 488
--files0-from=F read input from the files specified by\n\ Line 489
NUL-terminated names in file F;\n\ Line 490
If F is - then read names from standard input\n\ Line 491
"), stdout); Line 492
fputs (_("\ Line 493
-k, --key=KEYDEF sort via a key; KEYDEF gives location and type\n\ Line 494
-m, --merge merge already sorted files; do not sort\n\ Line 495
"), stdout); Line 496
fputs (_("\ Line 497
-o, --output=FILE write result to FILE instead of standard output\n\ Line 498
-s, --stable stabilize sort by disabling last-resort comparison\ Line 499
\n\
-S, --buffer-size=SIZE use SIZE for main memory buffer\n\ Line 501
"), stdout); Line 502
printf (_("\ Line 503
-t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\Line 504
-T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\ Line 505
multiple options specify multiple directories\n\ Line 506
--parallel=N change the number of sorts run concurrently to N\n\ Line 507
-u, --unique with -c, check for strict ordering;\n\ Line 508
without -c, output only the first of an equal run\Line 509
\n\
"), DEFAULT_TMPDIR); Line 511
fputs (_("\ Line 512
-z, --zero-terminated line delimiter is NUL, not newline\n\ Line 513
"), stdout); Line 514
fputs (HELP_OPTION_DESCRIPTION, stdout); Line 515
fputs (VERSION_OPTION_DESCRIPTION, stdout); Line 516
fputs (_("\ Line 517
\n\
KEYDEF is F[.C][OPTS][,F[.C][OPTS]] for start and stop position, where F is a\n\Line 519
field number and C a character position in the field; both are origin 1, and\n\ Line 520
the stop position defaults to the line's end. If neither -t nor -b is in\n\ Line 521
effect, characters in a field are counted from the beginning of the preceding\n\Line 522
whitespace. OPTS is one or more single-letter ordering options [bdfgiMhnRrV],\ Line 523
\n\
which override global ordering options for that key. If no key is given, use\n\Line 525
the entire line as the key. Use --debug to diagnose incorrect key usage.\n\ Line 526
\n\
SIZE may be followed by the following multiplicative suffixes:\n\ Line 528
"), stdout); Line 529
fputs (_("\ Line 530
% 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\ Line 531
\n\
*** WARNING ***\n\ Line 533
The locale specified by the environment affects sort order.\n\ Line 534
Set LC_ALL=C to get the traditional sort order that uses\n\ Line 535
native byte values.\n\ Line 536
"), stdout ); Line 537
emit_ancillary_info (PROGRAM_NAME); Line 538
}
exit (status); Line 541
} Block 18
/* For long options that have no equivalent short option, use a
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
enum Line 546
{
CHECK_OPTION = CHAR_MAX + 1, Line 548
COMPRESS_PROGRAM_OPTION, Line 549
DEBUG_PROGRAM_OPTION, Line 550
FILES0_FROM_OPTION, Line 551
NMERGE_OPTION, Line 552
RANDOM_SOURCE_OPTION, Line 553
SORT_OPTION, Line 554
PARALLEL_OPTION Line 555
}; Block 19
static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z"; Line 558
static struct option const long_options[] = Line 560
{
{"ignore-leading-blanks", no_argument, NULL, 'b'}, Line 562
{"check", optional_argument, NULL, CHECK_OPTION}, Line 563
{"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION}, Line 564
{"debug", no_argument, NULL, DEBUG_PROGRAM_OPTION}, Line 565
{"dictionary-order", no_argument, NULL, 'd'}, Line 566
{"ignore-case", no_argument, NULL, 'f'}, Line 567
{"files0-from", required_argument, NULL, FILES0_FROM_OPTION}, Line 568
{"general-numeric-sort", no_argument, NULL, 'g'}, Line 569
{"ignore-nonprinting", no_argument, NULL, 'i'}, Line 570
{"key", required_argument, NULL, 'k'}, Line 571
{"merge", no_argument, NULL, 'm'}, Line 572
{"month-sort", no_argument, NULL, 'M'}, Line 573
{"numeric-sort", no_argument, NULL, 'n'}, Line 574
{"human-numeric-sort", no_argument, NULL, 'h'}, Line 575
{"version-sort", no_argument, NULL, 'V'}, Line 576
{"random-sort", no_argument, NULL, 'R'}, Line 577
{"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION}, Line 578
{"sort", required_argument, NULL, SORT_OPTION}, Line 579
{"output", required_argument, NULL, 'o'}, Line 580
{"reverse", no_argument, NULL, 'r'}, Line 581
{"stable", no_argument, NULL, 's'}, Line 582
{"batch-size", required_argument, NULL, NMERGE_OPTION}, Line 583
{"buffer-size", required_argument, NULL, 'S'}, Line 584
{"field-separator", required_argument, NULL, 't'}, Line 585
{"temporary-directory", required_argument, NULL, 'T'}, Line 586
{"unique", no_argument, NULL, 'u'}, Line 587
{"zero-terminated", no_argument, NULL, 'z'}, Line 588
{"parallel", required_argument, NULL, PARALLEL_OPTION}, Line 589
{GETOPT_HELP_OPTION_DECL}, Line 590
{GETOPT_VERSION_OPTION_DECL}, Line 591
{NULL, 0, NULL, 0}, Line 592
}; Block 20
#define CHECK_TABLE \ Line 595
_ct_("quiet", 'C') \ Line 596
_ct_("silent", 'C') \ Line 597
_ct_("diagnose-first", 'c') Line 598
static char const *const check_args[] = Line 600
{
#define _ct_(_s, _c) _s, Line 602
CHECK_TABLE NULL Line 603
#undef _ct_ Line 604
}; Block 21
static char const check_types[] = Line 606
{
#define _ct_(_s, _c) _c, Line 608
CHECK_TABLE Line 609
#undef _ct_ Line 610
}; Block 22
#define SORT_TABLE \ Line 613
_st_("general-numeric", 'g') \ Line 614
_st_("human-numeric", 'h') \ Line 615
_st_("month", 'M') \ Line 616
_st_("numeric", 'n') \ Line 617
_st_("random", 'R') \ Line 618
_st_("version", 'V') Line 619
static char const *const sort_args[] = Line 621
{
#define _st_(_s, _c) _s, Line 623
SORT_TABLE NULL Line 624
#undef _st_ Line 625
}; Block 23
static char const sort_types[] = Line 627
{
#define _st_(_s, _c) _c, Line 629
SORT_TABLE Line 630
#undef _st_ Line 631
}; Block 24
/* The set of signals that are caught. */
static sigset_t caught_signals; Line 635
/* Critical section status. */
struct cs_status Line 638
{
bool valid; Line 640
sigset_t sigs; Line 641
}; Block 25
/* Enter a critical section. */
static void Line 645
cs_enter (struct cs_status *status) Line 646
{
int ret = pthread_sigmask (SIG_BLOCK, &caught_signals, &status->sigs); Line 648
status->valid = ret == 0; Line 649
} Block 26
/* Leave a critical section. */
static void Line 653
cs_leave (struct cs_status const *status) Line 654
{
if (status->valid) Line 656
{
/* Ignore failure when restoring the signal mask. */
pthread_sigmask (SIG_SETMASK, &status->sigs, NULL); Line 659
}
} Block 27
/* Possible states for a temp file. If compressed, the file's status
is unreaped or reaped, depending on whether 'sort' has waited for
the subprocess to finish. */
enum { UNCOMPRESSED, UNREAPED, REAPED }; Line 666
/* The list of temporary files. */
struct tempnode Line 669
{
struct tempnode *volatile next; Line 671
pid_t pid; /* The subprocess PID; undefined if state == UNCOMPRESSED. */ Line 672
char state; Line 673
char name[FLEXIBLE_ARRAY_MEMBER]; Line 674
}; Block 29
static struct tempnode *volatile temphead; Line 676
static struct tempnode *volatile *temptail = &temphead; Line 677
/* A file to be sorted. */
struct sortfile Line 680
{
/* The file's name. */
char const *name; Line 683
/* Non-null if this is a temporary file, in which case NAME == TEMP->name. */
struct tempnode *temp; Line 686
};
/* Map PIDs of unreaped subprocesses to their struct tempnode objects. */
static Hash_table *proctab; Line 690
enum { INIT_PROCTAB_SIZE = 47 }; Line 692
static size_t Line 694
proctab_hasher (void const *entry, size_t tabsize) Line 695
{
struct tempnode const *node = entry; Line 697
return node->pid % tabsize; Line 698
} Block 32
static bool Line 701
proctab_comparator (void const *e1, void const *e2) Line 702
{
struct tempnode const *n1 = e1; Line 704
struct tempnode const *n2 = e2; Line 705
return n1->pid == n2->pid; Line 706
} Block 33
/* The number of unreaped child processes. */
static pid_t nprocs; Line 710
static bool delete_proc (pid_t); Line 712
/* If PID is positive, wait for the child process with that PID to
exit, and assume that PID has already been removed from the process
table. If PID is 0 or -1, clean up some child that has exited (by
waiting for it, and removing it from the proc table) and return the
child's process ID. However, if PID is 0 and no children have
exited, return 0 without waiting. */
static pid_t Line 721
reap (pid_t pid) Line 722
{
int status; Line 724
pid_t cpid = waitpid ((pid ? pid : -1), &status, (pid ? 0 : WNOHANG)); Line 725
if (cpid < 0) Line 727
die (SORT_FAILURE, errno, _("waiting for %s [-d]"), Line 728
quoteaf (compress_program)); Line 729
else if (0 < cpid && (0 < pid || delete_proc (cpid))) Line 730
{
if (! WIFEXITED (status) || WEXITSTATUS (status)) Line 732
die (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"), Line 733
quoteaf (compress_program)); Line 734
--nprocs; Line 735
}
return cpid; Line 738
} Block 34
/* TEMP represents a new process; add it to the process table. Create
the process table the first time it's called. */
static void Line 744
register_proc (struct tempnode *temp) Line 745
{
if (! proctab) Line 747
{
proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL, Line 749
proctab_hasher, Line 750
proctab_comparator, Line 751
NULL); Line 752
if (! proctab) Line 753
xalloc_die (); ...!common auto-comment...
}
temp->state = UNREAPED; Line 757
if (! hash_insert (proctab, temp)) Line 759
xalloc_die (); ...!common auto-comment...
} Block 35
/* If PID is in the process table, remove it and return true.
Otherwise, return false. */
static bool Line 766
delete_proc (pid_t pid) Line 767
{
struct tempnode test; Line 769
test.pid = pid; Line 771
struct tempnode *node = hash_delete (proctab, &test); Line 772
if (! node) Line 773
return false; Line 774
node->state = REAPED; Line 775
return true; Line 776
} Block 36
/* Remove PID from the process table, and wait for it to exit if it
hasn't already. */
static void Line 782
wait_proc (pid_t pid) Line 783
{
if (delete_proc (pid)) Line 785
reap (pid); Line 786
} Block 37
/* Reap any exited children. Do not block; reap only those that have
already exited. */
static void Line 792
reap_exited (void) Line 793
{
while (0 < nprocs && reap (0)) Line 795
continue; Line 796
} Block 38
/* Reap at least one exited child, waiting if necessary. */
static void Line 801
reap_some (void) Line 802
{
reap (-1); Line 804
reap_exited (); Line 805
} Block 39
/* Reap all children, waiting if necessary. */
static void Line 810
reap_all (void) Line 811
{
while (0 < nprocs) Line 813
reap (-1); Line 814
} Block 40
/* Clean up any remaining temporary files. */
static void Line 819
cleanup (void) Line 820
{
struct tempnode const *node; Line 822
for (node = temphead; node; node = node->next) Line 824
unlink (node->name); Line 825...!syscalls auto-comment......!syscalls auto-comment...
temphead = NULL; Line 826
} Block 41
/* Cleanup actions to take when exiting. */
static void Line 831
exit_cleanup (void) Line 832
{
if (temphead) Line 834
{
/* Clean up any remaining temporary files in a critical section so
that a signal handler does not try to clean them too. */
struct cs_status cs; Line 838
cs_enter (&cs); Line 839
cleanup (); Line 840
cs_leave (&cs); Line 841
}
close_stdout (); Line 844
} Block 42
/* Create a new temporary file, returning its newly allocated tempnode.
Store into *PFD the file descriptor open for writing.
If the creation fails, return NULL and store -1 into *PFD if the
failure is due to file descriptor exhaustion and
SURVIVE_FD_EXHAUSTION; otherwise, die. */
static struct tempnode * Line 853
create_temp_file (int *pfd, bool survive_fd_exhaustion) Line 854
{
static char const slashbase[] = "/sortXXXXXX"; Line 856
static size_t temp_dir_index; Line 857
int fd; Line 858
int saved_errno; Line 859
char const *temp_dir = temp_dirs[temp_dir_index]; Line 860
size_t len = strlen (temp_dir); Line 861
struct tempnode *node = Line 862
xmalloc (FLEXSIZEOF (struct tempnode, name, len + sizeof slashbase)); Line 863
char *file = node->name; Line 864
struct cs_status cs; Line 865
memcpy (file, temp_dir, len); Line 867
memcpy (file + len, slashbase, sizeof slashbase); Line 868
node->next = NULL; Line 869
if (++temp_dir_index == temp_dir_count) Line 870
temp_dir_index = 0; Line 871
/* Create the temporary file in a critical section, to avoid races. */
cs_enter (&cs); Line 874
fd = mkostemp (file, O_CLOEXEC); Line 875
if (0 <= fd) Line 876
{
*temptail = node; Line 878
temptail = &node->next; Line 879
}
saved_errno = errno; Line 881
cs_leave (&cs); Line 882
errno = saved_errno; Line 883
if (fd < 0) Line 885
{
if (! (survive_fd_exhaustion && errno == EMFILE)) Line 887
die (SORT_FAILURE, errno, _("cannot create temporary file in %s"), Line 888
quoteaf (temp_dir)); Line 889
free (node); Line 890
node = NULL; Line 891
}
*pfd = fd; Line 894
return node; Line 895
} Block 43
/* Return a stream for FILE, opened with mode HOW. A null FILE means
standard output; HOW should be "w". When opening for input, "-"
means standard input. To avoid confusion, do not return file
descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
opening an ordinary FILE. Return NULL if unsuccessful.
Use fadvise to specify an access pattern for input files.
There are a few hints we could possibly provide,
and after careful testing it was decided that
specifying FADVISE_SEQUENTIAL was not detrimental
to any cases. On Linux 2.6.31, this option doubles
the size of read ahead performed and thus was seen to
benefit these cases:
Merging
Sorting with a smaller internal buffer
Reading from faster flash devices
In _addition_ one could also specify other hints...
FADVISE_WILLNEED was tested, but Linux 2.6.31
at least uses that to _synchronously_ prepopulate the cache
with the specified range. While sort does need to
read all of its input before outputting, a synchronous
read of the whole file up front precludes any processing
that sort could do in parallel with the system doing
read ahead of the data. This was seen to have negative effects
in a couple of cases:
Merging
Sorting with a smaller internal buffer
This option was seen to shorten the runtime for sort
on a multicore system with lots of RAM and other processes
competing for CPU. It could be argued that more explicit
scheduling hints with 'nice' et. al. are more appropriate
for this situation.
FADVISE_NOREUSE is a possibility as it could lower
the priority of input data in the cache as sort will
only need to process it once. However its functionality
has changed over Linux kernel versions and as of 2.6.31
it does nothing and thus we can't depend on what it might
do in future.
FADVISE_DONTNEED is not appropriate for user specified
input files, but for temp files we do want to drop the
cache immediately after processing. This is done implicitly
however when the files are unlinked. */
static FILE * Line 945
stream_open (char const *file, char const *how) Line 946...!syscalls auto-comment...
{
FILE *fp; Line 948
if (*how == 'r') Line 950
{
if (STREQ (file, "-")) Line 952
{
have_read_stdin = true; Line 954
fp = stdin; Line 955
}
else Line 957
{
int fd = open (file, O_RDONLY | O_CLOEXEC); Line 959...!syscalls auto-comment...
fp = fd < 0 ? NULL : fdopen (fd, how); Line 960...!syscalls auto-comment...
}
fadvise (fp, FADVISE_SEQUENTIAL); Line 962...!syscalls auto-comment...
}
else if (*how == 'w') Line 964
{
if (file && ftruncate (STDOUT_FILENO, 0) != 0) Line 966...!syscalls auto-comment...
die (SORT_FAILURE, errno, _("%s: error truncating"), Line 967
quotef (file)); Line 968
fp = stdout; Line 969
}
else Line 971
assert (!"unexpected mode passed to stream_open"); Line 972
return fp; Line 974
} Block 44
/* Same as stream_open, except always return a non-null value; die on
failure. */
static FILE * Line 980
xfopen (char const *file, char const *how) Line 981...!syscalls auto-comment...
{
FILE *fp = stream_open (file, how); Line 983...!syscalls auto-comment...
if (!fp) Line 984
sort_die (_("open failed"), file); Line 985
return fp; Line 986
} Block 45
/* Close FP, whose name is FILE, and report any errors. */
static void Line 991
xfclose (FILE *fp, char const *file) Line 992...!syscalls auto-comment...
{
switch (fileno (fp)) Line 994
{
case STDIN_FILENO: Line 996
/* Allow reading stdin from tty more than once. */
if (feof (fp)) Line 998
clearerr (fp); Line 999
break; Line 1000
case STDOUT_FILENO: Line 1002
/* Don't close stdout just yet. close_stdout does that. */
if (fflush (fp) != 0) Line 1004
sort_die (_("fflush failed"), file); Line 1005
break; Line 1006
default: Line 1008
if (fclose (fp) != 0) Line 1009...!syscalls auto-comment...
sort_die (_("close failed"), file); Line 1010
break; Line 1011
}
} Block 46
/* Move OLDFD to NEWFD. If OLDFD != NEWFD, NEWFD is not close-on-exec. */
static void Line 1017
move_fd (int oldfd, int newfd) Line 1018
{
if (oldfd != newfd) Line 1020
{
/* This should never fail for our usage. */
dup2 (oldfd, newfd); Line 1023
close (oldfd); Line 1024...!syscalls auto-comment...
}
} Block 47
/* Fork a child process for piping to and do common cleanup. The
TRIES parameter specifies how many times to try to fork before
giving up. Return the PID of the child, or -1 (setting errno)
on failure. */
static pid_t Line 1033
pipe_fork (int pipefds[2], size_t tries) Line 1034...!syscalls auto-comment...
{
#if HAVE_WORKING_FORK Line 1036
struct tempnode *saved_temphead; Line 1037
int saved_errno; Line 1038
double wait_retry = 0.25; Line 1039
pid_t pid IF_LINT ( = -1); Line 1040
struct cs_status cs; Line 1041
if (pipe2 (pipefds, O_CLOEXEC) < 0) Line 1043
return -1; Line 1044
/* At least NMERGE + 1 subprocesses are needed. More could be created, but
uncontrolled subprocess generation can hurt performance significantly.
Allow at most NMERGE + 2 subprocesses, on the theory that there
may be some useful parallelism by letting compression for the
previous merge finish (1 subprocess) in parallel with the current
merge (NMERGE + 1 subprocesses). */
if (nmerge + 1 < nprocs) Line 1053
reap_some (); Line 1054
while (tries--) Line 1056
{
/* This is so the child process won't delete our temp files
if it receives a signal before exec-ing. */
cs_enter (&cs); Line 1060
saved_temphead = temphead; Line 1061
temphead = NULL; Line 1062
pid = fork (); Line 1064...!syscalls auto-comment...
saved_errno = errno; Line 1065
if (pid) Line 1066
temphead = saved_temphead; Line 1067
cs_leave (&cs); Line 1069
errno = saved_errno; Line 1070
if (0 <= pid || errno != EAGAIN) Line 1072
break; Line 1073
else Line 1074
{
xnanosleep (wait_retry); Line 1076
wait_retry *= 2; Line 1077
reap_exited (); Line 1078
}
}
if (pid < 0) Line 1082
{
saved_errno = errno; Line 1084
close (pipefds[0]); Line 1085...!syscalls auto-comment...
close (pipefds[1]); Line 1086...!syscalls auto-comment...
errno = saved_errno; Line 1087
}
else if (pid == 0) Line 1089
{
close (STDIN_FILENO); Line 1091...!syscalls auto-comment...
close (STDOUT_FILENO); Line 1092...!syscalls auto-comment...
}
else Line 1094
++nprocs; Line 1095
return pid; Line 1097
#else /* ! HAVE_WORKING_FORK */ Line 1099
return -1; Line 1100
#endif Line 1101
} Block 48
/* Create a temporary file and, if asked for, start a compressor
to that file. Set *PFP to the file handle and return
the address of the new temp node. If the creation
fails, return NULL if the failure is due to file descriptor
exhaustion and SURVIVE_FD_EXHAUSTION; otherwise, die. */
static struct tempnode * Line 1110
maybe_create_temp (FILE **pfp, bool survive_fd_exhaustion) Line 1111
{
int tempfd; Line 1113
struct tempnode *node = create_temp_file (&tempfd, survive_fd_exhaustion); Line 1114
if (! node) Line 1115
return NULL; Line 1116
node->state = UNCOMPRESSED; Line 1118
if (compress_program) Line 1120
{
int pipefds[2]; Line 1122
node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS); Line 1124...!syscalls auto-comment...
if (0 < node->pid) Line 1125
{
close (tempfd); Line 1127...!syscalls auto-comment...
close (pipefds[0]); Line 1128...!syscalls auto-comment...
tempfd = pipefds[1]; Line 1129
register_proc (node); Line 1131
}
else if (node->pid == 0) Line 1133
{
/* Being the child of a multithreaded program before exec,
we're restricted to calling async-signal-safe routines here. */
close (pipefds[1]); Line 1137...!syscalls auto-comment...
move_fd (tempfd, STDOUT_FILENO); Line 1138
move_fd (pipefds[0], STDIN_FILENO); Line 1139
execlp (compress_program, compress_program, (char *) NULL); Line 1141
async_safe_die (errno, "couldn't execute compress program"); Line 1143
}
}
*pfp = fdopen (tempfd, "w"); Line 1147...!syscalls auto-comment...
if (! *pfp) Line 1148
sort_die (_("couldn't create temporary file"), node->name); Line 1149
return node; Line 1151
} Block 49
/* Create a temporary file and, if asked for, start a compressor
to that file. Set *PFP to the file handle and return the address
of the new temp node. Die on failure. */
static struct tempnode * Line 1158
create_temp (FILE **pfp) Line 1159
{
return maybe_create_temp (pfp, false); Line 1161
} Block 50
/* Open a compressed temp file and start a decompression process through
which to filter the input. Return NULL (setting errno to
EMFILE) if we ran out of file descriptors, and die on any other
kind of failure. */
static FILE * Line 1169
open_temp (struct tempnode *temp) Line 1170
{
int tempfd, pipefds[2]; Line 1172
FILE *fp = NULL; Line 1173
if (temp->state == UNREAPED) Line 1175
wait_proc (temp->pid); Line 1176
tempfd = open (temp->name, O_RDONLY); Line 1178...!syscalls auto-comment...
if (tempfd < 0) Line 1179
return NULL; Line 1180
pid_t child = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS); Line 1182...!syscalls auto-comment...
switch (child) Line 1184
{
case -1: Line 1186
if (errno != EMFILE) Line 1187
die (SORT_FAILURE, errno, _("couldn't create process for %s -d"), Line 1188
quoteaf (compress_program)); Line 1189
close (tempfd); Line 1190...!syscalls auto-comment...
errno = EMFILE; Line 1191
break; Line 1192
case 0: Line 1194
/* Being the child of a multithreaded program before exec,
we're restricted to calling async-signal-safe routines here. */
close (pipefds[0]); Line 1197...!syscalls auto-comment...
move_fd (tempfd, STDIN_FILENO); Line 1198
move_fd (pipefds[1], STDOUT_FILENO); Line 1199
execlp (compress_program, compress_program, "-d", (char *) NULL); Line 1201
async_safe_die (errno, "couldn't execute compress program (with -d)"); Line 1203
default: Line 1205
temp->pid = child; Line 1206
register_proc (temp); Line 1207
close (tempfd); Line 1208...!syscalls auto-comment...
close (pipefds[1]); Line 1209...!syscalls auto-comment...
fp = fdopen (pipefds[0], "r"); Line 1211...!syscalls auto-comment...
if (! fp) Line 1212
{
int saved_errno = errno; Line 1214
close (pipefds[0]); Line 1215...!syscalls auto-comment...
errno = saved_errno; Line 1216
}
break; Line 1218
}
return fp; Line 1221
} Block 51
/* Append DIR to the array of temporary directory names. */
static void Line 1225
add_temp_dir (char const *dir) Line 1226
{
if (temp_dir_count == temp_dir_alloc) Line 1228
temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc); Line 1229
temp_dirs[temp_dir_count++] = dir; Line 1231
} Block 52
/* Remove NAME from the list of temporary files. */
static void Line 1236
zaptemp (char const *name) Line 1237
{
struct tempnode *volatile *pnode; Line 1239
struct tempnode *node; Line 1240
struct tempnode *next; Line 1241
int unlink_status; Line 1242
int unlink_errno = 0; Line 1243
struct cs_status cs; Line 1244
for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next) Line 1246
continue; Line 1247
if (node->state == UNREAPED) Line 1249
wait_proc (node->pid); Line 1250
/* Unlink the temporary file in a critical section to avoid races. */
next = node->next; Line 1253
cs_enter (&cs); Line 1254
unlink_status = unlink (name); Line 1255...!syscalls auto-comment......!syscalls auto-comment...
unlink_errno = errno; Line 1256
*pnode = next; Line 1257
cs_leave (&cs); Line 1258
if (unlink_status != 0) Line 1260
error (0, unlink_errno, _("warning: cannot remove: %s"), quotef (name)); Line 1261
if (! next) Line 1262
temptail = pnode; Line 1263
free (node); Line 1264
} Block 53
#if HAVE_NL_LANGINFO Line 1267
static int Line 1269
struct_month_cmp (void const *m1, void const *m2) Line 1270
{
struct month const *month1 = m1; Line 1272
struct month const *month2 = m2; Line 1273
return strcmp (month1->name, month2->name); Line 1274
} Block 54
#endif Line 1277
/* Initialize the character class tables. */
static void Line 1281
inittables (void) Line 1282
{
size_t i; Line 1284
for (i = 0; i < UCHAR_LIM; ++i) Line 1286
{
blanks[i] = field_sep (i); Line 1288
nonprinting[i] = ! isprint (i); Line 1289
nondictionary[i] = ! isalnum (i) && ! field_sep (i); Line 1290
fold_toupper[i] = toupper (i); Line 1291
}
#if HAVE_NL_LANGINFO Line 1294
/* If we're not in the "C" locale, read different names for months. */
if (hard_LC_TIME) Line 1296
{
for (i = 0; i < MONTHS_PER_YEAR; i++) Line 1298
{
char const *s; Line 1300
size_t s_len; Line 1301
size_t j, k; Line 1302
char *name; Line 1303
s = nl_langinfo (ABMON_1 + i); Line 1305
s_len = strlen (s); Line 1306
monthtab[i].name = name = xmalloc (s_len + 1); Line 1307
monthtab[i].val = i + 1; Line 1308
for (j = k = 0; j < s_len; j++) Line 1310
if (! isblank (to_uchar (s[j]))) Line 1311
name[k++] = fold_toupper[to_uchar (s[j])]; Line 1312
name[k] = '\0'; Line 1313
}
qsort (monthtab, MONTHS_PER_YEAR, sizeof *monthtab, struct_month_cmp); Line 1315
}
#endif Line 1317
} Block 55
/* Specify how many inputs may be merged at once.
This may be set on the command-line with the
--batch-size option. */
static void Line 1323
specify_nmerge (int oi, char c, char const *s) Line 1324
{
uintmax_t n; Line 1326
struct rlimit rlimit; Line 1327
enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL); Line 1328
/* Try to find out how many file descriptors we'll be able
to open. We need at least nmerge + 3 (STDIN_FILENO,
STDOUT_FILENO and STDERR_FILENO). */
unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0 Line 1333
? rlimit.rlim_cur Line 1334
: OPEN_MAX) Line 1335
- 3); Line 1336
if (e == LONGINT_OK) Line 1338
{
nmerge = n; Line 1340
if (nmerge != n) Line 1341
e = LONGINT_OVERFLOW; Line 1342
else Line 1343
{
if (nmerge < 2) Line 1345
{
error (0, 0, _("invalid --%s argument %s"), Line 1347
long_options[oi].name, quote (s)); Line 1348
die (SORT_FAILURE, 0, Line 1349
_("minimum --%s argument is %s"), Line 1350
long_options[oi].name, quote ("2")); Line 1351
}
else if (max_nmerge < nmerge) Line 1353
{
e = LONGINT_OVERFLOW; Line 1355
}
else Line 1357
return; Line 1358
}
}
if (e == LONGINT_OVERFLOW) Line 1362
{
char max_nmerge_buf[INT_BUFSIZE_BOUND (max_nmerge)]; Line 1364
error (0, 0, _("--%s argument %s too large"), Line 1365
long_options[oi].name, quote (s)); Line 1366
die (SORT_FAILURE, 0, Line 1367
_("maximum --%s argument with current rlimit is %s"), Line 1368
long_options[oi].name, Line 1369
uinttostr (max_nmerge, max_nmerge_buf)); Line 1370
}
else Line 1372
xstrtol_fatal (e, oi, c, long_options, s); Line 1373
} Block 56
/* Specify the amount of main memory to use when sorting. */
static void Line 1377
specify_sort_size (int oi, char c, char const *s) Line 1378
{
uintmax_t n; Line 1380
char *suffix; Line 1381
enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ"); Line 1382
/* The default unit is KiB. */
if (e == LONGINT_OK && ISDIGIT (suffix[-1])) Line 1385
{
if (n <= UINTMAX_MAX / 1024) Line 1387
n *= 1024; Line 1388
else Line 1389
e = LONGINT_OVERFLOW; Line 1390
}
/* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1]) Line 1394
switch (suffix[0]) Line 1395
{
case 'b': Line 1397
e = LONGINT_OK; Line 1398
break; Line 1399
case '%': Line 1401
{
double mem = physmem_total () * n / 100; Line 1403
/* Use "<", not "<=", to avoid problems with rounding. */
if (mem < UINTMAX_MAX) Line 1406
{
n = mem; Line 1408
e = LONGINT_OK; Line 1409
}
else Line 1411
e = LONGINT_OVERFLOW; Line 1412
}
break; Line 1414
}
if (e == LONGINT_OK) Line 1417
{
/* If multiple sort sizes are specified, take the maximum, so
that option order does not matter. */
if (n < sort_size) Line 1421
return; Line 1422
sort_size = n; Line 1424
if (sort_size == n) Line 1425
{
sort_size = MAX (sort_size, MIN_SORT_SIZE); Line 1427
return; Line 1428
}
e = LONGINT_OVERFLOW; Line 1431
}
xstrtol_fatal (e, oi, c, long_options, s); Line 1434
} Block 57
/* Specify the number of threads to spawn during internal sort. */
static size_t Line 1438
specify_nthreads (int oi, char c, char const *s) Line 1439
{
unsigned long int nthreads; Line 1441
enum strtol_error e = xstrtoul (s, NULL, 10, &nthreads, ""); Line 1442
if (e == LONGINT_OVERFLOW) Line 1443
return SIZE_MAX; Line 1444
if (e != LONGINT_OK) Line 1445
xstrtol_fatal (e, oi, c, long_options, s); Line 1446
if (SIZE_MAX < nthreads) Line 1447
nthreads = SIZE_MAX; Line 1448
if (nthreads == 0) Line 1449
die (SORT_FAILURE, 0, _("number in parallel must be nonzero")); Line 1450
return nthreads; Line 1451
} Block 58
/* Return the default sort size. */
static size_t Line 1455
default_sort_size (void) Line 1456
{
/* Let SIZE be MEM, but no more than the maximum object size,
total memory, or system resource limits. Don't bother to check
for values like RLIM_INFINITY since in practice they are not much
less than SIZE_MAX. */
size_t size = SIZE_MAX; Line 1462
struct rlimit rlimit; Line 1463
if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size) Line 1464
size = rlimit.rlim_cur; Line 1465
#ifdef RLIMIT_AS Line 1466
if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size) Line 1467
size = rlimit.rlim_cur; Line 1468
#endif Line 1469
/* Leave a large safety margin for the above limits, as failure can
occur when they are exceeded. */
size /= 2; Line 1473
#ifdef RLIMIT_RSS Line 1475
/* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
Exceeding RSS is not fatal, but can be quite slow. */
if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size) Line 1478
size = rlimit.rlim_cur / 16 * 15; Line 1479
#endif Line 1480
/* Let MEM be available memory or 1/8 of total memory, whichever
is greater. */
double avail = physmem_available (); Line 1484
double total = physmem_total (); Line 1485
double mem = MAX (avail, total / 8); Line 1486
/* Leave a 1/4 margin for physical memory. */
if (total * 0.75 < size) Line 1489
size = total * 0.75; Line 1490
/* Return the minimum of MEM and SIZE, but no less than
MIN_SORT_SIZE. Avoid the MIN macro here, as it is not quite
right when only one argument is floating point. */
if (mem < size) Line 1495
size = mem; Line 1496
return MAX (size, MIN_SORT_SIZE); Line 1497
}
/* Return the sort buffer size to use with the input files identified
by FPS and FILES, which are alternate names of the same files.
NFILES gives the number of input files; NFPS may be less. Assume
that each input line requires LINE_BYTES extra bytes' worth of line
information. Do not exceed the size bound specified by the user
(or a default size bound, if the user does not specify one). */
static size_t Line 1507
sort_buffer_size (FILE *const *fps, size_t nfps, Line 1508
char *const *files, size_t nfiles, Line 1509
size_t line_bytes) Line 1510
{
/* A bound on the input size. If zero, the bound hasn't been
determined yet. */
static size_t size_bound; Line 1514
/* In the worst case, each input byte is a newline. */
size_t worst_case_per_input_byte = line_bytes + 1; Line 1517
/* Keep enough room for one extra input line and an extra byte.
This extra room might be needed when preparing to read EOF. */
size_t size = worst_case_per_input_byte + 1; Line 1521
for (size_t i = 0; i < nfiles; i++) Line 1523
{
struct stat st; Line 1525
off_t file_size; Line 1526
size_t worst_case; Line 1527
if ((i < nfps ? fstat (fileno (fps[i]), &st) Line 1529...!syscalls auto-comment......!syscalls auto-comment...
: STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st) Line 1530...!syscalls auto-comment......!syscalls auto-comment...
: stat (files[i], &st)) Line 1531...!syscalls auto-comment...
!= 0) Line 1532
sort_die (_("stat failed"), files[i]); Line 1533
if (S_ISREG (st.st_mode)) Line 1535
file_size = st.st_size; Line 1536
else Line 1537
{
/* The file has unknown size. If the user specified a sort
buffer size, use that; otherwise, guess the size. */
if (sort_size) Line 1541
return sort_size; Line 1542
file_size = INPUT_FILE_SIZE_GUESS; Line 1543
}
if (! size_bound) Line 1546
{
size_bound = sort_size; Line 1548
if (! size_bound) Line 1549
size_bound = default_sort_size (); Line 1550
}
/* Add the amount of memory needed to represent the worst case
where the input consists entirely of newlines followed by a
single non-newline. Check for overflow. */
worst_case = file_size * worst_case_per_input_byte + 1; Line 1556
if (file_size != worst_case / worst_case_per_input_byte Line 1557
|| size_bound - size <= worst_case) Line 1558
return size_bound; Line 1559
size += worst_case; Line 1560
}
return size; Line 1563
}
/* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
must be at least sizeof (struct line). Allocate ALLOC bytes
initially. */
static void Line 1570
initbuf (struct buffer *buf, size_t line_bytes, size_t alloc) Line 1571
{
/* Ensure that the line array is properly aligned. If the desired
size cannot be allocated, repeatedly halve it until allocation
succeeds. The smaller allocation may hurt overall performance,
but that's better than failing. */
while (true) Line 1577
{
alloc += sizeof (struct line) - alloc % sizeof (struct line); Line 1579
buf->buf = malloc (alloc); Line 1580
if (buf->buf) Line 1581
break; Line 1582
alloc /= 2; Line 1583
if (alloc <= line_bytes + 1) Line 1584
xalloc_die (); ...!common auto-comment...
}
buf->line_bytes = line_bytes; Line 1588
buf->alloc = alloc; Line 1589
buf->used = buf->left = buf->nlines = 0; Line 1590
buf->eof = false; Line 1591
}
/* Return one past the limit of the line array. */
static inline struct line * Line 1596
buffer_linelim (struct buffer const *buf) Line 1597
{
void *linelim = buf->buf + buf->alloc; Line 1599
return linelim; Line 1600
} Block 62
/* Return a pointer to the first character of the field specified
by KEY in LINE. */
static char * Line 1606
begfield (struct line const *line, struct keyfield const *key) Line 1607
{
char *ptr = line->text, *lim = ptr + line->length - 1; Line 1609
size_t sword = key->sword; Line 1610
size_t schar = key->schar; Line 1611
/* The leading field separator itself is included in a field when -t
is absent. */
if (tab != TAB_DEFAULT) Line 1616
while (ptr < lim && sword--) Line 1617
{
while (ptr < lim && *ptr != tab) Line 1619
++ptr; Line 1620
if (ptr < lim) Line 1621
++ptr; Line 1622
}
else Line 1624
while (ptr < lim && sword--) Line 1625
{
while (ptr < lim && blanks[to_uchar (*ptr)]) Line 1627
++ptr; Line 1628
while (ptr < lim && !blanks[to_uchar (*ptr)]) Line 1629
++ptr; Line 1630
}
/* If we're ignoring leading blanks when computing the Start
of the field, skip past them here. */
if (key->skipsblanks) Line 1635
while (ptr < lim && blanks[to_uchar (*ptr)]) Line 1636
++ptr; Line 1637
/* Advance PTR by SCHAR (if possible), but no further than LIM. */
ptr = MIN (lim, ptr + schar); Line 1640
return ptr; Line 1642
} Block 63
/* Return the limit of (a pointer to the first character after) the field
in LINE specified by KEY. */
static char * Line 1648
limfield (struct line const *line, struct keyfield const *key) Line 1649
{
char *ptr = line->text, *lim = ptr + line->length - 1; Line 1651
size_t eword = key->eword, echar = key->echar; Line 1652
if (echar == 0) Line 1654
eword++; /* Skip all of end field. */ Line 1655
/* Move PTR past EWORD fields or to one past the last byte on LINE,
whichever comes first. If there are more than EWORD fields, leave
PTR pointing at the beginning of the field having zero-based index,
EWORD. If a delimiter character was specified (via -t), then that
'beginning' is the first character following the delimiting TAB.
Otherwise, leave PTR pointing at the first 'blank' character after
the preceding field. */
if (tab != TAB_DEFAULT) Line 1664
while (ptr < lim && eword--) Line 1665
{
while (ptr < lim && *ptr != tab) Line 1667
++ptr; Line 1668
if (ptr < lim && (eword || echar)) Line 1669
++ptr; Line 1670
}
else Line 1672
while (ptr < lim && eword--) Line 1673
{
while (ptr < lim && blanks[to_uchar (*ptr)]) Line 1675
++ptr; Line 1676
while (ptr < lim && !blanks[to_uchar (*ptr)]) Line 1677
++ptr; Line 1678
}
#ifdef POSIX_UNSPECIFIED Line 1681
/* The following block of code makes GNU sort incompatible with
standard Unix sort, so it's ifdef'd out for now.
The POSIX spec isn't clear on how to interpret this.
FIXME: request clarification.
From: kwzh@gnu.ai.mit.edu (Karl Heuer)
Date: Thu, 30 May 96 12:20:41 -0400
[Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
[...]I believe I've found another bug in 'sort'.
$ cat /tmp/sort.in
a b c 2 d
pq rs 1 t
$ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
a b c 2 d
pq rs 1 t
$ /bin/sort -k1.7,1.7 </tmp/sort.in
pq rs 1 t
a b c 2 d
Unix sort produced the answer I expected: sort on the single character
in column 7. GNU sort produced different results, because it disagrees
on the interpretation of the key-end spec "M.N". Unix sort reads this
as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
"skip M-1 fields, then either N-1 characters or the rest of the current
field, whichever comes first". This extra clause applies only to
key-ends, not key-starts.
*/
/* Make LIM point to the end of (one byte past) the current field. */
if (tab != TAB_DEFAULT) Line 1713
{
char *newlim; Line 1715
newlim = memchr (ptr, tab, lim - ptr); Line 1716
if (newlim) Line 1717
lim = newlim; Line 1718
}
else Line 1720
{
char *newlim; Line 1722
newlim = ptr; Line 1723
while (newlim < lim && blanks[to_uchar (*newlim)]) Line 1724
++newlim; Line 1725
while (newlim < lim && !blanks[to_uchar (*newlim)]) Line 1726
++newlim; Line 1727
lim = newlim; Line 1728
}
#endif Line 1730
if (echar != 0) /* We need to skip over a portion of the end field. */ Line 1732
{
/* If we're ignoring leading blanks when computing the End
of the field, skip past them here. */
if (key->skipeblanks) Line 1736
while (ptr < lim && blanks[to_uchar (*ptr)]) Line 1737
++ptr; Line 1738
/* Advance PTR by ECHAR (if possible), but no further than LIM. */
ptr = MIN (lim, ptr + echar); Line 1741
}
return ptr; Line 1744
} Block 64
/* Fill BUF reading from FP, moving buf->left bytes from the end
of buf->buf to the beginning first. If EOF is reached and the
file wasn't terminated by a newline, supply one. Set up BUF's line
table too. FILE is the name of the file corresponding to FP.
Return true if some input was read. */
static bool Line 1753
fillbuf (struct buffer *buf, FILE *fp, char const *file) Line 1754
{
struct keyfield const *key = keylist; Line 1756
char eol = eolchar; Line 1757
size_t line_bytes = buf->line_bytes; Line 1758
size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE; Line 1759
if (buf->eof) Line 1761
return false; Line 1762
if (buf->used != buf->left) Line 1764
{
memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left); Line 1766
buf->used = buf->left; Line 1767
buf->nlines = 0; Line 1768
}
while (true) Line 1771
{
char *ptr = buf->buf + buf->used; Line 1773
struct line *linelim = buffer_linelim (buf); Line 1774
struct line *line = linelim - buf->nlines; Line 1775
size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr; Line 1776
char *line_start = buf->nlines ? line->text + line->length : buf->buf; Line 1777
while (line_bytes + 1 < avail) Line 1779
{
/* Read as many bytes as possible, but do not read so many
bytes that there might not be enough room for the
corresponding line array. The worst case is when the
rest of the input file consists entirely of newlines,
except that the last byte is not a newline. */
size_t readsize = (avail - 1) / (line_bytes + 1); Line 1786
size_t bytes_read = fread (ptr, 1, readsize, fp); Line 1787...!syscalls auto-comment...
char *ptrlim = ptr + bytes_read; Line 1788
char *p; Line 1789
avail -= bytes_read; Line 1790
if (bytes_read != readsize) Line 1792
{
if (ferror (fp)) Line 1794
sort_die (_("read failed"), file); Line 1795
if (feof (fp)) Line 1796
{
buf->eof = true; Line 1798
if (buf->buf == ptrlim) Line 1799
return false; Line 1800
if (line_start != ptrlim && ptrlim[-1] != eol) Line 1801
*ptrlim++ = eol; Line 1802
}
}
/* Find and record each line in the just-read input. */
while ((p = memchr (ptr, eol, ptrlim - ptr))) Line 1807
{
/* Delimit the line with NUL. This eliminates the need to
temporarily replace the last byte with NUL when calling
xmemcoll, which increases performance. */
*p = '\0'; Line 1812
ptr = p + 1; Line 1813
line--; Line 1814
line->text = line_start; Line 1815
line->length = ptr - line_start; Line 1816
mergesize = MAX (mergesize, line->length); Line 1817
avail -= line_bytes; Line 1818
if (key) Line 1820
{
/* Precompute the position of the first key for
efficiency. */
line->keylim = (key->eword == SIZE_MAX Line 1824
? p
: limfield (line, key)); Line 1826
if (key->sword != SIZE_MAX) Line 1828
line->keybeg = begfield (line, key); Line 1829
else Line 1830
{
if (key->skipsblanks) Line 1832
while (blanks[to_uchar (*line_start)]) Line 1833
line_start++; Line 1834
line->keybeg = line_start; Line 1835
}
}
line_start = ptr; Line 1839
}
ptr = ptrlim; Line 1842
if (buf->eof) Line 1843
break; Line 1844
}
buf->used = ptr - buf->buf; Line 1847
buf->nlines = buffer_linelim (buf) - line; Line 1848
if (buf->nlines != 0) Line 1849
{
buf->left = ptr - line_start; Line 1851
merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE; Line 1852
return true; Line 1853
}
{
/* The current input line is too long to fit in the buffer.
Increase the buffer size and try again, keeping it properly
aligned. */
size_t line_alloc = buf->alloc / sizeof (struct line); Line 1860
buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line)); Line 1861
buf->alloc = line_alloc * sizeof (struct line); Line 1862
}
}
} Block 65
/* Table that maps characters to order-of-magnitude values. */
static char const unit_order[UCHAR_LIM] = Line 1868
{
#if ! ('K' == 75 && 'M' == 77 && 'G' == 71 && 'T' == 84 && 'P' == 80 \ Line 1870
&& 'E' == 69 && 'Z' == 90 && 'Y' == 89 && 'k' == 107) Line 1871
/* This initializer syntax works on all C99 hosts. For now, use
it only on non-ASCII hosts, to ease the pain of porting to
pre-C99 ASCII hosts. */
['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8, Line 1875
['k']=1, Line 1876
#else Line 1877
/* Generate the following table with this command:
perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8);
foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\
|fmt */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Line 1882
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Line 1883
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, Line 1884
0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0, Line 1885
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Line 1886
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Line 1887
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Line 1888
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Line 1889
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Line 1890
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Line 1891
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 Line 1892
#endif Line 1893
}; Block 66
/* Traverse number given as *number consisting of digits, thousands_sep, and
decimal_point chars only. Returns the highest digit found in the number,
or '\0' if no digit has been found. Upon return *number points at the
character that immediately follows after the given number. */
static unsigned char Line 1900
traverse_raw_number (char const **number) Line 1901
{
char const *p = *number; Line 1903
unsigned char ch; Line 1904
unsigned char max_digit = '\0'; Line 1905
bool ends_with_thousands_sep = false; Line 1906
/* Scan to end of number.
Decimals or separators not followed by digits stop the scan.
Numbers ending in decimals or separators are thus considered
to be lacking in units.
FIXME: add support for multibyte thousands_sep and decimal_point. */
while (ISDIGIT (ch = *p++)) Line 1914
{
if (max_digit < ch) Line 1916
max_digit = ch; Line 1917
/* Allow to skip only one occurrence of thousands_sep to avoid finding
the unit in the next column in case thousands_sep matches as blank
and is used as column delimiter. */
ends_with_thousands_sep = (*p == thousands_sep); Line 1922
if (ends_with_thousands_sep) Line 1923
++p; Line 1924
}
if (ends_with_thousands_sep) Line 1927
{
/* thousands_sep not followed by digit is not allowed. */
*number = p - 2; Line 1930
return max_digit; Line 1931
}
if (ch == decimal_point) Line 1934
while (ISDIGIT (ch = *p++)) Line 1935
if (max_digit < ch) Line 1936
max_digit = ch; Line 1937
*number = p - 1; Line 1939
return max_digit; Line 1940
} Block 67
/* Return an integer that represents the order of magnitude of the
unit following the number. The number may contain thousands
separators and a decimal point, but it may not contain leading blanks.
Negative numbers get negative orders; zero numbers have a zero order. */
static int _GL_ATTRIBUTE_PURE Line 1948
find_unit_order (char const *number) Line 1949
{
bool minus_sign = (*number == '-'); Line 1951
char const *p = number + minus_sign; Line 1952
unsigned char max_digit = traverse_raw_number (&p); Line 1953
if ('0' < max_digit) Line 1954
{
unsigned char ch = *p; Line 1956
int order = unit_order[ch]; Line 1957
return (minus_sign ? -order : order); Line 1958
}
else Line 1960
return 0; Line 1961
} Block 68
/* Compare numbers A and B ending in units with SI or IEC prefixes
<none/unknown> < K/k < M < G < T < P < E < Z < Y */
static int Line 1967
human_numcompare (char const *a, char const *b) Line 1968
{
while (blanks[to_uchar (*a)]) Line 1970
a++; Line 1971
while (blanks[to_uchar (*b)]) Line 1972
b++; Line 1973
int diff = find_unit_order (a) - find_unit_order (b); Line 1975
return (diff ? diff : strnumcmp (a, b, decimal_point, thousands_sep)); Line 1976
} Block 69
/* Compare strings A and B as numbers without explicitly converting them to
machine numbers. Comparatively slow for short strings, but asymptotically
hideously fast. */
static int Line 1983
numcompare (char const *a, char const *b) Line 1984
{
while (blanks[to_uchar (*a)]) Line 1986
a++; Line 1987
while (blanks[to_uchar (*b)]) Line 1988
b++; Line 1989
return strnumcmp (a, b, decimal_point, thousands_sep); Line 1991
} Block 70
/* Work around a problem whereby the long double value returned by glibc's
strtold ("NaN", ...) contains uninitialized bits: clear all bytes of
A and B before calling strtold. FIXME: remove this function once
gnulib guarantees that strtold's result is always well defined. */
static int Line 1998
nan_compare (char const *sa, char const *sb) Line 1999
{
long_double a; Line 2001
memset (&a, 0, sizeof a); Line 2002
a = strtold (sa, NULL); Line 2003
long_double b; Line 2005
memset (&b, 0, sizeof b); Line 2006
b = strtold (sb, NULL); Line 2007
return memcmp (&a, &b, sizeof a); Line 2009
} Block 71
static int Line 2012
general_numcompare (char const *sa, char const *sb) Line 2013
{
/* FIXME: maybe add option to try expensive FP conversion
only if A and B can't be compared more cheaply/accurately. */
char *ea; Line 2018
char *eb; Line 2019
long_double a = strtold (sa, &ea); Line 2020
long_double b = strtold (sb, &eb); Line 2021
/* Put conversion errors at the start of the collating sequence. */
if (sa == ea) Line 2024
return sb == eb ? 0 : -1; Line 2025
if (sb == eb) Line 2026
return 1; Line 2027
/* Sort numbers in the usual way, where -0 == +0. Put NaNs after
conversion errors but before numbers; sort them by internal
bit-pattern, for lack of a more portable alternative. */
return (a < b ? -1 Line 2032
: a > b ? 1 Line 2033
: a == b ? 0 Line 2034
: b == b ? -1 Line 2035
: a == a ? 1 Line 2036
: nan_compare (sa, sb)); Line 2037
}
/* Return an integer in 1..12 of the month name MONTH.
Return 0 if the name in S is not recognized. */
static int Line 2043
getmonth (char const *month, char **ea) Line 2044
{
size_t lo = 0; Line 2046
size_t hi = MONTHS_PER_YEAR; Line 2047
while (blanks[to_uchar (*month)]) Line 2049
month++; Line 2050
do
{
size_t ix = (lo + hi) / 2; Line 2054
char const *m = month; Line 2055
char const *n = monthtab[ix].name; Line 2056
for (;; m++, n++) Line 2058
{
if (!*n) Line 2060
{
if (ea) Line 2062
*ea = (char *) m; Line 2063
return monthtab[ix].val; Line 2064
}
if (to_uchar (fold_toupper[to_uchar (*m)]) < to_uchar (*n)) Line 2066
{
hi = ix; Line 2068
break; Line 2069
}
else if (to_uchar (fold_toupper[to_uchar (*m)]) > to_uchar (*n)) Line 2071
{
lo = ix + 1; Line 2073
break; Line 2074
}
}
}
while (lo < hi); Line 2078
return 0; Line 2080
} Block 73
/* A randomly chosen MD5 state, used for random comparison. */
static struct md5_ctx random_md5_state; Line 2084
/* Initialize the randomly chosen MD5 state. */
static void Line 2088
random_md5_state_init (char const *random_source) Line 2089
{
unsigned char buf[MD5_DIGEST_SIZE]; Line 2091
struct randread_source *r = randread_new (random_source, sizeof buf); Line 2092
if (! r) Line 2093
sort_die (_("open failed"), random_source); Line 2094
randread (r, buf, sizeof buf); Line 2095...!syscalls auto-comment...
if (randread_free (r) != 0) Line 2096
sort_die (_("close failed"), random_source); Line 2097
md5_init_ctx (&random_md5_state); Line 2098
md5_process_bytes (buf, sizeof buf, &random_md5_state); Line 2099
} Block 74
/* This is like strxfrm, except it reports any error and exits. */
static size_t Line 2104
xstrxfrm (char *restrict dest, char const *restrict src, size_t destsize) Line 2105
{
errno = 0; Line 2107
size_t translated_size = strxfrm (dest, src, destsize); Line 2108
if (errno) Line 2110
{
error (0, errno, _("string transformation failed")); Line 2112
error (0, 0, _("set LC_ALL='C' to work around the problem")); Line 2113
die (SORT_FAILURE, 0, Line 2114
_("the untransformed string was %s"), Line 2115
quotearg_n_style (0, locale_quoting_style, src)); Line 2116
}
return translated_size; Line 2119
} Block 75
/* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
using one or more random hash functions. TEXTA[LENA] and
TEXTB[LENB] must be zero. */
static int Line 2126
compare_random (char *restrict texta, size_t lena, Line 2127
char *restrict textb, size_t lenb) Line 2128
{
/* XFRM_DIFF records the equivalent of memcmp on the transformed
data. This is used to break ties if there is a checksum
collision, and this is good enough given the astronomically low
probability of a collision. */
int xfrm_diff = 0; Line 2134
char stackbuf[4000]; Line 2136
char *buf = stackbuf; Line 2137
size_t bufsize = sizeof stackbuf; Line 2138
void *allocated = NULL; Line 2139
uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)]; Line 2140
struct md5_ctx s[2]; Line 2141
s[0] = s[1] = random_md5_state; Line 2142
if (hard_LC_COLLATE) Line 2144
{
char const *lima = texta + lena; Line 2146
char const *limb = textb + lenb; Line 2147
while (true) Line 2149
{
/* Transform the text into the basis of comparison, so that byte
strings that would otherwise considered to be equal are
considered equal here even if their bytes differ.
Each time through this loop, transform one
null-terminated string's worth from TEXTA or from TEXTB
or both. That way, there's no need to store the
transformation of the whole line, if it contains many
null-terminated strings. */
/* Store the transformed data into a big-enough buffer. */
/* A 3X size guess avoids the overhead of calling strxfrm
twice on typical implementations. Don't worry about
size_t overflow, as the guess need not be correct. */
size_t guess_bufsize = 3 * (lena + lenb) + 2; Line 2166
if (bufsize < guess_bufsize) Line 2167
{
bufsize = MAX (guess_bufsize, bufsize * 3 / 2); Line 2169
free (allocated); Line 2170
buf = allocated = malloc (bufsize); Line 2171
if (! buf) Line 2172
{
buf = stackbuf; Line 2174
bufsize = sizeof stackbuf; Line 2175
}
}
size_t sizea = Line 2179
(texta < lima ? xstrxfrm (buf, texta, bufsize) + 1 : 0); Line 2180
bool a_fits = sizea <= bufsize; Line 2181
size_t sizeb = Line 2182
(textb < limb Line 2183
? (xstrxfrm ((a_fits ? buf + sizea : NULL), textb, Line 2184
(a_fits ? bufsize - sizea : 0)) Line 2185
+ 1) Line 2186
: 0); Line 2187
if (! (a_fits && sizea + sizeb <= bufsize)) Line 2189
{
bufsize = sizea + sizeb; Line 2191
if (bufsize < SIZE_MAX / 3) Line 2192
bufsize = bufsize * 3 / 2; Line 2193
free (allocated); Line 2194
buf = allocated = xmalloc (bufsize); Line 2195
if (texta < lima) Line 2196
strxfrm (buf, texta, sizea); Line 2197
if (textb < limb) Line 2198
strxfrm (buf + sizea, textb, sizeb); Line 2199
}
/* Advance past NULs to the next part of each input string,
exiting the loop if both strings are exhausted. When
exiting the loop, prepare to finish off the tiebreaker
comparison properly. */
if (texta < lima) Line 2206
texta += strlen (texta) + 1; Line 2207
if (textb < limb) Line 2208
textb += strlen (textb) + 1; Line 2209
if (! (texta < lima || textb < limb)) Line 2210
{
lena = sizea; texta = buf; Line 2212
lenb = sizeb; textb = buf + sizea; Line 2213
break; Line 2214
}
/* Accumulate the transformed data in the corresponding
checksums. */
md5_process_bytes (buf, sizea, &s[0]); Line 2219
md5_process_bytes (buf + sizea, sizeb, &s[1]); Line 2220
/* Update the tiebreaker comparison of the transformed data. */
if (! xfrm_diff) Line 2223
{
xfrm_diff = memcmp (buf, buf + sizea, MIN (sizea, sizeb)); Line 2225
if (! xfrm_diff) Line 2226
xfrm_diff = (sizea > sizeb) - (sizea < sizeb); Line 2227
}
}
}
/* Compute and compare the checksums. */
md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]); Line 2233
md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]); Line 2234
int diff = memcmp (dig[0], dig[1], sizeof dig[0]); Line 2235
/* Fall back on the tiebreaker if the checksums collide. */
if (! diff) Line 2238
{
if (! xfrm_diff) Line 2240
{
xfrm_diff = memcmp (texta, textb, MIN (lena, lenb)); Line 2242
if (! xfrm_diff) Line 2243
xfrm_diff = (lena > lenb) - (lena < lenb); Line 2244
}
diff = xfrm_diff; Line 2247
}
free (allocated); Line 2250
return diff; Line 2252
}
/* Return the printable width of the block of memory starting at
TEXT and ending just before LIM, counting each tab as one byte.
FIXME: Should we generally be counting non printable chars? */
static size_t Line 2259
debug_width (char const *text, char const *lim) Line 2260
{
size_t width = mbsnwidth (text, lim - text, 0); Line 2262
while (text < lim) Line 2263
width += (*text++ == '\t'); Line 2264
return width; Line 2265
} Block 77
/* For debug mode, "underline" a key at the
specified offset and screen width. */
static void Line 2271
mark_key (size_t offset, size_t width) Line 2272
{
while (offset--) Line 2274
putchar (' '); Line 2275
if (!width) Line 2277
printf (_("^ no match for key\n")); Line 2278
else Line 2279
{
do
putchar ('_'); Line 2282
while (--width); Line 2283
putchar ('\n'); Line 2285
}
} Block 78
/* Return true if KEY is a numeric key. */
static inline bool Line 2291
key_numeric (struct keyfield const *key) Line 2292
{
return key->numeric || key->general_numeric || key->human_numeric; Line 2294
} Block 79
/* For LINE, output a debugging line that underlines KEY in LINE.
If KEY is null, underline the whole line. */
static void Line 2300
debug_key (struct line const *line, struct keyfield const *key) Line 2301
{
char *text = line->text; Line 2303
char *beg = text; Line 2304
char *lim = text + line->length - 1; Line 2305
if (key) Line 2307
{
if (key->sword != SIZE_MAX) Line 2309
beg = begfield (line, key); Line 2310
if (key->eword != SIZE_MAX) Line 2311
lim = limfield (line, key); Line 2312
if ((key->skipsblanks && key->sword == SIZE_MAX) Line 2314
|| key->month || key_numeric (key)) Line 2315
{
char saved = *lim; Line 2317
*lim = '\0'; Line 2318
while (blanks[to_uchar (*beg)]) Line 2320
beg++; Line 2321
char *tighter_lim = beg; Line 2323
if (lim < beg) Line 2325
tighter_lim = lim; Line 2326
else if (key->month) Line 2327
getmonth (beg, &tighter_lim); Line 2328
else if (key->general_numeric) Line 2329
ignore_value (strtold (beg, &tighter_lim)); Line 2330
else if (key->numeric || key->human_numeric) Line 2331
{
char const *p = beg + (beg < lim && *beg == '-'); Line 2333
unsigned char max_digit = traverse_raw_number (&p); Line 2334
if ('0' <= max_digit) Line 2335
{
unsigned char ch = *p; Line 2337
tighter_lim = (char *) p Line 2338
+ (key->human_numeric && unit_order[ch]); Line 2339
}
}
else Line 2342
tighter_lim = lim; Line 2343
*lim = saved; Line 2345
lim = tighter_lim; Line 2346
}
}
size_t offset = debug_width (text, beg); Line 2350
size_t width = debug_width (beg, lim); Line 2351
mark_key (offset, width); Line 2352
} Block 80
/* Debug LINE by underlining its keys. */
static void Line 2357
debug_line (struct line const *line) Line 2358
{
struct keyfield const *key = keylist; Line 2360
do
debug_key (line, key); Line 2363
while (key && ((key = key->next) || ! (unique || stable))); Line 2364
} Block 81
/* Return whether sorting options specified for key. */
static bool Line 2369
default_key_compare (struct keyfield const *key) Line 2370
{
return ! (key->ignore Line 2372
|| key->translate Line 2373
|| key->skipsblanks Line 2374
|| key->skipeblanks Line 2375
|| key_numeric (key) Line 2376
|| key->month Line 2377
|| key->version Line 2378
|| key->random Line 2379
/* || key->reverse */
);
} Block 82
/* Convert a key to the short options used to specify it. */
static void Line 2386
key_to_opts (struct keyfield const *key, char *opts) Line 2387
{
if (key->skipsblanks || key->skipeblanks) Line 2389
*opts++ = 'b';/* either disables global -b */ Line 2390
if (key->ignore == nondictionary) Line 2391
*opts++ = 'd'; Line 2392
if (key->translate) Line 2393
*opts++ = 'f'; Line 2394
if (key->general_numeric) Line 2395
*opts++ = 'g'; Line 2396
if (key->human_numeric) Line 2397
*opts++ = 'h'; Line 2398
if (key->ignore == nonprinting) Line 2399
*opts++ = 'i'; Line 2400
if (key->month) Line 2401
*opts++ = 'M'; Line 2402
if (key->numeric) Line 2403
*opts++ = 'n'; Line 2404
if (key->random) Line 2405
*opts++ = 'R'; Line 2406
if (key->reverse) Line 2407
*opts++ = 'r'; Line 2408
if (key->version) Line 2409
*opts++ = 'V'; Line 2410
*opts = '\0'; Line 2411
} Block 83
/* Output data independent key warnings to stderr. */
static void Line 2416
key_warnings (struct keyfield const *gkey, bool gkey_only) Line 2417
{
struct keyfield const *key; Line 2419
struct keyfield ugkey = *gkey; Line 2420
unsigned long keynum = 1; Line 2421
for (key = keylist; key; key = key->next, keynum++) Line 2423
{
if (key->traditional_used) Line 2425
{
size_t sword = key->sword; Line 2427
size_t eword = key->eword; Line 2428
char tmp[INT_BUFSIZE_BOUND (uintmax_t)]; Line 2429
/* obsolescent syntax +A.x -B.y is equivalent to:
-k A+1.x+1,B.y (when y = 0)
-k A+1.x+1,B+1.y (when y > 0) */
char obuf[INT_BUFSIZE_BOUND (sword) * 2 + 4]; /* +# -# */ Line 2433
char nbuf[INT_BUFSIZE_BOUND (sword) * 2 + 5]; /* -k #,# */ Line 2434
char *po = obuf; Line 2435
char *pn = nbuf; Line 2436
if (sword == SIZE_MAX) Line 2438
sword++; Line 2439
po = stpcpy (stpcpy (po, "+"), umaxtostr (sword, tmp)); Line 2441
pn = stpcpy (stpcpy (pn, "-k "), umaxtostr (sword + 1, tmp)); Line 2442
if (key->eword != SIZE_MAX) Line 2443
{
stpcpy (stpcpy (po, " -"), umaxtostr (eword + 1, tmp)); Line 2445
stpcpy (stpcpy (pn, ","), Line 2446
umaxtostr (eword + 1 Line 2447
+ (key->echar == SIZE_MAX), tmp)); Line 2448
}
error (0, 0, _("obsolescent key %s used; consider %s instead"), Line 2450
quote_n (0, obuf), quote_n (1, nbuf)); Line 2451
}
/* Warn about field specs that will never match. */
bool zero_width = key->sword != SIZE_MAX && key->eword < key->sword; Line 2455
if (zero_width) Line 2456
error (0, 0, _("key %lu has zero width and will be ignored"), keynum); Line 2457
/* Warn about significant leading blanks. */
bool implicit_skip = key_numeric (key) || key->month; Line 2460
bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */ Line 2461
if (!zero_width && !gkey_only && tab == TAB_DEFAULT && !line_offset Line 2462
&& ((!key->skipsblanks && !implicit_skip) Line 2463
|| (!key->skipsblanks && key->schar) Line 2464
|| (!key->skipeblanks && key->echar))) Line 2465
error (0, 0, _("leading blanks are significant in key %lu; " Line 2466
"consider also specifying 'b'"), keynum); Line 2467
/* Warn about numeric comparisons spanning fields,
as field delimiters could be interpreted as part
of the number (maybe only in other locales). */
if (!gkey_only && key_numeric (key)) Line 2472
{
size_t sword = key->sword + 1; Line 2474
size_t eword = key->eword + 1; Line 2475
if (!sword) Line 2476
sword++; Line 2477
if (!eword || sword < eword) Line 2478
error (0, 0, _("key %lu is numeric and spans multiple fields"), Line 2479
keynum); Line 2480
}
/* Flag global options not copied or specified in any key. */
if (ugkey.ignore && (ugkey.ignore == key->ignore)) Line 2484
ugkey.ignore = NULL; Line 2485
if (ugkey.translate && (ugkey.translate == key->translate)) Line 2486
ugkey.translate = NULL; Line 2487
ugkey.skipsblanks &= !key->skipsblanks; Line 2488
ugkey.skipeblanks &= !key->skipeblanks; Line 2489
ugkey.month &= !key->month; Line 2490
ugkey.numeric &= !key->numeric; Line 2491
ugkey.general_numeric &= !key->general_numeric; Line 2492
ugkey.human_numeric &= !key->human_numeric; Line 2493
ugkey.random &= !key->random; Line 2494
ugkey.version &= !key->version; Line 2495
ugkey.reverse &= !key->reverse; Line 2496
}
/* Warn about ignored global options flagged above.
This clears all flags if UGKEY is the only one in the list. */
if (!default_key_compare (&ugkey) Line 2501
|| (ugkey.reverse && (stable || unique) && keylist)) Line 2502
{
bool ugkey_reverse = ugkey.reverse; Line 2504
if (!(stable || unique)) Line 2505
ugkey.reverse = false; Line 2506
/* The following is too big, but guaranteed to be "big enough". */
char opts[sizeof short_options]; Line 2508
key_to_opts (&ugkey, opts); Line 2509
error (0, 0, Line 2510
ngettext ("option '-%s' is ignored", Line 2511
"options '-%s' are ignored", Line 2512
select_plural (strlen (opts))), opts); Line 2513
ugkey.reverse = ugkey_reverse; Line 2514
}
if (ugkey.reverse && !(stable || unique) && keylist) Line 2516
error (0, 0, _("option '-r' only applies to last-resort comparison")); Line 2517
} Block 84
/* Compare two lines A and B trying every key in sequence until there
are no more keys or a difference is found. */
static int Line 2523
keycompare (struct line const *a, struct line const *b) Line 2524
{
struct keyfield *key = keylist; Line 2526
/* For the first iteration only, the key positions have been
precomputed for us. */
char *texta = a->keybeg; Line 2530
char *textb = b->keybeg; Line 2531
char *lima = a->keylim; Line 2532
char *limb = b->keylim; Line 2533
int diff; Line 2535
while (true) Line 2537
{
char const *translate = key->translate; Line 2539
bool const *ignore = key->ignore; Line 2540
/* Treat field ends before field starts as empty fields. */
lima = MAX (texta, lima); Line 2543
limb = MAX (textb, limb); Line 2544
/* Find the lengths. */
size_t lena = lima - texta; Line 2547
size_t lenb = limb - textb; Line 2548
if (hard_LC_COLLATE || key_numeric (key) Line 2550
|| key->month || key->random || key->version) Line 2551
{
char *ta; Line 2553
char *tb; Line 2554
size_t tlena; Line 2555
size_t tlenb; Line 2556
char enda IF_LINT (= 0); Line 2558
char endb IF_LINT (= 0); Line 2559
void *allocated IF_LINT (= NULL); Line 2560
char stackbuf[4000]; Line 2561
if (ignore || translate) Line 2563
{
/* Compute with copies of the keys, which are the result of
translating or ignoring characters, and which need their
own storage. */
size_t i; Line 2569
/* Allocate space for copies. */
size_t size = lena + 1 + lenb + 1; Line 2572
if (size <= sizeof stackbuf) Line 2573
ta = stackbuf, allocated = NULL; Line 2574
else Line 2575
ta = allocated = xmalloc (size); Line 2576
tb = ta + lena + 1; Line 2577
/* Put into each copy a version of the key in which the
requested characters are ignored or translated. */
for (tlena = i = 0; i < lena; i++) Line 2581
if (! (ignore && ignore[to_uchar (texta[i])])) Line 2582
ta[tlena++] = (translate Line 2583
? translate[to_uchar (texta[i])] Line 2584
: texta[i]); Line 2585
ta[tlena] = '\0'; Line 2586
for (tlenb = i = 0; i < lenb; i++) Line 2588
if (! (ignore && ignore[to_uchar (textb[i])])) Line 2589
tb[tlenb++] = (translate Line 2590
? translate[to_uchar (textb[i])] Line 2591
: textb[i]); Line 2592
tb[tlenb] = '\0'; Line 2593
}
else Line 2595
{
/* Use the keys in-place, temporarily null-terminated. */
ta = texta; tlena = lena; enda = ta[tlena]; ta[tlena] = '\0'; Line 2598
tb = textb; tlenb = lenb; endb = tb[tlenb]; tb[tlenb] = '\0'; Line 2599
}
if (key->numeric) Line 2602
diff = numcompare (ta, tb); Line 2603
else if (key->general_numeric) Line 2604
diff = general_numcompare (ta, tb); Line 2605
else if (key->human_numeric) Line 2606
diff = human_numcompare (ta, tb); Line 2607
else if (key->month) Line 2608
diff = getmonth (ta, NULL) - getmonth (tb, NULL); Line 2609
else if (key->random) Line 2610
diff = compare_random (ta, tlena, tb, tlenb); Line 2611
else if (key->version) Line 2612
diff = filevercmp (ta, tb); Line 2613
else Line 2614
{
/* Locale-dependent string sorting. This is slower than
C-locale sorting, which is implemented below. */
if (tlena == 0) Line 2618
diff = - NONZERO (tlenb); Line 2619
else if (tlenb == 0) Line 2620
diff = 1; Line 2621
else Line 2622
diff = xmemcoll0 (ta, tlena + 1, tb, tlenb + 1); Line 2623
}
if (ignore || translate) Line 2626
free (allocated); Line 2627
else Line 2628
{
ta[tlena] = enda; Line 2630
tb[tlenb] = endb; Line 2631
}
}
else if (ignore) Line 2634
{
#define CMP_WITH_IGNORE(A, B) \ Line 2636
do \ Line 2637
{ \ Line 2638
while (true) \ Line 2639
{ \ Line 2640
while (texta < lima && ignore[to_uchar (*texta)]) \ Line 2641
++texta; \ Line 2642
while (textb < limb && ignore[to_uchar (*textb)]) \ Line 2643
++textb; \ Line 2644
if (! (texta < lima && textb < limb)) \ Line 2645
break; \ Line 2646
diff = to_uchar (A) - to_uchar (B); \ Line 2647
if (diff) \ Line 2648
goto not_equal; \ Line 2649
++texta; \ Line 2650
++textb; \ Line 2651
} \ Line 2652
\
diff = (texta < lima) - (textb < limb); \ Line 2654
} \ Line 2655
while (0) Line 2656
if (translate) Line 2658
CMP_WITH_IGNORE (translate[to_uchar (*texta)], Line 2659
translate[to_uchar (*textb)]); Line 2660
else Line 2661
CMP_WITH_IGNORE (*texta, *textb); Line 2662
}
else if (lena == 0) Line 2664
diff = - NONZERO (lenb); Line 2665
else if (lenb == 0) Line 2666
goto greater; Line 2667
else Line 2668
{
if (translate) Line 2670
{
while (texta < lima && textb < limb) Line 2672
{
diff = (to_uchar (translate[to_uchar (*texta++)]) Line 2674
- to_uchar (translate[to_uchar (*textb++)])); Line 2675
if (diff) Line 2676
goto not_equal; Line 2677
}
}
else Line 2680
{
diff = memcmp (texta, textb, MIN (lena, lenb)); Line 2682
if (diff) Line 2683
goto not_equal; Line 2684
}
diff = lena < lenb ? -1 : lena != lenb; Line 2686
}
if (diff) Line 2689
goto not_equal; Line 2690
key = key->next; Line 2692
if (! key) Line 2693
break; Line 2694
/* Find the beginning and limit of the next field. */
if (key->eword != SIZE_MAX) Line 2697
lima = limfield (a, key), limb = limfield (b, key); Line 2698
else Line 2699
lima = a->text + a->length - 1, limb = b->text + b->length - 1; Line 2700
if (key->sword != SIZE_MAX) Line 2702
texta = begfield (a, key), textb = begfield (b, key); Line 2703
else Line 2704
{
texta = a->text, textb = b->text; Line 2706
if (key->skipsblanks) Line 2707
{
while (texta < lima && blanks[to_uchar (*texta)]) Line 2709
++texta; Line 2710
while (textb < limb && blanks[to_uchar (*textb)]) Line 2711
++textb; Line 2712
}
}
}
return 0; Line 2717
greater: Line 2719
diff = 1; Line 2720
not_equal: Line 2721
return key->reverse ? -diff : diff; Line 2722
} Block 85
/* Compare two lines A and B, returning negative, zero, or positive
depending on whether A compares less than, equal to, or greater than B. */
static int Line 2728
compare (struct line const *a, struct line const *b) Line 2729
{
int diff; Line 2731
size_t alen, blen; Line 2732
/* First try to compare on the specified keys (if any).
The only two cases with no key at all are unadorned sort,
and unadorned sort -r. */
if (keylist) Line 2737
{
diff = keycompare (a, b); Line 2739
if (diff || unique || stable) Line 2740
return diff; Line 2741
}
/* If the keys all compare equal (or no keys were specified)
fall through to the default comparison. */
alen = a->length - 1, blen = b->length - 1; Line 2746
if (alen == 0) Line 2748
diff = - NONZERO (blen); Line 2749
else if (blen == 0) Line 2750
diff = 1; Line 2751
else if (hard_LC_COLLATE) Line 2752
{
/* xmemcoll0 is a performance enhancement as
it will not unconditionally write '\0' after the
passed in buffers, which was seen to give around
a 3% increase in performance for short lines. */
diff = xmemcoll0 (a->text, alen + 1, b->text, blen + 1); Line 2758
}
else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen)))) Line 2760
diff = alen < blen ? -1 : alen != blen; Line 2761
return reverse ? -diff : diff; Line 2763
} Block 86
/* Write LINE to output stream FP; the output file's name is
OUTPUT_FILE if OUTPUT_FILE is non-null, and is the standard output
otherwise. If debugging is enabled and FP is standard output,
append some debugging information. */
static void Line 2771
write_line (struct line const *line, FILE *fp, char const *output_file) Line 2772
{
char *buf = line->text; Line 2774
size_t n_bytes = line->length; Line 2775
char *ebuf = buf + n_bytes; Line 2776
if (!output_file && debug) Line 2778
{
/* Convert TAB to '>' and EOL to \n, and then output debugging info. */
char const *c = buf; Line 2781
while (c < ebuf) Line 2783
{
char wc = *c++; Line 2785
if (wc == '\t') Line 2786
wc = '>'; Line 2787
else if (c == ebuf) Line 2788
wc = '\n'; Line 2789
if (fputc (wc, fp) == EOF) Line 2790
sort_die (_("write failed"), output_file); Line 2791
}
debug_line (line); Line 2794
}
else Line 2796
{
ebuf[-1] = eolchar; Line 2798
if (fwrite (buf, 1, n_bytes, fp) != n_bytes) Line 2799...!syscalls auto-comment...
sort_die (_("write failed"), output_file); Line 2800
ebuf[-1] = '\0'; Line 2801
}
} Block 87
/* Check that the lines read from FILE_NAME come in order. Return
true if they are in order. If CHECKONLY == 'c', also print a
diagnostic (FILE_NAME, line number, contents of line) to stderr if
they are not in order. */
static bool Line 2810
check (char const *file_name, char checkonly) Line 2811
{
FILE *fp = xfopen (file_name, "r"); Line 2813...!syscalls auto-comment...
struct buffer buf; /* Input buffer. */ Line 2814
struct line temp; /* Copy of previous line. */ Line 2815
size_t alloc = 0; Line 2816
uintmax_t line_number = 0; Line 2817
struct keyfield const *key = keylist; Line 2818
bool nonunique = ! unique; Line 2819
bool ordered = true; Line 2820
initbuf (&buf, sizeof (struct line), Line 2822
MAX (merge_buffer_size, sort_size)); Line 2823
temp.text = NULL; Line 2824
while (fillbuf (&buf, fp, file_name)) Line 2826
{
struct line const *line = buffer_linelim (&buf); Line 2828
struct line const *linebase = line - buf.nlines; Line 2829
/* Make sure the line saved from the old buffer contents is
less than or equal to the first line of the new buffer. */
if (alloc && nonunique <= compare (&temp, line - 1)) Line 2833
{
found_disorder: Line 2835
{
if (checkonly == 'c') Line 2837
{
struct line const *disorder_line = line - 1; Line 2839
uintmax_t disorder_line_number = Line 2840
buffer_linelim (&buf) - disorder_line + line_number; Line 2841
char hr_buf[INT_BUFSIZE_BOUND (disorder_line_number)]; Line 2842
fprintf (stderr, _("%s: %s:%s: disorder: "), Line 2843
program_name, file_name, Line 2844
umaxtostr (disorder_line_number, hr_buf)); Line 2845
write_line (disorder_line, stderr, _("standard error")); Line 2846
}
ordered = false; Line 2849
break; Line 2850
}
}
/* Compare each line in the buffer with its successor. */
while (linebase < --line) Line 2855
if (nonunique <= compare (line, line - 1)) Line 2856
goto found_disorder; Line 2857
line_number += buf.nlines; Line 2859
/* Save the last line of the buffer. */
if (alloc < line->length) Line 2862
{
do
{
alloc *= 2; Line 2866
if (! alloc) Line 2867
{
alloc = line->length; Line 2869
break; Line 2870
}
}
while (alloc < line->length); Line 2873
free (temp.text); Line 2875
temp.text = xmalloc (alloc); Line 2876
}
memcpy (temp.text, line->text, line->length); Line 2878
temp.length = line->length; Line 2879
if (key) Line 2880
{
temp.keybeg = temp.text + (line->keybeg - line->text); Line 2882
temp.keylim = temp.text + (line->keylim - line->text); Line 2883
}
}
xfclose (fp, file_name); Line 2887...!syscalls auto-comment...
free (buf.buf); Line 2888
free (temp.text); Line 2889
return ordered; Line 2890
} Block 88
/* Open FILES (there are NFILES of them) and store the resulting array
of stream pointers into (*PFPS). Allocate the array. Return the
number of successfully opened files, setting errno if this value is
less than NFILES. */
static size_t Line 2898
open_input_files (struct sortfile *files, size_t nfiles, FILE ***pfps) Line 2899
{
FILE **fps = *pfps = xnmalloc (nfiles, sizeof *fps); Line 2901
int i; Line 2902
/* Open as many input files as we can. */
for (i = 0; i < nfiles; i++) Line 2905
{
fps[i] = (files[i].temp && files[i].temp->state != UNCOMPRESSED Line 2907
? open_temp (files[i].temp) Line 2908
: stream_open (files[i].name, "r")); Line 2909...!syscalls auto-comment...
if (!fps[i]) Line 2910
break; Line 2911
}
return i; Line 2914
} Block 89
/* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
files (all of which are at the start of the FILES array), and
NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
FPS is the vector of open stream corresponding to the files.
Close input and output streams before returning.
OUTPUT_FILE gives the name of the output file. If it is NULL,
the output file is standard output. */
static void Line 2925
mergefps (struct sortfile *files, size_t ntemps, size_t nfiles, Line 2926
FILE *ofp, char const *output_file, FILE **fps) Line 2927
{
struct buffer *buffer = xnmalloc (nfiles, sizeof *buffer); Line 2929
/* Input buffers for each file. */
struct line saved; /* Saved line storage for unique check. */ Line 2931
struct line const *savedline = NULL; Line 2932
/* &saved if there is a saved line. */
size_t savealloc = 0; /* Size allocated for the saved line. */ Line 2934
struct line const **cur = xnmalloc (nfiles, sizeof *cur); Line 2935
/* Current line in each line table. */
struct line const **base = xnmalloc (nfiles, sizeof *base); Line 2937
/* Base of each line table. */
size_t *ord = xnmalloc (nfiles, sizeof *ord); Line 2939
/* Table representing a permutation of fps,
such that cur[ord[0]] is the smallest line
and will be next output. */
size_t i; Line 2943
size_t j; Line 2944
size_t t; Line 2945
struct keyfield const *key = keylist; Line 2946
saved.text = NULL; Line 2947
/* Read initial lines from each input file. */
for (i = 0; i < nfiles; ) Line 2950
{
initbuf (&buffer[i], sizeof (struct line), Line 2952
MAX (merge_buffer_size, sort_size / nfiles)); Line 2953
if (fillbuf (&buffer[i], fps[i], files[i].name)) Line 2954
{
struct line const *linelim = buffer_linelim (&buffer[i]); Line 2956
cur[i] = linelim - 1; Line 2957
base[i] = linelim - buffer[i].nlines; Line 2958
i++; Line 2959
}
else Line 2961
{
/* fps[i] is empty; eliminate it from future consideration. */
xfclose (fps[i], files[i].name); Line 2964...!syscalls auto-comment...
if (i < ntemps) Line 2965
{
ntemps--; Line 2967
zaptemp (files[i].name); Line 2968
}
free (buffer[i].buf); Line 2970
--nfiles; Line 2971
for (j = i; j < nfiles; ++j) Line 2972
{
files[j] = files[j + 1]; Line 2974
fps[j] = fps[j + 1]; Line 2975
}
}
}
/* Set up the ord table according to comparisons among input lines.
Since this only reorders two items if one is strictly greater than
the other, it is stable. */
for (i = 0; i < nfiles; ++i) Line 2983
ord[i] = i; Line 2984
for (i = 1; i < nfiles; ++i) Line 2985
if (0 < compare (cur[ord[i - 1]], cur[ord[i]])) Line 2986
t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0; Line 2987
/* Repeatedly output the smallest line until no input remains. */
while (nfiles) Line 2990
{
struct line const *smallest = cur[ord[0]]; Line 2992
/* If uniquified output is turned on, output only the first of
an identical series of lines. */
if (unique) Line 2996
{
if (savedline && compare (savedline, smallest)) Line 2998
{
savedline = NULL; Line 3000
write_line (&saved, ofp, output_file); Line 3001
}
if (!savedline) Line 3003
{
savedline = &saved; Line 3005
if (savealloc < smallest->length) Line 3006
{
do
if (! savealloc) Line 3009
{
savealloc = smallest->length; Line 3011
break; Line 3012
}
while ((savealloc *= 2) < smallest->length); Line 3014
free (saved.text); Line 3016
saved.text = xmalloc (savealloc); Line 3017
}
saved.length = smallest->length; Line 3019
memcpy (saved.text, smallest->text, saved.length); Line 3020
if (key) Line 3021
{
saved.keybeg = Line 3023
saved.text + (smallest->keybeg - smallest->text); Line 3024
saved.keylim = Line 3025
saved.text + (smallest->keylim - smallest->text); Line 3026
}
}
}
else Line 3030
write_line (smallest, ofp, output_file); Line 3031
/* Check if we need to read more lines into core. */
if (base[ord[0]] < smallest) Line 3034
cur[ord[0]] = smallest - 1; Line 3035
else Line 3036
{
if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name)) Line 3038
{
struct line const *linelim = buffer_linelim (&buffer[ord[0]]); Line 3040
cur[ord[0]] = linelim - 1; Line 3041
base[ord[0]] = linelim - buffer[ord[0]].nlines; Line 3042
}
else Line 3044
{
/* We reached EOF on fps[ord[0]]. */
for (i = 1; i < nfiles; ++i) Line 3047
if (ord[i] > ord[0]) Line 3048
--ord[i]; Line 3049
--nfiles; Line 3050
xfclose (fps[ord[0]], files[ord[0]].name); Line 3051...!syscalls auto-comment...
if (ord[0] < ntemps) Line 3052
{
ntemps--; Line 3054
zaptemp (files[ord[0]].name); Line 3055
}
free (buffer[ord[0]].buf); Line 3057
for (i = ord[0]; i < nfiles; ++i) Line 3058
{
fps[i] = fps[i + 1]; Line 3060
files[i] = files[i + 1]; Line 3061
buffer[i] = buffer[i + 1]; Line 3062
cur[i] = cur[i + 1]; Line 3063
base[i] = base[i + 1]; Line 3064
}
for (i = 0; i < nfiles; ++i) Line 3066
ord[i] = ord[i + 1]; Line 3067
continue; Line 3068
}
}
/* The new line just read in may be larger than other lines
already in main memory; push it back in the queue until we
encounter a line larger than it. Optimize for the common
case where the new line is smallest. */
{
size_t lo = 1; Line 3077
size_t hi = nfiles; Line 3078
size_t probe = lo; Line 3079
size_t ord0 = ord[0]; Line 3080
size_t count_of_smaller_lines; Line 3081
while (lo < hi) Line 3083
{
int cmp = compare (cur[ord0], cur[ord[probe]]); Line 3085
if (cmp < 0 || (cmp == 0 && ord0 < ord[probe])) Line 3086
hi = probe; Line 3087
else Line 3088
lo = probe + 1; Line 3089
probe = (lo + hi) / 2; Line 3090
}
count_of_smaller_lines = lo - 1; Line 3093
for (j = 0; j < count_of_smaller_lines; j++) Line 3094
ord[j] = ord[j + 1]; Line 3095
ord[count_of_smaller_lines] = ord0; Line 3096
}
}
if (unique && savedline) Line 3100
{
write_line (&saved, ofp, output_file); Line 3102
free (saved.text); Line 3103
}
xfclose (ofp, output_file); Line 3106...!syscalls auto-comment...
free (fps); Line 3107
free (buffer); Line 3108
free (ord); Line 3109
free (base); Line 3110
free (cur); Line 3111
} Block 90
/* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
files (all of which are at the start of the FILES array), and
NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
Close input and output files before returning.
OUTPUT_FILE gives the name of the output file.
Return the number of files successfully merged. This number can be
less than NFILES if we ran low on file descriptors, but in this
case it is never less than 2. */
static size_t Line 3124
mergefiles (struct sortfile *files, size_t ntemps, size_t nfiles, Line 3125
FILE *ofp, char const *output_file) Line 3126
{
FILE **fps; Line 3128
size_t nopened = open_input_files (files, nfiles, &fps); Line 3129
if (nopened < nfiles && nopened < 2) Line 3130
sort_die (_("open failed"), files[nopened].name); Line 3131
mergefps (files, ntemps, nopened, ofp, output_file, fps); Line 3132
return nopened; Line 3133
} Block 91
/* Merge into T (of size NLINES) the two sorted arrays of lines
LO (with NLINES / 2 members), and
T - (NLINES / 2) (with NLINES - NLINES / 2 members).
T and LO point just past their respective arrays, and the arrays
are in reverse order. NLINES must be at least 2. */
static void Line 3142
mergelines (struct line *restrict t, size_t nlines, Line 3143
struct line const *restrict lo) Line 3144
{
size_t nlo = nlines / 2; Line 3146
size_t nhi = nlines - nlo; Line 3147
struct line *hi = t - nlo; Line 3148
while (true) Line 3150
if (compare (lo - 1, hi - 1) <= 0) Line 3151
{
*--t = *--lo; Line 3153
if (! --nlo) Line 3154
{
/* HI must equal T now, and there is no need to copy from
HI to T. */
return; Line 3158
}
}
else Line 3161
{
*--t = *--hi; Line 3163
if (! --nhi) Line 3164
{
do
*--t = *--lo; Line 3167
while (--nlo); Line 3168
return; Line 3170
}
}
} Block 92
/* Sort the array LINES with NLINES members, using TEMP for temporary space.
Do this all within one thread. NLINES must be at least 2.
If TO_TEMP, put the sorted output into TEMP, and TEMP is as large as LINES.
Otherwise the sort is in-place and TEMP is half-sized.
The input and output arrays are in reverse order, and LINES and
TEMP point just past the end of their respective arrays.
Use a recursive divide-and-conquer algorithm, in the style
suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
the optimization suggested by exercise 5.2.4-10; this requires room
for only 1.5*N lines, rather than the usual 2*N lines. Knuth
writes that this memory optimization was originally published by
D. A. Bell, Comp J. 1 (1958), 75. */
static void Line 3189
sequential_sort (struct line *restrict lines, size_t nlines, Line 3190
struct line *restrict temp, bool to_temp) Line 3191
{
if (nlines == 2) Line 3193
{
/* Declare 'swap' as int, not bool, to work around a bug
<https://lists.gnu.org/r/bug-coreutils/2005-10/msg00086.html>
in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
int swap = (0 < compare (&lines[-1], &lines[-2])); Line 3198
if (to_temp) Line 3199
{
temp[-1] = lines[-1 - swap]; Line 3201
temp[-2] = lines[-2 + swap]; Line 3202
}
else if (swap) Line 3204
{
temp[-1] = lines[-1]; Line 3206
lines[-1] = lines[-2]; Line 3207
lines[-2] = temp[-1]; Line 3208
}
}
else Line 3211
{
size_t nlo = nlines / 2; Line 3213
size_t nhi = nlines - nlo; Line 3214
struct line *lo = lines; Line 3215
struct line *hi = lines - nlo; Line 3216
sequential_sort (hi, nhi, temp - (to_temp ? nlo : 0), to_temp); Line 3218
if (1 < nlo) Line 3219
sequential_sort (lo, nlo, temp, !to_temp); Line 3220
else if (!to_temp) Line 3221
temp[-1] = lo[-1]; Line 3222
struct line *dest; Line 3224
struct line const *sorted_lo; Line 3225
if (to_temp) Line 3226
{
dest = temp; Line 3228
sorted_lo = lines; Line 3229
}
else Line 3231
{
dest = lines; Line 3233
sorted_lo = temp; Line 3234
}
mergelines (dest, nlines, sorted_lo); Line 3236
}
} Block 93
static struct merge_node *init_node (struct merge_node *restrict, Line 3240
struct merge_node *restrict, Line 3241
struct line *, size_t, size_t, bool); Line 3242
/* Create and return a merge tree for NTHREADS threads, sorting NLINES
lines, with destination DEST. */
static struct merge_node * Line 3247
merge_tree_init (size_t nthreads, size_t nlines, struct line *dest) Line 3248
{
struct merge_node *merge_tree = xmalloc (2 * sizeof *merge_tree * nthreads); Line 3250
struct merge_node *root = merge_tree; Line 3252
root->lo = root->hi = root->end_lo = root->end_hi = NULL; Line 3253
root->dest = NULL; Line 3254
root->nlo = root->nhi = nlines; Line 3255
root->parent = NULL; Line 3256
root->level = MERGE_END; Line 3257
root->queued = false; Line 3258
pthread_mutex_init (&root->lock, NULL); Line 3259
init_node (root, root + 1, dest, nthreads, nlines, false); Line 3261
return merge_tree; Line 3262
} Block 94
/* Destroy the merge tree. */
static void Line 3266
merge_tree_destroy (size_t nthreads, struct merge_node *merge_tree) Line 3267
{
size_t n_nodes = nthreads * 2; Line 3269
struct merge_node *node = merge_tree; Line 3270
while (n_nodes--) Line 3272
{
pthread_mutex_destroy (&node->lock); Line 3274
node++; Line 3275
}
free (merge_tree); Line 3278
} Block 95
/* Initialize a merge tree node and its descendants. The node's
parent is PARENT. The node and its descendants are taken from the
array of nodes NODE_POOL. Their destination starts at DEST; they
will consume NTHREADS threads. The total number of sort lines is
TOTAL_LINES. IS_LO_CHILD is true if the node is the low child of
its parent. */
static struct merge_node * Line 3288
init_node (struct merge_node *restrict parent, Line 3289
struct merge_node *restrict node_pool, Line 3290
struct line *dest, size_t nthreads, Line 3291
size_t total_lines, bool is_lo_child) Line 3292
{
size_t nlines = (is_lo_child ? parent->nlo : parent->nhi); Line 3294
size_t nlo = nlines / 2; Line 3295
size_t nhi = nlines - nlo; Line 3296
struct line *lo = dest - total_lines; Line 3297
struct line *hi = lo - nlo; Line 3298
struct line **parent_end = (is_lo_child ? &parent->end_lo : &parent->end_hi); Line 3299
struct merge_node *node = node_pool++; Line 3301
node->lo = node->end_lo = lo; Line 3302
node->hi = node->end_hi = hi; Line 3303
node->dest = parent_end; Line 3304
node->nlo = nlo; Line 3305
node->nhi = nhi; Line 3306
node->parent = parent; Line 3307
node->level = parent->level + 1; Line 3308
node->queued = false; Line 3309
pthread_mutex_init (&node->lock, NULL); Line 3310
if (nthreads > 1) Line 3312
{
size_t lo_threads = nthreads / 2; Line 3314
size_t hi_threads = nthreads - lo_threads; Line 3315
node->lo_child = node_pool; Line 3316
node_pool = init_node (node, node_pool, lo, lo_threads, Line 3317
total_lines, true); Line 3318
node->hi_child = node_pool; Line 3319
node_pool = init_node (node, node_pool, hi, hi_threads, Line 3320
total_lines, false); Line 3321
}
else Line 3323
{
node->lo_child = NULL; Line 3325
node->hi_child = NULL; Line 3326
}
return node_pool; Line 3328
} Block 96
/* Compare two merge nodes A and B for priority. */
static int Line 3334
compare_nodes (void const *a, void const *b) Line 3335
{
struct merge_node const *nodea = a; Line 3337
struct merge_node const *nodeb = b; Line 3338
if (nodea->level == nodeb->level) Line 3339
return (nodea->nlo + nodea->nhi) < (nodeb->nlo + nodeb->nhi); Line 3340
return nodea->level < nodeb->level; Line 3341
} Block 97
/* Lock a merge tree NODE. */
static inline void Line 3346
lock_node (struct merge_node *node) Line 3347
{
pthread_mutex_lock (&node->lock); Line 3349
} Block 98
/* Unlock a merge tree NODE. */
static inline void Line 3354
unlock_node (struct merge_node *node) Line 3355
{
pthread_mutex_unlock (&node->lock); Line 3357
} Block 99
/* Destroy merge QUEUE. */
static void Line 3362
queue_destroy (struct merge_node_queue *queue) Line 3363
{
heap_free (queue->priority_queue); Line 3365
pthread_cond_destroy (&queue->cond); Line 3366
pthread_mutex_destroy (&queue->mutex); Line 3367
} Block 100
/* Initialize merge QUEUE, allocating space suitable for a maximum of
NTHREADS threads. */
static void Line 3373
queue_init (struct merge_node_queue *queue, size_t nthreads) Line 3374
{
/* Though it's highly unlikely all nodes are in the heap at the same
time, the heap should accommodate all of them. Counting a NULL
dummy head for the heap, reserve 2 * NTHREADS nodes. */
queue->priority_queue = heap_alloc (compare_nodes, 2 * nthreads); Line 3379
pthread_mutex_init (&queue->mutex, NULL); Line 3380
pthread_cond_init (&queue->cond, NULL); Line 3381
}
/* Insert NODE into QUEUE. The caller either holds a lock on NODE, or
does not need to lock NODE. */
static void Line 3387
queue_insert (struct merge_node_queue *queue, struct merge_node *node) Line 3388
{
pthread_mutex_lock (&queue->mutex); Line 3390
heap_insert (queue->priority_queue, node); Line 3391
node->queued = true; Line 3392
pthread_cond_signal (&queue->cond); Line 3393
pthread_mutex_unlock (&queue->mutex); Line 3394
} Block 102
/* Pop the top node off the priority QUEUE, lock the node, return it. */
static struct merge_node * Line 3399
queue_pop (struct merge_node_queue *queue) Line 3400
{
struct merge_node *node; Line 3402
pthread_mutex_lock (&queue->mutex); Line 3403
while (! (node = heap_remove_top (queue->priority_queue))) Line 3404
pthread_cond_wait (&queue->cond, &queue->mutex); Line 3405
pthread_mutex_unlock (&queue->mutex); Line 3406
lock_node (node); Line 3407
node->queued = false; Line 3408
return node; Line 3409
} Block 103
/* Output LINE to TFP, unless -u is specified and the line compares
equal to the previous line. TEMP_OUTPUT is the name of TFP, or
is null if TFP is standard output.
This function does not save the line for comparison later, so it is
appropriate only for internal sort. */
static void Line 3419
write_unique (struct line const *line, FILE *tfp, char const *temp_output) Line 3420
{
if (unique) Line 3422
{
if (saved_line.text && ! compare (line, &saved_line)) Line 3424
return; Line 3425
saved_line = *line; Line 3426
}
write_line (line, tfp, temp_output); Line 3429
} Block 104
/* Merge the lines currently available to a NODE in the binary
merge tree. Merge a number of lines appropriate for this merge
level, assuming TOTAL_LINES is the total number of lines.
If merging at the top level, send output to TFP. TEMP_OUTPUT is
the name of TFP, or is null if TFP is standard output. */
static void Line 3439
mergelines_node (struct merge_node *restrict node, size_t total_lines, Line 3440
FILE *tfp, char const *temp_output) Line 3441
{
struct line *lo_orig = node->lo; Line 3443
struct line *hi_orig = node->hi; Line 3444
size_t to_merge = MAX_MERGE (total_lines, node->level); Line 3445
size_t merged_lo; Line 3446
size_t merged_hi; Line 3447
if (node->level > MERGE_ROOT) Line 3449
{
/* Merge to destination buffer. */
struct line *dest = *node->dest; Line 3452
while (node->lo != node->end_lo && node->hi != node->end_hi && to_merge--)Line 3453
if (compare (node->lo - 1, node->hi - 1) <= 0) Line 3454
*--dest = *--node->lo; Line 3455
else Line 3456
*--dest = *--node->hi; Line 3457
merged_lo = lo_orig - node->lo; Line 3459
merged_hi = hi_orig - node->hi; Line 3460
if (node->nhi == merged_hi) Line 3462
while (node->lo != node->end_lo && to_merge--) Line 3463
*--dest = *--node->lo; Line 3464
else if (node->nlo == merged_lo) Line 3465
while (node->hi != node->end_hi && to_merge--) Line 3466
*--dest = *--node->hi; Line 3467
*node->dest = dest; Line 3468
}
else Line 3470
{
/* Merge directly to output. */
while (node->lo != node->end_lo && node->hi != node->end_hi && to_merge--)Line 3473
{
if (compare (node->lo - 1, node->hi - 1) <= 0) Line 3475
write_unique (--node->lo, tfp, temp_output); Line 3476
else Line 3477
write_unique (--node->hi, tfp, temp_output); Line 3478
}
merged_lo = lo_orig - node->lo; Line 3481
merged_hi = hi_orig - node->hi; Line 3482
if (node->nhi == merged_hi) Line 3484
{
while (node->lo != node->end_lo && to_merge--) Line 3486
write_unique (--node->lo, tfp, temp_output); Line 3487
}
else if (node->nlo == merged_lo) Line 3489
{
while (node->hi != node->end_hi && to_merge--) Line 3491
write_unique (--node->hi, tfp, temp_output); Line 3492
}
}
/* Update NODE. */
merged_lo = lo_orig - node->lo; Line 3497
merged_hi = hi_orig - node->hi; Line 3498
node->nlo -= merged_lo; Line 3499
node->nhi -= merged_hi; Line 3500
} Block 105
/* Into QUEUE, insert NODE if it is not already queued, and if one of
NODE's children has available lines and the other either has
available lines or has exhausted its lines. */
static void Line 3507
queue_check_insert (struct merge_node_queue *queue, struct merge_node *node) Line 3508
{
if (! node->queued) Line 3510
{
bool lo_avail = (node->lo - node->end_lo) != 0; Line 3512
bool hi_avail = (node->hi - node->end_hi) != 0; Line 3513
if (lo_avail ? hi_avail || ! node->nhi : hi_avail && ! node->nlo) Line 3514
queue_insert (queue, node); Line 3515
}
} Block 106
/* Into QUEUE, insert NODE's parent if the parent can now be worked on. */
static void Line 3521
queue_check_insert_parent (struct merge_node_queue *queue, Line 3522
struct merge_node *node) Line 3523
{
if (node->level > MERGE_ROOT) Line 3525
{
lock_node (node->parent); Line 3527
queue_check_insert (queue, node->parent); Line 3528
unlock_node (node->parent); Line 3529
}
else if (node->nlo + node->nhi == 0) Line 3531
{
/* If the MERGE_ROOT NODE has finished merging, insert the
MERGE_END node. */
queue_insert (queue, node->parent); Line 3535
}
} Block 107
/* Repeatedly pop QUEUE for a node with lines to merge, and merge at least
some of those lines, until the MERGE_END node is popped.
TOTAL_LINES is the total number of lines. If merging at the top
level, send output to TFP. TEMP_OUTPUT is the name of TFP, or is
null if TFP is standard output. */
static void Line 3545
merge_loop (struct merge_node_queue *queue, Line 3546
size_t total_lines, FILE *tfp, char const *temp_output) Line 3547
{
while (1) Line 3549
{
struct merge_node *node = queue_pop (queue); Line 3551
if (node->level == MERGE_END) Line 3553
{
unlock_node (node); Line 3555
/* Reinsert so other threads can pop it. */
queue_insert (queue, node); Line 3557
break; Line 3558
}
mergelines_node (node, total_lines, tfp, temp_output); Line 3560
queue_check_insert (queue, node); Line 3561
queue_check_insert_parent (queue, node); Line 3562
unlock_node (node); Line 3564
}
} Block 108
static void sortlines (struct line *restrict, size_t, size_t, Line 3569
struct merge_node *, struct merge_node_queue *, Line 3570
FILE *, char const *); Line 3571
/* Thread arguments for sortlines_thread. */
struct thread_args Line 3575
{
/* Source, i.e., the array of lines to sort. This points just past
the end of the array. */
struct line *lines; Line 3579
/* Number of threads to use. If 0 or 1, sort single-threaded. */
size_t nthreads; Line 3582
/* Number of lines in LINES and DEST. */
size_t const total_lines; Line 3585
/* Merge node. Lines from this node and this node's sibling will merged
to this node's parent. */
struct merge_node *const node; Line 3589
/* The priority queue controlling available work for the entire
internal sort. */
struct merge_node_queue *const queue; Line 3593
/* If at the top level, the file to output to, and the file's name.
If the file is standard output, the file's name is null. */
FILE *tfp; Line 3597
char const *output_temp; Line 3598
};
/* Like sortlines, except with a signature acceptable to pthread_create. */
static void * Line 3603
sortlines_thread (void *data) Line 3604...!syscalls auto-comment...
{
struct thread_args const *args = data; Line 3606
sortlines (args->lines, args->nthreads, args->total_lines, Line 3607
args->node, args->queue, args->tfp, Line 3608
args->output_temp); Line 3609
return NULL; Line 3610
} Block 110
/* Sort lines, possibly in parallel. The arguments are as in struct
thread_args above.
The algorithm has three phases: node creation, sequential sort,
and binary merge.
During node creation, sortlines recursively visits each node in the
binary merge tree and creates a NODE structure corresponding to all the
future line merging NODE is responsible for. For each call to
sortlines, half the available threads are assigned to each recursive
call, until a leaf node having only 1 available thread is reached.
Each leaf node then performs two sequential sorts, one on each half of
the lines it is responsible for. It records in its NODE structure that
there are two sorted sublists available to merge from, and inserts its
NODE into the priority queue.
The binary merge phase then begins. Each thread drops into a loop
where the thread retrieves a NODE from the priority queue, merges lines
available to that NODE, and potentially insert NODE or its parent back
into the queue if there are sufficient available lines for them to
merge. This continues until all lines at all nodes of the merge tree
have been merged. */
static void Line 3637
sortlines (struct line *restrict lines, size_t nthreads, Line 3638
size_t total_lines, struct merge_node *node, Line 3639
struct merge_node_queue *queue, FILE *tfp, char const *temp_output) Line 3640
{
size_t nlines = node->nlo + node->nhi; Line 3642
/* Calculate thread arguments. */
size_t lo_threads = nthreads / 2; Line 3645
size_t hi_threads = nthreads - lo_threads; Line 3646
pthread_t thread; Line 3647
struct thread_args args = {lines, lo_threads, total_lines, Line 3648
node->lo_child, queue, tfp, temp_output}; Line 3649
if (nthreads > 1 && SUBTHREAD_LINES_HEURISTIC <= nlines Line 3651
&& pthread_create (&thread, NULL, sortlines_thread, &args) == 0) Line 3652
{
sortlines (lines - node->nlo, hi_threads, total_lines, Line 3654
node->hi_child, queue, tfp, temp_output); Line 3655
pthread_join (thread, NULL); Line 3656
}
else Line 3658
{
/* Nthreads = 1, this is a leaf NODE, or pthread_create failed.
Sort with 1 thread. */
size_t nlo = node->nlo; Line 3662
size_t nhi = node->nhi; Line 3663
struct line *temp = lines - total_lines; Line 3664
if (1 < nhi) Line 3665
sequential_sort (lines - nlo, nhi, temp - nlo / 2, false); Line 3666
if (1 < nlo) Line 3667
sequential_sort (lines, nlo, temp, false); Line 3668
/* Update merge NODE. No need to lock yet. */
node->lo = lines; Line 3671
node->hi = lines - nlo; Line 3672
node->end_lo = lines - nlo; Line 3673
node->end_hi = lines - nlo - nhi; Line 3674
queue_insert (queue, node); Line 3676
merge_loop (queue, total_lines, tfp, temp_output); Line 3677
}
} Block 111
/* Scan through FILES[NTEMPS .. NFILES-1] looking for files that are
the same as OUTFILE. If found, replace each with the same
temporary copy that can be merged into OUTFILE without destroying
OUTFILE before it is completely read. This temporary copy does not
count as a merge temp, so don't worry about incrementing NTEMPS in
the caller; final cleanup will remove it, not zaptemp.
This test ensures that an otherwise-erroneous use like
"sort -m -o FILE ... FILE ..." copies FILE before writing to it.
It's not clear that POSIX requires this nicety.
Detect common error cases, but don't try to catch obscure cases like
"cat ... FILE ... | sort -m -o FILE"
where traditional "sort" doesn't copy the input and where
people should know that they're getting into trouble anyway.
Catching these obscure cases would slow down performance in
common cases. */
static void Line 3698
avoid_trashing_input (struct sortfile *files, size_t ntemps, Line 3699
size_t nfiles, char const *outfile) Line 3700
{
bool got_outstat = false; Line 3702
struct stat outstat; Line 3703
struct tempnode *tempcopy = NULL; Line 3704
for (size_t i = ntemps; i < nfiles; i++) Line 3706
{
bool is_stdin = STREQ (files[i].name, "-"); Line 3708
bool same; Line 3709
struct stat instat; Line 3710
if (outfile && STREQ (outfile, files[i].name) && !is_stdin) Line 3712
same = true; Line 3713
else Line 3714
{
if (! got_outstat) Line 3716
{
if (fstat (STDOUT_FILENO, &outstat) != 0) Line 3718...!syscalls auto-comment......!syscalls auto-comment...
break; Line 3719
got_outstat = true; Line 3720
}
same = (((is_stdin Line 3723
? fstat (STDIN_FILENO, &instat) Line 3724...!syscalls auto-comment......!syscalls auto-comment...
: stat (files[i].name, &instat)) Line 3725...!syscalls auto-comment...
== 0) Line 3726
&& SAME_INODE (instat, outstat)); Line 3727
}
if (same) Line 3730
{
if (! tempcopy) Line 3732
{
FILE *tftp; Line 3734
tempcopy = create_temp (&tftp); Line 3735
mergefiles (&files[i], 0, 1, tftp, tempcopy->name); Line 3736
}
files[i].name = tempcopy->name; Line 3739
files[i].temp = tempcopy; Line 3740
}
}
} Block 112
/* Scan the input files to ensure all are accessible.
Otherwise exit with a diagnostic.
This will catch common issues with permissions etc.
but will fail to notice issues where you can open but not read,
like when a directory is specified on some systems.
Catching these obscure cases could slow down performance in
common cases. */
static void Line 3754
check_inputs (char *const *files, size_t nfiles) Line 3755
{
for (size_t i = 0; i < nfiles; i++) Line 3757
{
if (STREQ (files[i], "-")) Line 3759
continue; Line 3760
if (euidaccess (files[i], R_OK) != 0) Line 3762
sort_die (_("cannot read"), files[i]); Line 3763
}
} Block 113
/* Ensure a specified output file can be created or written to,
and point stdout to it. Do not truncate the file.
Exit with a diagnostic on failure. */
static void Line 3771
check_output (char const *outfile) Line 3772
{
if (outfile) Line 3774
{
int oflags = O_WRONLY | O_BINARY | O_CLOEXEC | O_CREAT; Line 3776
int outfd = open (outfile, oflags, MODE_RW_UGO); Line 3777...!syscalls auto-comment...
if (outfd < 0) Line 3778
sort_die (_("open failed"), outfile); Line 3779
move_fd (outfd, STDOUT_FILENO); Line 3780
}
} Block 114
/* Merge the input FILES. NTEMPS is the number of files at the
start of FILES that are temporary; it is zero at the top level.
NFILES is the total number of files. Put the output in
OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
static void Line 3789
merge (struct sortfile *files, size_t ntemps, size_t nfiles, Line 3790
char const *output_file) Line 3791
{
while (nmerge < nfiles) Line 3793
{
/* Number of input files processed so far. */
size_t in; Line 3796
/* Number of output files generated so far. */
size_t out; Line 3799
/* nfiles % NMERGE; this counts input files that are left over
after all full-sized merges have been done. */
size_t remainder; Line 3803
/* Number of easily-available slots at the next loop iteration. */
size_t cheap_slots; Line 3806
/* Do as many NMERGE-size merges as possible. In the case that
nmerge is bogus, increment by the maximum number of file
descriptors allowed. */
for (out = in = 0; nmerge <= nfiles - in; out++) Line 3811
{
FILE *tfp; Line 3813
struct tempnode *temp = create_temp (&tfp); Line 3814
size_t num_merged = mergefiles (&files[in], MIN (ntemps, nmerge), Line 3815
nmerge, tfp, temp->name); Line 3816
ntemps -= MIN (ntemps, num_merged); Line 3817
files[out].name = temp->name; Line 3818
files[out].temp = temp; Line 3819
in += num_merged; Line 3820
}
remainder = nfiles - in; Line 3823
cheap_slots = nmerge - out % nmerge; Line 3824
if (cheap_slots < remainder) Line 3826
{
/* So many files remain that they can't all be put into the last
NMERGE-sized output window. Do one more merge. Merge as few
files as possible, to avoid needless I/O. */
size_t nshortmerge = remainder - cheap_slots + 1; Line 3831
FILE *tfp; Line 3832
struct tempnode *temp = create_temp (&tfp); Line 3833
size_t num_merged = mergefiles (&files[in], MIN (ntemps, nshortmerge),Line 3834
nshortmerge, tfp, temp->name); Line 3835
ntemps -= MIN (ntemps, num_merged); Line 3836
files[out].name = temp->name; Line 3837
files[out++].temp = temp; Line 3838
in += num_merged; Line 3839
}
/* Put the remaining input files into the last NMERGE-sized output
window, so they will be merged in the next pass. */
memmove (&files[out], &files[in], (nfiles - in) * sizeof *files); Line 3844
ntemps += out; Line 3845
nfiles -= in - out; Line 3846
}
avoid_trashing_input (files, ntemps, nfiles, output_file); Line 3849
/* We aren't guaranteed that this final mergefiles will work, therefore we
try to merge into the output, and then merge as much as we can into a
temp file if we can't. Repeat. */
while (true) Line 3855
{
/* Merge directly into the output file if possible. */
FILE **fps; Line 3858
size_t nopened = open_input_files (files, nfiles, &fps); Line 3859
if (nopened == nfiles) Line 3861
{
FILE *ofp = stream_open (output_file, "w"); Line 3863...!syscalls auto-comment...
if (ofp) Line 3864
{
mergefps (files, ntemps, nfiles, ofp, output_file, fps); Line 3866
break; Line 3867
}
if (errno != EMFILE || nopened <= 2) Line 3869
sort_die (_("open failed"), output_file); Line 3870
}
else if (nopened <= 2) Line 3872
sort_die (_("open failed"), files[nopened].name); Line 3873
/* We ran out of file descriptors. Close one of the input
files, to gain a file descriptor. Then create a temporary
file with our spare file descriptor. Retry if that failed
(e.g., some other process could open a file between the time
we closed and tried to create). */
FILE *tfp; Line 3880
struct tempnode *temp; Line 3881
do
{
nopened--; Line 3884
xfclose (fps[nopened], files[nopened].name); Line 3885...!syscalls auto-comment...
temp = maybe_create_temp (&tfp, ! (nopened <= 2)); Line 3886
}
while (!temp); Line 3888
/* Merge into the newly allocated temporary. */
mergefps (&files[0], MIN (ntemps, nopened), nopened, tfp, temp->name, Line 3891
fps); Line 3892
ntemps -= MIN (ntemps, nopened); Line 3893
files[0].name = temp->name; Line 3894
files[0].temp = temp; Line 3895
memmove (&files[1], &files[nopened], (nfiles - nopened) * sizeof *files); Line 3897
ntemps++; Line 3898
nfiles -= nopened - 1; Line 3899
}
} Block 115
/* Sort NFILES FILES onto OUTPUT_FILE. Use at most NTHREADS threads. */
static void Line 3905
sort (char *const *files, size_t nfiles, char const *output_file, Line 3906
size_t nthreads) Line 3907
{
struct buffer buf; Line 3909
IF_LINT (buf.buf = NULL); Line 3910
size_t ntemps = 0; Line 3911
bool output_file_created = false; Line 3912
buf.alloc = 0; Line 3914
while (nfiles) Line 3916
{
char const *temp_output; Line 3918
char const *file = *files; Line 3919
FILE *fp = xfopen (file, "r"); Line 3920...!syscalls auto-comment...
FILE *tfp; Line 3921
size_t bytes_per_line; Line 3923
if (nthreads > 1) Line 3924
{
/* Get log P. */
size_t tmp = 1; Line 3927
size_t mult = 1; Line 3928
while (tmp < nthreads) Line 3929
{
tmp *= 2; Line 3931
mult++; Line 3932
}
bytes_per_line = (mult * sizeof (struct line)); Line 3934
}
else Line 3936
bytes_per_line = sizeof (struct line) * 3 / 2; Line 3937
if (! buf.alloc) Line 3939
initbuf (&buf, bytes_per_line, Line 3940
sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line)); Line 3941
buf.eof = false; Line 3942
files++; Line 3943
nfiles--; Line 3944
while (fillbuf (&buf, fp, file)) Line 3946
{
struct line *line; Line 3948
if (buf.eof && nfiles Line 3950
&& (bytes_per_line + 1 Line 3951
< (buf.alloc - buf.used - bytes_per_line * buf.nlines))) Line 3952
{
/* End of file, but there is more input and buffer room.
Concatenate the next input file; this is faster in
the usual case. */
buf.left = buf.used; Line 3957
break; Line 3958
}
saved_line.text = NULL; Line 3961
line = buffer_linelim (&buf); Line 3962
if (buf.eof && !nfiles && !ntemps && !buf.left) Line 3963
{
xfclose (fp, file); Line 3965...!syscalls auto-comment...
tfp = xfopen (output_file, "w"); Line 3966...!syscalls auto-comment...
temp_output = output_file; Line 3967
output_file_created = true; Line 3968
}
else Line 3970
{
++ntemps; Line 3972
temp_output = create_temp (&tfp)->name; Line 3973
}
if (1 < buf.nlines) Line 3975
{
struct merge_node_queue queue; Line 3977
queue_init (&queue, nthreads); Line 3978
struct merge_node *merge_tree = Line 3979
merge_tree_init (nthreads, buf.nlines, line); Line 3980
sortlines (line, nthreads, buf.nlines, merge_tree + 1, Line 3982
&queue, tfp, temp_output); Line 3983
#ifdef lint Line 3985
merge_tree_destroy (nthreads, merge_tree); Line 3986
queue_destroy (&queue); Line 3987
#endif Line 3988
}
else Line 3990
write_unique (line - 1, tfp, temp_output); Line 3991
xfclose (tfp, temp_output); Line 3993...!syscalls auto-comment...
if (output_file_created) Line 3995
goto finish; Line 3996
}
xfclose (fp, file); Line 3998...!syscalls auto-comment...
}
finish: Line 4001
free (buf.buf); Line 4002
if (! output_file_created) Line 4004
{
struct tempnode *node = temphead; Line 4006
struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles); Line 4007
for (size_t i = 0; node; i++) Line 4008
{
tempfiles[i].name = node->name; Line 4010
tempfiles[i].temp = node; Line 4011
node = node->next; Line 4012
}
merge (tempfiles, ntemps, ntemps, output_file); Line 4014
free (tempfiles); Line 4015
}
reap_all (); Line 4018
} Block 116
/* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
static void Line 4023
insertkey (struct keyfield *key_arg) Line 4024
{
struct keyfield **p; Line 4026
struct keyfield *key = xmemdup (key_arg, sizeof *key); Line 4027
for (p = &keylist; *p; p = &(*p)->next) Line 4029
continue; Line 4030
*p = key; Line 4031
key->next = NULL; Line 4032
} Block 117
/* Report a bad field specification SPEC, with extra info MSGID. */
static void badfieldspec (char const *, char const *) Line 4037
ATTRIBUTE_NORETURN; Line 4038
static void Line 4039
badfieldspec (char const *spec, char const *msgid) Line 4040
{
die (SORT_FAILURE, 0, _("%s: invalid field specification %s"), Line 4042
_(msgid), quote (spec)); Line 4043
} Block 118
/* Report incompatible options. */
static void incompatible_options (char const *) ATTRIBUTE_NORETURN; Line 4048
static void Line 4049
incompatible_options (char const *opts) Line 4050
{
die (SORT_FAILURE, 0, _("options '-%s' are incompatible"), (opts)); Line 4052
} Block 119
/* Check compatibility of ordering options. */
static void Line 4057
check_ordering_compatibility (void) Line 4058
{
struct keyfield *key; Line 4060
for (key = keylist; key; key = key->next) Line 4062
if (1 < (key->numeric + key->general_numeric + key->human_numeric Line 4063
+ key->month + (key->version | key->random | !!key->ignore))) Line 4064
{
/* The following is too big, but guaranteed to be "big enough". */
char opts[sizeof short_options]; Line 4067
/* Clear flags we're not interested in. */
key->skipsblanks = key->skipeblanks = key->reverse = false; Line 4069
key_to_opts (key, opts); Line 4070
incompatible_options (opts); Line 4071
}
} Block 120
/* Parse the leading integer in STRING and store the resulting value
(which must fit into size_t) into *VAL. Return the address of the
suffix after the integer. If the value is too large, silently
substitute SIZE_MAX. If MSGID is NULL, return NULL after
failure; otherwise, report MSGID and exit on failure. */
static char const * Line 4081
parse_field_count (char const *string, size_t *val, char const *msgid) Line 4082
{
char *suffix; Line 4084
uintmax_t n; Line 4085
switch (xstrtoumax (string, &suffix, 10, &n, "")) Line 4087
{
case LONGINT_OK: Line 4089
case LONGINT_INVALID_SUFFIX_CHAR: Line 4090
*val = n; Line 4091
if (*val == n) Line 4092
break; Line 4093
FALLTHROUGH; Line 4094
case LONGINT_OVERFLOW: Line 4095
case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR: Line 4096
*val = SIZE_MAX; Line 4097
break; Line 4098
case LONGINT_INVALID: Line 4100
if (msgid) Line 4101
die (SORT_FAILURE, 0, _("%s: invalid count at start of %s"), Line 4102
_(msgid), quote (string)); Line 4103
return NULL; Line 4104
}
return suffix; Line 4107
} Block 121
/* Handle interrupts and hangups. */
static void Line 4112
sighandler (int sig) Line 4113
{
if (! SA_NOCLDSTOP) Line 4115
signal (sig, SIG_IGN); Line 4116
cleanup (); Line 4118
signal (sig, SIG_DFL); Line 4120
raise (sig); Line 4121
} Block 122
/* Set the ordering options for KEY specified in S.
Return the address of the first character in S that
is not a valid ordering option.
BLANKTYPE is the kind of blanks that 'b' should skip. */
static char * Line 4129
set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype) Line 4130
{
while (*s) Line 4132
{
switch (*s) Line 4134
{
case 'b': Line 4136
if (blanktype == bl_start || blanktype == bl_both) Line 4137
key->skipsblanks = true; Line 4138
if (blanktype == bl_end || blanktype == bl_both) Line 4139
key->skipeblanks = true; Line 4140
break; Line 4141
case 'd': Line 4142
key->ignore = nondictionary; Line 4143
break; Line 4144
case 'f': Line 4145
key->translate = fold_toupper; Line 4146
break; Line 4147
case 'g': Line 4148
key->general_numeric = true; Line 4149
break; Line 4150
case 'h': Line 4151
key->human_numeric = true; Line 4152
break; Line 4153
case 'i': Line 4154
/* Option order should not matter, so don't let -i override
-d. -d implies -i, but -i does not imply -d. */
if (! key->ignore) Line 4157
key->ignore = nonprinting; Line 4158
break; Line 4159
case 'M': Line 4160
key->month = true; Line 4161
break; Line 4162
case 'n': Line 4163
key->numeric = true; Line 4164
break; Line 4165
case 'R': Line 4166
key->random = true; Line 4167
break; Line 4168
case 'r': Line 4169
key->reverse = true; Line 4170
break; Line 4171
case 'V': Line 4172
key->version = true; Line 4173
break; Line 4174
default: Line 4175
return (char *) s; Line 4176
}
++s; Line 4178
}
return (char *) s; Line 4180
} Block 123
/* Initialize KEY. */
static struct keyfield * Line 4185
key_init (struct keyfield *key) Line 4186
{
memset (key, 0, sizeof *key); Line 4188
key->eword = SIZE_MAX; Line 4189
return key; Line 4190
} Block 124
int
main (int argc, char **argv) Line 4194
{
struct keyfield *key; Line 4196
struct keyfield key_buf; Line 4197
struct keyfield gkey; Line 4198
bool gkey_only = false; Line 4199
char const *s; Line 4200
int c = 0; Line 4201
char checkonly = 0; Line 4202
bool mergeonly = false; Line 4203
char *random_source = NULL; Line 4204
bool need_random = false; Line 4205
size_t nthreads = 0; Line 4206
size_t nfiles = 0; Line 4207
bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL); Line 4208
int posix_ver = posix2_version (); Line 4209
bool traditional_usage = ! (200112 <= posix_ver && posix_ver < 200809); Line 4210
char **files; Line 4211
char *files_from = NULL; Line 4212
struct Tokens tok; Line 4213
char const *outfile = NULL; Line 4214
bool locale_ok; Line 4215
initialize_main (&argc, &argv); VMS-specific entry point handling wildcard expansion
set_program_name (argv[0]); Retains program name and discards path
locale_ok = !! setlocale (LC_ALL, ""); Sets up internationalization (i18n)
bindtextdomain (PACKAGE, LOCALEDIR); Assigns i18n directorySets text domain for _() [gettext()] function
textdomain (PACKAGE); Sets text domain for _() [gettext()] function
initialize_exit_failure (SORT_FAILURE); Line 4223
hard_LC_COLLATE = hard_locale (LC_COLLATE); Line 4225
#if HAVE_NL_LANGINFO Line 4226
hard_LC_TIME = hard_locale (LC_TIME); Line 4227
#endif Line 4228
/* Get locale's representation of the decimal point. */
{
struct lconv const *locale = localeconv (); Line 4232
/* If the locale doesn't define a decimal point, or if the decimal
point is multibyte, use the C locale's decimal point. FIXME:
add support for multibyte decimal points. */
decimal_point = to_uchar (locale->decimal_point[0]); Line 4237
if (! decimal_point || locale->decimal_point[1]) Line 4238
decimal_point = '.'; Line 4239
/* FIXME: add support for multibyte thousands separators. */
thousands_sep = to_uchar (*locale->thousands_sep); Line 4242
if (! thousands_sep || locale->thousands_sep[1]) Line 4243
thousands_sep = -1; Line 4244
}
have_read_stdin = false; Line 4247
inittables (); Line 4248
{
size_t i; Line 4251
static int const sig[] = Line 4252
{
/* The usual suspects. */
SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, Line 4255
#ifdef SIGPOLL Line 4256
SIGPOLL, Line 4257
#endif Line 4258
#ifdef SIGPROF Line 4259
SIGPROF, Line 4260
#endif Line 4261
#ifdef SIGVTALRM Line 4262
SIGVTALRM, Line 4263
#endif Line 4264
#ifdef SIGXCPU Line 4265
SIGXCPU, Line 4266
#endif Line 4267
#ifdef SIGXFSZ Line 4268
SIGXFSZ, Line 4269
#endif Line 4270
};
enum { nsigs = ARRAY_CARDINALITY (sig) }; Line 4272
#if SA_NOCLDSTOP Line 4274
struct sigaction act; Line 4275
sigemptyset (&caught_signals); Line 4277
for (i = 0; i < nsigs; i++) Line 4278
{
sigaction (sig[i], NULL, &act); Line 4280
if (act.sa_handler != SIG_IGN) Line 4281
sigaddset (&caught_signals, sig[i]); Line 4282
}
act.sa_handler = sighandler; Line 4285
act.sa_mask = caught_signals; Line 4286
act.sa_flags = 0; Line 4287
for (i = 0; i < nsigs; i++) Line 4289
if (sigismember (&caught_signals, sig[i])) Line 4290
sigaction (sig[i], &act, NULL); Line 4291
#else Line 4292
for (i = 0; i < nsigs; i++) Line 4293
if (signal (sig[i], SIG_IGN) != SIG_IGN) Line 4294
{
signal (sig[i], sighandler); Line 4296
siginterrupt (sig[i], 1); Line 4297
}
#endif Line 4299
}
signal (SIGCHLD, SIG_DFL); /* Don't inherit CHLD handling from parent. */ Line 4301
/* The signal mask is known, so it is safe to invoke exit_cleanup. */
atexit (exit_cleanup); Close stdout on exit (see gnulib)
key_init (&gkey); Line 4306
gkey.sword = SIZE_MAX; Line 4307
files = xnmalloc (argc, sizeof *files); Line 4309
while (true) Line 4311
{
/* Parse an operand as a file after "--" was seen; or if
pedantic and a file was seen, unless the POSIX version
is not 1003.1-2001 and -c was not seen and the operand is
"-o FILE" or "-oFILE". */
int oi = -1; Line 4317
if (c == -1 Line 4319
|| (posixly_correct && nfiles != 0 Line 4320
&& ! (traditional_usage Line 4321
&& ! checkonly Line 4322
&& optind != argc Line 4323
&& argv[optind][0] == '-' && argv[optind][1] == 'o' Line 4324
&& (argv[optind][2] || optind + 1 != argc))) Line 4325
|| ((c = getopt_long (argc, argv, short_options, Line 4326
long_options, &oi)) Line 4327
== -1)) Line 4328
{
if (argc <= optind) Line 4330
break; Line 4331
files[nfiles++] = argv[optind++]; Line 4332
}
else switch (c) Line 4334
{
case 1: Line 4336
key = NULL; Line 4337
if (optarg[0] == '+') Line 4338
{
bool minus_pos_usage = (optind != argc && argv[optind][0] == '-' Line 4340
&& ISDIGIT (argv[optind][1])); Line 4341
traditional_usage |= minus_pos_usage && !posixly_correct; Line 4342
if (traditional_usage) Line 4343
{
/* Treat +POS1 [-POS2] as a key if possible; but silently
treat an operand as a file if it is not a valid +POS1. */
key = key_init (&key_buf); Line 4347
s = parse_field_count (optarg + 1, &key->sword, NULL); Line 4348
if (s && *s == '.') Line 4349
s = parse_field_count (s + 1, &key->schar, NULL); Line 4350
if (! (key->sword || key->schar)) Line 4351
key->sword = SIZE_MAX; Line 4352
if (! s || *set_ordering (s, key, bl_start)) Line 4353
key = NULL; Line 4354
else Line 4355
{
if (minus_pos_usage) Line 4357
{
char const *optarg1 = argv[optind++]; Line 4359
s = parse_field_count (optarg1 + 1, &key->eword, Line 4360
N_("invalid number after '-'")); Line 4361
/* When called with a non-NULL message ID,
parse_field_count cannot return NULL. Tell static
analysis tools that dereferencing S is safe. */
assert (s); Line 4365
if (*s == '.') Line 4366
s = parse_field_count (s + 1, &key->echar, Line 4367
N_("invalid number after '.'")); Line 4368
if (!key->echar && key->eword) Line 4369
{
/* obsolescent syntax +A.x -B.y is equivalent to:
-k A+1.x+1,B.y (when y = 0)
-k A+1.x+1,B+1.y (when y > 0)
So eword is decremented as in the -k case
only when the end field (B) is specified and
echar (y) is 0. */
key->eword--; Line 4377
}
if (*set_ordering (s, key, bl_end)) Line 4379
badfieldspec (optarg1, Line 4380
N_("stray character in field spec")); Line 4381
}
key->traditional_used = true; Line 4383
insertkey (key); Line 4384
}
}
}
if (! key) Line 4388
files[nfiles++] = optarg; Line 4389
break; Line 4390
case SORT_OPTION: Line 4392
c = XARGMATCH ("--sort", optarg, sort_args, sort_types); Line 4393
FALLTHROUGH; Line 4394
case 'b': Line 4395
case 'd': Line 4396
case 'f': Line 4397
case 'g': Line 4398
case 'h': Line 4399
case 'i': Line 4400
case 'M': Line 4401
case 'n': Line 4402
case 'r': Line 4403
case 'R': Line 4404
case 'V': Line 4405
{
char str[2]; Line 4407
str[0] = c; Line 4408
str[1] = '\0'; Line 4409
set_ordering (str, &gkey, bl_both); Line 4410
}
break; Line 4412
case CHECK_OPTION: Line 4414
c = (optarg Line 4415
? XARGMATCH ("--check", optarg, check_args, check_types) Line 4416
: 'c'); Line 4417
FALLTHROUGH; Line 4418
case 'c': Line 4419
case 'C': Line 4420
if (checkonly && checkonly != c) Line 4421
incompatible_options ("cC"); Line 4422
checkonly = c; Line 4423
break; Line 4424
case COMPRESS_PROGRAM_OPTION: Line 4426
if (compress_program && !STREQ (compress_program, optarg)) Line 4427
die (SORT_FAILURE, 0, _("multiple compress programs specified")); Line 4428
compress_program = optarg; Line 4429
break; Line 4430
case DEBUG_PROGRAM_OPTION: Line 4432
debug = true; Line 4433
break; Line 4434
case FILES0_FROM_OPTION: Line 4436
files_from = optarg; Line 4437
break; Line 4438
case 'k': Line 4440
key = key_init (&key_buf); Line 4441
/* Get POS1. */
s = parse_field_count (optarg, &key->sword, Line 4444
N_("invalid number at field start")); Line 4445
if (! key->sword--) Line 4446
{
/* Provoke with 'sort -k0' */
badfieldspec (optarg, N_("field number is zero")); Line 4449
}
if (*s == '.') Line 4451
{
s = parse_field_count (s + 1, &key->schar, Line 4453
N_("invalid number after '.'")); Line 4454
if (! key->schar--) Line 4455
{
/* Provoke with 'sort -k1.0' */
badfieldspec (optarg, N_("character offset is zero")); Line 4458
}
}
if (! (key->sword || key->schar)) Line 4461
key->sword = SIZE_MAX; Line 4462
s = set_ordering (s, key, bl_start); Line 4463
if (*s != ',') Line 4464
{
key->eword = SIZE_MAX; Line 4466
key->echar = 0; Line 4467
}
else Line 4469
{
/* Get POS2. */
s = parse_field_count (s + 1, &key->eword, Line 4472
N_("invalid number after ','")); Line 4473
if (! key->eword--) Line 4474
{
/* Provoke with 'sort -k1,0' */
badfieldspec (optarg, N_("field number is zero")); Line 4477
}
if (*s == '.') Line 4479
{
s = parse_field_count (s + 1, &key->echar, Line 4481
N_("invalid number after '.'")); Line 4482
}
s = set_ordering (s, key, bl_end); Line 4484
}
if (*s) Line 4486
badfieldspec (optarg, N_("stray character in field spec")); Line 4487
insertkey (key); Line 4488
break; Line 4489
case 'm': Line 4491
mergeonly = true; Line 4492
break; Line 4493
case NMERGE_OPTION: Line 4495
specify_nmerge (oi, c, optarg); Line 4496
break; Line 4497
case 'o': Line 4499
if (outfile && !STREQ (outfile, optarg)) Line 4500
die (SORT_FAILURE, 0, _("multiple output files specified")); Line 4501
outfile = optarg; Line 4502
break; Line 4503
case RANDOM_SOURCE_OPTION: Line 4505
if (random_source && !STREQ (random_source, optarg)) Line 4506
die (SORT_FAILURE, 0, _("multiple random sources specified")); Line 4507
random_source = optarg; Line 4508
break; Line 4509
case 's': Line 4511
stable = true; Line 4512
break; Line 4513
case 'S': Line 4515
specify_sort_size (oi, c, optarg); Line 4516
break; Line 4517
case 't': Line 4519
{
char newtab = optarg[0]; Line 4521
if (! newtab) Line 4522
die (SORT_FAILURE, 0, _("empty tab")); Line 4523
if (optarg[1]) Line 4524
{
if (STREQ (optarg, "\\0")) Line 4526
newtab = '\0'; Line 4527
else Line 4528
{
/* Provoke with 'sort -txx'. Complain about
"multi-character tab" instead of "multibyte tab", so
that the diagnostic's wording does not need to be
changed once multibyte characters are supported. */
die (SORT_FAILURE, 0, _("multi-character tab %s"), Line 4534
quote (optarg)); Line 4535
}
}
if (tab != TAB_DEFAULT && tab != newtab) Line 4538
die (SORT_FAILURE, 0, _("incompatible tabs")); Line 4539
tab = newtab; Line 4540
}
break; Line 4542
case 'T': Line 4544
add_temp_dir (optarg); Line 4545
break; Line 4546
case PARALLEL_OPTION: Line 4548
nthreads = specify_nthreads (oi, c, optarg); Line 4549
break; Line 4550
case 'u': Line 4552
unique = true; Line 4553
break; Line 4554
case 'y': Line 4556
/* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
through Solaris 7. It is also accepted by many non-Solaris
"sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
-y is marked as obsolete starting with Solaris 8 (1999), but is
still accepted as of Solaris 10 prerelease (2004).
Solaris 2.5.1 "sort -y 100" reads the input file "100", but
emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
and which in general ignores the argument after "-y" if it
consists entirely of digits (it can even be empty). */
if (optarg == argv[optind - 1]) Line 4567
{
char const *p; Line 4569
for (p = optarg; ISDIGIT (*p); p++) Line 4570
continue; Line 4571
optind -= (*p != '\0'); Line 4572
}
break; Line 4574
case 'z': Line 4576
eolchar = 0; Line 4577
break; Line 4578
case_GETOPT_HELP_CHAR; Line 4580
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); Line 4582
default: Line 4584
usage (SORT_FAILURE); Line 4585
}
}
if (files_from) Line 4589
{
/* When using --files0-from=F, you may not specify any files
on the command-line. */
if (nfiles) Line 4593
{
error (0, 0, _("extra operand %s"), quoteaf (files[0])); Line 4595
fprintf (stderr, "%s\n", Line 4596
_("file operands cannot be combined with --files0-from")); Line 4597
usage (SORT_FAILURE); Line 4598
}
FILE *stream = xfopen (files_from, "r"); Line 4601...!syscalls auto-comment...
readtokens0_init (&tok); Line 4603
if (! readtokens0 (stream, &tok)) Line 4605
die (SORT_FAILURE, 0, _("cannot read file names from %s"), Line 4606
quoteaf (files_from)); Line 4607
xfclose (stream, files_from); Line 4608...!syscalls auto-comment...
if (tok.n_tok) Line 4610
{
free (files); Line 4612
files = tok.tok; Line 4613
nfiles = tok.n_tok; Line 4614
for (size_t i = 0; i < nfiles; i++) Line 4615
{
if (STREQ (files[i], "-")) Line 4617
die (SORT_FAILURE, 0, _("when reading file names from stdin, " Line 4618
"no file name of %s allowed"), Line 4619
quoteaf (files[i])); Line 4620
else if (files[i][0] == '\0') Line 4621
{
/* Using the standard 'filename:line-number:' prefix here is
not totally appropriate, since NUL is the separator,
not NL, but it might be better than nothing. */
unsigned long int file_number = i + 1; Line 4626
die (SORT_FAILURE, 0, Line 4627
_("%s:%lu: invalid zero-length file name"), Line 4628
quotef (files_from), file_number); Line 4629
}
}
}
else Line 4633
die (SORT_FAILURE, 0, _("no input from %s"), Line 4634
quoteaf (files_from)); Line 4635
}
/* Inheritance of global options to individual keys. */
for (key = keylist; key; key = key->next) Line 4639
{
if (default_key_compare (key) && !key->reverse) Line 4641
{
key->ignore = gkey.ignore; Line 4643
key->translate = gkey.translate; Line 4644
key->skipsblanks = gkey.skipsblanks; Line 4645
key->skipeblanks = gkey.skipeblanks; Line 4646
key->month = gkey.month; Line 4647
key->numeric = gkey.numeric; Line 4648
key->general_numeric = gkey.general_numeric; Line 4649
key->human_numeric = gkey.human_numeric; Line 4650
key->version = gkey.version; Line 4651
key->random = gkey.random; Line 4652
key->reverse = gkey.reverse; Line 4653
}
need_random |= key->random; Line 4656
}
if (!keylist && !default_key_compare (&gkey)) Line 4659
{
gkey_only = true; Line 4661
insertkey (&gkey); Line 4662
need_random |= gkey.random; Line 4663
}
check_ordering_compatibility (); Line 4666
if (debug) Line 4668
{
if (checkonly || outfile) Line 4670
{
static char opts[] = "X --debug"; Line 4672
opts[0] = (checkonly ? checkonly : 'o'); Line 4673
incompatible_options (opts); Line 4674
}
/* Always output the locale in debug mode, since this
is such a common source of confusion. */
/* OpenBSD can only set some categories with LC_ALL above,
so set LC_COLLATE explicitly to flag errors. */
if (locale_ok) Line 4682
locale_ok = !! setlocale (LC_COLLATE, ""); Sets up internationalization (i18n)
if (! locale_ok) Line 4684
error (0, 0, "%s", _("failed to set locale")); Line 4685
if (hard_LC_COLLATE) Line 4686
error (0, 0, _("using %s sorting rules"), Line 4687
quote (setlocale (LC_COLLATE, NULL))); Sets up internationalization (i18n)
else Line 4689
error (0, 0, "%s", _("using simple byte comparison")); Line 4690
key_warnings (&gkey, gkey_only); Line 4692
}
reverse = gkey.reverse; Line 4695
if (need_random) Line 4697
random_md5_state_init (random_source); Line 4698
if (temp_dir_count == 0) Line 4700
{
char const *tmp_dir = getenv ("TMPDIR"); Line 4702
add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR); Line 4703
}
if (nfiles == 0) Line 4706
{
nfiles = 1; Line 4708
free (files); Line 4709
files = xmalloc (sizeof *files); Line 4710
*files = (char *) "-"; Line 4711
}
/* Need to re-check that we meet the minimum requirement for memory
usage with the final value for NMERGE. */
if (0 < sort_size) Line 4716
sort_size = MAX (sort_size, MIN_SORT_SIZE); Line 4717
if (checkonly) Line 4719
{
if (nfiles > 1) Line 4721
die (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"), Line 4722
quoteaf (files[1]), checkonly); Line 4723
if (outfile) Line 4725
{
static char opts[] = {0, 'o', 0}; Line 4727
opts[0] = checkonly; Line 4728
incompatible_options (opts); Line 4729
}
/* POSIX requires that sort return 1 IFF invoked with -c or -C and the
input is not properly sorted. */
return check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER; Line 4734
}
/* Check all inputs are accessible, or exit immediately. */
check_inputs (files, nfiles); Line 4738
/* Check output is writable, or exit immediately. */
check_output (outfile); Line 4741
if (mergeonly) Line 4743
{
struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles); Line 4745
for (size_t i = 0; i < nfiles; ++i) Line 4747
sortfiles[i].name = files[i]; Line 4748
merge (sortfiles, 0, nfiles, outfile); Line 4750
IF_LINT (free (sortfiles)); Line 4751
}
else Line 4753
{
if (!nthreads) Line 4755
{
unsigned long int np = num_processors (NPROC_CURRENT_OVERRIDABLE); Line 4757
nthreads = MIN (np, DEFAULT_MAX_THREADS); Line 4758
}
/* Avoid integer overflow later. */
size_t nthreads_max = SIZE_MAX / (2 * sizeof (struct merge_node)); Line 4762
nthreads = MIN (nthreads, nthreads_max); Line 4763
sort (files, nfiles, outfile, nthreads); Line 4765
}
#ifdef lint Line 4768
if (files_from) Line 4769
readtokens0_free (&tok); Line 4770
else Line 4771
free (files); Line 4772
#endif Line 4773
if (have_read_stdin && fclose (stdin) == EOF) Line 4775...!syscalls auto-comment...
sort_die (_("close failed"), "-"); Line 4776
return EXIT_SUCCESS; Line 4778
} Block 125