/* comm -- compare two sorted files line by line. This is the comm utility
Copyright (C) 1986-2018 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */ The GNUv3 license
/* Written by Richard Stallman and David MacKenzie. */
#include <config.h> Provides system specific information
#include <getopt.h> ...!includes auto-comment...
#include <sys/types.h> Provides system data types
#include "system.h" ...!includes auto-comment...
#include "linebuffer.h" ...!includes auto-comment...
#include "die.h" ...!includes auto-comment...
#include "error.h" ...!includes auto-comment...
#include "fadvise.h" ...!includes auto-comment...
#include "hard-locale.h" ...!includes auto-comment...
#include "quote.h" ...!includes auto-comment...
#include "stdio--.h" ...!includes auto-comment...
#include "memcmp2.h" ...!includes auto-comment...
#include "xmemcoll.h" ...!includes auto-comment...
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "comm" Line 35
#define AUTHORS \ Line 37
proper_name ("Richard M. Stallman"), \ Line 38
proper_name ("David MacKenzie") Line 39
/* Undefine, to avoid warning about redefinition on some systems. */
#undef min Line 42
#define min(x, y) ((x) < (y) ? (x) : (y)) Line 43
/* True if the LC_COLLATE locale is hard. */
static bool hard_LC_COLLATE; Line 46
/* If true, print lines that are found only in file 1. */
static bool only_file_1; Line 49
/* If true, print lines that are found only in file 2. */
static bool only_file_2; Line 52
/* If true, print lines that are found in both files. */
static bool both; Line 55
/* If nonzero, we have seen at least one unpairable line. */
static bool seen_unpairable; Line 58
/* If nonzero, we have warned about disorder in that file. */
static bool issued_disorder_warning[2]; Line 61
/* line delimiter. */
static unsigned char delim = '\n'; Line 64
/* If true, print a summary. */
static bool total_option; Line 67
/* If nonzero, check that the input is correctly ordered. */
static enum Line 70
{
CHECK_ORDER_DEFAULT, Line 72
CHECK_ORDER_ENABLED, Line 73
CHECK_ORDER_DISABLED Line 74
} check_input_order; Line 75Block 1
/* Output columns will be delimited with this string, which may be set
on the command-line with --output-delimiter=STR. */
static char const *col_sep = "\t"; Line 79
static size_t col_sep_len = 0; Line 80
/* For long options that have no equivalent short option, use a
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
enum Line 84
{
CHECK_ORDER_OPTION = CHAR_MAX + 1, Line 86
NOCHECK_ORDER_OPTION, Line 87
OUTPUT_DELIMITER_OPTION, Line 88
TOTAL_OPTION Line 89
}; Block 2
static struct option const long_options[] = Line 92
{
{"check-order", no_argument, NULL, CHECK_ORDER_OPTION}, Line 94
{"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION}, Line 95
{"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION}, Line 96
{"total", no_argument, NULL, TOTAL_OPTION}, Line 97
{"zero-terminated", no_argument, NULL, 'z'}, Line 98
{GETOPT_HELP_OPTION_DECL}, Line 99
{GETOPT_VERSION_OPTION_DECL}, Line 100
{NULL, 0, NULL, 0} Line 101
}; Block 3
void Line 105
usage (int status) Line 106
{
if (status != EXIT_SUCCESS) Line 108
emit_try_help (); ...!common auto-comment...
else Line 110
{
printf (_("\ Line 112
Usage: %s [OPTION]... FILE1 FILE2\n\ Line 113
"), Line 114
program_name); Line 115
fputs (_("\ Line 116
Compare sorted files FILE1 and FILE2 line by line.\n\ Line 117
"), stdout); Line 118
fputs (_("\ Line 119
\n\
When FILE1 or FILE2 (not both) is -, read standard input.\n\ Line 121
"), stdout); Line 122
fputs (_("\ Line 123
\n\
With no options, produce three-column output. Column one contains\n\ Line 125
lines unique to FILE1, column two contains lines unique to FILE2,\n\ Line 126
and column three contains lines common to both files.\n\ Line 127
"), stdout); Line 128
fputs (_("\ Line 129
\n\
-1 suppress column 1 (lines unique to FILE1)\n\ Line 131
-2 suppress column 2 (lines unique to FILE2)\n\ Line 132
-3 suppress column 3 (lines that appear in both files)\n\ Line 133
"), stdout); Line 134
fputs (_("\ Line 135
\n\
--check-order check that the input is correctly sorted, even\n\ Line 137
if all input lines are pairable\n\ Line 138
--nocheck-order do not check that the input is correctly sorted\n\ Line 139
"), stdout); Line 140
fputs (_("\ Line 141
--output-delimiter=STR separate columns with STR\n\ Line 142
"), stdout); Line 143
fputs (_("\ Line 144
--total output a summary\n\ Line 145
"), stdout); Line 146
fputs (_("\ Line 147
-z, --zero-terminated line delimiter is NUL, not newline\n\ Line 148
"), stdout); Line 149
fputs (HELP_OPTION_DESCRIPTION, stdout); Line 150
fputs (VERSION_OPTION_DESCRIPTION, stdout); Line 151
fputs (_("\ Line 152
\n\
Note, comparisons honor the rules specified by 'LC_COLLATE'.\n\ Line 154
"), stdout); Line 155
printf (_("\ Line 156
\n\
Examples:\n\ Line 158
%s -12 file1 file2 Print only lines present in both file1 and file2.\n\ Line 159
%s -3 file1 file2 Print lines in file1 not in file2, and vice versa.\n\ Line 160
"), Line 161
program_name, program_name); Line 162
emit_ancillary_info (PROGRAM_NAME); Line 163
}
exit (status); Line 165
} Block 4
/* Output the line in linebuffer LINE to stream STREAM
provided the switches say it should be output.
CLASS is 1 for a line found only in file 1,
2 for a line only in file 2, 3 for a line in both. */
static void Line 173
writeline (struct linebuffer const *line, FILE *stream, int class) Line 174
{
switch (class) Line 176
{
case 1: Line 178
if (!only_file_1) Line 179
return; Line 180
break; Line 181
case 2: Line 183
if (!only_file_2) Line 184
return; Line 185
if (only_file_1) Line 186
fwrite (col_sep, 1, col_sep_len, stream); Line 187...!syscalls auto-comment...
break; Line 188
case 3: Line 190
if (!both) Line 191
return; Line 192
if (only_file_1) Line 193
fwrite (col_sep, 1, col_sep_len, stream); Line 194...!syscalls auto-comment...
if (only_file_2) Line 195
fwrite (col_sep, 1, col_sep_len, stream); Line 196...!syscalls auto-comment...
break; Line 197
}
fwrite (line->buffer, sizeof (char), line->length, stream); Line 200...!syscalls auto-comment...
} Block 5
/* Check that successive input lines PREV and CURRENT from input file
WHATFILE are presented in order.
If the user specified --nocheck-order, the check is not made.
If the user specified --check-order, the problem is fatal.
Otherwise (the default), the message is simply a warning.
A message is printed at most once per input file.
This function was copied (nearly) verbatim from 'src/join.c'. */
static void Line 214
check_order (struct linebuffer const *prev, Line 215
struct linebuffer const *current, Line 216
int whatfile) Line 217
{
if (check_input_order != CHECK_ORDER_DISABLED Line 220
&& ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable)) Line 221
{
if (!issued_disorder_warning[whatfile - 1]) Line 223
{
int order; Line 225
if (hard_LC_COLLATE) Line 227
order = xmemcoll (prev->buffer, prev->length - 1, Line 228
current->buffer, current->length - 1); Line 229
else Line 230
order = memcmp2 (prev->buffer, prev->length - 1, Line 231
current->buffer, current->length - 1); Line 232
if (0 < order) Line 234
{
error ((check_input_order == CHECK_ORDER_ENABLED Line 236
? EXIT_FAILURE : 0), Line 237
0, _("file %d is not in sorted order"), whatfile); Line 238
/* If we get to here, the message was just a warning, but we
want only to issue it once. */
issued_disorder_warning[whatfile - 1] = true; Line 242
}
}
}
}
/* Compare INFILES[0] and INFILES[1].
If either is "-", use the standard input for that file.
Assume that each input file is sorted;
merge them and output the result. */
static void Line 253
compare_files (char **infiles) Line 254
{
/* For each file, we have four linebuffers in lba. */
struct linebuffer lba[2][4]; Line 257
/* thisline[i] points to the linebuffer holding the next available line
in file i, or is NULL if there are no lines left in that file. */
struct linebuffer *thisline[2]; Line 261
/* all_line[i][alt[i][0]] also points to the linebuffer holding the
current line in file i. We keep two buffers of history around so we
can look two lines back when we get to the end of a file. */
struct linebuffer *all_line[2][4]; Line 266
/* This is used to rotate through the buffers for each input file. */
int alt[2][3]; Line 269
/* streams[i] holds the input stream for file i. */
FILE *streams[2]; Line 272
/* Counters for the summary. */
uintmax_t total[] = {0, 0, 0}; Line 275
int i, j; Line 277
/* Initialize the storage. */
for (i = 0; i < 2; i++) Line 280
{
for (j = 0; j < 4; j++) Line 282
{
initbuffer (&lba[i][j]); Line 284
all_line[i][j] = &lba[i][j]; Line 285
}
alt[i][0] = 0; Line 287
alt[i][1] = 0; Line 288
alt[i][2] = 0; Line 289
streams[i] = (STREQ (infiles[i], "-") ? stdin : fopen (infiles[i], "r")); Line 290...!syscalls auto-comment...
if (!streams[i]) Line 291
die (EXIT_FAILURE, errno, "%s", quotef (infiles[i])); Line 292
fadvise (streams[i], FADVISE_SEQUENTIAL); Line 294...!syscalls auto-comment...
thisline[i] = readlinebuffer_delim (all_line[i][alt[i][0]], streams[i], Line 296
delim); Line 297
if (ferror (streams[i])) Line 298
die (EXIT_FAILURE, errno, "%s", quotef (infiles[i])); Line 299
}
while (thisline[0] || thisline[1]) Line 302
{
int order; Line 304
bool fill_up[2] = { false, false }; Line 305
/* Compare the next available lines of the two files. */
if (!thisline[0]) Line 309
order = 1; Line 310
else if (!thisline[1]) Line 311
order = -1; Line 312
else Line 313
{
if (hard_LC_COLLATE) Line 315
order = xmemcoll (thisline[0]->buffer, thisline[0]->length - 1, Line 316
thisline[1]->buffer, thisline[1]->length - 1); Line 317
else Line 318
{
size_t len = min (thisline[0]->length, thisline[1]->length) - 1; Line 320
order = memcmp (thisline[0]->buffer, thisline[1]->buffer, len); Line 321
if (order == 0) Line 322
order = (thisline[0]->length < thisline[1]->length Line 323
? -1 Line 324
: thisline[0]->length != thisline[1]->length); Line 325
}
}
/* Output the line that is lesser. */
if (order == 0) Line 330
{
/* Line is seen in both files. */
total[2]++; Line 333
writeline (thisline[1], stdout, 3); Line 334
}
else Line 336
{
seen_unpairable = true; Line 338
if (order <= 0) Line 339
{
/* Line is seen in file 1 only. */
total[0]++; Line 342
writeline (thisline[0], stdout, 1); Line 343
}
else Line 345
{
/* Line is seen in file 2 only. */
total[1]++; Line 348
writeline (thisline[1], stdout, 2); Line 349
}
}
/* Step the file the line came from.
If the files match, step both files. */
if (0 <= order) Line 355
fill_up[1] = true; Line 356
if (order <= 0) Line 357
fill_up[0] = true; Line 358
for (i = 0; i < 2; i++) Line 360
if (fill_up[i]) Line 361
{
/* Rotate the buffers for this file. */
alt[i][2] = alt[i][1]; Line 364
alt[i][1] = alt[i][0]; Line 365
alt[i][0] = (alt[i][0] + 1) & 0x03; Line 366
thisline[i] = readlinebuffer_delim (all_line[i][alt[i][0]], Line 368
streams[i], delim); Line 369
if (thisline[i]) Line 371
check_order (all_line[i][alt[i][1]], thisline[i], i + 1); Line 372
/* If this is the end of the file we may need to re-check
the order of the previous two lines, since we might have
discovered an unpairable match since we checked before. */
else if (all_line[i][alt[i][2]]->buffer) Line 377
check_order (all_line[i][alt[i][2]], Line 378
all_line[i][alt[i][1]], i + 1); Line 379
if (ferror (streams[i])) Line 381
die (EXIT_FAILURE, errno, "%s", quotef (infiles[i])); Line 382
fill_up[i] = false; Line 384
}
}
for (i = 0; i < 2; i++) Line 388
if (fclose (streams[i]) != 0) Line 389...!syscalls auto-comment...
die (EXIT_FAILURE, errno, "%s", quotef (infiles[i])); Line 390
if (total_option) Line 392
{
/* Print the summary, minding the column and line delimiters. */
char buf1[INT_BUFSIZE_BOUND (uintmax_t)]; Line 395
char buf2[INT_BUFSIZE_BOUND (uintmax_t)]; Line 396
char buf3[INT_BUFSIZE_BOUND (uintmax_t)]; Line 397
printf ("%s%s%s%s%s%s%s%c", Line 398
umaxtostr (total[0], buf1), col_sep, Line 399
umaxtostr (total[1], buf2), col_sep, Line 400
umaxtostr (total[2], buf3), col_sep, Line 401
_("total"), delim); Line 402
}
}
int
main (int argc, char **argv) Line 407
{
int c; Line 409
initialize_main (&argc, &argv); VMS-specific entry point handling wildcard expansion
set_program_name (argv[0]); Retains program name and discards path
setlocale (LC_ALL, ""); Sets up internationalization (i18n)
bindtextdomain (PACKAGE, LOCALEDIR); Assigns i18n directorySets text domain for _() [gettext()] function
textdomain (PACKAGE); Sets text domain for _() [gettext()] function
hard_LC_COLLATE = hard_locale (LC_COLLATE); Line 416
atexit (close_stdout); Close stdout on exit (see gnulib)
only_file_1 = true; Line 420
only_file_2 = true; Line 421
both = true; Line 422
seen_unpairable = false; Line 424
issued_disorder_warning[0] = issued_disorder_warning[1] = false; Line 425
check_input_order = CHECK_ORDER_DEFAULT; Line 426
total_option = false; Line 427
while ((c = getopt_long (argc, argv, "123z", long_options, NULL)) != -1) Line 429
switch (c) Line 430
{
case '1': Line 432
only_file_1 = false; Line 433
break; Line 434
case '2': Line 436
only_file_2 = false; Line 437
break; Line 438
case '3': Line 440
both = false; Line 441
break; Line 442
case 'z': Line 444
delim = '\0'; Line 445
break; Line 446
case NOCHECK_ORDER_OPTION: Line 448
check_input_order = CHECK_ORDER_DISABLED; Line 449
break; Line 450
case CHECK_ORDER_OPTION: Line 452
check_input_order = CHECK_ORDER_ENABLED; Line 453
break; Line 454
case OUTPUT_DELIMITER_OPTION: Line 456
if (col_sep_len && !STREQ (col_sep, optarg)) Line 457
die (EXIT_FAILURE, 0, _("multiple output delimiters specified")); Line 458
col_sep = optarg; Line 459
col_sep_len = *optarg ? strlen (optarg) : 1; Line 460
break; Line 461
case TOTAL_OPTION: Line 463
total_option = true; Line 464
break; Line 465
case_GETOPT_HELP_CHAR; Line 467
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); Line 469
default: Line 471
usage (EXIT_FAILURE); Line 472
}
if (! col_sep_len) Line 475
col_sep_len = 1; Line 476
if (argc - optind < 2) Line 478
{
if (argc <= optind) Line 480
error (0, 0, _("missing operand")); Line 481
else Line 482
error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); Line 483
usage (EXIT_FAILURE); Line 484
}
if (2 < argc - optind) Line 487
{
error (0, 0, _("extra operand %s"), quote (argv[optind + 2])); Line 489
usage (EXIT_FAILURE); Line 490
}
compare_files (argv + optind); Line 493
if (issued_disorder_warning[0] || issued_disorder_warning[1]) Line 495
return EXIT_FAILURE; Line 496
else Line 497
return EXIT_SUCCESS; Line 498
} Block 8