/* join - join lines of two files on a common field This is the join utility
Copyright (C) 1991-2018 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Written by Mike Haertel, mike@gnu.ai.mit.edu. */ The GNUv3 license
#include <config.h> Provides system specific information
#include <assert.h> ...!includes auto-comment...
#include <sys/types.h> Provides system data types
#include <getopt.h> ...!includes auto-comment...
#include "system.h" ...!includes auto-comment...
#include "die.h" ...!includes auto-comment...
#include "error.h" ...!includes auto-comment...
#include "fadvise.h" ...!includes auto-comment...
#include "hard-locale.h" ...!includes auto-comment...
#include "linebuffer.h" ...!includes auto-comment...
#include "memcasecmp.h" ...!includes auto-comment...
#include "quote.h" ...!includes auto-comment...
#include "stdio--.h" ...!includes auto-comment...
#include "xmemcoll.h" ...!includes auto-comment...
#include "xstrtol.h" ...!includes auto-comment...
#include "argmatch.h" ...!includes auto-comment...
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "join" Line 39
#define AUTHORS proper_name ("Mike Haertel") Line 41
#define join system_join Line 43
#define SWAPLINES(a, b) do { \ Line 45
struct line *tmp = a; \ Line 46
a = b; \ Line 47
b = tmp; \ Line 48
} while (0); Line 49Block 1
/* An element of the list identifying which fields to print for each
output line. */
struct outlist Line 53
{
/* File number: 0, 1, or 2. 0 means use the join field.
1 means use the first file argument, 2 the second. */
int file; Line 57
/* Field index (zero-based), specified only when FILE is 1 or 2. */
size_t field; Line 60
struct outlist *next; Line 62
};
/* A field of a line. */
struct field Line 66
{
char *beg; /* First character in field. */ Line 68
size_t len; /* The length of the field. */ Line 69
}; Block 3
/* A line read from an input file. */
struct line Line 73
{
struct linebuffer buf; /* The line itself. */ Line 75
size_t nfields; /* Number of elements in 'fields'. */ Line 76
size_t nfields_allocated; /* Number of elements allocated for 'fields'. */ Line 77
struct field *fields; Line 78
}; Block 4
/* One or more consecutive lines read from a file that all have the
same join field value. */
struct seq Line 83
{
size_t count; /* Elements used in 'lines'. */ Line 85
size_t alloc; /* Elements allocated in 'lines'. */ Line 86
struct line **lines; Line 87
}; Block 5
/* The previous line read from each file. */
static struct line *prevline[2] = {NULL, NULL}; Line 91
/* The number of lines read from each file. */
static uintmax_t line_no[2] = {0, 0}; Line 94
/* The input file names. */
static char *g_names[2]; Line 97
/* This provides an extra line buffer for each file. We need these if we
try to read two consecutive lines into the same buffer, since we don't
want to overwrite the previous buffer before we check order. */
static struct line *spareline[2] = {NULL, NULL}; Line 102
/* True if the LC_COLLATE locale is hard. */
static bool hard_LC_COLLATE; Line 105
/* If nonzero, print unpairable lines in file 1 or 2. */
static bool print_unpairables_1, print_unpairables_2; Line 108
/* If nonzero, print pairable lines. */
static bool print_pairables; Line 111
/* If nonzero, we have seen at least one unpairable line. */
static bool seen_unpairable; Line 114
/* If nonzero, we have warned about disorder in that file. */
static bool issued_disorder_warning[2]; Line 117
/* Empty output field filler. */
static char const *empty_filler; Line 120
/* Whether to ensure the same number of fields are output from each line. */
static bool autoformat; Line 123
/* The number of fields to output for each line.
Only significant when autoformat is true. */
static size_t autocount_1; Line 126
static size_t autocount_2; Line 127
/* Field to join on; SIZE_MAX means they haven't been determined yet. */
static size_t join_field_1 = SIZE_MAX; Line 130
static size_t join_field_2 = SIZE_MAX; Line 131
/* List of fields to print. */
static struct outlist outlist_head; Line 134
/* Last element in 'outlist', where a new element can be added. */
static struct outlist *outlist_end = &outlist_head; Line 137
/* Tab character separating fields. If negative, fields are separated
by any nonempty string of blanks, otherwise by exactly one
tab character whose value (when cast to unsigned char) equals TAB. */
static int tab = -1; Line 142
/* If nonzero, check that the input is correctly ordered. */
static enum Line 145
{
CHECK_ORDER_DEFAULT, Line 147
CHECK_ORDER_ENABLED, Line 148
CHECK_ORDER_DISABLED Line 149
} check_input_order; Line 150Block 9
enum Line 152
{
CHECK_ORDER_OPTION = CHAR_MAX + 1, Line 154
NOCHECK_ORDER_OPTION, Line 155
HEADER_LINE_OPTION Line 156
}; Block 10
static struct option const longopts[] = Line 160
{
{"ignore-case", no_argument, NULL, 'i'}, Line 162
{"check-order", no_argument, NULL, CHECK_ORDER_OPTION}, Line 163
{"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION}, Line 164
{"zero-terminated", no_argument, NULL, 'z'}, Line 165
{"header", no_argument, NULL, HEADER_LINE_OPTION}, Line 166
{GETOPT_HELP_OPTION_DECL}, Line 167
{GETOPT_VERSION_OPTION_DECL}, Line 168
{NULL, 0, NULL, 0} Line 169
}; Block 11
/* Used to print non-joining lines */
static struct line uni_blank; Line 173
/* If nonzero, ignore case when comparing join fields. */
static bool ignore_case; Line 176
/* If nonzero, treat the first line of each file as column headers --
join them without checking for ordering */
static bool join_header_lines; Line 180
/* The character marking end of line. Default to \n. */
static char eolchar = '\n'; Line 183
void Line 185
usage (int status) Line 186
{
if (status != EXIT_SUCCESS) Line 188
emit_try_help (); ...!common auto-comment...
else Line 190
{
printf (_("\ Line 192
Usage: %s [OPTION]... FILE1 FILE2\n\ Line 193
"), Line 194
program_name); Line 195
fputs (_("\ Line 196
For each pair of input lines with identical join fields, write a line to\n\ Line 197
standard output. The default join field is the first, delimited by blanks.\ Line 198
\n\
"), stdout); Line 200
fputs (_("\ Line 201
\n\
When FILE1 or FILE2 (not both) is -, read standard input.\n\ Line 203
"), stdout); Line 204
fputs (_("\ Line 205
\n\
-a FILENUM also print unpairable lines from file FILENUM, where\n\ Line 207
FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\ Line 208
-e EMPTY replace missing input fields with EMPTY\n\ Line 209
"), stdout); Line 210
fputs (_("\ Line 211
-i, --ignore-case ignore differences in case when comparing fields\n\ Line 212
-j FIELD equivalent to '-1 FIELD -2 FIELD'\n\ Line 213
-o FORMAT obey FORMAT while constructing output line\n\ Line 214
-t CHAR use CHAR as input and output field separator\n\ Line 215
"), stdout); Line 216
fputs (_("\ Line 217
-v FILENUM like -a FILENUM, but suppress joined output lines\n\ Line 218
-1 FIELD join on this FIELD of file 1\n\ Line 219
-2 FIELD join on this FIELD of file 2\n\ Line 220
--check-order check that the input is correctly sorted, even\n\ Line 221
if all input lines are pairable\n\ Line 222
--nocheck-order do not check that the input is correctly sorted\n\ Line 223
--header treat the first line in each file as field headers,\n\ Line 224
print them without trying to pair them\n\ Line 225
"), stdout); Line 226
fputs (_("\ Line 227
-z, --zero-terminated line delimiter is NUL, not newline\n\ Line 228
"), stdout); Line 229
fputs (HELP_OPTION_DESCRIPTION, stdout); Line 230
fputs (VERSION_OPTION_DESCRIPTION, stdout); Line 231
fputs (_("\ Line 232
\n\
Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\ Line 234
else fields are separated by CHAR. Any FIELD is a field number counted\n\ Line 235
from 1. FORMAT is one or more comma or blank separated specifications,\n\ Line 236
each being 'FILENUM.FIELD' or '0'. Default FORMAT outputs the join field,\n\ Line 237
the remaining fields from FILE1, the remaining fields from FILE2, all\n\ Line 238
separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\ Line 239
line of each file determines the number of fields output for each line.\n\ Line 240
\n\
Important: FILE1 and FILE2 must be sorted on the join fields.\n\ Line 242
E.g., use \"sort -k 1b,1\" if 'join' has no options,\n\ Line 243
or use \"join -t ''\" if 'sort' has no options.\n\ Line 244
Note, comparisons honor the rules specified by 'LC_COLLATE'.\n\ Line 245
If the input is not sorted and some lines cannot be joined, a\n\ Line 246
warning message will be given.\n\ Line 247
"), stdout); Line 248
emit_ancillary_info (PROGRAM_NAME); Line 249
}
exit (status); Line 251
} Block 12
/* Record a field in LINE, with location FIELD and size LEN. */
static void Line 256
extract_field (struct line *line, char *field, size_t len) Line 257
{
if (line->nfields >= line->nfields_allocated) Line 259
{
line->fields = X2NREALLOC (line->fields, &line->nfields_allocated); Line 261
}
line->fields[line->nfields].beg = field; Line 263
line->fields[line->nfields].len = len; Line 264
++(line->nfields); Line 265
} Block 13
/* Fill in the 'fields' structure in LINE. */
static void Line 270
xfields (struct line *line) Line 271
{
char *ptr = line->buf.buffer; Line 273
char const *lim = ptr + line->buf.length - 1; Line 274
if (ptr == lim) Line 276
return; Line 277
if (0 <= tab && tab != '\n') Line 279
{
char *sep; Line 281
for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1) Line 282
extract_field (line, ptr, sep - ptr); Line 283
}
else if (tab < 0) Line 285
{
/* Skip leading blanks before the first field. */
while (field_sep (*ptr)) Line 288
if (++ptr == lim) Line 289
return; Line 290
do
{
char *sep; Line 294
for (sep = ptr + 1; sep != lim && ! field_sep (*sep); sep++) Line 295
continue; Line 296
extract_field (line, ptr, sep - ptr); Line 297
if (sep == lim) Line 298
return; Line 299
for (ptr = sep + 1; ptr != lim && field_sep (*ptr); ptr++) Line 300
continue; Line 301
}
while (ptr != lim); Line 303
}
extract_field (line, ptr, lim - ptr); Line 306
} Block 14
static void Line 309
freeline (struct line *line) Line 310
{
if (line == NULL) Line 312
return; Line 313
free (line->fields); Line 314
line->fields = NULL; Line 315
free (line->buf.buffer); Line 316
line->buf.buffer = NULL; Line 317
} Block 15
/* Return <0 if the join field in LINE1 compares less than the one in LINE2;
>0 if it compares greater; 0 if it compares equal.
Report an error and exit if the comparison fails.
Use join fields JF_1 and JF_2 respectively. */
static int Line 325
keycmp (struct line const *line1, struct line const *line2, Line 326
size_t jf_1, size_t jf_2) Line 327
{
/* Start of field to compare in each file. */
char *beg1; Line 330
char *beg2; Line 331
size_t len1; Line 333
size_t len2; /* Length of fields to compare. */ Line 334
int diff; Line 335
if (jf_1 < line1->nfields) Line 337
{
beg1 = line1->fields[jf_1].beg; Line 339
len1 = line1->fields[jf_1].len; Line 340
}
else Line 342
{
beg1 = NULL; Line 344
len1 = 0; Line 345
}
if (jf_2 < line2->nfields) Line 348
{
beg2 = line2->fields[jf_2].beg; Line 350
len2 = line2->fields[jf_2].len; Line 351
}
else Line 353
{
beg2 = NULL; Line 355
len2 = 0; Line 356
}
if (len1 == 0) Line 359
return len2 == 0 ? 0 : -1; Line 360
if (len2 == 0) Line 361
return 1; Line 362
if (ignore_case) Line 364
{
/* FIXME: ignore_case does not work with NLS (in particular,
with multibyte chars). */
diff = memcasecmp (beg1, beg2, MIN (len1, len2)); Line 368
}
else Line 370
{
if (hard_LC_COLLATE) Line 372
return xmemcoll (beg1, len1, beg2, len2); Line 373
diff = memcmp (beg1, beg2, MIN (len1, len2)); Line 374
}
if (diff) Line 377
return diff; Line 378
return len1 < len2 ? -1 : len1 != len2; Line 379
}
/* Check that successive input lines PREV and CURRENT from input file
WHATFILE are presented in order, unless the user may be relying on
the GNU extension that input lines may be out of order if no input
lines are unpairable.
If the user specified --nocheck-order, the check is not made.
If the user specified --check-order, the problem is fatal.
Otherwise (the default), the message is simply a warning.
A message is printed at most once per input file. */
static void Line 393
check_order (const struct line *prev, Line 394
const struct line *current, Line 395
int whatfile) Line 396
{
if (check_input_order != CHECK_ORDER_DISABLED Line 398
&& ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable)) Line 399
{
if (!issued_disorder_warning[whatfile-1]) Line 401
{
size_t join_field = whatfile == 1 ? join_field_1 : join_field_2; Line 403
if (keycmp (prev, current, join_field, join_field) > 0) Line 404
{
/* Exclude any trailing newline. */
size_t len = current->buf.length; Line 407
if (0 < len && current->buf.buffer[len - 1] == '\n') Line 408
--len; Line 409
/* If the offending line is longer than INT_MAX, output
only the first INT_MAX bytes in this diagnostic. */
len = MIN (INT_MAX, len); Line 413
error ((check_input_order == CHECK_ORDER_ENABLED Line 415
? EXIT_FAILURE : 0), Line 416
0, _("%s:%"PRIuMAX": is not sorted: %.*s"), Line 417
g_names[whatfile - 1], line_no[whatfile - 1], Line 418
(int) len, current->buf.buffer); Line 419
/* If we get to here, the message was merely a warning.
Arrange to issue it only once per file. */
issued_disorder_warning[whatfile-1] = true; Line 423
}
}
}
} Block 17
static inline void Line 429
reset_line (struct line *line) Line 430
{
line->nfields = 0; Line 432
} Block 18
static struct line * Line 435
init_linep (struct line **linep) Line 436
{
struct line *line = xcalloc (1, sizeof *line); Line 438
*linep = line; Line 439
return line; Line 440
} Block 19
/* Read a line from FP into LINE and split it into fields.
Return true if successful. */
static bool Line 446
get_line (FILE *fp, struct line **linep, int which) Line 447
{
struct line *line = *linep; Line 449
if (line == prevline[which - 1]) Line 451
{
SWAPLINES (line, spareline[which - 1]); Line 453
*linep = line; Line 454
}
if (line) Line 457
reset_line (line); Line 458
else Line 459
line = init_linep (linep); Line 460
if (! readlinebuffer_delim (&line->buf, fp, eolchar)) Line 462
{
if (ferror (fp)) Line 464
die (EXIT_FAILURE, errno, _("read error")); Line 465
freeline (line); Line 466
return false; Line 467
}
++line_no[which - 1]; Line 469
xfields (line); Line 471
if (prevline[which - 1]) Line 473
check_order (prevline[which - 1], line, which); Line 474
prevline[which - 1] = line; Line 476
return true; Line 477
} Block 20
static void Line 480
free_spareline (void) Line 481
{
for (size_t i = 0; i < ARRAY_CARDINALITY (spareline); i++) Line 483
{
if (spareline[i]) Line 485
{
freeline (spareline[i]); Line 487
free (spareline[i]); Line 488
}
}
} Block 21
static void Line 493
initseq (struct seq *seq) Line 494
{
seq->count = 0; Line 496
seq->alloc = 0; Line 497
seq->lines = NULL; Line 498
} Block 22
/* Read a line from FP and add it to SEQ. Return true if successful. */
static bool Line 503
getseq (FILE *fp, struct seq *seq, int whichfile) Line 504
{
if (seq->count == seq->alloc) Line 506
{
seq->lines = X2NREALLOC (seq->lines, &seq->alloc); Line 508
for (size_t i = seq->count; i < seq->alloc; i++) Line 509
seq->lines[i] = NULL; Line 510
}
if (get_line (fp, &seq->lines[seq->count], whichfile)) Line 513
{
++seq->count; Line 515
return true; Line 516
}
return false; Line 518
} Block 23
/* Read a line from FP and add it to SEQ, as the first item if FIRST is
true, else as the next. */
static bool Line 523
advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile) Line 524
{
if (first) Line 526
seq->count = 0; Line 527
return getseq (fp, seq, whichfile); Line 529
} Block 24
static void Line 532
delseq (struct seq *seq) Line 533
{
for (size_t i = 0; i < seq->alloc; i++) Line 535
{
freeline (seq->lines[i]); Line 537
free (seq->lines[i]); Line 538
}
free (seq->lines); Line 540
} Block 25
/* Print field N of LINE if it exists and is nonempty, otherwise
'empty_filler' if it is nonempty. */
static void Line 547
prfield (size_t n, struct line const *line) Line 548
{
size_t len; Line 550
if (n < line->nfields) Line 552
{
len = line->fields[n].len; Line 554
if (len) Line 555
fwrite (line->fields[n].beg, 1, len, stdout); Line 556...!syscalls auto-comment...
else if (empty_filler) Line 557
fputs (empty_filler, stdout); Line 558
}
else if (empty_filler) Line 560
fputs (empty_filler, stdout); Line 561
} Block 26
/* Output all the fields in line, other than the join field. */
static void Line 566
prfields (struct line const *line, size_t join_field, size_t autocount) Line 567
{
size_t i; Line 569
size_t nfields = autoformat ? autocount : line->nfields; Line 570
char output_separator = tab < 0 ? ' ' : tab; Line 571
for (i = 0; i < join_field && i < nfields; ++i) Line 573
{
putchar (output_separator); Line 575
prfield (i, line); Line 576
}
for (i = join_field + 1; i < nfields; ++i) Line 578
{
putchar (output_separator); Line 580
prfield (i, line); Line 581
}
} Block 27
/* Print the join of LINE1 and LINE2. */
static void Line 587
prjoin (struct line const *line1, struct line const *line2) Line 588
{
const struct outlist *outlist; Line 590
char output_separator = tab < 0 ? ' ' : tab; Line 591
size_t field; Line 592
struct line const *line; Line 593
outlist = outlist_head.next; Line 595
if (outlist) Line 596
{
const struct outlist *o; Line 598
o = outlist; Line 600
while (1) Line 601
{
if (o->file == 0) Line 603
{
if (line1 == &uni_blank) Line 605
{
line = line2; Line 607
field = join_field_2; Line 608
}
else Line 610
{
line = line1; Line 612
field = join_field_1; Line 613
}
}
else Line 616
{
line = (o->file == 1 ? line1 : line2); Line 618
field = o->field; Line 619
}
prfield (field, line); Line 621
o = o->next; Line 622
if (o == NULL) Line 623
break; Line 624
putchar (output_separator); Line 625
}
putchar (eolchar); Line 627
}
else Line 629
{
if (line1 == &uni_blank) Line 631
{
line = line2; Line 633
field = join_field_2; Line 634
}
else Line 636
{
line = line1; Line 638
field = join_field_1; Line 639
}
/* Output the join field. */
prfield (field, line); Line 643
/* Output other fields. */
prfields (line1, join_field_1, autocount_1); Line 646
prfields (line2, join_field_2, autocount_2); Line 647
putchar (eolchar); Line 649
}
} Block 28
/* Print the join of the files in FP1 and FP2. */
static void Line 655
join (FILE *fp1, FILE *fp2) Line 656
{
struct seq seq1, seq2; Line 658
int diff; Line 659
bool eof1, eof2; Line 660
fadvise (fp1, FADVISE_SEQUENTIAL); Line 662...!syscalls auto-comment...
fadvise (fp2, FADVISE_SEQUENTIAL); Line 663...!syscalls auto-comment...
/* Read the first line of each file. */
initseq (&seq1); Line 666
getseq (fp1, &seq1, 1); Line 667
initseq (&seq2); Line 668
getseq (fp2, &seq2, 2); Line 669
if (autoformat) Line 671
{
autocount_1 = seq1.count ? seq1.lines[0]->nfields : 0; Line 673
autocount_2 = seq2.count ? seq2.lines[0]->nfields : 0; Line 674
}
if (join_header_lines && (seq1.count || seq2.count)) Line 677
{
struct line const *hline1 = seq1.count ? seq1.lines[0] : &uni_blank; Line 679
struct line const *hline2 = seq2.count ? seq2.lines[0] : &uni_blank; Line 680
prjoin (hline1, hline2); Line 681
prevline[0] = NULL; Line 682
prevline[1] = NULL; Line 683
if (seq1.count) Line 684
advance_seq (fp1, &seq1, true, 1); Line 685
if (seq2.count) Line 686
advance_seq (fp2, &seq2, true, 2); Line 687
}
while (seq1.count && seq2.count) Line 690
{
diff = keycmp (seq1.lines[0], seq2.lines[0], Line 692
join_field_1, join_field_2); Line 693
if (diff < 0) Line 694
{
if (print_unpairables_1) Line 696
prjoin (seq1.lines[0], &uni_blank); Line 697
advance_seq (fp1, &seq1, true, 1); Line 698
seen_unpairable = true; Line 699
continue; Line 700
}
if (diff > 0) Line 702
{
if (print_unpairables_2) Line 704
prjoin (&uni_blank, seq2.lines[0]); Line 705
advance_seq (fp2, &seq2, true, 2); Line 706
seen_unpairable = true; Line 707
continue; Line 708
}
/* Keep reading lines from file1 as long as they continue to
match the current line from file2. */
eof1 = false; Line 713
do
if (!advance_seq (fp1, &seq1, false, 1)) Line 715
{
eof1 = true; Line 717
++seq1.count; Line 718
break; Line 719
}
while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0], Line 721
join_field_1, join_field_2)); Line 722
/* Keep reading lines from file2 as long as they continue to
match the current line from file1. */
eof2 = false; Line 726
do
if (!advance_seq (fp2, &seq2, false, 2)) Line 728
{
eof2 = true; Line 730
++seq2.count; Line 731
break; Line 732
}
while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1], Line 734
join_field_1, join_field_2)); Line 735
if (print_pairables) Line 737
{
for (size_t i = 0; i < seq1.count - 1; ++i) Line 739
{
size_t j; Line 741
for (j = 0; j < seq2.count - 1; ++j) Line 742
prjoin (seq1.lines[i], seq2.lines[j]); Line 743
}
}
if (!eof1) Line 747
{
SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]); Line 749
seq1.count = 1; Line 750
}
else Line 752
seq1.count = 0; Line 753
if (!eof2) Line 755
{
SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]); Line 757
seq2.count = 1; Line 758
}
else Line 760
seq2.count = 0; Line 761
}
/* If the user did not specify --nocheck-order, then we read the
tail ends of both inputs to verify that they are in order. We
skip the rest of the tail once we have issued a warning for that
file, unless we actually need to print the unpairable lines. */
struct line *line = NULL; Line 768
bool checktail = false; Line 769
if (check_input_order != CHECK_ORDER_DISABLED Line 771
&& !(issued_disorder_warning[0] && issued_disorder_warning[1])) Line 772
checktail = true; Line 773
if ((print_unpairables_1 || checktail) && seq1.count) Line 775
{
if (print_unpairables_1) Line 777
prjoin (seq1.lines[0], &uni_blank); Line 778
if (seq2.count) Line 779
seen_unpairable = true; Line 780
while (get_line (fp1, &line, 1)) Line 781
{
if (print_unpairables_1) Line 783
prjoin (line, &uni_blank); Line 784
if (issued_disorder_warning[0] && !print_unpairables_1) Line 785
break; Line 786
}
}
if ((print_unpairables_2 || checktail) && seq2.count) Line 790
{
if (print_unpairables_2) Line 792
prjoin (&uni_blank, seq2.lines[0]); Line 793
if (seq1.count) Line 794
seen_unpairable = true; Line 795
while (get_line (fp2, &line, 2)) Line 796
{
if (print_unpairables_2) Line 798
prjoin (&uni_blank, line); Line 799
if (issued_disorder_warning[1] && !print_unpairables_2) Line 800
break; Line 801
}
}
freeline (line); Line 805
free (line); Line 806
delseq (&seq1); Line 808
delseq (&seq2); Line 809
} Block 29
/* Add a field spec for field FIELD of file FILE to 'outlist'. */
static void Line 814
add_field (int file, size_t field) Line 815
{
struct outlist *o; Line 817
assert (file == 0 || file == 1 || file == 2); Line 819
assert (file != 0 || field == 0); Line 820
o = xmalloc (sizeof *o); Line 822
o->file = file; Line 823
o->field = field; Line 824
o->next = NULL; Line 825
/* Add to the end of the list so the fields are in the right order. */
outlist_end->next = o; Line 828
outlist_end = o; Line 829
} Block 30
/* Convert a string of decimal digits, STR (the 1-based join field number),
to an integral value. Upon successful conversion, return one less
(the zero-based field number). Silently convert too-large values
to SIZE_MAX - 1. Otherwise, if a value cannot be converted, give a
diagnostic and exit. */
static size_t Line 838
string_to_join_field (char const *str) Line 839
{
size_t result; Line 841
unsigned long int val; Line 842
verify (SIZE_MAX <= ULONG_MAX); Line 843
strtol_error s_err = xstrtoul (str, NULL, 10, &val, ""); Line 845
if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val)) Line 846
val = SIZE_MAX; Line 847
else if (s_err != LONGINT_OK || val == 0) Line 848
die (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str)); Line 849
result = val - 1; Line 851
return result; Line 853
} Block 31
/* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
If S is valid, return true. Otherwise, give a diagnostic and exit. */
static void Line 860
decode_field_spec (const char *s, int *file_index, size_t *field_index) Line 861
{
/* The first character must be 0, 1, or 2. */
switch (s[0]) Line 864
{
case '0': Line 866
if (s[1]) Line 867
{
/* '0' must be all alone -- no '.FIELD'. */
die (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s)); Line 870
}
*file_index = 0; Line 872
*field_index = 0; Line 873
break; Line 874
case '1': Line 876
case '2': Line 877
if (s[1] != '.') Line 878
die (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s)); Line 879
*file_index = s[0] - '0'; Line 880
*field_index = string_to_join_field (s + 2); Line 881
break; Line 882
default: Line 884
die (EXIT_FAILURE, 0, Line 885
_("invalid file number in field spec: %s"), quote (s)); Line 886
/* Tell gcc -W -Wall that we can't get beyond this point.
This avoids a warning (otherwise legit) that the caller's copies
of *file_index and *field_index might be used uninitialized. */
abort (); ...!common auto-comment...
break; Line 893
}
}
/* Add the comma or blank separated field spec(s) in STR to 'outlist'. */
static void Line 899
add_field_list (char *str) Line 900
{
char *p = str; Line 902
do
{
int file_index; Line 906
size_t field_index; Line 907
char const *spec_item = p; Line 908
p = strpbrk (p, ", \t"); Line 910
if (p) Line 911
*p++ = '\0'; Line 912
decode_field_spec (spec_item, &file_index, &field_index); Line 913
add_field (file_index, field_index); Line 914
}
while (p); Line 916
} Block 33
/* Set the join field *VAR to VAL, but report an error if *VAR is set
more than once to incompatible values. */
static void Line 922
set_join_field (size_t *var, size_t val) Line 923
{
if (*var != SIZE_MAX && *var != val) Line 925
{
unsigned long int var1 = *var + 1; Line 927
unsigned long int val1 = val + 1; Line 928
die (EXIT_FAILURE, 0, Line 929
_("incompatible join fields %lu, %lu"), var1, val1); Line 930
}
*var = val; Line 932
} Block 34
/* Status of command-line arguments. */
enum operand_status Line 937
{
/* This argument must be an operand, i.e., one of the files to be
joined. */
MUST_BE_OPERAND, Line 941
/* This might be the argument of the preceding -j1 or -j2 option,
or it might be an operand. */
MIGHT_BE_J1_ARG, Line 945
MIGHT_BE_J2_ARG, Line 946
/* This might be the argument of the preceding -o option, or it might be
an operand. */
MIGHT_BE_O_ARG Line 950
};
/* Add NAME to the array of input file NAMES with operand statuses
OPERAND_STATUS; currently there are NFILES names in the list. */
static void Line 956
add_file_name (char *name, char *names[2], Line 957
int operand_status[2], int joption_count[2], int *nfiles, Line 958
int *prev_optc_status, int *optc_status) Line 959
{
int n = *nfiles; Line 961
if (n == 2) Line 963
{
bool op0 = (operand_status[0] == MUST_BE_OPERAND); Line 965
char *arg = names[op0]; Line 966
switch (operand_status[op0]) Line 967
{
case MUST_BE_OPERAND: Line 969
error (0, 0, _("extra operand %s"), quoteaf (name)); Line 970
usage (EXIT_FAILURE); Line 971
case MIGHT_BE_J1_ARG: Line 973
joption_count[0]--; Line 974
set_join_field (&join_field_1, string_to_join_field (arg)); Line 975
break; Line 976
case MIGHT_BE_J2_ARG: Line 978
joption_count[1]--; Line 979
set_join_field (&join_field_2, string_to_join_field (arg)); Line 980
break; Line 981
case MIGHT_BE_O_ARG: Line 983
add_field_list (arg); Line 984
break; Line 985
}
if (!op0) Line 987
{
operand_status[0] = operand_status[1]; Line 989
names[0] = names[1]; Line 990
}
n = 1; Line 992
}
operand_status[n] = *prev_optc_status; Line 995
names[n] = name; Line 996
*nfiles = n + 1; Line 997
if (*prev_optc_status == MIGHT_BE_O_ARG) Line 998
*optc_status = MIGHT_BE_O_ARG; Line 999
} Block 36
int
main (int argc, char **argv) Line 1003
{
int optc_status; Line 1005
int prev_optc_status = MUST_BE_OPERAND; Line 1006
int operand_status[2]; Line 1007
int joption_count[2] = { 0, 0 }; Line 1008
FILE *fp1, *fp2; Line 1009
int optc; Line 1010
int nfiles = 0; Line 1011
int i; Line 1012
initialize_main (&argc, &argv); VMS-specific entry point handling wildcard expansion
set_program_name (argv[0]); Retains program name and discards path
setlocale (LC_ALL, ""); Sets up internationalization (i18n)
bindtextdomain (PACKAGE, LOCALEDIR); Assigns i18n directorySets text domain for _() [gettext()] function
textdomain (PACKAGE); Sets text domain for _() [gettext()] function
hard_LC_COLLATE = hard_locale (LC_COLLATE); Line 1019
atexit (close_stdout); Close stdout on exit (see gnulib)
atexit (free_spareline); Close stdout on exit (see gnulib)
print_pairables = true; Line 1024
seen_unpairable = false; Line 1025
issued_disorder_warning[0] = issued_disorder_warning[1] = false; Line 1026
check_input_order = CHECK_ORDER_DEFAULT; Line 1027
while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:z", Line 1029
longopts, NULL)) Line 1030
!= -1) Line 1031
{
optc_status = MUST_BE_OPERAND; Line 1033
switch (optc) Line 1035
{
case 'v': Line 1037
print_pairables = false; Line 1038
FALLTHROUGH; Line 1039
case 'a': Line 1041
{
unsigned long int val; Line 1043
if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK Line 1044
|| (val != 1 && val != 2)) Line 1045
die (EXIT_FAILURE, 0, Line 1046
_("invalid field number: %s"), quote (optarg)); Line 1047
if (val == 1) Line 1048
print_unpairables_1 = true; Line 1049
else Line 1050
print_unpairables_2 = true; Line 1051
}
break; Line 1053
case 'e': Line 1055
if (empty_filler && ! STREQ (empty_filler, optarg)) Line 1056
die (EXIT_FAILURE, 0, Line 1057
_("conflicting empty-field replacement strings")); Line 1058
empty_filler = optarg; Line 1059
break; Line 1060
case 'i': Line 1062
ignore_case = true; Line 1063
break; Line 1064
case '1': Line 1066
set_join_field (&join_field_1, string_to_join_field (optarg)); Line 1067
break; Line 1068
case '2': Line 1070
set_join_field (&join_field_2, string_to_join_field (optarg)); Line 1071
break; Line 1072
case 'j': Line 1074
if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1] Line 1075
&& optarg == argv[optind - 1] + 2) Line 1076
{
/* The argument was either "-j1" or "-j2". */
bool is_j2 = (optarg[0] == '2'); Line 1079
joption_count[is_j2]++; Line 1080
optc_status = MIGHT_BE_J1_ARG + is_j2; Line 1081
}
else Line 1083
{
set_join_field (&join_field_1, string_to_join_field (optarg)); Line 1085
set_join_field (&join_field_2, join_field_1); Line 1086
}
break; Line 1088
case 'o': Line 1090
if (STREQ (optarg, "auto")) Line 1091
autoformat = true; Line 1092
else Line 1093
{
add_field_list (optarg); Line 1095
optc_status = MIGHT_BE_O_ARG; Line 1096
}
break; Line 1098
case 't': Line 1100
{
unsigned char newtab = optarg[0]; Line 1102
if (! newtab) Line 1103
newtab = '\n'; /* '' => process the whole line. */ Line 1104
else if (optarg[1]) Line 1105
{
if (STREQ (optarg, "\\0")) Line 1107
newtab = '\0'; Line 1108
else Line 1109
die (EXIT_FAILURE, 0, _("multi-character tab %s"), Line 1110
quote (optarg)); Line 1111
}
if (0 <= tab && tab != newtab) Line 1113
die (EXIT_FAILURE, 0, _("incompatible tabs")); Line 1114
tab = newtab; Line 1115
}
break; Line 1117
case 'z': Line 1119
eolchar = 0; Line 1120
break; Line 1121
case NOCHECK_ORDER_OPTION: Line 1123
check_input_order = CHECK_ORDER_DISABLED; Line 1124
break; Line 1125
case CHECK_ORDER_OPTION: Line 1127
check_input_order = CHECK_ORDER_ENABLED; Line 1128
break; Line 1129
case 1: /* Non-option argument. */ Line 1131
add_file_name (optarg, g_names, operand_status, joption_count, Line 1132
&nfiles, &prev_optc_status, &optc_status); Line 1133
break; Line 1134
case HEADER_LINE_OPTION: Line 1136
join_header_lines = true; Line 1137
break; Line 1138
case_GETOPT_HELP_CHAR; Line 1140
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); Line 1142
default: Line 1144
usage (EXIT_FAILURE); Line 1145
}
prev_optc_status = optc_status; Line 1148
}
/* Process any operands after "--". */
prev_optc_status = MUST_BE_OPERAND; Line 1152
while (optind < argc) Line 1153
add_file_name (argv[optind++], g_names, operand_status, joption_count, Line 1154
&nfiles, &prev_optc_status, &optc_status); Line 1155
if (nfiles != 2) Line 1157
{
if (nfiles == 0) Line 1159
error (0, 0, _("missing operand")); Line 1160
else Line 1161
error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); Line 1162
usage (EXIT_FAILURE); Line 1163
}
/* If "-j1" was specified and it turns out not to have had an argument,
treat it as "-j 1". Likewise for -j2. */
for (i = 0; i < 2; i++) Line 1168
if (joption_count[i] != 0) Line 1169
{
set_join_field (&join_field_1, i); Line 1171
set_join_field (&join_field_2, i); Line 1172
}
if (join_field_1 == SIZE_MAX) Line 1175
join_field_1 = 0; Line 1176
if (join_field_2 == SIZE_MAX) Line 1177
join_field_2 = 0; Line 1178
fp1 = STREQ (g_names[0], "-") ? stdin : fopen (g_names[0], "r"); Line 1180...!syscalls auto-comment...
if (!fp1) Line 1181
die (EXIT_FAILURE, errno, "%s", quotef (g_names[0])); Line 1182
fp2 = STREQ (g_names[1], "-") ? stdin : fopen (g_names[1], "r"); Line 1183...!syscalls auto-comment...
if (!fp2) Line 1184
die (EXIT_FAILURE, errno, "%s", quotef (g_names[1])); Line 1185
if (fp1 == fp2) Line 1186
die (EXIT_FAILURE, errno, _("both files cannot be standard input")); Line 1187
join (fp1, fp2); Line 1188
if (fclose (fp1) != 0) Line 1190...!syscalls auto-comment...
die (EXIT_FAILURE, errno, "%s", quotef (g_names[0])); Line 1191
if (fclose (fp2) != 0) Line 1192...!syscalls auto-comment...
die (EXIT_FAILURE, errno, "%s", quotef (g_names[1])); Line 1193
if (issued_disorder_warning[0] || issued_disorder_warning[1]) Line 1195
return EXIT_FAILURE; Line 1196
else Line 1197
return EXIT_SUCCESS; Line 1198
} Block 37