/* cut - remove parts of lines of files This is the cut utility
Copyright (C) 1997-2018 Free Software Foundation, Inc.
Copyright (C) 1984 David M. Ihnat
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */ The GNUv3 license
/* Written by David Ihnat. */
/* POSIX changes, bug fixes, long-named options, and cleanup
by David MacKenzie <djm@gnu.ai.mit.edu>.
Rewrite cut_fields and cut_bytes -- Jim Meyering. */
#include <config.h> Provides system specific information
#include <stdio.h> Provides standard I/O capability
#include <assert.h> ...!includes auto-comment...
#include <getopt.h> ...!includes auto-comment...
#include <sys/types.h> Provides system data types
#include "system.h" ...!includes auto-comment...
#include "error.h" ...!includes auto-comment...
#include "fadvise.h" ...!includes auto-comment...
#include "getndelim2.h" ...!includes auto-comment...
#include "hash.h" ...!includes auto-comment...
#include "xstrndup.h" ...!includes auto-comment...
#include "set-fields.h" ...!includes auto-comment...
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "cut" Line 42
#define AUTHORS \ Line 44
proper_name ("David M. Ihnat"), \ Line 45
proper_name ("David MacKenzie"), \ Line 46
proper_name ("Jim Meyering") Line 47
#define FATAL_ERROR(Message) \ Line 49
do \ Line 50
{ \ Line 51
error (0, 0, (Message)); \ Line 52
usage (EXIT_FAILURE); \ Line 53
} \ Line 54Block 1
while (0) Line 55
/* Pointer inside RP. When checking if a byte or field is selected
by a finite range, we check if it is between CURRENT_RP.LO
and CURRENT_RP.HI. If the byte or field index is greater than
CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
static struct field_range_pair *current_rp; Line 62
/* This buffer is used to support the semantics of the -s option
(or lack of same) when the specified field list includes (does
not include) the first field. In both of those cases, the entire
first field must be read into this buffer to determine whether it
is followed by a delimiter or a newline before any of it may be
output. Otherwise, cut_fields can do the job without using this
buffer. */
static char *field_1_buffer; Line 71
/* The number of bytes allocated for FIELD_1_BUFFER. */
static size_t field_1_bufsize; Line 74
enum operating_mode Line 76
{
undefined_mode, Line 78
/* Output characters that are in the given bytes. */
byte_mode, Line 81
/* Output the given delimiter-separated fields. */
field_mode Line 84
}; Block 2
static enum operating_mode operating_mode; Line 87
/* If true do not output lines containing no delimiter characters.
Otherwise, all such lines are printed. This option is valid only
with field mode. */
static bool suppress_non_delimited; Line 92
/* If true, print all bytes, characters, or fields _except_
those that were specified. */
static bool complement; Line 96
/* The delimiter character for field mode. */
static unsigned char delim; Line 99
/* The delimiter for each line/record. */
static unsigned char line_delim = '\n'; Line 102
/* True if the --output-delimiter=STRING option was specified. */
static bool output_delimiter_specified; Line 105
/* The length of output_delimiter_string. */
static size_t output_delimiter_length; Line 108
/* The output field separator string. Defaults to the 1-character
string consisting of the input delimiter. */
static char *output_delimiter_string; Line 112
/* True if we have ever read standard input. */
static bool have_read_stdin; Line 115
/* For long options that have no equivalent short option, use a
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
enum Line 119
{
OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1, Line 121
COMPLEMENT_OPTION Line 122
}; Block 3
static struct option const longopts[] = Line 125
{
{"bytes", required_argument, NULL, 'b'}, Line 127
{"characters", required_argument, NULL, 'c'}, Line 128
{"fields", required_argument, NULL, 'f'}, Line 129
{"delimiter", required_argument, NULL, 'd'}, Line 130
{"only-delimited", no_argument, NULL, 's'}, Line 131
{"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION}, Line 132
{"complement", no_argument, NULL, COMPLEMENT_OPTION}, Line 133
{"zero-terminated", no_argument, NULL, 'z'}, Line 134
{GETOPT_HELP_OPTION_DECL}, Line 135
{GETOPT_VERSION_OPTION_DECL}, Line 136
{NULL, 0, NULL, 0} Line 137
}; Block 4
void Line 140
usage (int status) Line 141
{
if (status != EXIT_SUCCESS) Line 143
emit_try_help (); ...!common auto-comment...
else Line 145
{
printf (_("\ Line 147
Usage: %s OPTION... [FILE]...\n\ Line 148
"), Line 149
program_name); Line 150
fputs (_("\ Line 151
Print selected parts of lines from each FILE to standard output.\n\ Line 152
"), stdout); Line 153
emit_stdin_note (); ...!common auto-comment...
emit_mandatory_arg_note (); ...!common auto-comment...
fputs (_("\ Line 158
-b, --bytes=LIST select only these bytes\n\ Line 159
-c, --characters=LIST select only these characters\n\ Line 160
-d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\ Line 161
"), stdout); Line 162
fputs (_("\ Line 163
-f, --fields=LIST select only these fields; also print any line\n\ Line 164
that contains no delimiter character, unless\n\ Line 165
the -s option is specified\n\ Line 166
-n (ignored)\n\ Line 167
"), stdout); Line 168
fputs (_("\ Line 169
--complement complement the set of selected bytes, characters\n\ Line 170
or fields\n\ Line 171
"), stdout); Line 172
fputs (_("\ Line 173
-s, --only-delimited do not print lines not containing delimiters\n\ Line 174
--output-delimiter=STRING use STRING as the output delimiter\n\ Line 175
the default is to use the input delimiter\n\ Line 176
"), stdout); Line 177
fputs (_("\ Line 178
-z, --zero-terminated line delimiter is NUL, not newline\n\ Line 179
"), stdout); Line 180
fputs (HELP_OPTION_DESCRIPTION, stdout); Line 181
fputs (VERSION_OPTION_DESCRIPTION, stdout); Line 182
fputs (_("\ Line 183
\n\
Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\ Line 185
range, or many ranges separated by commas. Selected input is written\n\ Line 186
in the same order that it is read, and is written exactly once.\n\ Line 187
"), stdout); Line 188
fputs (_("\ Line 189
Each range is one of:\n\ Line 190
\n\
N N'th byte, character or field, counted from 1\n\ Line 192
N- from N'th byte, character or field, to end of line\n\ Line 193
N-M from N'th to M'th (included) byte, character or field\n\ Line 194
-M from first to M'th (included) byte, character or field\n\ Line 195
"), stdout); Line 196
emit_ancillary_info (PROGRAM_NAME); Line 197
}
exit (status); Line 199
} Block 5
/* Increment *ITEM_IDX (i.e., a field or byte index),
and if required CURRENT_RP. */
static inline void Line 206
next_item (uintmax_t *item_idx) Line 207
{
(*item_idx)++; Line 209
if ((*item_idx) > current_rp->hi) Line 210
current_rp++; Line 211
} Block 6
/* Return nonzero if the K'th field or byte is printable. */
static inline bool Line 216
print_kth (uintmax_t k) Line 217
{
return current_rp->lo <= k; Line 219
} Block 7
/* Return nonzero if K'th byte is the beginning of a range. */
static inline bool Line 224
is_range_start_index (uintmax_t k) Line 225
{
return k == current_rp->lo; Line 227
} Block 8
/* Read from stream STREAM, printing to standard output any selected bytes. */
static void Line 232
cut_bytes (FILE *stream) Line 233
{
uintmax_t byte_idx; /* Number of bytes in the line so far. */ Line 235
/* Whether to begin printing delimiters between ranges for the current line.
Set after we've begun printing data corresponding to the first range. */
bool print_delimiter; Line 238
byte_idx = 0; Line 240
print_delimiter = false; Line 241
current_rp = frp; Line 242
while (true) Line 243
{
int c; /* Each character from the file. */ Line 245
c = getc (stream); Line 247
if (c == line_delim) Line 249
{
putchar (c); Line 251
byte_idx = 0; Line 252
print_delimiter = false; Line 253
current_rp = frp; Line 254
}
else if (c == EOF) Line 256
{
if (byte_idx > 0) Line 258
putchar (line_delim); Line 259
break; Line 260
}
else Line 262
{
next_item (&byte_idx); Line 264
if (print_kth (byte_idx)) Line 265
{
if (output_delimiter_specified) Line 267
{
if (print_delimiter && is_range_start_index (byte_idx)) Line 269
{
fwrite (output_delimiter_string, sizeof (char), Line 271...!syscalls auto-comment...
output_delimiter_length, stdout); Line 272
}
print_delimiter = true; Line 274
}
putchar (c); Line 277
}
}
}
} Block 9
/* Read from stream STREAM, printing to standard output any selected fields. */
static void Line 285
cut_fields (FILE *stream) Line 286
{
int c; Line 288
uintmax_t field_idx = 1; Line 289
bool found_any_selected_field = false; Line 290
bool buffer_first_field; Line 291
current_rp = frp; Line 293
c = getc (stream); Line 295
if (c == EOF) Line 296
return; Line 297
ungetc (c, stream); Line 299
c = 0; Line 300
/* To support the semantics of the -s flag, we may have to buffer
all of the first field to determine whether it is 'delimited.'
But that is unnecessary if all non-delimited lines must be printed
and the first field has been selected, or if non-delimited lines
must be suppressed and the first field has *not* been selected.
That is because a non-delimited line has exactly one field. */
buffer_first_field = (suppress_non_delimited ^ !print_kth (1)); Line 308
while (1) Line 310
{
if (field_idx == 1 && buffer_first_field) Line 312
{
ssize_t len; Line 314
size_t n_bytes; Line 315
len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0, Line 317
GETNLINE_NO_LIMIT, delim, line_delim, stream); Line 318
if (len < 0) Line 319
{
free (field_1_buffer); Line 321
field_1_buffer = NULL; Line 322
if (ferror (stream) || feof (stream)) Line 323
break; Line 324
xalloc_die (); ...!common auto-comment...
}
n_bytes = len; Line 328
assert (n_bytes != 0); Line 329
c = 0; Line 331
/* If the first field extends to the end of line (it is not
delimited) and we are printing all non-delimited lines,
print this one. */
if (to_uchar (field_1_buffer[n_bytes - 1]) != delim) Line 336
{
if (suppress_non_delimited) Line 338
{
/* Empty. */
}
else Line 342
{
fwrite (field_1_buffer, sizeof (char), n_bytes, stdout); Line 344...!syscalls auto-comment...
/* Make sure the output line is newline terminated. */
if (field_1_buffer[n_bytes - 1] != line_delim) Line 346
putchar (line_delim); Line 347
c = line_delim; Line 348
}
continue; Line 350
}
if (print_kth (1)) Line 352
{
/* Print the field, but not the trailing delimiter. */
fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout); Line 355...!syscalls auto-comment...
/* With -d$'\n' don't treat the last '\n' as a delimiter. */
if (delim == line_delim) Line 358
{
int last_c = getc (stream); Line 360
if (last_c != EOF) Line 361
{
ungetc (last_c, stream); Line 363
found_any_selected_field = true; Line 364
}
}
else Line 367
found_any_selected_field = true; Line 368
}
next_item (&field_idx); Line 370
}
int prev_c = c; Line 373
if (print_kth (field_idx)) Line 375
{
if (found_any_selected_field) Line 377
{
fwrite (output_delimiter_string, sizeof (char), Line 379...!syscalls auto-comment...
output_delimiter_length, stdout); Line 380
}
found_any_selected_field = true; Line 382
while ((c = getc (stream)) != delim && c != line_delim && c != EOF) Line 384
{
putchar (c); Line 386
prev_c = c; Line 387
}
}
else Line 390
{
while ((c = getc (stream)) != delim && c != line_delim && c != EOF) Line 392
{
prev_c = c; Line 394
}
}
/* With -d$'\n' don't treat the last '\n' as a delimiter. */
if (delim == line_delim && c == delim) Line 399
{
int last_c = getc (stream); Line 401
if (last_c != EOF) Line 402
ungetc (last_c, stream); Line 403
else Line 404
c = last_c; Line 405
}
if (c == delim) Line 408
next_item (&field_idx); Line 409
else if (c == line_delim || c == EOF) Line 410
{
if (found_any_selected_field Line 412
|| !(suppress_non_delimited && field_idx == 1)) Line 413
{
if (c == line_delim || prev_c != line_delim Line 415
|| delim == line_delim) Line 416
putchar (line_delim); Line 417
}
if (c == EOF) Line 419
break; Line 420
field_idx = 1; Line 421
current_rp = frp; Line 422
found_any_selected_field = false; Line 423
}
}
} Block 10
static void Line 428
cut_stream (FILE *stream) Line 429
{
if (operating_mode == byte_mode) Line 431
cut_bytes (stream); Line 432
else Line 433
cut_fields (stream); Line 434
} Block 11
/* Process file FILE to standard output.
Return true if successful. */
static bool Line 440
cut_file (char const *file) Line 441
{
FILE *stream; Line 443
if (STREQ (file, "-")) Line 445
{
have_read_stdin = true; Line 447
stream = stdin; Line 448
}
else Line 450
{
stream = fopen (file, "r"); Line 452...!syscalls auto-comment...
if (stream == NULL) Line 453
{
error (0, errno, "%s", quotef (file)); Line 455
return false; Line 456
}
}
fadvise (stream, FADVISE_SEQUENTIAL); Line 460...!syscalls auto-comment...
cut_stream (stream); Line 462
if (ferror (stream)) Line 464
{
error (0, errno, "%s", quotef (file)); Line 466
return false; Line 467
}
if (STREQ (file, "-")) Line 469
clearerr (stream); /* Also clear EOF. */ Line 470
else if (fclose (stream) == EOF) Line 471...!syscalls auto-comment...
{
error (0, errno, "%s", quotef (file)); Line 473
return false; Line 474
}
return true; Line 476
} Block 12
int
main (int argc, char **argv) Line 480
{
int optc; Line 482
bool ok; Line 483
bool delim_specified = false; Line 484
char *spec_list_string IF_LINT ( = NULL); Line 485
initialize_main (&argc, &argv); VMS-specific entry point handling wildcard expansion
set_program_name (argv[0]); Retains program name and discards path
setlocale (LC_ALL, ""); Sets up internationalization (i18n)
bindtextdomain (PACKAGE, LOCALEDIR); Assigns i18n directorySets text domain for _() [gettext()] function
textdomain (PACKAGE); Sets text domain for _() [gettext()] function
atexit (close_stdout); Close stdout on exit (see gnulib)
operating_mode = undefined_mode; Line 495
/* By default, all non-delimited lines are printed. */
suppress_non_delimited = false; Line 498
delim = '\0'; Line 500
have_read_stdin = false; Line 501
while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, NULL)) != -1)Line 503
{
switch (optc) Line 505
{
case 'b': Line 507
case 'c': Line 508
/* Build the byte list. */
if (operating_mode != undefined_mode) Line 510
FATAL_ERROR (_("only one type of list may be specified")); Line 511
operating_mode = byte_mode; Line 512
spec_list_string = optarg; Line 513
break; Line 514
case 'f': Line 516
/* Build the field list. */
if (operating_mode != undefined_mode) Line 518
FATAL_ERROR (_("only one type of list may be specified")); Line 519
operating_mode = field_mode; Line 520
spec_list_string = optarg; Line 521
break; Line 522
case 'd': Line 524
/* New delimiter. */
/* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
if (optarg[0] != '\0' && optarg[1] != '\0') Line 527
FATAL_ERROR (_("the delimiter must be a single character")); Line 528
delim = optarg[0]; Line 529
delim_specified = true; Line 530
break; Line 531
case OUTPUT_DELIMITER_OPTION: Line 533
output_delimiter_specified = true; Line 534
/* Interpret --output-delimiter='' to mean
'use the NUL byte as the delimiter.' */
output_delimiter_length = (optarg[0] == '\0' Line 537
? 1 : strlen (optarg)); Line 538
output_delimiter_string = xstrdup (optarg); Line 539
break; Line 540
case 'n': Line 542
break; Line 543
case 's': Line 545
suppress_non_delimited = true; Line 546
break; Line 547
case 'z': Line 549
line_delim = '\0'; Line 550
break; Line 551
case COMPLEMENT_OPTION: Line 553
complement = true; Line 554
break; Line 555
case_GETOPT_HELP_CHAR; Line 557
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); Line 559
default: Line 561
usage (EXIT_FAILURE); Line 562
}
}
if (operating_mode == undefined_mode) Line 566
FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); Line 567
if (delim_specified && operating_mode != field_mode) Line 569
FATAL_ERROR (_("an input delimiter may be specified only\ Line 570
when operating on fields")); Line 571
if (suppress_non_delimited && operating_mode != field_mode) Line 573
FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\ Line 574
\tonly when operating on fields")); Line 575
set_fields (spec_list_string, Line 577
( (operating_mode == field_mode) ? 0 : SETFLD_ERRMSG_USE_POS) Line 578
| (complement ? SETFLD_COMPLEMENT : 0) ); Line 579
if (!delim_specified) Line 581
delim = '\t'; Line 582
if (output_delimiter_string == NULL) Line 584
{
static char dummy[2]; Line 586
dummy[0] = delim; Line 587
dummy[1] = '\0'; Line 588
output_delimiter_string = dummy; Line 589
output_delimiter_length = 1; Line 590
}
if (optind == argc) Line 593
ok = cut_file ("-"); Line 594
else Line 595
for (ok = true; optind < argc; optind++) Line 596
ok &= cut_file (argv[optind]); Line 597
if (have_read_stdin && fclose (stdin) == EOF) Line 600...!syscalls auto-comment...
{
error (0, errno, "-"); Line 602
ok = false; Line 603
}
IF_LINT (reset_fields ()); Line 606
return ok ? EXIT_SUCCESS : EXIT_FAILURE; Line 608
} Block 13