/* csplit - split a file into sections determined by context lines This is the csplit utility
Copyright (C) 1991-2018 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */ The GNUv3 license
/* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
#include <config.h> Provides system specific information
#include <assert.h> ...!includes auto-comment...
#include <getopt.h> ...!includes auto-comment...
#include <sys/types.h> Provides system data types
#include <signal.h> ...!includes auto-comment...
#include "system.h" ...!includes auto-comment...
#include <regex.h> ...!includes auto-comment...
#include "die.h" ...!includes auto-comment...
#include "error.h" ...!includes auto-comment...
#include "fd-reopen.h" ...!includes auto-comment...
#include "quote.h" ...!includes auto-comment...
#include "safe-read.h" ...!includes auto-comment...
#include "stdio--.h" ...!includes auto-comment...
#include "xdectoint.h" ...!includes auto-comment...
#include "xstrtol.h" ...!includes auto-comment...
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "csplit" Line 41
#define AUTHORS \ Line 43
proper_name ("Stuart Kemp"), \ Line 44
proper_name ("David MacKenzie") Line 45
/* The default prefix for output file names. */
#define DEFAULT_PREFIX "xx" Line 48
/* A compiled pattern arg. */
struct control Line 51
{
intmax_t offset; /* Offset from regexp to split at. */ Line 53
uintmax_t lines_required; /* Number of lines required. */ Line 54
uintmax_t repeat; /* Repeat count. */ Line 55
int argnum; /* ARGV index. */ Line 56
bool repeat_forever; /* True if '*' used as a repeat count. */ Line 57
bool ignore; /* If true, produce no output (for regexp). */ Line 58
bool regexpr; /* True if regular expression was used. */ Line 59
struct re_pattern_buffer re_compiled; /* Compiled regular expression. */ Line 60
}; Block 1
/* Initial size of data area in buffers. */
#define START_SIZE 8191 Line 64
/* Increment size for data area. */
#define INCR_SIZE 2048 Line 67
/* Number of lines kept in each node in line list. */
#define CTRL_SIZE 80 Line 70
#ifdef DEBUG Line 72
/* Some small values to test the algorithms. */
# define START_SIZE 200 Line 74
# define INCR_SIZE 10 Line 75
# define CTRL_SIZE 1 Line 76
#endif Line 77
/* A string with a length count. */
struct cstring Line 80
{
size_t len; Line 82
char *str; Line 83
}; Block 2
/* Pointers to the beginnings of lines in the buffer area.
These structures are linked together if needed. */
struct line Line 88
{
size_t used; /* Number of offsets used in this struct. */ Line 90
size_t insert_index; /* Next offset to use when inserting line. */ Line 91
size_t retrieve_index; /* Next index to use when retrieving line. */ Line 92
struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */ Line 93
struct line *next; /* Next in linked list. */ Line 94
}; Block 3
/* The structure to hold the input lines.
Contains a pointer to the data area and a list containing
pointers to the individual lines. */
struct buffer_record Line 100
{
size_t bytes_alloc; /* Size of the buffer area. */ Line 102
size_t bytes_used; /* Bytes used in the buffer area. */ Line 103
uintmax_t start_line; /* First line number in this buffer. */ Line 104
uintmax_t first_available; /* First line that can be retrieved. */ Line 105
size_t num_lines; /* Number of complete lines in this buffer. */ Line 106
char *buffer; /* Data area. */ Line 107
struct line *line_start; /* Head of list of pointers to lines. */ Line 108
struct line *curr_line; /* The line start record currently in use. */ Line 109
struct buffer_record *next; Line 110
}; Block 4
static void close_output_file (void); Line 113
static void create_output_file (void); Line 114
static void delete_all_files (bool); Line 115
static void save_line_to_file (const struct cstring *line); Line 116
/* Start of buffer list. */
static struct buffer_record *head = NULL; Line 119
/* Partially read line. */
static char *hold_area = NULL; Line 122
/* Number of bytes in 'hold_area'. */
static size_t hold_count = 0; Line 125
/* Number of the last line in the buffers. */
static uintmax_t last_line_number = 0; Line 128
/* Number of the line currently being examined. */
static uintmax_t current_line = 0; Line 131
/* If true, we have read EOF. */
static bool have_read_eof = false; Line 134
/* Name of output files. */
static char *volatile filename_space = NULL; Line 137
/* Prefix part of output file names. */
static char const *volatile prefix = NULL; Line 140
/* Suffix part of output file names. */
static char *volatile suffix = NULL; Line 143
/* Number of digits to use in output file names. */
static int volatile digits = 2; Line 146
/* Number of files created so far. */
static unsigned int volatile files_created = 0; Line 149
/* Number of bytes written to current file. */
static uintmax_t bytes_written; Line 152
/* Output file pointer. */
static FILE *output_stream = NULL; Line 155
/* Output file name. */
static char *output_filename = NULL; Line 158
/* Perhaps it would be cleaner to pass arg values instead of indexes. */
static char **global_argv; Line 161
/* If true, do not print the count of bytes in each output file. */
static bool suppress_count; Line 164
/* If true, remove output files on error. */
static bool volatile remove_files; Line 167
/* If true, remove all output files which have a zero length. */
static bool elide_empty_files; Line 170
/* If true, suppress the lines that match the PATTERN */
static bool suppress_matched; Line 173
/* The compiled pattern arguments, which determine how to split
the input file. */
static struct control *controls; Line 177
/* Number of elements in 'controls'. */
static size_t control_used; Line 180
/* The set of signals that are caught. */
static sigset_t caught_signals; Line 183
/* For long options that have no equivalent short option, use a
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
enum Line 187
{
SUPPRESS_MATCHED_OPTION = CHAR_MAX + 1 Line 189
}; Block 5
static struct option const longopts[] = Line 192
{
{"digits", required_argument, NULL, 'n'}, Line 194
{"quiet", no_argument, NULL, 'q'}, Line 195
{"silent", no_argument, NULL, 's'}, Line 196
{"keep-files", no_argument, NULL, 'k'}, Line 197
{"elide-empty-files", no_argument, NULL, 'z'}, Line 198
{"prefix", required_argument, NULL, 'f'}, Line 199
{"suffix-format", required_argument, NULL, 'b'}, Line 200
{"suppress-matched", no_argument, NULL, SUPPRESS_MATCHED_OPTION}, Line 201
{GETOPT_HELP_OPTION_DECL}, Line 202
{GETOPT_VERSION_OPTION_DECL}, Line 203
{NULL, 0, NULL, 0} Line 204
}; Block 6
/* Optionally remove files created so far; then exit.
Called when an error detected. */
static void Line 210
cleanup (void) Line 211
{
sigset_t oldset; Line 213
close_output_file (); Line 215
sigprocmask (SIG_BLOCK, &caught_signals, &oldset); Line 217
delete_all_files (false); Line 218
sigprocmask (SIG_SETMASK, &oldset, NULL); Line 219
} Block 7
static void cleanup_fatal (void) ATTRIBUTE_NORETURN; Line 222
static void Line 223
cleanup_fatal (void) Line 224
{
cleanup (); Line 226
exit (EXIT_FAILURE); Line 227
} Block 8
extern void Line 230
xalloc_die (void) ...!common auto-comment...
{
error (0, 0, "%s", _("memory exhausted")); Line 233
cleanup_fatal (); Line 234
} Block 9
static void Line 237
interrupt_handler (int sig) Line 238
{
delete_all_files (true); Line 240
signal (sig, SIG_DFL); Line 241
/* The signal has been reset to SIG_DFL, but blocked during this
handler. Force the default action of this signal once the
handler returns and the block is removed. */
raise (sig); Line 245
} Block 10
/* Keep track of NUM bytes of a partial line in buffer START.
These bytes will be retrieved later when another large buffer is read. */
static void Line 251
save_to_hold_area (char *start, size_t num) Line 252
{
free (hold_area); Line 254
hold_area = start; Line 255
hold_count = num; Line 256
} Block 11
/* Read up to MAX_N_BYTES bytes from the input stream into DEST.
Return the number of bytes read. */
static size_t Line 262
read_input (char *dest, size_t max_n_bytes) Line 263
{
size_t bytes_read; Line 265
if (max_n_bytes == 0) Line 267
return 0; Line 268
bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes); Line 270...!syscalls auto-comment...
if (bytes_read == 0) Line 272
have_read_eof = true; Line 273
if (bytes_read == SAFE_READ_ERROR) Line 275
{
error (0, errno, _("read error")); Line 277
cleanup_fatal (); Line 278
}
return bytes_read; Line 281
} Block 12
/* Initialize existing line record P. */
static void Line 286
clear_line_control (struct line *p) Line 287
{
p->used = 0; Line 289
p->insert_index = 0; Line 290
p->retrieve_index = 0; Line 291
} Block 13
/* Return a new, initialized line record. */
static struct line * Line 296
new_line_control (void) Line 297
{
struct line *p = xmalloc (sizeof *p); Line 299
p->next = NULL; Line 301
clear_line_control (p); Line 302
return p; Line 304
} Block 14
/* Record LINE_START, which is the address of the start of a line
of length LINE_LEN in the large buffer, in the lines buffer of B. */
static void Line 310
keep_new_line (struct buffer_record *b, char *line_start, size_t line_len) Line 311
{
struct line *l; Line 313
/* If there is no existing area to keep line info, get some. */
if (b->line_start == NULL) Line 316
b->line_start = b->curr_line = new_line_control (); Line 317
/* If existing area for lines is full, get more. */
if (b->curr_line->used == CTRL_SIZE) Line 320
{
b->curr_line->next = new_line_control (); Line 322
b->curr_line = b->curr_line->next; Line 323
}
l = b->curr_line; Line 326
/* Record the start of the line, and update counters. */
l->starts[l->insert_index].str = line_start; Line 329
l->starts[l->insert_index].len = line_len; Line 330
l->used++; Line 331
l->insert_index++; Line 332
} Block 15
/* Scan the buffer in B for newline characters
and record the line start locations and lengths in B.
Return the number of lines found in this buffer.
There may be an incomplete line at the end of the buffer;
a pointer is kept to this area, which will be used when
the next buffer is filled. */
static size_t Line 343
record_line_starts (struct buffer_record *b) Line 344
{
char *line_start; /* Start of current line. */ Line 346
char *line_end; /* End of each line found. */ Line 347
size_t bytes_left; /* Length of incomplete last line. */ Line 348
size_t lines; /* Number of lines found. */ Line 349
size_t line_length; /* Length of each line found. */ Line 350
if (b->bytes_used == 0) Line 352
return 0; Line 353
lines = 0; Line 355
line_start = b->buffer; Line 356
bytes_left = b->bytes_used; Line 357
while (true) Line 359
{
line_end = memchr (line_start, '\n', bytes_left); Line 361
if (line_end == NULL) Line 362
break; Line 363
line_length = line_end - line_start + 1; Line 364
keep_new_line (b, line_start, line_length); Line 365
bytes_left -= line_length; Line 366
line_start = line_end + 1; Line 367
lines++; Line 368
}
/* Check for an incomplete last line. */
if (bytes_left) Line 372
{
if (have_read_eof) Line 374
{
keep_new_line (b, line_start, bytes_left); Line 376
lines++; Line 377
}
else Line 379
save_to_hold_area (xmemdup (line_start, bytes_left), bytes_left); Line 380
}
b->num_lines = lines; Line 383
b->first_available = b->start_line = last_line_number + 1; Line 384
last_line_number += lines; Line 385
return lines; Line 387
} Block 16
/* Return a new buffer with room to store SIZE bytes, plus
an extra byte for safety. */
static struct buffer_record * Line 393
create_new_buffer (size_t size) Line 394
{
struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer); Line 396
new_buffer->buffer = xmalloc (size + 1); Line 398
new_buffer->bytes_alloc = size; Line 400
new_buffer->line_start = new_buffer->curr_line = NULL; Line 401
return new_buffer; Line 403
} Block 17
/* Return a new buffer of at least MINSIZE bytes. If a buffer of at
least that size is currently free, use it, otherwise create a new one. */
static struct buffer_record * Line 409
get_new_buffer (size_t min_size) Line 410
{
struct buffer_record *new_buffer; /* Buffer to return. */ Line 412
size_t alloc_size; /* Actual size that will be requested. */ Line 413
alloc_size = START_SIZE; Line 415
if (alloc_size < min_size) Line 416
{
size_t s = min_size - alloc_size + INCR_SIZE - 1; Line 418
alloc_size += s - s % INCR_SIZE; Line 419
}
new_buffer = create_new_buffer (alloc_size); Line 422
new_buffer->num_lines = 0; Line 424
new_buffer->bytes_used = 0; Line 425
new_buffer->start_line = new_buffer->first_available = last_line_number + 1; Line 426
new_buffer->next = NULL; Line 427
return new_buffer; Line 429
} Block 18
static void Line 432
free_buffer (struct buffer_record *buf) Line 433
{
struct line *l; Line 435
for (l = buf->line_start; l;) Line 436
{
struct line *n = l->next; Line 438
free (l); Line 439
l = n; Line 440
}
buf->line_start = NULL; Line 442
free (buf->buffer); Line 443
buf->buffer = NULL; Line 444
} Block 19
/* Append buffer BUF to the linked list of buffers that contain
some data yet to be processed. */
static void Line 450
save_buffer (struct buffer_record *buf) Line 451
{
struct buffer_record *p; Line 453
buf->next = NULL; Line 455
buf->curr_line = buf->line_start; Line 456
if (head == NULL) Line 458
head = buf; Line 459
else Line 460
{
for (p = head; p->next; p = p->next) Line 462
/* Do nothing. */ ;
p->next = buf; Line 464
}
} Block 20
/* Fill a buffer of input.
Set the initial size of the buffer to a default.
Fill the buffer (from the hold area and input stream)
and find the individual lines.
If no lines are found (the buffer is too small to hold the next line),
release the current buffer (whose contents would have been put in the
hold area) and repeat the process with another large buffer until at least
one entire line has been read.
Return true if a new buffer was obtained, otherwise false
(in which case end-of-file must have been encountered). */
static bool Line 481
load_buffer (void) Line 482
{
struct buffer_record *b; Line 484
size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */ Line 485
size_t bytes_avail; /* Size of new buffer created. */ Line 486
size_t lines_found; /* Number of lines in this new buffer. */ Line 487
char *p; /* Place to load into buffer. */ Line 488
if (have_read_eof) Line 490
return false; Line 491
/* We must make the buffer at least as large as the amount of data
in the partial line left over from the last call. */
if (bytes_wanted < hold_count) Line 495
bytes_wanted = hold_count; Line 496
while (1) Line 498
{
b = get_new_buffer (bytes_wanted); Line 500
bytes_avail = b->bytes_alloc; /* Size of buffer returned. */ Line 501
p = b->buffer; Line 502
/* First check the 'holding' area for a partial line. */
if (hold_count) Line 505
{
memcpy (p, hold_area, hold_count); Line 507
p += hold_count; Line 508
b->bytes_used += hold_count; Line 509
bytes_avail -= hold_count; Line 510
hold_count = 0; Line 511
}
b->bytes_used += read_input (p, bytes_avail); Line 514
lines_found = record_line_starts (b); Line 516
if (lines_found || have_read_eof) Line 518
break; Line 519
if (xalloc_oversized (2, b->bytes_alloc)) Line 521
xalloc_die (); ...!common auto-comment...
bytes_wanted = 2 * b->bytes_alloc; Line 523
free_buffer (b); Line 524
free (b); Line 525
}
if (lines_found) Line 528
save_buffer (b); Line 529
else Line 530
{
free_buffer (b); Line 532
free (b); Line 533
}
return lines_found != 0; Line 536
} Block 21
/* Return the line number of the first line that has not yet been retrieved. */
static uintmax_t Line 541
get_first_line_in_buffer (void) Line 542
{
if (head == NULL && !load_buffer ()) Line 544
die (EXIT_FAILURE, errno, _("input disappeared")); Line 545
return head->first_available; Line 547
} Block 22
/* Return a pointer to the logical first line in the buffer and make the
next line the logical first line.
Return NULL if there is no more input. */
static struct cstring * Line 554
remove_line (void) Line 555
{
/* If non-NULL, this is the buffer for which the previous call
returned the final line. So now, presuming that line has been
processed, we can free the buffer and reset this pointer. */
static struct buffer_record *prev_buf = NULL; Line 560
struct cstring *line; /* Return value. */ Line 562
struct line *l; /* For convenience. */ Line 563
if (prev_buf) Line 565
{
free_buffer (prev_buf); Line 567
free (prev_buf); Line 568
prev_buf = NULL; Line 569
}
if (head == NULL && !load_buffer ()) Line 572
return NULL; Line 573
if (current_line < head->first_available) Line 575
current_line = head->first_available; Line 576
++(head->first_available); Line 578
l = head->curr_line; Line 580
line = &l->starts[l->retrieve_index]; Line 582
/* Advance index to next line. */
if (++l->retrieve_index == l->used) Line 585
{
/* Go on to the next line record. */
head->curr_line = l->next; Line 588
if (head->curr_line == NULL || head->curr_line->used == 0) Line 589
{
/* Go on to the next data block.
but first record the current one so we can free it
once the line we're returning has been processed. */
prev_buf = head; Line 594
head = head->next; Line 595
}
}
return line; Line 599
}
/* Search the buffers for line LINENUM, reading more input if necessary.
Return a pointer to the line, or NULL if it is not found in the file. */
static struct cstring * Line 605
find_line (uintmax_t linenum) Line 606
{
struct buffer_record *b; Line 608
if (head == NULL && !load_buffer ()) Line 610
return NULL; Line 611
if (linenum < head->start_line) Line 613
return NULL; Line 614
for (b = head;;) Line 616
{
assert (b); Line 618
if (linenum < b->start_line + b->num_lines) Line 619
{
/* The line is in this buffer. */
struct line *l; Line 622
size_t offset; /* How far into the buffer the line is. */ Line 623
l = b->line_start; Line 625
offset = linenum - b->start_line; Line 626
/* Find the control record. */
while (offset >= CTRL_SIZE) Line 628
{
l = l->next; Line 630
offset -= CTRL_SIZE; Line 631
}
return &l->starts[offset]; Line 633
}
if (b->next == NULL && !load_buffer ()) Line 635
return NULL; Line 636
b = b->next; /* Try the next data block. */ Line 637
}
} Block 24
/* Return true if at least one more line is available for input. */
static bool Line 643
no_more_lines (void) Line 644
{
return find_line (current_line + 1) == NULL; Line 646
} Block 25
/* Open NAME as standard input. */
static void Line 651
set_input_file (const char *name) Line 652
{
if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0) Line 654...!syscalls auto-comment...
die (EXIT_FAILURE, errno, _("cannot open %s for reading"), Line 655
quoteaf (name)); Line 656
} Block 26
/* Write all lines from the beginning of the buffer up to, but
not including, line LAST_LINE, to the current output file.
If IGNORE is true, do not output lines selected here.
ARGNUM is the index in ARGV of the current pattern. */
static void Line 664
write_to_file (uintmax_t last_line, bool ignore, int argnum) Line 665
{
struct cstring *line; Line 667
uintmax_t first_line; /* First available input line. */ Line 668
uintmax_t lines; /* Number of lines to output. */ Line 669
uintmax_t i; Line 670
first_line = get_first_line_in_buffer (); Line 672
if (first_line > last_line) Line 674
{
error (0, 0, _("%s: line number out of range"), Line 676
quote (global_argv[argnum])); Line 677
cleanup_fatal (); Line 678
}
lines = last_line - first_line; Line 681
for (i = 0; i < lines; i++) Line 683
{
line = remove_line (); Line 685
if (line == NULL) Line 686
{
error (0, 0, _("%s: line number out of range"), Line 688
quote (global_argv[argnum])); Line 689
cleanup_fatal (); Line 690
}
if (!ignore) Line 692
save_line_to_file (line); Line 693
}
} Block 27
/* Output any lines left after all regexps have been processed. */
static void Line 699
dump_rest_of_file (void) Line 700
{
struct cstring *line; Line 702
while ((line = remove_line ()) != NULL) Line 704
save_line_to_file (line); Line 705
} Block 28
/* Handle an attempt to read beyond EOF under the control of record P,
on iteration REPETITION if nonzero. */
static void handle_line_error (const struct control *, uintmax_t) Line 711
ATTRIBUTE_NORETURN; Line 712
static void Line 713
handle_line_error (const struct control *p, uintmax_t repetition) Line 714
{
char buf[INT_BUFSIZE_BOUND (uintmax_t)]; Line 716
fprintf (stderr, _("%s: %s: line number out of range"), Line 718
program_name, quote (umaxtostr (p->lines_required, buf))); Line 719
if (repetition) Line 720
fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf)); Line 721
else Line 722
fprintf (stderr, "\n"); Line 723
cleanup_fatal (); Line 725
} Block 29
/* Determine the line number that marks the end of this file,
then get those lines and save them to the output file.
P is the control record.
REPETITION is the repetition number. */
static void Line 733
process_line_count (const struct control *p, uintmax_t repetition) Line 734
{
uintmax_t linenum; Line 736
uintmax_t last_line_to_save = p->lines_required * (repetition + 1); Line 737
create_output_file (); Line 739
/* Ensure that the line number specified is not 1 greater than
the number of lines in the file.
When suppressing matched lines, check before the loop. */
if (no_more_lines () && suppress_matched) Line 744
handle_line_error (p, repetition); Line 745
linenum = get_first_line_in_buffer (); Line 747
while (linenum++ < last_line_to_save) Line 748
{
struct cstring *line = remove_line (); Line 750
if (line == NULL) Line 751
handle_line_error (p, repetition); Line 752
save_line_to_file (line); Line 753
}
close_output_file (); Line 756
if (suppress_matched) Line 758
remove_line (); Line 759
/* Ensure that the line number specified is not 1 greater than
the number of lines in the file. */
if (no_more_lines () && !suppress_matched) Line 763
handle_line_error (p, repetition); Line 764
} Block 30
static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;Line 767
static void Line 768
regexp_error (struct control *p, uintmax_t repetition, bool ignore) Line 769
{
fprintf (stderr, _("%s: %s: match not found"), Line 771
program_name, quote (global_argv[p->argnum])); Line 772
if (repetition) Line 774
{
char buf[INT_BUFSIZE_BOUND (uintmax_t)]; Line 776
fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf)); Line 777
}
else Line 779
fprintf (stderr, "\n"); Line 780
if (!ignore) Line 782
{
dump_rest_of_file (); Line 784
close_output_file (); Line 785
}
cleanup_fatal (); Line 787
} Block 31
/* Read the input until a line matches the regexp in P, outputting
it unless P->IGNORE is true.
REPETITION is this repeat-count; 0 means the first time. */
static void Line 794
process_regexp (struct control *p, uintmax_t repetition) Line 795
{
struct cstring *line; /* From input file. */ Line 797
size_t line_len; /* To make "$" in regexps work. */ Line 798
uintmax_t break_line; /* First line number of next file. */ Line 799
bool ignore = p->ignore; /* If true, skip this section. */ Line 800
regoff_t ret; Line 801
if (!ignore) Line 803
create_output_file (); Line 804
if (suppress_matched && current_line > 0) Line 806
remove_line (); Line 807
/* If there is no offset for the regular expression, or
it is positive, then it is not necessary to buffer the lines. */
if (p->offset >= 0) Line 812
{
while (true) Line 814
{
line = find_line (++current_line); Line 816
if (line == NULL) Line 817
{
if (p->repeat_forever) Line 819
{
if (!ignore) Line 821
{
dump_rest_of_file (); Line 823
close_output_file (); Line 824
}
exit (EXIT_SUCCESS); Line 826
}
else Line 828
regexp_error (p, repetition, ignore); Line 829
}
line_len = line->len; Line 831
if (line->str[line_len - 1] == '\n') Line 832
line_len--; Line 833
ret = re_search (&p->re_compiled, line->str, line_len, Line 834
0, line_len, NULL); Line 835
if (ret == -2) Line 836
{
error (0, 0, _("error in regular expression search")); Line 838
cleanup_fatal (); Line 839
}
if (ret == -1) Line 841
{
line = remove_line (); Line 843
if (!ignore) Line 844
save_line_to_file (line); Line 845
}
else Line 847
break; Line 848
}
}
else Line 851
{
/* Buffer the lines. */
while (true) Line 854
{
line = find_line (++current_line); Line 856
if (line == NULL) Line 857
{
if (p->repeat_forever) Line 859
{
if (!ignore) Line 861
{
dump_rest_of_file (); Line 863
close_output_file (); Line 864
}
exit (EXIT_SUCCESS); Line 866
}
else Line 868
regexp_error (p, repetition, ignore); Line 869
}
line_len = line->len; Line 871
if (line->str[line_len - 1] == '\n') Line 872
line_len--; Line 873
ret = re_search (&p->re_compiled, line->str, line_len, Line 874
0, line_len, NULL); Line 875
if (ret == -2) Line 876
{
error (0, 0, _("error in regular expression search")); Line 878
cleanup_fatal (); Line 879
}
if (ret != -1) Line 881
break; Line 882
}
}
/* Account for any offset from this regexp. */
break_line = current_line + p->offset; Line 887
write_to_file (break_line, ignore, p->argnum); Line 889
if (!ignore) Line 891
close_output_file (); Line 892
if (p->offset > 0) Line 894
current_line = break_line; Line 895
} Block 32
/* Split the input file according to the control records we have built. */
static void Line 900
split_file (void) Line 901
{
for (size_t i = 0; i < control_used; i++) Line 903
{
uintmax_t j; Line 905
if (controls[i].regexpr) Line 906
{
for (j = 0; (controls[i].repeat_forever Line 908
|| j <= controls[i].repeat); j++) Line 909
process_regexp (&controls[i], j); Line 910
}
else Line 912
{
for (j = 0; (controls[i].repeat_forever Line 914
|| j <= controls[i].repeat); j++) Line 915
process_line_count (&controls[i], j); Line 916
}
}
create_output_file (); Line 920
dump_rest_of_file (); Line 921
close_output_file (); Line 922
} Block 33
/* Return the name of output file number NUM.
This function is called from a signal handler, so it should invoke
only reentrant functions that are async-signal-safe. POSIX does
not guarantee this for the functions called below, but we don't
know of any hosts where this implementation isn't safe. */
static char * Line 932
make_filename (unsigned int num) Line 933
{
strcpy (filename_space, prefix); Line 935
if (suffix) Line 936
sprintf (filename_space + strlen (prefix), suffix, num); Line 937
else Line 938
sprintf (filename_space + strlen (prefix), "%0*u", digits, num); Line 939
return filename_space; Line 940
} Block 34
/* Create the next output file. */
static void Line 945
create_output_file (void) Line 946
{
bool fopen_ok; Line 948
int fopen_errno; Line 949
output_filename = make_filename (files_created); Line 951
if (files_created == UINT_MAX) Line 953
{
fopen_ok = false; Line 955
fopen_errno = EOVERFLOW; Line 956
}
else Line 958
{
/* Create the output file in a critical section, to avoid races. */
sigset_t oldset; Line 961
sigprocmask (SIG_BLOCK, &caught_signals, &oldset); Line 962
output_stream = fopen (output_filename, "w"); Line 963...!syscalls auto-comment...
fopen_ok = (output_stream != NULL); Line 964
fopen_errno = errno; Line 965
files_created += fopen_ok; Line 966
sigprocmask (SIG_SETMASK, &oldset, NULL); Line 967
}
if (! fopen_ok) Line 970
{
error (0, fopen_errno, "%s", quotef (output_filename)); Line 972
cleanup_fatal (); Line 973
}
bytes_written = 0; Line 975
} Block 35
/* If requested, delete all the files we have created. This function
must be called only from critical sections. */
static void Line 981
delete_all_files (bool in_signal_handler) Line 982
{
if (! remove_files) Line 984
return; Line 985
for (unsigned int i = 0; i < files_created; i++) Line 987
{
const char *name = make_filename (i); Line 989
if (unlink (name) != 0 && !in_signal_handler) Line 990...!syscalls auto-comment......!syscalls auto-comment...
error (0, errno, "%s", quotef (name)); Line 991
}
files_created = 0; Line 994
} Block 36
/* Close the current output file and print the count
of characters in this file. */
static void Line 1000
close_output_file (void) Line 1001
{
if (output_stream) Line 1003
{
if (ferror (output_stream)) Line 1005
{
error (0, 0, _("write error for %s"), quoteaf (output_filename)); Line 1007
output_stream = NULL; Line 1008
cleanup_fatal (); Line 1009
}
if (fclose (output_stream) != 0) Line 1011...!syscalls auto-comment...
{
error (0, errno, "%s", quotef (output_filename)); Line 1013
output_stream = NULL; Line 1014
cleanup_fatal (); Line 1015
}
if (bytes_written == 0 && elide_empty_files) Line 1017
{
sigset_t oldset; Line 1019
bool unlink_ok; Line 1020
int unlink_errno; Line 1021
/* Remove the output file in a critical section, to avoid races. */
sigprocmask (SIG_BLOCK, &caught_signals, &oldset); Line 1024
unlink_ok = (unlink (output_filename) == 0); Line 1025...!syscalls auto-comment......!syscalls auto-comment...
unlink_errno = errno; Line 1026
files_created -= unlink_ok; Line 1027
sigprocmask (SIG_SETMASK, &oldset, NULL); Line 1028
if (! unlink_ok) Line 1030
error (0, unlink_errno, "%s", quotef (output_filename)); Line 1031
}
else Line 1033
{
if (!suppress_count) Line 1035
{
char buf[INT_BUFSIZE_BOUND (uintmax_t)]; Line 1037
fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf)); Line 1038
}
}
output_stream = NULL; Line 1041
}
} Block 37
/* Save line LINE to the output file and
increment the character count for the current file. */
static void Line 1048
save_line_to_file (const struct cstring *line) Line 1049
{
size_t l = fwrite (line->str, sizeof (char), line->len, output_stream); Line 1051...!syscalls auto-comment...
if (l != line->len) Line 1052
{
error (0, errno, _("write error for %s"), quoteaf (output_filename)); Line 1054
output_stream = NULL; Line 1055
cleanup_fatal (); Line 1056
}
bytes_written += line->len; Line 1058
} Block 38
/* Return a new, initialized control record. */
static struct control * Line 1063
new_control_record (void) Line 1064
{
static size_t control_allocated = 0; /* Total space allocated. */ Line 1066
struct control *p; Line 1067
if (control_used == control_allocated) Line 1069
controls = X2NREALLOC (controls, &control_allocated); Line 1070
p = &controls[control_used++]; Line 1071
p->regexpr = false; Line 1072
p->repeat = 0; Line 1073
p->repeat_forever = false; Line 1074
p->lines_required = 0; Line 1075
p->offset = 0; Line 1076
return p; Line 1077
} Block 39
/* Check if there is a numeric offset after a regular expression.
STR is the entire command line argument.
P is the control record for this regular expression.
NUM is the numeric part of STR. */
static void Line 1085
check_for_offset (struct control *p, const char *str, const char *num) Line 1086
{
if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK) Line 1088
die (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), Line 1089
quote (str)); Line 1090
} Block 40
/* Given that the first character of command line arg STR is '{',
make sure that the rest of the string is a valid repeat count
and store its value in P.
ARGNUM is the ARGV index of STR. */
static void Line 1098
parse_repeat_count (int argnum, struct control *p, char *str) Line 1099
{
uintmax_t val; Line 1101
char *end; Line 1102
end = str + strlen (str) - 1; Line 1104
if (*end != '}') Line 1105Block 41
die (EXIT_FAILURE, 0, _("%s: '}' is required in repeat count"), Line 1106
quote (str)); Line 1107
*end = '\0'; Line 1108
if (str+1 == end-1 && *(str+1) == '*') Line 1110
p->repeat_forever = true; Line 1111
else Line 1112
{
if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK) Line 1114
{
die (EXIT_FAILURE, 0, Line 1116
_("%s}: integer required between '{' and '}'"), Line 1117Block 43Block 42
quote (global_argv[argnum])); Line 1118
}
p->repeat = val; Line 1120
}
*end = '}'; Line 1123
}
/* Extract the regular expression from STR and check for a numeric offset.
STR should start with the regexp delimiter character.
Return a new control record for the regular expression.
ARGNUM is the ARGV index of STR.
Unless IGNORE is true, mark these lines for output. */
static struct control * Line 1132
extract_regexp (int argnum, bool ignore, char const *str) Line 1133
{
size_t len; /* Number of bytes in this regexp. */ Line 1135
char delim = *str; Line 1136
char const *closing_delim; Line 1137
struct control *p; Line 1138
const char *err; Line 1139
closing_delim = strrchr (str + 1, delim); Line 1141
if (closing_delim == NULL) Line 1142
die (EXIT_FAILURE, 0, Line 1143
_("%s: closing delimiter '%c' missing"), str, delim); Line 1144
len = closing_delim - str - 1; Line 1146
p = new_control_record (); Line 1147
p->argnum = argnum; Line 1148
p->ignore = ignore; Line 1149
p->regexpr = true; Line 1151
p->re_compiled.buffer = NULL; Line 1152
p->re_compiled.allocated = 0; Line 1153
p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1); Line 1154
p->re_compiled.translate = NULL; Line 1155
re_syntax_options = Line 1156
RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES; Line 1157
err = re_compile_pattern (str + 1, len, &p->re_compiled); Line 1158
if (err) Line 1159
{
error (0, 0, _("%s: invalid regular expression: %s"), quote (str), err); Line 1161
cleanup_fatal (); Line 1162
}
if (closing_delim[1]) Line 1165
check_for_offset (p, str, closing_delim + 1); Line 1166
return p; Line 1168
}
/* Extract the break patterns from args START through ARGC - 1 of ARGV.
After each pattern, check if the next argument is a repeat count. */
static void Line 1174
parse_patterns (int argc, int start, char **argv) Line 1175
{
struct control *p; /* New control record created. */ Line 1177
uintmax_t val; Line 1178
static uintmax_t last_val = 0; Line 1179
for (int i = start; i < argc; i++) Line 1181
{
if (*argv[i] == '/' || *argv[i] == '%') Line 1183
{
p = extract_regexp (i, *argv[i] == '%', argv[i]); Line 1185
}
else Line 1187
{
p = new_control_record (); Line 1189
p->argnum = i; Line 1190
if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK) Line 1192
die (EXIT_FAILURE, 0, _("%s: invalid pattern"), quote (argv[i])); Line 1193
if (val == 0) Line 1194
die (EXIT_FAILURE, 0, Line 1195
_("%s: line number must be greater than zero"), argv[i]); Line 1196
if (val < last_val) Line 1197
{
char buf[INT_BUFSIZE_BOUND (uintmax_t)]; Line 1199
die (EXIT_FAILURE, 0, Line 1200
_("line number %s is smaller than preceding line number, %s"), Line 1201
quote (argv[i]), umaxtostr (last_val, buf)); Line 1202
}
if (val == last_val) Line 1205
error (0, 0, Line 1206
_("warning: line number %s is the same as preceding line number"), Line 1207
quote (argv[i])); Line 1208
last_val = val; Line 1210
p->lines_required = val; Line 1212
}
if (i + 1 < argc && *argv[i + 1] == '{') Line 1215
{
/* We have a repeat count. */
i++; Line 1218
parse_repeat_count (i, p, argv[i]); Line 1219
}
}
}
/* Names for the printf format flags ' and #. These can be ORed together. */
enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 }; Line 1227
/* Scan the printf format flags in FORMAT, storing info about the
flags into *FLAGS_PTR. Return the number of flags found. */
static size_t Line 1231
get_format_flags (char const *format, int *flags_ptr) Line 1232
{
int flags = 0; Line 1234
for (size_t count = 0; ; count++) Line 1236
{
switch (format[count]) Line 1238
{
case '-': Line 1240
case '0': Line 1241
break; Line 1242
case '\'': Line 1244
flags |= FLAG_THOUSANDS; Line 1245
break; Line 1246
case '#': Line 1248
flags |= FLAG_ALTERNATIVE; Line 1249
break; Line 1250
default: Line 1252
*flags_ptr = flags; Line 1253
return count; Line 1254
}
}
}
/* Check that the printf format conversion specifier *FORMAT is valid
and compatible with FLAGS. Change it to 'u' if it is 'd' or 'i',
since the format will be used with an unsigned value. */
static void Line 1262
check_format_conv_type (char *format, int flags) Line 1263
{
unsigned char ch = *format; Line 1265
int compatible_flags = FLAG_THOUSANDS; Line 1266
switch (ch) Line 1268
{
case 'd': Line 1270
case 'i': Line 1271
*format = 'u'; Line 1272
break; Line 1273
case 'u': Line 1275
break; Line 1276
case 'o': Line 1278
case 'x': Line 1279
case 'X': Line 1280
compatible_flags = FLAG_ALTERNATIVE; Line 1281
break; Line 1282
case 0: Line 1284
die (EXIT_FAILURE, 0, _("missing conversion specifier in suffix")); Line 1285
default: Line 1287
if (isprint (ch)) Line 1288
die (EXIT_FAILURE, 0, Line 1289
_("invalid conversion specifier in suffix: %c"), ch); Line 1290
else Line 1291
die (EXIT_FAILURE, 0, Line 1292
_("invalid conversion specifier in suffix: \\%.3o"), ch); Line 1293
}
if (flags & ~ compatible_flags) Line 1296
die (EXIT_FAILURE, 0, Line 1297
_("invalid flags in conversion specification: %%%c%c"), Line 1298
(flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch); Line 1299
}
/* Return the maximum number of bytes that can be generated by
applying FORMAT to an unsigned int value. If the format is
invalid, diagnose the problem and exit. */
static size_t Line 1305
max_out (char *format) Line 1306
{
bool percent = false; Line 1308
for (char *f = format; *f; f++) Line 1310
if (*f == '%' && *++f != '%') Line 1311
{
if (percent) Line 1313
die (EXIT_FAILURE, 0, Line 1314
_("too many %% conversion specifications in suffix")); Line 1315
percent = true; Line 1316
int flags; Line 1317
f += get_format_flags (f, &flags); Line 1318
while (ISDIGIT (*f)) Line 1319
f++; Line 1320
if (*f == '.') Line 1321
while (ISDIGIT (*++f)) Line 1322
continue; Line 1323
check_format_conv_type (f, flags); Line 1324
}
if (! percent) Line 1327
die (EXIT_FAILURE, 0, Line 1328
_("missing %% conversion specification in suffix")); Line 1329
int maxlen = snprintf (NULL, 0, format, UINT_MAX); Line 1331
if (! (0 <= maxlen && maxlen <= SIZE_MAX)) Line 1332
xalloc_die (); ...!common auto-comment...
return maxlen; Line 1334
}
int
main (int argc, char **argv) Line 1338
{
int optc; Line 1340
initialize_main (&argc, &argv); VMS-specific entry point handling wildcard expansion
set_program_name (argv[0]); Retains program name and discards path
setlocale (LC_ALL, ""); Sets up internationalization (i18n)
bindtextdomain (PACKAGE, LOCALEDIR); Assigns i18n directorySets text domain for _() [gettext()] function
textdomain (PACKAGE); Sets text domain for _() [gettext()] function
atexit (close_stdout); Close stdout on exit (see gnulib)
global_argv = argv; Line 1350
controls = NULL; Line 1351
control_used = 0; Line 1352
suppress_count = false; Line 1353
remove_files = true; Line 1354
suppress_matched = false; Line 1355
prefix = DEFAULT_PREFIX; Line 1356
while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1) Line 1358
switch (optc) Line 1359
{
case 'f': Line 1361
prefix = optarg; Line 1362
break; Line 1363
case 'b': Line 1365
suffix = optarg; Line 1366
break; Line 1367
case 'k': Line 1369
remove_files = false; Line 1370
break; Line 1371
case 'n': Line 1373
digits = xdectoimax (optarg, 0, MIN (INT_MAX, SIZE_MAX), "", Line 1374
_("invalid number"), 0); Line 1375
break; Line 1376
case 's': Line 1378
case 'q': Line 1379
suppress_count = true; Line 1380
break; Line 1381
case 'z': Line 1383
elide_empty_files = true; Line 1384
break; Line 1385
case SUPPRESS_MATCHED_OPTION: Line 1387
suppress_matched = true; Line 1388
break; Line 1389
case_GETOPT_HELP_CHAR; Line 1391
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); Line 1393
default: Line 1395
usage (EXIT_FAILURE); Line 1396
}
if (argc - optind < 2) Line 1399
{
if (argc <= optind) Line 1401
error (0, 0, _("missing operand")); Line 1402
else Line 1403
error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); Line 1404
usage (EXIT_FAILURE); Line 1405
}
size_t prefix_len = strlen (prefix); Line 1408
size_t max_digit_string_len Line 1409
= (suffix Line 1410
? max_out (suffix) Line 1411
: MAX (INT_STRLEN_BOUND (unsigned int), digits)); Line 1412
if (SIZE_MAX - 1 - prefix_len < max_digit_string_len) Line 1413
xalloc_die (); ...!common auto-comment...
filename_space = xmalloc (prefix_len + max_digit_string_len + 1); Line 1415
set_input_file (argv[optind++]); Line 1417
parse_patterns (argc, optind, argv); Line 1419
{
int i; Line 1422
static int const sig[] = Line 1423
{
/* The usual suspects. */
SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, Line 1426
#ifdef SIGPOLL Line 1427
SIGPOLL, Line 1428
#endif Line 1429
#ifdef SIGPROF Line 1430
SIGPROF, Line 1431
#endif Line 1432
#ifdef SIGVTALRM Line 1433
SIGVTALRM, Line 1434
#endif Line 1435
#ifdef SIGXCPU Line 1436
SIGXCPU, Line 1437
#endif Line 1438
#ifdef SIGXFSZ Line 1439
SIGXFSZ, Line 1440
#endif Line 1441
};
enum { nsigs = ARRAY_CARDINALITY (sig) }; Line 1443
struct sigaction act; Line 1445
sigemptyset (&caught_signals); Line 1447
for (i = 0; i < nsigs; i++) Line 1448
{
sigaction (sig[i], NULL, &act); Line 1450
if (act.sa_handler != SIG_IGN) Line 1451
sigaddset (&caught_signals, sig[i]); Line 1452
}
act.sa_handler = interrupt_handler; Line 1455
act.sa_mask = caught_signals; Line 1456
act.sa_flags = 0; Line 1457
for (i = 0; i < nsigs; i++) Line 1459
if (sigismember (&caught_signals, sig[i])) Line 1460
sigaction (sig[i], &act, NULL); Line 1461
}
split_file (); Line 1464
if (close (STDIN_FILENO) != 0) Line 1466...!syscalls auto-comment...
{
error (0, errno, _("read error")); Line 1468
cleanup_fatal (); Line 1469
}
return EXIT_SUCCESS; Line 1472
}
void Line 1475
usage (int status) Line 1476
{
if (status != EXIT_SUCCESS) Line 1478
emit_try_help (); ...!common auto-comment...
else Line 1480
{
printf (_("\ Line 1482
Usage: %s [OPTION]... FILE PATTERN...\n\ Line 1483
"), Line 1484
program_name); Line 1485
fputs (_("\ Line 1486
Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ...,\n\ Line 1487
and output byte counts of each piece to standard output.\n\ Line 1488
"), stdout); Line 1489
fputs (_("\ Line 1490
\n\
Read standard input if FILE is -\n\ Line 1492
"), stdout); Line 1493
emit_mandatory_arg_note (); ...!common auto-comment...
fputs (_("\ Line 1497
-b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\ Line 1498
-f, --prefix=PREFIX use PREFIX instead of 'xx'\n\ Line 1499
-k, --keep-files do not remove output files on errors\n\ Line 1500
"), stdout); Line 1501
fputs (_("\ Line 1502
--suppress-matched suppress the lines matching PATTERN\n\ Line 1503
"), stdout); Line 1504
fputs (_("\ Line 1505
-n, --digits=DIGITS use specified number of digits instead of 2\n\ Line 1506
-s, --quiet, --silent do not print counts of output file sizes\n\ Line 1507
-z, --elide-empty-files remove empty output files\n\ Line 1508
"), stdout); Line 1509
fputs (HELP_OPTION_DESCRIPTION, stdout); Line 1510
fputs (VERSION_OPTION_DESCRIPTION, stdout); Line 1511
fputs (_("\ Line 1512
\n\
Each PATTERN may be:\n\ Line 1514
INTEGER copy up to but not including specified line number\n\ Line 1515
/REGEXP/[OFFSET] copy up to but not including a matching line\n\ Line 1516
%REGEXP%[OFFSET] skip to, but not including a matching line\n\ Line 1517
{INTEGER} repeat the previous pattern specified number of times\n\ Line 1518
{*} repeat the previous pattern as many times as possible\n\ Line 1519
\n\
A line OFFSET is a required '+' or '-' followed by a positive integer.\n\ Line 1521
"), stdout); Line 1522
emit_ancillary_info (PROGRAM_NAME); Line 1523
}
exit (status); Line 1525
}