/* Permuted index for GNU, with keywords in their context. This is the ptx utility
Copyright (C) 1990-2018 Free Software Foundation, Inc.
François Pinard <pinard@iro.umontreal.ca>, 1988.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
François Pinard <pinard@iro.umontreal.ca> */ The GNUv3 license
#include <config.h> Provides system specific information
#include <getopt.h> ...!includes auto-comment...
#include <sys/types.h> Provides system data types
#include "system.h" ...!includes auto-comment...
#include "die.h" ...!includes auto-comment...
#include <regex.h> ...!includes auto-comment...
#include "argmatch.h" ...!includes auto-comment...
#include "diacrit.h" ...!includes auto-comment...
#include "error.h" ...!includes auto-comment...
#include "fadvise.h" ...!includes auto-comment...
#include "quote.h" ...!includes auto-comment...
#include "read-file.h" ...!includes auto-comment...
#include "stdio--.h" ...!includes auto-comment...
#include "xstrtol.h" ...!includes auto-comment...
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "ptx" Line 37
/* TRANSLATORS: Please translate "F. Pinard" to "François Pinard"
if "ç" (c-with-cedilla) is available in the translation's character
set and encoding. */
#define AUTHORS proper_name_utf8 ("F. Pinard", "Fran\xc3\xa7ois Pinard") Line 42
/* Number of possible characters in a byte. */
#define CHAR_SET_SIZE 256 Line 45
#define ISODIGIT(C) ((C) >= '0' && (C) <= '7') Line 47
#define HEXTOBIN(C) ((C) >= 'a' && (C) <= 'f' ? (C)-'a'+10 \ Line 48
: (C) >= 'A' && (C) <= 'F' ? (C)-'A'+10 : (C)-'0') Line 49
#define OCTTOBIN(C) ((C) - '0') Line 50
/* Debugging the memory allocator. */
#if WITH_DMALLOC Line 54
# define MALLOC_FUNC_CHECK 1 Line 55
# include <dmalloc.h> Line 56
#endif Line 57
/* Global definitions. */
/* FIXME: There are many unchecked integer overflows in this file,
and in theory they could cause this command to have undefined
behavior given large inputs or options. This command should
diagnose any such overflow and exit. */
/* Program options. */
enum Format Line 68
{
UNKNOWN_FORMAT, /* output format still unknown */ Line 70
DUMB_FORMAT, /* output for a dumb terminal */ Line 71
ROFF_FORMAT, /* output for 'troff' or 'nroff' */ Line 72
TEX_FORMAT /* output for 'TeX' or 'LaTeX' */ Line 73
}; Block 1
static bool gnu_extensions = true; /* trigger all GNU extensions */ Line 76
static bool auto_reference = false; /* refs are 'file_name:line_number:' */ Line 77
static bool input_reference = false; /* refs at beginning of input lines */ Line 78
static bool right_reference = false; /* output refs after right context */ Line 79
static ptrdiff_t line_width = 72; /* output line width in characters */ Line 80
static ptrdiff_t gap_size = 3; /* number of spaces between output fields */ Line 81
static const char *truncation_string = "/"; Line 82
/* string used to mark line truncations */
static const char *macro_name = "xx"; /* macro name for roff or TeX output */ Line 84
static enum Format output_format = UNKNOWN_FORMAT; Line 85
/* output format */
static bool ignore_case = false; /* fold lower to upper for sorting */ Line 88
static const char *break_file = NULL; /* name of the 'Break chars' file */ Line 89
static const char *only_file = NULL; /* name of the 'Only words' file */ Line 90
static const char *ignore_file = NULL; /* name of the 'Ignore words' file */ Line 91
/* Options that use regular expressions. */
struct regex_data Line 94
{
/* The original regular expression, as a string. */
char const *string; Line 97
/* The compiled regular expression, and its fastmap. */
struct re_pattern_buffer pattern; Line 100
char fastmap[UCHAR_MAX + 1]; Line 101
};
static struct regex_data context_regex; /* end of context */ Line 104
static struct regex_data word_regex; /* keyword */ Line 105
/* A BLOCK delimit a region in memory of arbitrary size, like the copy of a
whole file. A WORD is similar, except it is intended for smaller regions.
A WORD_TABLE may contain several WORDs. */
typedef struct Line 111
{
char *start; /* pointer to beginning of region */ Line 113
char *end; /* pointer to end + 1 of region */ Line 114
} Block 3
BLOCK; Line 116
typedef struct Line 118
{
char *start; /* pointer to beginning of region */ Line 120
ptrdiff_t size; /* length of the region */ Line 121
} Block 4
WORD; Line 123
typedef struct Line 125
{
WORD *start; /* array of WORDs */ Line 127
size_t alloc; /* allocated length */ Line 128
ptrdiff_t length; /* number of used entries */ Line 129
} Block 5
WORD_TABLE; Line 131
/* Pattern description tables. */
/* For each character, provide its folded equivalent. */
static unsigned char folded_chars[CHAR_SET_SIZE]; Line 136
/* End of context pattern register indices. */
static struct re_registers context_regs; Line 139
/* Keyword pattern register indices. */
static struct re_registers word_regs; Line 142
/* A word characters fastmap is used only when no word regexp has been
provided. A word is then made up of a sequence of one or more characters
allowed by the fastmap. Contains !0 if character allowed in word. Not
only this is faster in most cases, but it simplifies the implementation
of the Break files. */
static char word_fastmap[CHAR_SET_SIZE]; Line 149
/* Maximum length of any word read. */
static ptrdiff_t maximum_word_length; Line 152
/* Maximum width of any reference used. */
static ptrdiff_t reference_max_width; Line 155
/* Ignore and Only word tables. */
static WORD_TABLE ignore_table; /* table of words to ignore */ Line 159
static WORD_TABLE only_table; /* table of words to select */ Line 160
/* Source text table, and scanning macros. */
static int number_input_files; /* number of text input files */ Line 164
static intmax_t total_line_count; /* total number of lines seen so far */ Line 165
static const char **input_file_name; /* array of text input file names */ Line 166
static intmax_t *file_line_count; /* array of line count values at end */ Line 167
static BLOCK *text_buffers; /* files to study */ Line 169
/* SKIP_NON_WHITE used only for getting or skipping the reference. */
#define SKIP_NON_WHITE(cursor, limit) \ Line 173
while (cursor < limit && ! isspace (to_uchar (*cursor))) \ Line 174
cursor++ Line 175
#define SKIP_WHITE(cursor, limit) \ Line 177
while (cursor < limit && isspace (to_uchar (*cursor))) \ Line 178
cursor++ Line 179
#define SKIP_WHITE_BACKWARDS(cursor, start) \ Line 181
while (cursor > start && isspace (to_uchar (cursor[-1]))) \ Line 182
cursor-- Line 183
#define SKIP_SOMETHING(cursor, limit) \ Line 185
if (word_regex.string) \ Line 186
{ \ Line 187
regoff_t count; \ Line 188
count = re_match (&word_regex.pattern, cursor, limit - cursor, 0, NULL); \Line 189
if (count == -2) \ Line 190
matcher_error (); \ Line 191
cursor += count == -1 ? 1 : count; \ Line 192
} \ Line 193Block 6
else if (word_fastmap[to_uchar (*cursor)]) \ Line 194
while (cursor < limit && word_fastmap[to_uchar (*cursor)]) \ Line 195
cursor++; \ Line 196
else \ Line 197
cursor++ Line 198
/* Occurrences table.
The 'keyword' pointer provides the central word, which is surrounded
by a left context and a right context. The 'keyword' and 'length'
field allow full 8-bit characters keys, even including NULs. At other
places in this program, the name 'keyafter' refers to the keyword
followed by its right context.
The left context does not extend, towards the beginning of the file,
further than a distance given by the 'left' value. This value is
relative to the keyword beginning, it is usually negative. This
insures that, except for white space, we will never have to backward
scan the source text, when it is time to generate the final output
lines.
The right context, indirectly attainable through the keyword end, does
not extend, towards the end of the file, further than a distance given
by the 'right' value. This value is relative to the keyword
beginning, it is usually positive.
When automatic references are used, the 'reference' value is the
overall line number in all input files read so far, in this case, it
is of type intmax_t. When input references are used, the 'reference'
value indicates the distance between the keyword beginning and the
start of the reference field, and it fits in ptrdiff_t and is usually
negative. */
typedef struct Line 227
{
WORD key; /* description of the keyword */ Line 229
ptrdiff_t left; /* distance to left context start */ Line 230
ptrdiff_t right; /* distance to right context end */ Line 231
intmax_t reference; /* reference descriptor */ Line 232
int file_index; /* corresponding file */ Line 233
} Block 7
OCCURS; Line 235
/* The various OCCURS tables are indexed by the language. But the time
being, there is no such multiple language support. */
static OCCURS *occurs_table[1]; /* all words retained from the read text */ Line 240
static size_t occurs_alloc[1]; /* allocated size of occurs_table */ Line 241
static ptrdiff_t number_of_occurs[1]; /* number of used slots in occurs_table */Line 242
/* Communication among output routines. */
/* Indicate if special output processing is requested for each character. */
static char edited_flag[CHAR_SET_SIZE]; Line 248
/* Half of line width, reference excluded. */
static ptrdiff_t half_line_width; Line 251
/* Maximum width of before field. */
static ptrdiff_t before_max_width; Line 254
/* Maximum width of keyword-and-after field. */
static ptrdiff_t keyafter_max_width; Line 257
/* Length of string that flags truncation. */
static ptrdiff_t truncation_string_length; Line 260
/* When context is limited by lines, wraparound may happen on final output:
the 'head' pointer gives access to some supplementary left context which
will be seen at the end of the output line, the 'tail' pointer gives
access to some supplementary right context which will be seen at the
beginning of the output line. */
static BLOCK tail; /* tail field */ Line 268
static bool tail_truncation; /* flag truncation after the tail field */ Line 269
static BLOCK before; /* before field */ Line 271
static bool before_truncation; /* flag truncation before the before field */ Line 272
static BLOCK keyafter; /* keyword-and-after field */ Line 274
static bool keyafter_truncation; /* flag truncation after the keyafter field */ Line 275
static BLOCK head; /* head field */ Line 277
static bool head_truncation; /* flag truncation before the head field */ Line 278
static BLOCK reference; /* reference field for input reference mode */ Line 280
/* Miscellaneous routines. */
/* Diagnose an error in the regular expression matcher. Then exit. */
static void ATTRIBUTE_NORETURN Line 286
matcher_error (void) Line 287
{
die (EXIT_FAILURE, errno, _("error in regular expression matcher")); Line 289
} Block 8
/*------------------------------------------------------.
| Duplicate string STRING, while evaluating \-escapes. |
`------------------------------------------------------*/
/* Loosely adapted from GNU sh-utils printf.c code. */
static char * Line 298
copy_unescaped_string (const char *string) Line 299
{
char *result; /* allocated result */ Line 301
char *cursor; /* cursor in result */ Line 302
int value; /* value of \nnn escape */ Line 303
int length; /* length of \nnn escape */ Line 304
result = xmalloc (strlen (string) + 1); Line 306
cursor = result; Line 307
while (*string) Line 309
{
if (*string == '\\') Line 311
{
string++; Line 313
switch (*string) Line 314
{
case 'x': /* \xhhh escape, 3 chars maximum */ Line 316
value = 0; Line 317
for (length = 0, string++; Line 318
length < 3 && isxdigit (to_uchar (*string)); Line 319
length++, string++) Line 320
value = value * 16 + HEXTOBIN (*string); Line 321
if (length == 0) Line 322
{
*cursor++ = '\\'; Line 324
*cursor++ = 'x'; Line 325
}
else Line 327
*cursor++ = value; Line 328
break; Line 329
case '0': /* \0ooo escape, 3 chars maximum */ Line 331
value = 0; Line 332
for (length = 0, string++; Line 333
length < 3 && ISODIGIT (*string); Line 334
length++, string++) Line 335
value = value * 8 + OCTTOBIN (*string); Line 336
*cursor++ = value; Line 337
break; Line 338
case 'a': /* alert */ Line 340
#if __STDC__ Line 341
*cursor++ = '\a'; Line 342
#else Line 343
*cursor++ = 7; Line 344
#endif Line 345
string++; Line 346
break; Line 347
case 'b': /* backspace */ Line 349
*cursor++ = '\b'; Line 350
string++; Line 351
break; Line 352
case 'c': /* cancel the rest of the output */ Line 354
while (*string) Line 355
string++; Line 356
break; Line 357
case 'f': /* form feed */ Line 359
*cursor++ = '\f'; Line 360
string++; Line 361
break; Line 362
case 'n': /* new line */ Line 364
*cursor++ = '\n'; Line 365
string++; Line 366
break; Line 367
case 'r': /* carriage return */ Line 369
*cursor++ = '\r'; Line 370
string++; Line 371
break; Line 372
case 't': /* horizontal tab */ Line 374
*cursor++ = '\t'; Line 375
string++; Line 376
break; Line 377
case 'v': /* vertical tab */ Line 379
#if __STDC__ Line 380
*cursor++ = '\v'; Line 381
#else Line 382
*cursor++ = 11; Line 383
#endif Line 384
string++; Line 385
break; Line 386
case '\0': /* lone backslash at end of string */ Line 388
/* ignore it */
break; Line 390
default: Line 392
*cursor++ = '\\'; Line 393
*cursor++ = *string++; Line 394
break; Line 395
}
}
else Line 398
*cursor++ = *string++; Line 399
}
*cursor = '\0'; Line 402
return result; Line 403
} Block 9
/*--------------------------------------------------------------------------.
| Compile the regex represented by REGEX, diagnose and abort if any error. |
`--------------------------------------------------------------------------*/
static void Line 410
compile_regex (struct regex_data *regex) Line 411
{
struct re_pattern_buffer *pattern = ®ex->pattern; Line 413
char const *string = regex->string; Line 414
char const *message; Line 415
pattern->buffer = NULL; Line 417
pattern->allocated = 0; Line 418
pattern->fastmap = regex->fastmap; Line 419
pattern->translate = ignore_case ? folded_chars : NULL; Line 420
message = re_compile_pattern (string, strlen (string), pattern); Line 422
if (message) Line 423
die (EXIT_FAILURE, 0, _("%s (for regexp %s)"), message, quote (string)); Line 424
/* The fastmap should be compiled before 're_match'. The following
call is not mandatory, because 're_search' is always called sooner,
and it compiles the fastmap if this has not been done yet. */
re_compile_fastmap (pattern); Line 430
} Block 10
/*------------------------------------------------------------------------.
| This will initialize various tables for pattern match and compiles some |
| regexps. |
`------------------------------------------------------------------------*/
static void Line 438
initialize_regex (void) Line 439
{
int character; /* character value */ Line 441
/* Initialize the case folding table. */
if (ignore_case) Line 445
for (character = 0; character < CHAR_SET_SIZE; character++) Line 446
folded_chars[character] = toupper (character); Line 447
/* Unless the user already provided a description of the end of line or
end of sentence sequence, select an end of line sequence to compile.
If the user provided an empty definition, thus disabling end of line
or sentence feature, make it NULL to speed up tests. If GNU
extensions are enabled, use end of sentence like in GNU emacs. If
disabled, use end of lines. */
if (context_regex.string) Line 456
{
if (!*context_regex.string) Line 458
context_regex.string = NULL; Line 459
}
else if (gnu_extensions && !input_reference) Line 461
context_regex.string = "[.?!][]\"')}]*\\($\\|\t\\| \\)[ \t\n]*"; Line 462Block 11
else Line 463
context_regex.string = "\n"; Line 464
if (context_regex.string) Line 466
compile_regex (&context_regex); Line 467
/* If the user has already provided a non-empty regexp to describe
words, compile it. Else, unless this has already been done through
a user provided Break character file, construct a fastmap of
characters that may appear in a word. If GNU extensions enabled,
include only letters of the underlying character set. If disabled,
include almost everything, even punctuations; stop only on white
space. */
if (word_regex.string) Line 477
compile_regex (&word_regex); Line 478
else if (!break_file) Line 479
{
if (gnu_extensions) Line 481
{
/* Simulate \w+. */
for (character = 0; character < CHAR_SET_SIZE; character++) Line 486
word_fastmap[character] = !! isalpha (character); Line 487
}
else Line 489
{
/* Simulate [^ \t\n]+. */
memset (word_fastmap, 1, CHAR_SET_SIZE); Line 494
word_fastmap[' '] = 0; Line 495
word_fastmap['\t'] = 0; Line 496
word_fastmap['\n'] = 0; Line 497
}
} Block 12
}
/*------------------------------------------------------------------------.
| This routine will attempt to swallow a whole file name FILE_NAME into a |
| contiguous region of memory and return a description of it into BLOCK. |
| Standard input is assumed whenever FILE_NAME is NULL, empty or "-". |
| |
| Previously, in some cases, white space compression was attempted while |
| inputting text. This was defeating some regexps like default end of |
| sentence, which checks for two consecutive spaces. If white space |
| compression is ever reinstated, it should be in output routines. |
`------------------------------------------------------------------------*/
static void Line 513
swallow_file_in_memory (const char *file_name, BLOCK *block) Line 514
{
size_t used_length; /* used length in memory buffer */ Line 516
/* As special cases, a file name which is NULL or "-" indicates standard
input, which is already opened. In all other cases, open the file from
its name. */
bool using_stdin = !file_name || !*file_name || STREQ (file_name, "-"); Line 521
if (using_stdin) Line 522
block->start = fread_file (stdin, &used_length); Line 523
else Line 524
block->start = read_file (file_name, &used_length); Line 525
if (!block->start) Line 527
die (EXIT_FAILURE, errno, "%s", quotef (using_stdin ? "-" : file_name)); Line 528
block->end = block->start + used_length; Line 530
}
/* Sort and search routines. */
/*--------------------------------------------------------------------------.
| Compare two words, FIRST and SECOND, and return 0 if they are identical. |
| Return less than 0 if the first word goes before the second; return |
| greater than 0 if the first word goes after the second. |
| |
| If a word is indeed a prefix of the other, the shorter should go first. |
`--------------------------------------------------------------------------*/
static int Line 543
compare_words (const void *void_first, const void *void_second) Line 544
{
#define first ((const WORD *) void_first) Line 546
#define second ((const WORD *) void_second) Line 547
ptrdiff_t length; /* minimum of two lengths */ Line 548
ptrdiff_t counter; /* cursor in words */ Line 549
int value; /* value of comparison */ Line 550
length = first->size < second->size ? first->size : second->size; Line 552
if (ignore_case) Line 554
{
for (counter = 0; counter < length; counter++) Line 556
{
value = (folded_chars [to_uchar (first->start[counter])] Line 558
- folded_chars [to_uchar (second->start[counter])]); Line 559
if (value != 0) Line 560
return value; Line 561
}
} Block 13
else Line 564
{
for (counter = 0; counter < length; counter++) Line 566
{
value = (to_uchar (first->start[counter]) Line 568
- to_uchar (second->start[counter])); Line 569
if (value != 0) Line 570
return value; Line 571
}
} Block 14
return first->size < second->size ? -1 : first->size > second->size; Line 575
#undef first Line 576
#undef second Line 577
}
/*-----------------------------------------------------------------------.
| Decides which of two OCCURS, FIRST or SECOND, should lexicographically |
| go first. In case of a tie, preserve the original order through a |
| pointer comparison. |
`-----------------------------------------------------------------------*/
static int Line 586
compare_occurs (const void *void_first, const void *void_second) Line 587
{
#define first ((const OCCURS *) void_first) Line 589
#define second ((const OCCURS *) void_second) Line 590
int value; Line 591
value = compare_words (&first->key, &second->key); Line 593
return (value ? value Line 594
: first->key.start < second->key.start ? -1 Line 595
: first->key.start > second->key.start); Line 596
#undef first Line 597
#undef second Line 598
}
/* True if WORD appears in TABLE. Uses a binary search. */
static bool _GL_ATTRIBUTE_PURE Line 603
search_table (WORD *word, WORD_TABLE *table) Line 604
{
ptrdiff_t lowest; /* current lowest possible index */ Line 606
ptrdiff_t highest; /* current highest possible index */ Line 607
ptrdiff_t middle; /* current middle index */ Line 608
int value; /* value from last comparison */ Line 609
lowest = 0; Line 611
highest = table->length - 1; Line 612
while (lowest <= highest) Line 613
{
middle = (lowest + highest) / 2; Line 615
value = compare_words (word, table->start + middle); Line 616
if (value < 0) Line 617
highest = middle - 1; Line 618
else if (value > 0) Line 619
lowest = middle + 1; Line 620
else Line 621
return true; Line 622
} Block 15
return false; Line 624
}
/*---------------------------------------------------------------------.
| Sort the whole occurs table in memory. Presumably, 'qsort' does not |
| take intermediate copies or table elements, so the sort will be |
| stabilized throughout the comparison routine. |
`---------------------------------------------------------------------*/
static void Line 633
sort_found_occurs (void) Line 634
{
/* Only one language for the time being. */
if (number_of_occurs[0]) Line 638
qsort (occurs_table[0], number_of_occurs[0], sizeof **occurs_table, Line 639
compare_occurs); Line 640
}
/* Parameter files reading routines. */
/*----------------------------------------------------------------------.
| Read a file named FILE_NAME, containing a set of break characters. |
| Build a content to the array word_fastmap in which all characters are |
| allowed except those found in the file. Characters may be repeated. |
`----------------------------------------------------------------------*/
static void Line 651
digest_break_file (const char *file_name) Line 652
{
BLOCK file_contents; /* to receive a copy of the file */ Line 654
char *cursor; /* cursor in file copy */ Line 655
swallow_file_in_memory (file_name, &file_contents); Line 657
/* Make the fastmap and record the file contents in it. */
memset (word_fastmap, 1, CHAR_SET_SIZE); Line 661
for (cursor = file_contents.start; cursor < file_contents.end; cursor++) Line 662
word_fastmap[to_uchar (*cursor)] = 0; Line 663
if (!gnu_extensions) Line 665
{
/* If GNU extensions are enabled, the only way to avoid newline as
a break character is to write all the break characters in the
file with no newline at all, not even at the end of the file.
If disabled, spaces, tabs and newlines are always considered as
break characters even if not included in the break file. */
word_fastmap[' '] = 0; Line 674
word_fastmap['\t'] = 0; Line 675
word_fastmap['\n'] = 0; Line 676
}
/* Return the space of the file, which is no more required. */
free (file_contents.start); Line 681
}
/*-----------------------------------------------------------------------.
| Read a file named FILE_NAME, containing one word per line, then |
| construct in TABLE a table of WORD descriptors for them. The routine |
| swallows the whole file in memory; this is at the expense of space |
| needed for newlines, which are useless; however, the reading is fast. |
`-----------------------------------------------------------------------*/
static void Line 691
digest_word_file (const char *file_name, WORD_TABLE *table) Line 692
{
BLOCK file_contents; /* to receive a copy of the file */ Line 694
char *cursor; /* cursor in file copy */ Line 695
char *word_start; /* start of the current word */ Line 696
swallow_file_in_memory (file_name, &file_contents); Line 698
table->start = NULL; Line 700
table->alloc = 0; Line 701
table->length = 0; Line 702
/* Read the whole file. */
cursor = file_contents.start; Line 706
while (cursor < file_contents.end) Line 707
{
/* Read one line, and save the word in contains. */
word_start = cursor; Line 712
while (cursor < file_contents.end && *cursor != '\n') Line 713
cursor++; Line 714
/* Record the word in table if it is not empty. */
if (cursor > word_start) Line 718
{
if (table->length == table->alloc) Line 720
table->start = x2nrealloc (table->start, &table->alloc, Line 721
sizeof *table->start); Line 722
table->start[table->length].start = word_start; Line 723
table->start[table->length].size = cursor - word_start; Line 724
table->length++; Line 725
}
/* This test allows for an incomplete line at end of file. */
if (cursor < file_contents.end) Line 730
cursor++; Line 731
}
/* Finally, sort all the words read. */
qsort (table->start, table->length, sizeof table->start[0], compare_words); Line 736
}
/* Keyword recognition and selection. */
/*----------------------------------------------------------------------.
| For each keyword in the source text, constructs an OCCURS structure. |
`----------------------------------------------------------------------*/
static void Line 745
find_occurs_in_text (int file_index) Line 746
{
char *cursor; /* for scanning the source text */ Line 748
char *scan; /* for scanning the source text also */ Line 749
char *line_start; /* start of the current input line */ Line 750
char *line_scan; /* newlines scanned until this point */ Line 751
ptrdiff_t reference_length; /* length of reference in input mode */ Line 752
WORD possible_key; /* possible key, to ease searches */ Line 753
OCCURS *occurs_cursor; /* current OCCURS under construction */ Line 754
char *context_start; /* start of left context */ Line 756
char *context_end; /* end of right context */ Line 757
char *word_start; /* start of word */ Line 758
char *word_end; /* end of word */ Line 759
char *next_context_start; /* next start of left context */ Line 760
const BLOCK *text_buffer = &text_buffers[file_index]; Line 762
/* reference_length is always used within 'if (input_reference)'.
However, GNU C diagnoses that it may be used uninitialized. The
following assignment is merely to shut it up. */
reference_length = 0; Line 768
/* Tracking where lines start is helpful for reference processing. In
auto reference mode, this allows counting lines. In input reference
mode, this permits finding the beginning of the references.
The first line begins with the file, skip immediately this very first
reference in input reference mode, to help further rejection any word
found inside it. Also, unconditionally assigning these variable has
the happy effect of shutting up lint. */
line_start = text_buffer->start; Line 779
line_scan = line_start; Line 780
if (input_reference) Line 781
{
SKIP_NON_WHITE (line_scan, text_buffer->end); Line 783
reference_length = line_scan - line_start; Line 784
SKIP_WHITE (line_scan, text_buffer->end); Line 785
} Block 18
/* Process the whole buffer, one line or one sentence at a time. */
for (cursor = text_buffer->start; Line 790
cursor < text_buffer->end; Line 791
cursor = next_context_start) Line 792
{
/* 'context_start' gets initialized before the processing of each
line, or once for the whole buffer if no end of line or sentence
sequence separator. */
context_start = cursor; Line 799
/* If an end of line or end of sentence sequence is defined and
non-empty, 'next_context_start' will be recomputed to be the end of
each line or sentence, before each one is processed. If no such
sequence, then 'next_context_start' is set at the end of the whole
buffer, which is then considered to be a single line or sentence.
This test also accounts for the case of an incomplete line or
sentence at the end of the buffer. */
next_context_start = text_buffer->end; Line 809
if (context_regex.string) Line 810
switch (re_search (&context_regex.pattern, cursor, Line 811
text_buffer->end - cursor, Line 812
0, text_buffer->end - cursor, &context_regs)) Line 813
{
case -2: Line 815
matcher_error (); Line 816
case -1: Line 818
break; Line 819
case 0: Line 821
die (EXIT_FAILURE, 0, Line 822
_("error: regular expression has a match of length zero: %s"), Line 823
quote (context_regex.string)); Line 824
default: Line 826
next_context_start = cursor + context_regs.end[0]; Line 827
break; Line 828
}
/* Include the separator into the right context, but not any suffix
white space in this separator; this insures it will be seen in
output and will not take more space than necessary. */
context_end = next_context_start; Line 835
SKIP_WHITE_BACKWARDS (context_end, context_start); Line 836
/* Read and process a single input line or sentence, one word at a
time. */
while (1) Line 841
{
if (word_regex.string) Line 843
/* If a word regexp has been compiled, use it to skip at the
beginning of the next word. If there is no such word, exit
the loop. */
{
regoff_t r = re_search (&word_regex.pattern, cursor, Line 850
context_end - cursor, Line 851
0, context_end - cursor, &word_regs); Line 852
if (r == -2) Line 853
matcher_error (); Line 854
if (r == -1) Line 855
break; Line 856
word_start = cursor + word_regs.start[0]; Line 857
word_end = cursor + word_regs.end[0]; Line 858
}
else Line 860
/* Avoid re_search and use the fastmap to skip to the
beginning of the next word. If there is no more word in
the buffer, exit the loop. */
{
scan = cursor; Line 867
while (scan < context_end Line 868
&& !word_fastmap[to_uchar (*scan)]) Line 869
scan++; Line 870
if (scan == context_end) Line 872
break; Line 873
word_start = scan; Line 875
while (scan < context_end Line 877
&& word_fastmap[to_uchar (*scan)]) Line 878
scan++; Line 879
word_end = scan; Line 881
}
/* Skip right to the beginning of the found word. */
cursor = word_start; Line 886
/* Skip any zero length word. Just advance a single position,
then go fetch the next word. */
if (word_end == word_start) Line 891
{
cursor++; Line 893
continue; Line 894
}
/* This is a genuine, non empty word, so save it as a possible
key. Then skip over it. Also, maintain the maximum length of
all words read so far. It is mandatory to take the maximum
length of all words in the file, without considering if they
are actually kept or rejected, because backward jumps at output
generation time may fall in *any* word. */
possible_key.start = cursor; Line 904
possible_key.size = word_end - word_start; Line 905
cursor += possible_key.size; Line 906
if (possible_key.size > maximum_word_length) Line 908
maximum_word_length = possible_key.size; Line 909
/* In input reference mode, update 'line_start' from its previous
value. Count the lines just in case auto reference mode is
also selected. If it happens that the word just matched is
indeed part of a reference; just ignore it. */
if (input_reference) Line 916
{
while (line_scan < possible_key.start) Line 918
if (*line_scan == '\n') Line 919
{
total_line_count++; Line 921
line_scan++; Line 922
line_start = line_scan; Line 923
SKIP_NON_WHITE (line_scan, text_buffer->end); Line 924
reference_length = line_scan - line_start; Line 925
}
else Line 927
line_scan++; Line 928
if (line_scan > possible_key.start) Line 929
continue; Line 930
}
/* Ignore the word if an 'Ignore words' table exists and if it is
part of it. Also ignore the word if an 'Only words' table and
if it is *not* part of it.
It is allowed that both tables be used at once, even if this
may look strange for now. Just ignore a word that would appear
in both. If regexps are eventually implemented for these
tables, the Ignore table could then reject words that would
have been previously accepted by the Only table. */
if (ignore_file && search_table (&possible_key, &ignore_table)) Line 943
continue; Line 944
if (only_file && !search_table (&possible_key, &only_table)) Line 945
continue; Line 946
/* A non-empty word has been found. First of all, insure
proper allocation of the next OCCURS, and make a pointer to
where it will be constructed. */
if (number_of_occurs[0] == occurs_alloc[0]) Line 952
occurs_table[0] = x2nrealloc (occurs_table[0], Line 953
&occurs_alloc[0], Line 954
sizeof *occurs_table[0]); Line 955
occurs_cursor = occurs_table[0] + number_of_occurs[0]; Line 956
/* Define the reference field, if any. */
if (auto_reference) Line 960
{
/* While auto referencing, update 'line_start' from its
previous value, counting lines as we go. If input
referencing at the same time, 'line_start' has been
advanced earlier, and the following loop is never really
executed. */
while (line_scan < possible_key.start) Line 969
if (*line_scan == '\n') Line 970
{
total_line_count++; Line 972
line_scan++; Line 973
line_start = line_scan; Line 974
SKIP_NON_WHITE (line_scan, text_buffer->end); Line 975
}
else Line 977
line_scan++; Line 978
occurs_cursor->reference = total_line_count; Line 980
}
else if (input_reference) Line 982
{
/* If only input referencing, 'line_start' has been computed
earlier to detect the case the word matched would be part
of the reference. The reference position is simply the
value of 'line_start'. */
occurs_cursor->reference = line_start - possible_key.start; Line 990
if (reference_length > reference_max_width) Line 991
reference_max_width = reference_length; Line 992
}
/* Exclude the reference from the context in simple cases. */
if (input_reference && line_start == context_start) Line 997
{
SKIP_NON_WHITE (context_start, context_end); Line 999
SKIP_WHITE (context_start, context_end); Line 1000
}
/* Completes the OCCURS structure. */
occurs_cursor->key = possible_key; Line 1005
occurs_cursor->left = context_start - possible_key.start; Line 1006
occurs_cursor->right = context_end - possible_key.start; Line 1007
occurs_cursor->file_index = file_index; Line 1008
number_of_occurs[0]++; Line 1010
}
}
}
/* Formatting and actual output - service routines. */
/*-----------------------------------------.
| Prints some NUMBER of spaces on stdout. |
`-----------------------------------------*/
static void Line 1021
print_spaces (ptrdiff_t number) Line 1022
{
for (ptrdiff_t counter = number; counter > 0; counter--) Line 1024
putchar (' '); Line 1025
}
/*-------------------------------------.
| Prints the field provided by FIELD. |
`-------------------------------------*/
static void Line 1032
print_field (BLOCK field) Line 1033
{
char *cursor; /* Cursor in field to print */ Line 1035
int base; /* Base character, without diacritic */ Line 1036
int diacritic; /* Diacritic code for the character */ Line 1037
/* Whitespace is not really compressed. Instead, each white space
character (tab, vt, ht etc.) is printed as one single space. */
for (cursor = field.start; cursor < field.end; cursor++) Line 1042
{
unsigned char character = *cursor; Line 1044
if (edited_flag[character]) Line 1045
{
/* First check if this is a diacriticized character.
This works only for TeX. I do not know how diacriticized
letters work with 'roff'. Please someone explain it to me! */
diacritic = todiac (character); Line 1053
if (diacritic != 0 && output_format == TEX_FORMAT) Line 1054
{
base = tobase (character); Line 1056
switch (diacritic) Line 1057
{
case 1: /* Latin diphthongs */ Line 1060
switch (base) Line 1061
{
case 'o': Line 1063
fputs ("\\oe{}", stdout); Line 1064
break; Line 1065
case 'O': Line 1067
fputs ("\\OE{}", stdout); Line 1068
break; Line 1069
case 'a': Line 1071
fputs ("\\ae{}", stdout); Line 1072
break; Line 1073
case 'A': Line 1075
fputs ("\\AE{}", stdout); Line 1076
break; Line 1077
default: Line 1079
putchar (' '); Line 1080
}
break; Line 1082
case 2: /* Acute accent */ Line 1084
printf ("\\'%s%c", (base == 'i' ? "\\" : ""), base); Line 1085
break; Line 1086
case 3: /* Grave accent */ Line 1088
printf ("\\'%s%c", (base == 'i' ? "\\" : ""), base); Line 1089
break; Line 1090
case 4: /* Circumflex accent */ Line 1092
printf ("\\^%s%c", (base == 'i' ? "\\" : ""), base); Line 1093
break; Line 1094
case 5: /* Diaeresis */ Line 1096
printf ("\\\"%s%c", (base == 'i' ? "\\" : ""), base); Line 1097
break; Line 1098
case 6: /* Tilde accent */ Line 1100
printf ("\\~%s%c", (base == 'i' ? "\\" : ""), base); Line 1101
break; Line 1102
case 7: /* Cedilla */ Line 1104
printf ("\\c{%c}", base); Line 1105
break; Line 1106
case 8: /* Small circle beneath */ Line 1108
switch (base) Line 1109
{
case 'a': Line 1111
fputs ("\\aa{}", stdout); Line 1112
break; Line 1113
case 'A': Line 1115
fputs ("\\AA{}", stdout); Line 1116
break; Line 1117
default: Line 1119
putchar (' '); Line 1120
}
break; Line 1122
case 9: /* Strike through */ Line 1124
switch (base) Line 1125
{
case 'o': Line 1127
fputs ("\\o{}", stdout); Line 1128
break; Line 1129
case 'O': Line 1131
fputs ("\\O{}", stdout); Line 1132
break; Line 1133
default: Line 1135
putchar (' '); Line 1136
}
break; Line 1138
}
}
else Line 1141
/* This is not a diacritic character, so handle cases which are
really specific to 'roff' or TeX. All white space processing
is done as the default case of this switch. */
switch (character) Line 1147
{
case '"': Line 1149
/* In roff output format, double any quote. */
putchar ('"'); Line 1151
putchar ('"'); Line 1152
break; Line 1153
case '$': Line 1155
case '%': Line 1156
case '&': Line 1157
case '#': Line 1158
case '_': Line 1159
/* In TeX output format, precede these with a backslash. */
putchar ('\\'); Line 1161
putchar (character); Line 1162
break; Line 1163
case '{': Line 1165
case '}': Line 1166
/* In TeX output format, precede these with a backslash and
force mathematical mode. */
printf ("$\\%c$", character); Line 1169
break; Line 1170
case '\\': Line 1172
/* In TeX output mode, request production of a backslash. */
fputs ("\\backslash{}", stdout); Line 1174
break; Line 1175
default: Line 1177
/* Any other flagged character produces a single space. */
putchar (' '); Line 1179
}
}
else Line 1182
putchar (*cursor); Line 1183
} Block 20
}
/* Formatting and actual output - planning routines. */
/*--------------------------------------------------------------------.
| From information collected from command line options and input file |
| readings, compute and fix some output parameter values. |
`--------------------------------------------------------------------*/
static void Line 1194
fix_output_parameters (void) Line 1195
{
size_t file_index; /* index in text input file arrays */ Line 1197
intmax_t line_ordinal; /* line ordinal value for reference */ Line 1198
ptrdiff_t reference_width; /* width for the whole reference */ Line 1199
int character; /* character ordinal */ Line 1200
const char *cursor; /* cursor in some constant strings */ Line 1201
/* In auto reference mode, the maximum width of this field is
precomputed and subtracted from the overall line width. Add one for
the column which separate the file name from the line number. */
if (auto_reference) Line 1207
{
reference_max_width = 0; Line 1209
for (file_index = 0; file_index < number_input_files; file_index++) Line 1210
{
line_ordinal = file_line_count[file_index] + 1; Line 1212
if (file_index > 0) Line 1213
line_ordinal -= file_line_count[file_index - 1]; Line 1214
char ordinal_string[INT_BUFSIZE_BOUND (intmax_t)]; Line 1215
reference_width = sprintf (ordinal_string, "%"PRIdMAX, line_ordinal); Line 1216
if (input_file_name[file_index]) Line 1217
reference_width += strlen (input_file_name[file_index]); Line 1218
if (reference_width > reference_max_width) Line 1219
reference_max_width = reference_width; Line 1220
}
reference_max_width++; Line 1222
reference.start = xmalloc (reference_max_width + 1); Line 1223
} Block 21
/* If the reference appears to the left of the output line, reserve some
space for it right away, including one gap size. */
if ((auto_reference || input_reference) && !right_reference) Line 1229
line_width -= reference_max_width + gap_size; Line 1230
if (line_width < 0) Line 1231
line_width = 0; Line 1232
/* The output lines, minimally, will contain from left to right a left
context, a gap, and a keyword followed by the right context with no
special intervening gap. Half of the line width is dedicated to the
left context and the gap, the other half is dedicated to the keyword
and the right context; these values are computed once and for all here.
There also are tail and head wrap around fields, used when the keyword
is near the beginning or the end of the line, or when some long word
cannot fit in, but leave place from wrapped around shorter words. The
maximum width of these fields are recomputed separately for each line,
on a case by case basis. It is worth noting that it cannot happen that
both the tail and head fields are used at once. */
half_line_width = line_width / 2; Line 1246
before_max_width = half_line_width - gap_size; Line 1247
keyafter_max_width = half_line_width; Line 1248
/* If truncation_string is the empty string, make it NULL to speed up
tests. In this case, truncation_string_length will never get used, so
there is no need to set it. */
if (truncation_string && *truncation_string) Line 1254
truncation_string_length = strlen (truncation_string); Line 1255
else Line 1256
truncation_string = NULL; Line 1257
if (gnu_extensions) Line 1259
{
/* When flagging truncation at the left of the keyword, the
truncation mark goes at the beginning of the before field,
unless there is a head field, in which case the mark goes at the
left of the head field. When flagging truncation at the right
of the keyword, the mark goes at the end of the keyafter field,
unless there is a tail field, in which case the mark goes at the
end of the tail field. Only eight combination cases could arise
for truncation marks:
. None.
. One beginning the before field.
. One beginning the head field.
. One ending the keyafter field.
. One ending the tail field.
. One beginning the before field, another ending the keyafter field.
. One ending the tail field, another beginning the before field.
. One ending the keyafter field, another beginning the head field.
So, there is at most two truncation marks, which could appear both
on the left side of the center of the output line, both on the
right side, or one on either side. */
before_max_width -= 2 * truncation_string_length; Line 1284
if (before_max_width < 0) Line 1285
before_max_width = 0; Line 1286
keyafter_max_width -= 2 * truncation_string_length; Line 1287
}
else Line 1289
{
/* I never figured out exactly how UNIX' ptx plans the output width
of its various fields. If GNU extensions are disabled, do not
try computing the field widths correctly; instead, use the
following formula, which does not completely imitate UNIX' ptx,
but almost. */
keyafter_max_width -= 2 * truncation_string_length + 1; Line 1298
}
/* Compute which characters need special output processing. Initialize
by flagging any white space character. Some systems do not consider
form feed as a space character, but we do. */
for (character = 0; character < CHAR_SET_SIZE; character++) Line 1305
edited_flag[character] = !! isspace (character); Line 1306
edited_flag['\f'] = 1; Line 1307
/* Complete the special character flagging according to selected output
format. */
switch (output_format) Line 1312
{
case UNKNOWN_FORMAT: Line 1314
/* Should never happen. */
case DUMB_FORMAT: Line 1317
break; Line 1318
case ROFF_FORMAT: Line 1320
/* 'Quote' characters should be doubled. */
edited_flag['"'] = 1; Line 1324
break; Line 1325
case TEX_FORMAT: Line 1327
/* Various characters need special processing. */
for (cursor = "$%_{}\\"; *cursor; cursor++) Line 1331
edited_flag[to_uchar (*cursor)] = 1; Line 1332
/* Any character with 8th bit set will print to a single space, unless
it is diacriticized. */
for (character = 0200; character < CHAR_SET_SIZE; character++) Line 1337
edited_flag[character] = todiac (character) != 0; Line 1338
break; Line 1339
} Block 24
}
/*------------------------------------------------------------------.
| Compute the position and length of all the output fields, given a |
| pointer to some OCCURS. |
`------------------------------------------------------------------*/
static void Line 1348
define_all_fields (OCCURS *occurs) Line 1349
{
ptrdiff_t tail_max_width; /* allowable width of tail field */ Line 1351
ptrdiff_t head_max_width; /* allowable width of head field */ Line 1352
char *cursor; /* running cursor in source text */ Line 1353
char *left_context_start; /* start of left context */ Line 1354
char *right_context_end; /* end of right context */ Line 1355
char *left_field_start; /* conservative start for 'head'/'before' */ Line 1356
const char *file_name; /* file name for reference */ Line 1357
intmax_t line_ordinal; /* line ordinal for reference */ Line 1358
const char *buffer_start; /* start of buffered file for this occurs */ Line 1359
const char *buffer_end; /* end of buffered file for this occurs */ Line 1360
/* Define 'keyafter', start of left context and end of right context.
'keyafter' starts at the saved position for keyword and extend to the
right from the end of the keyword, eating separators or full words, but
not beyond maximum allowed width for 'keyafter' field or limit for the
right context. Suffix spaces will be removed afterwards. */
keyafter.start = occurs->key.start; Line 1368
keyafter.end = keyafter.start + occurs->key.size; Line 1369
left_context_start = keyafter.start + occurs->left; Line 1370
right_context_end = keyafter.start + occurs->right; Line 1371
buffer_start = text_buffers[occurs->file_index].start; Line 1373
buffer_end = text_buffers[occurs->file_index].end; Line 1374
cursor = keyafter.end; Line 1376
while (cursor < right_context_end Line 1377
&& cursor <= keyafter.start + keyafter_max_width) Line 1378
{
keyafter.end = cursor; Line 1380
SKIP_SOMETHING (cursor, right_context_end); Line 1381
} Block 25
if (cursor <= keyafter.start + keyafter_max_width) Line 1383
keyafter.end = cursor; Line 1384
keyafter_truncation = truncation_string && keyafter.end < right_context_end; Line 1386
SKIP_WHITE_BACKWARDS (keyafter.end, keyafter.start); Line 1388
/* When the left context is wide, it might take some time to catch up from
the left context boundary to the beginning of the 'head' or 'before'
fields. So, in this case, to speed the catchup, we jump back from the
keyword, using some secure distance, possibly falling in the middle of
a word. A secure backward jump would be at least half the maximum
width of a line, plus the size of the longest word met in the whole
input. We conclude this backward jump by a skip forward of at least
one word. In this manner, we should not inadvertently accept only part
of a word. From the reached point, when it will be time to fix the
beginning of 'head' or 'before' fields, we will skip forward words or
delimiters until we get sufficiently near. */
if (-occurs->left > half_line_width + maximum_word_length) Line 1402
{
left_field_start Line 1404
= keyafter.start - (half_line_width + maximum_word_length); Line 1405
SKIP_SOMETHING (left_field_start, keyafter.start); Line 1406
} Block 26
else Line 1408
left_field_start = keyafter.start + occurs->left; Line 1409
/* 'before' certainly ends at the keyword, but not including separating
spaces. It starts after than the saved value for the left context, by
advancing it until it falls inside the maximum allowed width for the
before field. There will be no prefix spaces either. 'before' only
advances by skipping single separators or whole words. */
before.start = left_field_start; Line 1417
before.end = keyafter.start; Line 1418
SKIP_WHITE_BACKWARDS (before.end, before.start); Line 1419
while (before.start + before_max_width < before.end) Line 1421
SKIP_SOMETHING (before.start, before.end); Line 1422
if (truncation_string) Line 1424
{
cursor = before.start; Line 1426
SKIP_WHITE_BACKWARDS (cursor, buffer_start); Line 1427
before_truncation = cursor > left_context_start; Line 1428
} Block 27
else Line 1430
before_truncation = false; Line 1431
SKIP_WHITE (before.start, buffer_end); Line 1433
/* The tail could not take more columns than what has been left in the
left context field, and a gap is mandatory. It starts after the
right context, and does not contain prefixed spaces. It ends at
the end of line, the end of buffer or when the tail field is full,
whichever comes first. It cannot contain only part of a word, and
has no suffixed spaces. */
tail_max_width Line 1442
= before_max_width - (before.end - before.start) - gap_size; Line 1443
if (tail_max_width > 0) Line 1445
{
tail.start = keyafter.end; Line 1447
SKIP_WHITE (tail.start, buffer_end); Line 1448
tail.end = tail.start; Line 1450
cursor = tail.end; Line 1451
while (cursor < right_context_end Line 1452
&& cursor < tail.start + tail_max_width) Line 1453
{
tail.end = cursor; Line 1455
SKIP_SOMETHING (cursor, right_context_end); Line 1456
}
if (cursor < tail.start + tail_max_width) Line 1459
tail.end = cursor; Line 1460
if (tail.end > tail.start) Line 1462
{
keyafter_truncation = false; Line 1464
tail_truncation = truncation_string && tail.end < right_context_end; Line 1465
}
else Line 1467
tail_truncation = false; Line 1468
SKIP_WHITE_BACKWARDS (tail.end, tail.start); Line 1470
} Block 28
else Line 1472
{
/* No place left for a tail field. */
tail.start = NULL; Line 1477
tail.end = NULL; Line 1478
tail_truncation = false; Line 1479
}
/* 'head' could not take more columns than what has been left in the right
context field, and a gap is mandatory. It ends before the left
context, and does not contain suffixed spaces. Its pointer is advanced
until the head field has shrunk to its allowed width. It cannot
contain only part of a word, and has no suffixed spaces. */
head_max_width Line 1488
= keyafter_max_width - (keyafter.end - keyafter.start) - gap_size; Line 1489
if (head_max_width > 0) Line 1491
{
head.end = before.start; Line 1493
SKIP_WHITE_BACKWARDS (head.end, buffer_start); Line 1494
head.start = left_field_start; Line 1496
while (head.start + head_max_width < head.end) Line 1497
SKIP_SOMETHING (head.start, head.end); Line 1498
if (head.end > head.start) Line 1500
{
before_truncation = false; Line 1502
head_truncation = (truncation_string Line 1503
&& head.start > left_context_start); Line 1504
}
else Line 1506
head_truncation = false; Line 1507
SKIP_WHITE (head.start, head.end); Line 1509
} Block 30
else Line 1511
{
/* No place left for a head field. */
head.start = NULL; Line 1516
head.end = NULL; Line 1517
head_truncation = false; Line 1518
}
if (auto_reference) Line 1521
{
/* Construct the reference text in preallocated space from the file
name and the line number. Standard input yields an empty file name.
Ensure line numbers are 1 based, even if they are computed 0 based. */
file_name = input_file_name[occurs->file_index]; Line 1528
if (!file_name) Line 1529
file_name = ""; Line 1530
line_ordinal = occurs->reference + 1; Line 1532
if (occurs->file_index > 0) Line 1533
line_ordinal -= file_line_count[occurs->file_index - 1]; Line 1534
char *file_end = stpcpy (reference.start, file_name); Line 1536
reference.end = file_end + sprintf (file_end, ":%"PRIdMAX, line_ordinal); Line 1537
}
else if (input_reference) Line 1539
{
/* Reference starts at saved position for reference and extends right
until some white space is met. */
reference.start = keyafter.start + occurs->reference; Line 1545
reference.end = reference.start; Line 1546
SKIP_NON_WHITE (reference.end, right_context_end); Line 1547
}
}
/* Formatting and actual output - control routines. */
/*----------------------------------------------------------------------.
| Output the current output fields as one line for 'troff' or 'nroff'. |
`----------------------------------------------------------------------*/
static void Line 1557
output_one_roff_line (void) Line 1558
{
/* Output the 'tail' field. */
printf (".%s \"", macro_name); Line 1562
print_field (tail); Line 1563
if (tail_truncation) Line 1564
fputs (truncation_string, stdout); Line 1565
putchar ('"'); Line 1566
/* Output the 'before' field. */
fputs (" \"", stdout); Line 1570
if (before_truncation) Line 1571
fputs (truncation_string, stdout); Line 1572
print_field (before); Line 1573
putchar ('"'); Line 1574
/* Output the 'keyafter' field. */
fputs (" \"", stdout); Line 1578
print_field (keyafter); Line 1579
if (keyafter_truncation) Line 1580
fputs (truncation_string, stdout); Line 1581
putchar ('"'); Line 1582
/* Output the 'head' field. */
fputs (" \"", stdout); Line 1586
if (head_truncation) Line 1587
fputs (truncation_string, stdout); Line 1588
print_field (head); Line 1589
putchar ('"'); Line 1590
/* Conditionally output the 'reference' field. */
if (auto_reference || input_reference) Line 1594
{
fputs (" \"", stdout); Line 1596
print_field (reference); Line 1597
putchar ('"'); Line 1598
} Block 34
putchar ('\n'); Line 1601
}
/*---------------------------------------------------------.
| Output the current output fields as one line for 'TeX'. |
`---------------------------------------------------------*/
static void Line 1608
output_one_tex_line (void) Line 1609
{
BLOCK key; /* key field, isolated */ Line 1611
BLOCK after; /* after field, isolated */ Line 1612
char *cursor; /* running cursor in source text */ Line 1613
printf ("\\%s ", macro_name); Line 1615
putchar ('{'); Line 1616
print_field (tail); Line 1617
fputs ("}{", stdout); Line 1618Block 35
print_field (before); Line 1619
fputs ("}{", stdout); Line 1620Block 36
key.start = keyafter.start; Line 1621
after.end = keyafter.end; Line 1622
cursor = keyafter.start; Line 1623
SKIP_SOMETHING (cursor, keyafter.end); Line 1624
key.end = cursor; Line 1625
after.start = cursor; Line 1626
print_field (key); Line 1627
fputs ("}{", stdout); Line 1628Block 37
print_field (after); Line 1629
fputs ("}{", stdout); Line 1630Block 38
print_field (head); Line 1631
putchar ('}'); Line 1632Block 39
if (auto_reference || input_reference) Line 1633
{
putchar ('{'); Line 1635
print_field (reference); Line 1636
putchar ('}'); Line 1637
} Block 40
putchar ('\n'); Line 1639
}
/*-------------------------------------------------------------------.
| Output the current output fields as one line for a dumb terminal. |
`-------------------------------------------------------------------*/
static void Line 1646
output_one_dumb_line (void) Line 1647
{
if (!right_reference) Line 1649
{
if (auto_reference) Line 1651
{
/* Output the 'reference' field, in such a way that GNU emacs
next-error will handle it. The ending colon is taken from the
gap which follows. */
print_field (reference); Line 1658
putchar (':'); Line 1659
print_spaces (reference_max_width Line 1660
+ gap_size Line 1661
- (reference.end - reference.start) Line 1662
- 1); Line 1663
}
else Line 1665
{
/* Output the 'reference' field and its following gap. */
print_field (reference); Line 1670
print_spaces (reference_max_width Line 1671
+ gap_size Line 1672
- (reference.end - reference.start)); Line 1673
}
} Block 41
if (tail.start < tail.end) Line 1677
{
/* Output the 'tail' field. */
print_field (tail); Line 1681
if (tail_truncation) Line 1682
fputs (truncation_string, stdout); Line 1683
print_spaces (half_line_width - gap_size Line 1685
- (before.end - before.start) Line 1686
- (before_truncation ? truncation_string_length : 0) Line 1687
- (tail.end - tail.start) Line 1688
- (tail_truncation ? truncation_string_length : 0)); Line 1689
}
else Line 1691
print_spaces (half_line_width - gap_size Line 1692
- (before.end - before.start) Line 1693
- (before_truncation ? truncation_string_length : 0)); Line 1694
/* Output the 'before' field. */
if (before_truncation) Line 1698
fputs (truncation_string, stdout); Line 1699
print_field (before); Line 1700
print_spaces (gap_size); Line 1702
/* Output the 'keyafter' field. */
print_field (keyafter); Line 1706
if (keyafter_truncation) Line 1707
fputs (truncation_string, stdout); Line 1708
if (head.start < head.end) Line 1710
{
/* Output the 'head' field. */
print_spaces (half_line_width Line 1714
- (keyafter.end - keyafter.start) Line 1715
- (keyafter_truncation ? truncation_string_length : 0) Line 1716
- (head.end - head.start) Line 1717
- (head_truncation ? truncation_string_length : 0)); Line 1718
if (head_truncation) Line 1719
fputs (truncation_string, stdout); Line 1720
print_field (head); Line 1721
}
else Line 1723
if ((auto_reference || input_reference) && right_reference) Line 1725
print_spaces (half_line_width Line 1726
- (keyafter.end - keyafter.start) Line 1727
- (keyafter_truncation ? truncation_string_length : 0)); Line 1728
if ((auto_reference || input_reference) && right_reference) Line 1730
{
/* Output the 'reference' field. */
print_spaces (gap_size); Line 1734
print_field (reference); Line 1735
}
putchar ('\n'); Line 1738
}
/*------------------------------------------------------------------------.
| Scan the whole occurs table and, for each entry, output one line in the |
| appropriate format. |
`------------------------------------------------------------------------*/
static void Line 1746
generate_all_output (void) Line 1747
{
ptrdiff_t occurs_index; /* index of keyword entry being processed */ Line 1749
OCCURS *occurs_cursor; /* current keyword entry being processed */ Line 1750
/* The following assignments are useful to provide default values in case
line contexts or references are not used, in which case these variables
would never be computed. */
tail.start = NULL; Line 1756
tail.end = NULL; Line 1757
tail_truncation = false; Line 1758
head.start = NULL; Line 1760
head.end = NULL; Line 1761
head_truncation = false; Line 1762
/* Loop over all keyword occurrences. */
occurs_cursor = occurs_table[0]; Line 1766
for (occurs_index = 0; occurs_index < number_of_occurs[0]; occurs_index++) Line 1768
{
/* Compute the exact size of every field and whenever truncation flags
are present or not. */
define_all_fields (occurs_cursor); Line 1773
/* Produce one output line according to selected format. */
switch (output_format) Line 1777
{
case UNKNOWN_FORMAT: Line 1779
/* Should never happen. */
case DUMB_FORMAT: Line 1782
output_one_dumb_line (); Line 1783
break; Line 1784
case ROFF_FORMAT: Line 1786
output_one_roff_line (); Line 1787
break; Line 1788
case TEX_FORMAT: Line 1790
output_one_tex_line (); Line 1791
break; Line 1792
}
/* Advance the cursor into the occurs table. */
occurs_cursor++; Line 1797
}
}
/* Option decoding and main program. */
/*------------------------------------------------------.
| Print program identification and options, then exit. |
`------------------------------------------------------*/
void Line 1807
usage (int status) Line 1808
{
if (status != EXIT_SUCCESS) Line 1810
emit_try_help (); ...!common auto-comment...
else Line 1812
{
printf (_("\ Line 1814
Usage: %s [OPTION]... [INPUT]... (without -G)\n\ Line 1815
or: %s -G [OPTION]... [INPUT [OUTPUT]]\n"), Line 1816
program_name, program_name); Line 1817
fputs (_("\ Line 1818
Output a permuted index, including context, of the words in the input files.\n\ Line 1819
"), stdout); Line 1820
emit_stdin_note (); ...!common auto-comment...
emit_mandatory_arg_note (); ...!common auto-comment...
fputs (_("\ Line 1825
-A, --auto-reference output automatically generated references\n\ Line 1826
-G, --traditional behave more like System V 'ptx'\n\ Line 1827
"), stdout); Line 1828
fputs (_("\ Line 1829
-F, --flag-truncation=STRING use STRING for flagging line truncations.\n\ Line 1830
The default is '/'\n\ Line 1831
"), stdout); Line 1832
fputs (_("\ Line 1833
-M, --macro-name=STRING macro name to use instead of 'xx'\n\ Line 1834
-O, --format=roff generate output as roff directives\n\ Line 1835
-R, --right-side-refs put references at right, not counted in -w\n\ Line 1836
-S, --sentence-regexp=REGEXP for end of lines or end of sentences\n\ Line 1837
-T, --format=tex generate output as TeX directives\n\ Line 1838
"), stdout); Line 1839
fputs (_("\ Line 1840
-W, --word-regexp=REGEXP use REGEXP to match each keyword\n\ Line 1841
-b, --break-file=FILE word break characters in this FILE\n\ Line 1842
-f, --ignore-case fold lower case to upper case for sorting\n\ Line 1843
-g, --gap-size=NUMBER gap size in columns between output fields\n\ Line 1844
-i, --ignore-file=FILE read ignore word list from FILE\n\ Line 1845
-o, --only-file=FILE read only word list from this FILE\n\ Line 1846
"), stdout); Line 1847
fputs (_("\ Line 1848
-r, --references first field of each line is a reference\n\ Line 1849
-t, --typeset-mode - not implemented -\n\ Line 1850
-w, --width=NUMBER output width in columns, reference excluded\n\ Line 1851
"), stdout); Line 1852
fputs (HELP_OPTION_DESCRIPTION, stdout); Line 1853
fputs (VERSION_OPTION_DESCRIPTION, stdout); Line 1854
emit_ancillary_info (PROGRAM_NAME); Line 1855
} Block 46
exit (status); Line 1857
}
/*----------------------------------------------------------------------.
| Main program. Decode ARGC arguments passed through the ARGV array of |
| strings, then launch execution. |
`----------------------------------------------------------------------*/
/* Long options equivalences. */
static struct option const long_options[] = Line 1866
{
{"auto-reference", no_argument, NULL, 'A'}, Line 1868Block 47
{"break-file", required_argument, NULL, 'b'}, Line 1869Block 48
{"flag-truncation", required_argument, NULL, 'F'}, Line 1870Block 49
{"ignore-case", no_argument, NULL, 'f'}, Line 1871Block 50
{"gap-size", required_argument, NULL, 'g'}, Line 1872Block 51
{"ignore-file", required_argument, NULL, 'i'}, Line 1873Block 52
{"macro-name", required_argument, NULL, 'M'}, Line 1874Block 53
{"only-file", required_argument, NULL, 'o'}, Line 1875Block 54
{"references", no_argument, NULL, 'r'}, Line 1876Block 55
{"right-side-refs", no_argument, NULL, 'R'}, Line 1877Block 56
{"format", required_argument, NULL, 10}, Line 1878Block 57
{"sentence-regexp", required_argument, NULL, 'S'}, Line 1879Block 58
{"traditional", no_argument, NULL, 'G'}, Line 1880Block 59
{"typeset-mode", no_argument, NULL, 't'}, Line 1881Block 60
{"width", required_argument, NULL, 'w'}, Line 1882Block 61
{"word-regexp", required_argument, NULL, 'W'}, Line 1883Block 62
{GETOPT_HELP_OPTION_DECL}, Line 1884Block 63
{GETOPT_VERSION_OPTION_DECL}, Line 1885Block 64
{NULL, 0, NULL, 0}, Line 1886
};
static char const* const format_args[] = Line 1889
{
"roff", "tex", NULL Line 1891
};
static enum Format const format_vals[] = Line 1894
{
ROFF_FORMAT, TEX_FORMAT Line 1896
};
int
main (int argc, char **argv) Line 1900
{
int optchar; /* argument character */ Line 1902
int file_index; /* index in text input file arrays */ Line 1903
/* Decode program options. */
initialize_main (&argc, &argv); VMS-specific entry point handling wildcard expansion
set_program_name (argv[0]); Retains program name and discards path
setlocale (LC_ALL, ""); Sets up internationalization (i18n)
bindtextdomain (PACKAGE, LOCALEDIR); Assigns i18n directorySets text domain for _() [gettext()] function
textdomain (PACKAGE); Sets text domain for _() [gettext()] function
atexit (close_stdout); Close stdout on exit (see gnulib)
#if HAVE_SETCHRCLASS Line 1915
setchrclass (NULL); Line 1916
#endif Line 1917
while (optchar = getopt_long (argc, argv, "AF:GM:ORS:TW:b:i:fg:o:trw:", Line 1919
long_options, NULL), Line 1920
optchar != EOF) Line 1921
{
switch (optchar) Line 1923
{
default: Line 1925
usage (EXIT_FAILURE); Line 1926
case 'G': Line 1928
gnu_extensions = false; Line 1929
break; Line 1930
case 'b': Line 1932
break_file = optarg; Line 1933
break; Line 1934
case 'f': Line 1936
ignore_case = true; Line 1937
break; Line 1938
case 'g': Line 1940
{
intmax_t tmp; Line 1942
if (! (xstrtoimax (optarg, NULL, 0, &tmp, NULL) == LONGINT_OK Line 1943
&& 0 < tmp && tmp <= PTRDIFF_MAX)) Line 1944
die (EXIT_FAILURE, 0, _("invalid gap width: %s"), Line 1945
quote (optarg)); Line 1946
gap_size = tmp; Line 1947
break; Line 1948
}
case 'i': Line 1951
ignore_file = optarg; Line 1952
break; Line 1953
case 'o': Line 1955
only_file = optarg; Line 1956
break; Line 1957
case 'r': Line 1959
input_reference = true; Line 1960
break; Line 1961
case 't': Line 1963
/* Yet to understand... */
break; Line 1965
case 'w': Line 1967
{
intmax_t tmp; Line 1969
if (! (xstrtoimax (optarg, NULL, 0, &tmp, NULL) == LONGINT_OK Line 1970
&& 0 < tmp && tmp <= PTRDIFF_MAX)) Line 1971
die (EXIT_FAILURE, 0, _("invalid line width: %s"), Line 1972
quote (optarg)); Line 1973
line_width = tmp; Line 1974
break; Line 1975
}
case 'A': Line 1978
auto_reference = true; Line 1979
break; Line 1980
case 'F': Line 1982
truncation_string = copy_unescaped_string (optarg); Line 1983
break; Line 1984
case 'M': Line 1986
macro_name = optarg; Line 1987
break; Line 1988
case 'O': Line 1990
output_format = ROFF_FORMAT; Line 1991
break; Line 1992
case 'R': Line 1994
right_reference = true; Line 1995
break; Line 1996
case 'S': Line 1998
context_regex.string = copy_unescaped_string (optarg); Line 1999
break; Line 2000
case 'T': Line 2002
output_format = TEX_FORMAT; Line 2003
break; Line 2004
case 'W': Line 2006
word_regex.string = copy_unescaped_string (optarg); Line 2007
if (!*word_regex.string) Line 2008
word_regex.string = NULL; Line 2009
break; Line 2010
case 10: Line 2012
output_format = XARGMATCH ("--format", optarg, Line 2013
format_args, format_vals); Line 2014
break; Line 2015
case_GETOPT_HELP_CHAR; Line 2017
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); Line 2019
}
} Block 66
/* Process remaining arguments. If GNU extensions are enabled, process
all arguments as input parameters. If disabled, accept at most two
arguments, the second of which is an output parameter. */
if (optind == argc) Line 2027
{
/* No more argument simply means: read standard input. */
input_file_name = xmalloc (sizeof *input_file_name); Line 2032
file_line_count = xmalloc (sizeof *file_line_count); Line 2033
text_buffers = xmalloc (sizeof *text_buffers); Line 2034
number_input_files = 1; Line 2035
input_file_name[0] = NULL; Line 2036
}
else if (gnu_extensions) Line 2038
{
number_input_files = argc - optind; Line 2040
input_file_name = xnmalloc (number_input_files, sizeof *input_file_name); Line 2041
file_line_count = xnmalloc (number_input_files, sizeof *file_line_count); Line 2042
text_buffers = xnmalloc (number_input_files, sizeof *text_buffers); Line 2043
for (file_index = 0; file_index < number_input_files; file_index++) Line 2045
{
if (!*argv[optind] || STREQ (argv[optind], "-")) Line 2047
input_file_name[file_index] = NULL; Line 2048
else Line 2049
input_file_name[file_index] = argv[optind]; Line 2050
optind++; Line 2051
}
} Block 68
else Line 2054
{
/* There is one necessary input file. */
number_input_files = 1; Line 2059
input_file_name = xmalloc (sizeof *input_file_name); Line 2060
file_line_count = xmalloc (sizeof *file_line_count); Line 2061
text_buffers = xmalloc (sizeof *text_buffers); Line 2062
if (!*argv[optind] || STREQ (argv[optind], "-")) Line 2063
input_file_name[0] = NULL; Line 2064
else Line 2065
input_file_name[0] = argv[optind]; Line 2066
optind++; Line 2067
/* Redirect standard output, only if requested. */
if (optind < argc) Line 2071
{
if (! freopen (argv[optind], "w", stdout)) Line 2073...!syscalls auto-comment...
die (EXIT_FAILURE, errno, "%s", quotef (argv[optind])); Line 2074
optind++; Line 2075
}
/* Diagnose any other argument as an error. */
if (optind < argc) Line 2080
{
error (0, 0, _("extra operand %s"), quote (argv[optind])); Line 2082
usage (EXIT_FAILURE); Line 2083
}
}
/* If the output format has not been explicitly selected, choose dumb
terminal format if GNU extensions are enabled, else 'roff' format. */
if (output_format == UNKNOWN_FORMAT) Line 2090
output_format = gnu_extensions ? DUMB_FORMAT : ROFF_FORMAT; Line 2091
/* Initialize the main tables. */
initialize_regex (); Line 2095
/* Read 'Break character' file, if any. */
if (break_file) Line 2099
digest_break_file (break_file); Line 2100
/* Read 'Ignore words' file and 'Only words' files, if any. If any of
these files is empty, reset the name of the file to NULL, to avoid
unnecessary calls to search_table. */
if (ignore_file) Line 2106
{
digest_word_file (ignore_file, &ignore_table); Line 2108
if (ignore_table.length == 0) Line 2109
ignore_file = NULL; Line 2110
} Block 70
if (only_file) Line 2113
{
digest_word_file (only_file, &only_table); Line 2115
if (only_table.length == 0) Line 2116
only_file = NULL; Line 2117
} Block 71
/* Prepare to study all the input files. */
number_of_occurs[0] = 0; Line 2122
total_line_count = 0; Line 2123
maximum_word_length = 0; Line 2124
reference_max_width = 0; Line 2125
for (file_index = 0; file_index < number_input_files; file_index++) Line 2127
{
BLOCK *text_buffer = text_buffers + file_index; Line 2129
/* Read the file in core, then study it. */
swallow_file_in_memory (input_file_name[file_index], text_buffer); Line 2133
find_occurs_in_text (file_index); Line 2134
/* Maintain for each file how many lines has been read so far when its
end is reached. Incrementing the count first is a simple kludge to
handle a possible incomplete line at end of file. */
total_line_count++; Line 2140
file_line_count[file_index] = total_line_count; Line 2141
} Block 72
/* Do the output process phase. */
sort_found_occurs (); Line 2146
fix_output_parameters (); Line 2147
generate_all_output (); Line 2148
/* All done. */
return EXIT_SUCCESS; Line 2152
}