.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.43)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
.    ds C`
.    ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is >0, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{\
.    if \nF \{\
.        de IX
.        tm Index:\\$1\t\\n%\t"\\$2"
..
.        if !\nF==2 \{\
.            nr % 0
.            nr F 2
.        \}
.    \}
.\}
.rr rF
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "btparse::doc::btparse 3"
.TH btparse::doc::btparse 3 "2023-01-30" "btparse, version 0.89" "btparse"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
btparse \- C library for parsing and processing BibTeX data files
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
.Vb 1
\&   #include <btparse.h>
\&
\&   /* Basic library initialization / cleanup */
\&   void bt_initialize (void);
\&   void bt_free_ast (AST *ast);
\&   void bt_cleanup (void);
\&
\&   /* Input / interface to parser */
\&   void   bt_set_stringopts (bt_metatype_t metatype, btshort options);
\&   AST * bt_parse_entry_s (char *    entry_text,
\&                           char *    filename,
\&                           int       line,
\&                           btshort    options,
\&                           boolean * status);
\&   AST * bt_parse_entry   (FILE *    infile,
\&                           char *    filename,
\&                           btshort    options,
\&                           boolean * status);
\&   AST * bt_parse_file    (char *    filename, 
\&                           btshort    options, 
\&                           boolean * overall_status);
\&
\&   /* AST traversal/query */
\&   AST * bt_next_entry (AST * entry_list, 
\&                        AST * prev_entry)
\&   AST * bt_next_field (AST *entry, AST *prev, char **name);
\&   AST * bt_next_value (AST *head, 
\&                        AST *prev,
\&                        bt_nodetype_t *nodetype,
\&                        char **text);
\&
\&   bt_metatype_t bt_entry_metatype (AST *entry);
\&   char *bt_entry_type (AST *entry);
\&   char *bt_entry_key (AST *entry);
\&   char *bt_get_text (AST *node);
\&
\&   /* Splitting names and lists of names */
\&   bt_stringlist * bt_split_list (char *   string,
\&                                  char *   delim,
\&                                  char *   filename,
\&                                  int      line,
\&                                  char *   description);
\&   void bt_free_list (bt_stringlist *list);
\&   bt_name * bt_split_name (char *  name,
\&                            char *  filename, 
\&                            int     line,
\&                            int     name_num);
\&   void bt_free_name (bt_name * name);
\&
\&   /* Formatting names */
\&   bt_name_format * bt_create_name_format (char * parts, boolean abbrev_first);
\&   void bt_free_name_format (bt_name_format * format);
\&   void bt_set_format_text (bt_name_format * format, 
\&                            bt_namepart part,
\&                            char * pre_part,
\&                            char * post_part,
\&                            char * pre_token,
\&                            char * post_token);
\&   void bt_set_format_options (bt_name_format * format, 
\&                               bt_namepart part,
\&                               boolean abbrev,
\&                               bt_joinmethod join_tokens,
\&                               bt_joinmethod join_part);
\&   char * bt_format_name (bt_name * name, bt_name_format * format);
\&
\&   /* Construct tree from TeX groups */
\&   bt_tex_tree * bt_build_tex_tree (char * string);
\&   void          bt_free_tex_tree (bt_tex_tree **top);
\&   void          bt_dump_tex_tree (bt_tex_tree *node, int depth, FILE *stream);
\&   char *        bt_flatten_tex_tree (bt_tex_tree *top);
\&
\&   /* Miscellaneous string utilities */
\&   void bt_purify_string (char * string, btshort options);
\&   void bt_change_case (char transform, char * string, btshort options);
.Ve
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
\&\fBbtparse\fR is a C library for parsing and processing BibTeX files.  It
provides a lexical scanner and \s-1LR\s0 parser (constructed by \s-1PCCTS\s0), both of
which are efficient and offer good error detection and recovery; a set
of functions for traversing the \s-1AST\s0 (abstract syntax tree) generated by
the parser; and utility functions for manipulating strings according to
BibTeX conventions.  (Note that nothing in the library assumes that
you're using BibTeX files for their original purpose of bibliographic
data for scholarly publications; you could use the file format for any
conceivable purpose that fits it.  However, there is some code in the
library that is really only appropriate for use with strings meant to be
processed in the same way that BibTeX itself does.  This is all entirely
optional, though.)
.PP
Note that the interface provided by \fBbtparse\fR, while complete, is
fairly low-level.  If you have more sophisticated needs, you might be
interested my \f(CW\*(C`Text::BibTeX\*(C'\fR module for Perl 5 (available on \s-1CPAN\s0).
.SH "CONCEPTS AND TERMINOLOGY"
.IX Header "CONCEPTS AND TERMINOLOGY"
To understand this document and use \fBbtparse\fR, you should already be
familiar with the BibTeX language\-\-\-more specifically, the BibTeX data
description language.  (BibTeX being the complex beast that it is, one
can conceive of the term applying to the program, the data language, the
particular database structure described in the original BibTeX
documentation, the \*(L".bst\*(R" formatting language, and the set of
conventions embodied in the standard styles included with the BibTeX
distribution.  In this document, I'll stick to the first two
meanings\-\-\-the data language because that's what \fBbtparse\fR deals with,
and the program because it's occasionally necessary to explain
differences between my parser and BibTeX's.)
.PP
In particular, you should have a good idea what's going on in the
following:
.PP
.Vb 3
\&   @string{and = { and },
\&           joe = "Blow, Joe",
\&           john = "John Smith"}
\&
\&   @book(ourbook,
\&         author = joe # and # john,
\&         title = {Our Little Book})
.Ve
.PP
If this looks like something you want to parse, but don't want to have
to write your own parser for, you've come to the right place.
.PP
Before going much further, though, you're going to have to learn some of
the terminology I use for describing BibTeX data.  Most of it's the same
as you'll find in any BibTeX documentation, but it's important to be
sure that we're talking about the same things here.  So, some
definitions:
.IP "top-level" 4
.IX Item "top-level"
All text in a BibTeX file from the start of the file to the start of the
first entry, and between entries thereafter.
.IP "name" 4
.IX Item "name"
A string of letters, digits, and the following characters:
.Sp
.Vb 1
\&   ! $ & * + \- . / : ; < > ? [ ] ^ _ \` |
.Ve
.Sp
A \*(L"name\*(R" is a catch-all used for entry types, entry keys, and field and
macro names.  For BibTeX compatibility, there are slightly different
rules for these four entities; currently, the only such rule actually
implemented is that field and macro names may not begin with a digit.
Some names in the above example: \f(CW\*(C`string\*(C'\fR, \f(CW\*(C`and\*(C'\fR.
.IP "entry" 4
.IX Item "entry"
A chunk of text starting with an \*(L"at\*(R" sign (\f(CW\*(C`@\*(C'\fR) at top-level, followed
by a name (the \fIentry type\fR), an \fIentry delimiter\fR (\f(CW\*(C`{\*(C'\fR or \f(CW\*(C`(\*(C'\fR), and
proceeding to the matching closing delimiter.  Also, the data structure
that results from parsing this chunk of text.  There are two entries in
the above example.
.IP "entry type" 4
.IX Item "entry type"
The name that comes right after an \f(CW\*(C`@\*(C'\fR at top-level.  Examples from
above: \f(CW\*(C`string\*(C'\fR, \f(CW\*(C`book\*(C'\fR.
.IP "entry metatype" 4
.IX Item "entry metatype"
A classification of entry types that allows us to group one or more
entry types under the same heading.  With the standard BibTeX database
structure, \f(CW\*(C`article\*(C'\fR, \f(CW\*(C`book\*(C'\fR, \f(CW\*(C`inbook\*(C'\fR, etc. all fall under the
\&\*(L"regular entry\*(R" metatype.  Other metatypes are \*(L"macro definition\*(R" (for
\&\f(CW\*(C`string\*(C'\fR entries), \*(L"preamble\*(R" (for \f(CW\*(C`preamble\*(C'\fR) entries, and \*(L"comment\*(R"
(\f(CW\*(C`comment\*(C'\fR entries).  In fact, any entry whose type is not one of
\&\f(CW\*(C`string\*(C'\fR, \f(CW\*(C`preamble\*(C'\fR, or \f(CW\*(C`comment\*(C'\fR is called a \*(L"regular\*(R" entry.
.IP "entry delimiters" 4
.IX Item "entry delimiters"
\&\f(CW\*(C`{\*(C'\fR and \f(CW\*(C`}\*(C'\fR, or \f(CW\*(C`(\*(C'\fR and \f(CW\*(C`)\*(C'\fR: the pair of characters that (almost)
mark the boundaries of an entry.  \*(L"Almost\*(R" because the start of an entry
is marked by an \f(CW\*(C`@\*(C'\fR, not by the \*(L"entry open\*(R" delimiter.
.IP "entry key" 4
.IX Item "entry key"
(Or just \fIkey\fR when it's clear what we're speaking of.)  The name
immediately following the entry open delimiter in a regular entry, which
uniquely identifies the entry.  Example from above: \f(CW\*(C`ourbook\*(C'\fR.  Only
regular entries have keys.
.IP "field" 4
.IX Item "field"
A name to the left of an equals sign in a regular or macro-definition
entry.  In the latter context, might also be called a macro name.
Examples from above: \f(CW\*(C`joe\*(C'\fR, \f(CW\*(C`author\*(C'\fR.
.IP "field list" 4
.IX Item "field list"
In a regular entry, everything between the entry delimiters except for
the entry key.  In a macro definition entry, everything between the
entry delimiters (possibly also called a macro list).
.IP "compound value" 4
.IX Item "compound value"
(Usually just \*(L"value\*(R".)  The text that follows an equals sign (\f(CW\*(C`=\*(C'\fR) in
a regular or macro definition entry, up to a comma or the entry close
delimiter; a list of one or more simple values joined by hash signs
(\f(CW\*(C`#\*(C'\fR).
.IP "simple value" 4
.IX Item "simple value"
A string, macro, or number.
.IP "string" 4
.IX Item "string"
(Or, sometimes, \*(L"quoted string.\*(R")  A chunk of text between quotes (\f(CW\*(C`"\*(C'\fR)
or braces (\f(CW\*(C`{\*(C'\fR and \f(CW\*(C`}\*(C'\fR).  Braces must balance: \f(CW\*(C`{this is a {string}\*(C'\fR
is not a BibTeX string, but \f(CW\*(C`{this is a {string}}\*(C'\fR is.  
(\f(CW"this is a {string"\fR is also illegal, mainly to avoid the possibility
of generating bogus TeX code\*(--which BibTeX will do in certain cases.)
.IP "macro" 4
.IX Item "macro"
A name that appears on the right-hand side of an equals sign (i.e. as
one simple value in a compound value).  Implies that this name was
defined as a macro in an earlier macro definition entry, but this is
only checked if \fBbtparse\fR is being asked to expand macros to their full
definitions.
.IP "number" 4
.IX Item "number"
An unquoted string of digits.
.PP
Working with \fBbtparse\fR generally consists of passing the library some
BibTeX data (or a source for some BibTeX data, such as a filename or a
file pointer), which it then lexically scans, parses, and constructs an
abstract syntax tree (\s-1AST\s0) from.  It returns this \s-1AST\s0 to you, and you
call other \fBbtparse\fR functions to traverse and query the tree.
.PP
The contents of \s-1AST\s0 nodes are the private domain of the library, and you
shouldn't go poking into them.  This being C, though, there's nothing to
prevent you from doing so except good manners and the possibility that I
might change the \s-1AST\s0 structure in future releases, breaking any
badly-behaved code.  Also, it's not necessary to know the structural
relationships between nodes in the AST\-\-\-that's taken care of by the
query/traversal functions.
.PP
However, it's useful to know some of the things that \fBbtparse\fR deposits
in the \s-1AST\s0 and returns to you through those query/traversal functions.
First off, each node has a \*(L"node type,\*(R" which records the syntactic
element corresponding to each node.  For instance, the entry
.PP
.Vb 1
\&   @book{mybook, author = "Joe Blow", title = "My Little Book"}
.Ve
.PP
is rooted by an \*(L"entry\*(R" node; under this would be found a \*(L"key\*(R" node
(for the entry key), two \*(L"field\*(R" nodes (for the \*(L"author\*(R" and \*(L"title\*(R"
fields); and associated with each field node would be a \*(L"string\*(R" node.
The only time this concerns you is when you ask the library for a simple
value; just looking at the text is not enough to distinguish quoted
strings, numbers, and macro names, so \fBbtparse\fR returns the nodetype as
well.
.PP
In addition to the nodetype, \fBbtparse\fR records the metatype of each
\&\*(L"entry\*(R" node.  This allows you (and the library) to distinguish, say,
regular entries from comment entries.  Not only do they have very
different structures and must therefore be traversed differently by the
library, but certain traversal functions make no sense on certain entry
metatypes\-\-\-thus it's necessary for you to be able to make the
distinction as well.
.PP
That said, everything you need to know to work with the \s-1AST\s0 is explained
in bt_traversal.
.SH "DATA TYPES AND MACROS"
.IX Header "DATA TYPES AND MACROS"
\&\fBbtparse\fR defines several types required for the external interface.
First, it trivially defines a \f(CW\*(C`boolean\*(C'\fR type (along with \f(CW\*(C`TRUE\*(C'\fR and
\&\f(CW\*(C`FALSE\*(C'\fR macros).  This might affect you when including the \fIbtparse.h\fR
header in your own code\-\-\-since it's not possible for the code to detect
if there is already a \f(CW\*(C`boolean\*(C'\fR type defined, you might have to define
the \f(CW\*(C`HAVE_BOOLEAN\*(C'\fR pre-processor token to deactivate \fIbtparse.h\fR's
\&\f(CW\*(C`typedef\*(C'\fR of \f(CW\*(C`boolean\*(C'\fR.
.PP
Next, two enumeration types are defined: \f(CW\*(C`bt_metatype\*(C'\fR and
\&\f(CW\*(C`bt_nodetype\*(C'\fR.  Both of these are used extensively in the library
itself, and are made available to users of the library because they can
be found in nodes of the \f(CW\*(C`btparse\*(C'\fR \s-1AST\s0 (abstract syntax tree).  (I.e.,
querying the \s-1AST\s0 can give you \f(CW\*(C`bt_metatype\*(C'\fR and \f(CW\*(C`bt_nodetype\*(C'\fR
values, so the \f(CW\*(C`typedef\*(C'\fRs must be available to your code.)
.SS "Entry metatype enum"
.IX Subsection "Entry metatype enum"
\&\f(CW\*(C`bt_metatype_t\*(C'\fR has the following values:
.IP "\(bu" 4
\&\f(CW\*(C`BTE_UNKNOWN\*(C'\fR
.IP "\(bu" 4
\&\f(CW\*(C`BTE_REGULAR\*(C'\fR
.IP "\(bu" 4
\&\f(CW\*(C`BTE_COMMENT\*(C'\fR
.IP "\(bu" 4
\&\f(CW\*(C`BTE_PREAMBLE\*(C'\fR
.IP "\(bu" 4
\&\f(CW\*(C`BTE_MACRODEF\*(C'\fR
.PP
which are determined by the \*(L"entry type\*(R" token.  (\f(CW@string\fR entries
have the \f(CW\*(C`BTE_MACRODEF\*(C'\fR metatype; \f(CW@comment\fR and \f(CW@preamble\fR
correspond to \f(CW\*(C`BTE_COMMENT\*(C'\fR and \f(CW\*(C`BTE_PREAMBLE\*(C'\fR; and any other entry
type has the \f(CW\*(C`BTE_REGULAR\*(C'\fR metatype.)
.SS "\s-1AST\s0 nodetype enum"
.IX Subsection "AST nodetype enum"
\&\f(CW\*(C`bt_nodetype\*(C'\fR has the following values:
.IP "\(bu" 4
\&\f(CW\*(C`BTAST_UNKNOWN\*(C'\fR
.IP "\(bu" 4
\&\f(CW\*(C`BTAST_ENTRY\*(C'\fR
.IP "\(bu" 4
\&\f(CW\*(C`BTAST_KEY\*(C'\fR
.IP "\(bu" 4
\&\f(CW\*(C`BTAST_FIELD\*(C'\fR
.IP "\(bu" 4
\&\f(CW\*(C`BTAST_STRING\*(C'\fR
.IP "\(bu" 4
\&\f(CW\*(C`BTAST_NUMBER\*(C'\fR
.IP "\(bu" 4
\&\f(CW\*(C`BTAST_MACRO\*(C'\fR
.PP
Of these, you'll only ever deal with the last three.  They are returned
when you query the \s-1AST\s0 for a simple value\-\-\-just seeing the text isn't
enough to distinguish between a quoted string, a number, and a macro, so
the \s-1AST\s0 nodetype is supplied along with the text.
.SS "String processing option macros"
.IX Subsection "String processing option macros"
Since BibTeX is essentially a system for glueing strings together in a
wide variety of ways, the processing done to its strings is fairly
important.  Most of the string transformations are done outside of the
lexer/parser; this reduces their complexity, and makes it easier to
switch different transformations on and off.  This switching is done
with an \*(L"options\*(R" bitmap which can be specified on a per-entry-metatype
basis.  (That is, you can have one set of transformations done to the
strings in all regular entries, another set done to the strings in all
macro definition entries, and so on.)  If you need finer control than
that, it's currently unavailable outside of the library (but it's just a
matter of making a couple functions available and documenting them\-\-\-so
bug me if you need this feature).
.PP
There are three basic macros for constructing this bitmap:
.ie n .IP """BTO_CONVERT""" 4
.el .IP "\f(CWBTO_CONVERT\fR" 4
.IX Item "BTO_CONVERT"
Convert \*(L"number\*(R" values to strings.  (The conversion is trivial,
involving changing the type of the \s-1AST\s0 node representing the number from
\&\f(CW\*(C`BTAST_NUMBER\*(C'\fR to \f(CW\*(C`BTAST_STRING\*(C'\fR.  \*(L"Number\*(R" values are stored as
strings of digits, just as they are in the input data.)
.ie n .IP """BTO_EXPAND""" 4
.el .IP "\f(CWBTO_EXPAND\fR" 4
.IX Item "BTO_EXPAND"
Expand macro invocations to the full macro text.
.ie n .IP """BTO_PASTE""" 4
.el .IP "\f(CWBTO_PASTE\fR" 4
.IX Item "BTO_PASTE"
Paste simple values together.
.ie n .IP """BTO_COLLAPSE""" 4
.el .IP "\f(CWBTO_COLLAPSE\fR" 4
.IX Item "BTO_COLLAPSE"
Collapse whitespace according to the BibTeX rules.
.PP
For instance, supplying \f(CW\*(C`BTO_CONVERT | BTO_EXPAND\*(C'\fR as the string
options bitmap for the \f(CW\*(C`BTE_REGULAR\*(C'\fR metatype means that all simple
values in \*(L"regular\*(R" entries will be converted to strings: numbers will
simply have their \*(L"nodetype\*(R" changed, and macros will be expanded.
Nothing else will be done to the simple values, though\-\-\-they will not
be concatenated, nor will whitespace be collapsed.  See the
\&\f(CW\*(C`bt_set_stringopts()\*(C'\fR and \f(CW\*(C`bt_parse_*()\*(C'\fR functions in bt_input for
more information on the various options for parsing; see
bt_postprocess for details on the post-processing.
.SH "USING THE LIBRARY"
.IX Header "USING THE LIBRARY"
The following code is a skeletal example of using the \fBbtparse\fR
library:
.PP
.Vb 1
\&    #include <btparse.h>
\&
\&    int main (void)
\&    {
\&       bt_initialize ();
\&
\&       /* process some data */
\&
\&       bt_cleanup ();
\&       exit (0);
\&    }
.Ve
.PP
Please note the call to \f(CW\*(C`bt_initialize()\*(C'\fR; this is very important!
Without it, the library may crash or fail mysteriously.  You \fImust\fR
call \f(CW\*(C`bt_initialize()\*(C'\fR before calling any other \fBbtparse\fR functions.
\&\f(CW\*(C`bt_cleanup()\*(C'\fR just frees the memory allocated by \f(CW\*(C`bt_initialize()\*(C'\fR;
if you are careful to call it before exiting, and \f(CW\*(C`bt_free_ast()\*(C'\fR on
any abstract syntax trees generated by \fBbtparse\fR when you are done with
them, then your program shouldn't have any memory leaks.  (Unless
they're due to your own code, of course!)
.SH "BUGS AND LIMITATIONS"
.IX Header "BUGS AND LIMITATIONS"
\&\fBbtparse\fR has several inherent limitations that are due to the lexical
scanner and parser generated by \s-1PCCTS 1\s0.x.  In short, the scanner and
parser are both heavily dependent on global variables, meaning that
thread safety \*(-- or even the ability to have two files open and being
parsed at the same time \*(-- is well-nigh impossible.  This will not
change until I get with the times and adopt \s-1ANTLR 2.0,\s0 the successor to
\&\s-1PCCTS\s0 \*(-- presuming of course that it can generate more modular C
scanners and parsers.
.PP
Another limitation that is due to \s-1PCCTS:\s0 entries with a large number of
fields (more than about 90, if each field value is just a single string)
will cause the parser to crash.  This is unavoidable due to the parser
using statically-allocated stacks for attributes and abstract-syntax
tree nodes.  I could increase the static allocation, but that would just
decrease the likelihood of encountering the problem, not make it go
away.  Again, the chances of this changing as long as I'm using \s-1PCCTS
1\s0.x are nil.
.PP
Apart from those inherent limitations, there are no known bugs in
\&\fBbtparse\fR.  Any segmentation faults or bus errors from the library
should be considered bugs.  They probably result from using the library
incorrectly (eg. attempting to interleave the parsing of two files), but
I do make an attempt to catch all such mistakes, and if I've missed any
I'd like to know about it.
.PP
Any memory leaks from the library are also a concern; as long as you are
conscientious about calling the cleanup functions (\f(CW\*(C`bt_free_ast()\*(C'\fR and
\&\f(CW\*(C`bt_cleanup()\*(C'\fR), then the library shouldn't leak.
.SH "SEE ALSO"
.IX Header "SEE ALSO"
To read and parse BibTeX data files, see bt_input.
.PP
To traverse the syntax tree that results, see bt_traversal.
.PP
To learn what is done to values in parsed entries, and how to customize
that munging, see bt_postprocess.
.PP
To learn how \fBbtparse\fR deals with strings, see bt_strings (oops, I
haven't written this one yet!).
.PP
To manipulate and access the \fBbtparse\fR macro table, see bt_macros.
.PP
For splitting author names and lists \*(L"the BibTeX way\*(R" using \fBbtparse\fR,
bt_split_names.
.PP
To put author names back together again, see bt_format_names.
.PP
Miscellaneous functions for processing strings \*(L"the BibTeX way\*(R":
bt_misc.
.PP
A semi-formal language definition is in bt_language.
.SH "AUTHOR"
.IX Header "AUTHOR"
Greg Ward <gward@python.net>
.SH "COPYRIGHT"
.IX Header "COPYRIGHT"
Copyright (c) 1996\-97 by Gregory P. Ward.
.PP
This library is free software; you can redistribute it and/or modify it
under the terms of the \s-1GNU\s0 Library General Public License as published
by the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
.PP
This library is distributed in the hope that it will be useful, but
\&\s-1WITHOUT ANY WARRANTY\s0; without even the implied warranty of
\&\s-1MERCHANTABILITY\s0 or \s-1FITNESS FOR A PARTICULAR PURPOSE.\s0  See the \s-1GNU\s0
Library General Public License for more details.
.PP
You should have received a copy of the \s-1GNU\s0 Library General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, \s-1MA 02139, USA.\s0
.SH "AVAILABILITY"
.IX Header "AVAILABILITY"
The btOOL home page, where you can get up-to-date information about
\&\fBbtparse\fR (and download the latest version) is
.PP
.Vb 1
\&   http://starship.python.net/~gward/btOOL/
.Ve
.PP
You will also find the latest version of \fBText::BibTeX\fR, the Perl
library that provides a high-level front-end to \fBbtparse\fR, there.
\&\fBbtparse\fR is needed to build \f(CW\*(C`Text::BibTeX\*(C'\fR, and must be downloaded
separately.
.PP
Both libraries are also available on \s-1CTAN\s0 (the Comprehensive TeX Archive
Network, \f(CW\*(C`http://www.ctan.org/tex\-archive/\*(C'\fR) and \s-1CPAN\s0 (the Comprehensive
Perl Archive Network, \f(CW\*(C`http://www.cpan.org/\*(C'\fR).  Look in
\&\fIbiblio/bibtex/utils/btOOL/\fR on \s-1CTAN,\s0 and \fIauthors/Greg_Ward/\fR on
\&\s-1CPAN.\s0  For example,
.PP
.Vb 2
\&   http://www.ctan.org/tex\-archive/biblio/bibtex/utils/btOOL/
\&   http://www.cpan.org/authors/Greg_Ward
.Ve
.PP
will both get you to the latest version of \f(CW\*(C`Text::BibTeX\*(C'\fR and \fBbtparse\fR
\&\*(-- but of course, you should always access busy sites like \s-1CTAN\s0 and \s-1CPAN\s0
through a mirror.