'\" t .\" Title: unicode::bidi .\" Author: Sam Varshavchik .\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/> .\" Date: 05/18/2024 .\" Manual: Courier Unicode Library .\" Source: Courier Unicode Library .\" Language: English .\" .TH "UNICODE::BIDI" "3" "05/18/2024" "Courier Unicode Library" "Courier Unicode Library" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" http://bugs.debian.org/507673 .\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .\" ----------------------------------------------------------------- .\" * MAIN CONTENT STARTS HERE * .\" ----------------------------------------------------------------- .SH "NAME" unicode::bidi, unicode::bidi_calc, unicode::bidi_calc_types, unicode::bidi_reorder, unicode::bidi_cleanup, unicode::bidi_logical_order, unicode::bidi_combinings, unicode::bidi_needs_embed, unicode::bidi_embed, unicode::bidi_embed_paragraph_level, unicode::bidi_get_direction, unicode::bidi_override \- unicode bi\-directional algorithm .SH "SYNOPSIS" .sp .nf #include <courier\-unicode\&.h> .fi .sp .nf struct unicode::bidi_calc_types\ \&{ . \ \&\ \&bidi_calc_types(const std::u32string &\ \&string); . \ \&\ \&std::vector<unicode_bidi_type_t>\ \&types\ \&; . \ \&\ \&void\ \&setbnl(std::u32string &\ \&string); . }.fi .HP \w'std::tuple<std::vector<unicode_bidi_level_t>,\ struct\ unicode_bidi_direction>\ unicode::bidi_calc('u .BI "std::tuple<std::vector<unicode_bidi_level_t>, struct unicode_bidi_direction> unicode::bidi_calc(const\ unicode::bidi_calc_types\ &" "ustring" ");" .HP \w'std::tuple<std::vector<unicode_bidi_level_t>,\ struct\ unicode_bidi_direction>\ unicode::bidi_calc('u .BI "std::tuple<std::vector<unicode_bidi_level_t>, struct unicode_bidi_direction> unicode::bidi_calc(const\ unicode::bidi_calc_types\ &" "ustring" ", unicode_bidi_level_t\ " "embedding_level" ");" .HP \w'int\ unicode::bidi_reorder('u .BI "int unicode::bidi_reorder(std::u32string\ &" "string" ", std::vector<unicode_bidi_level_t>\ &" "embedding_level" ", const\ std::function<void\ (size_t,\ size_t)>\ &" "reorder_callback" "=[](size_t,\ size_t){}, size_t\ " "starting_pos" "=0, size_t\ " "n" "=(size_t)\-1);" .HP \w'void\ unicode::bidi_reorder('u .BI "void unicode::bidi_reorder(std::vector<unicode_bidi_level_t>\ &" "embedding_level" ", const\ std::function<void\ (size_t,\ size_t)>\ &" "reorder_callback" "=[](size_t,\ size_t){}, size_t\ " "starting_pos" "=0, size_t\ " "n" "=(size_t)\-1);" .HP \w'void\ unicode::bidi_cleanup('u .BI "void unicode::bidi_cleanup(std::u32string\ &" "string" ", const\ std::function<void\ (size_t)>\ &" "removed_callback" "=[](size_t){}, int\ " "cleanup_options" ");" .HP \w'int\ unicode::bidi_cleanup('u .BI "int unicode::bidi_cleanup(std::u32string\ &" "string" ", std::vector\ <unicode_bidi_level_t>\ &" "levels" ", const\ std::function<void\ (size_t)>\ &" "removed_callback" "=[](size_t){}, int\ " "cleanup_options" "=0);" .HP \w'int\ unicode::bidi_cleanup('u .BI "int unicode::bidi_cleanup(std::u32string\ &" "string" ", std::vector\ <unicode_bidi_level_t>\ &" "levels" ", const\ std::function<void\ (size_t)>\ &" "removed_callback" ", int\ " "cleanup_options" ", size_t\ " "starting_pos" ", size_t\ " "n" ");" .HP \w'int\ unicode::bidi_logical_order('u .BI "int unicode::bidi_logical_order(std::u32string\ &" "string" ", std::vector\ <unicode_bidi_level_t>\ &" "levels" ", unicode_bidi_level_t\ " "paragraph_embedding" ", const\ std::function<void\ (size_t,\ size_t)>\ &" "reorder_callback" "=[](size_t,\ size_t){}, size_t\ " "starting_pos" "=0, size_t\ " "n" "=(size_t)\-1);" .HP \w'void\ unicode::bidi_combinings('u .BI "void unicode::bidi_combinings(const\ std::u32string\ &" "string" ", const\ std::vector\ <unicode_bidi_level_t>\ &" "levels" ", const\ std::function\ <void\ (unicode_bidi_level_t\ level,\ size_t\ level_start,\ size_t\ n_chars,\ size_t\ comb_start,\ size_t\ n_comb_chars)>\ &" "callback" ");" .HP \w'void\ unicode::bidi_combinings('u .BI "void unicode::bidi_combinings(const\ std::u32string\ &" "string" ", const\ std::function\ <void\ (unicode_bidi_level_t\ level,\ size_t\ level_start,\ size_t\ n_chars,\ size_t\ comb_start,\ size_t\ n_comb_chars)>\ &" "callback" ");" .HP \w'void\ unicode::bidi_logical_order('u .BI "void unicode::bidi_logical_order(std::vector\ <unicode_bidi_level_t>\ &" "levels" ", unicode_bidi_level_t\ " "paragraph_embedding" ", const\ std::function<void\ (size_t,\ size_t)>\ &" "reorder_callback" ", size_t\ " "starting_pos" "=0, size_t\ " "n" "=(size_t)\-1);" .HP \w'bool\ unicode::bidi_needs_embed('u .BI "bool unicode::bidi_needs_embed(const\ std::u32string\ &" "string" ", const\ std::vector\ <unicode_bidi_level_t>\ &" "levels" ", const\ unicode_bidi_level_t\ (" "paragraph_embedding" "=NULL, size_t\ " "starting_pos" "=0, size_t\ " "n" "=(size_t)\-1);" .HP \w'int\ unicode::bidi_embed('u .BI "int unicode::bidi_embed(const\ std::u32string\ &" "string" ", const\ std::vector\ <unicode_bidi_level_t>\ &" "levels" ", unicode_bidi_level_t\ " "paragraph_embedding" ", const\ std::function<void\ (const\ char32_t\ *,\ size_t,\ bool)>\ &" "callback" ");" .HP \w'std::u32string\ unicode::bidi_embed('u .BI "std::u32string unicode::bidi_embed(const\ std::u32string\ &" "string" ", const\ std::vector\ <unicode_bidi_level_t>\ &" "levels" ", unicode_bidi_level_t\ " "paragraph_embedding" ");" .HP \w'char32_t\ unicode_bidi_embed_paragraph_level('u .BI "char32_t unicode_bidi_embed_paragraph_level(const\ std::u32string\ &" "string" ", unicode_bidi_level_t\ " "paragraph_embedding" ");" .HP \w'unicode_bidi_direction\ bidi_get_direction('u .BI "unicode_bidi_direction bidi_get_direction(const\ std::u32string\ &" "string" ", size_t\ " "starting_pos" "=0, size_t\ " "n" "=(size_t)\-1);" .HP \w'std::u32string\ bidi_override('u .BI "std::u32string bidi_override(const\ std::u32string\ &" "string" ", unicode_bidi_level_t\ " "direction" ", int\ " "cleanup_options" "=0);" .SH "DESCRIPTION" .PP These functions implement the C++ interface for the \m[blue]\fBUnicode Bi\-Directional algorithm\fR\m[]\&\s-2\u[1]\d\s+2\&. See the description of the underlying \fBunicode_bidi\fR(3) C library API for more information\&. C++ specific notes: .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} \fBunicode::bidi_calc\fR returns the directional embedding value buffer and the calculated paragraph embedding level\&. Its \fIustring\fR is implicitly converted from a std::u32string: .sp .if n \{\ .RS 4 .\} .nf std::u32string text; auto [levels, direction]=unicode::bidi_calc(text); .fi .if n \{\ .RE .\} Alternatively a unicode::bidi_calc_types objects gets constructed from the same std::u32string and then passed directly to \fBunicode::bidi_calc\fR: .sp .if n \{\ .RS 4 .\} .nf std::u32string text; unicode::bidi_calc_types types{text}; types\&.setbnl(text); // Optional // types\&.types is a std::vector of enum_bidi_types_t values auto [levels, direction]=unicode::bidi_calc(types); .fi .if n \{\ .RE .\} This provides the means to access the intermediate enum_bidi_types_t values that get calculated from the Unicode text string\&. .if n \{\ .sp .\} .RS 4 .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br .ps +1 \fBNote\fR .ps -1 .br In all cases the std::u32string cannot be a temporary object, and it must remain in scope until \fBunicode::bidi_calc\fR() returns\&. .sp .5v .RE The optional setbnl() method uses \fBunicode_bidi_setbnl\fR(3) to replace paragraph separators with newline characters, in the unicode string\&. It requires the same unicode string that was passed to the constructor as a parameter (because the constructor takes a constant reference, but this method modifies the string\&. .RE .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} Several C functions provide a \(lqdry\-run\(rq mode by passing a NULL pointer\&. The C++ API provides separate overloads, with and without the nullable parameter\&. .RE .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} Several C functions accept a nullable function pointer, with the NULL function pointer specifying no callback\&. The C++ functions have a std::function parameter with a default do\-nothing closure\&. .RE .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} Several C functions accept two parameters, a Unicode character pointer and the embedding level buffer, and a single parameter that specifies the size of both\&. The equivalent C++ function takes two discrete parameters, a std::u32string and a std::vector and returns an int; a negative value if their sizes differ, and 0 if their sizes match, and the requested function completes\&. The \fBunicode::bidi_embed\fR overload that returns a std::u32string returns an empty string in case of a mismatch\&. .RE .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} \fBunicode::bidi_reorder\fR reorders the entire \fIstring\fR and its \fIembedding_level\fRs by default\&. The optional \fIstarting_pos\fR and \fIn\fR parameters limit the reordering to the indicated subset of the original string (specified as the starting position offset index, and the number of characters)\&. .RE .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} \fBunicode::bidi_reorder\fR, \fBunicode::bidi_cleanup\fR, \fBunicode::bidi_logical_order\fR, \fBunicode::bidi_needs_embed\fR and \fBunicode::bidi_get_direction\fR take two optional parameters (defaulted values or overloaded) specifying an optional starting position and number of characters that define a subset of the original string that gets reordered, cleaned up, or has its direction determined\&. .sp This \fBunicode::bidi_cleanup\fR does not trim off the passed in string and embedding level buffer, since it affects only a subset of the string\&. The number of times the removed character callback gets invoked indicates how much the substring should be trimmed off\&. .RE .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} \fBunicode::bidi_override\fR modifies the passed\-in \fIstring\fR as follows: .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} \fBunicode::bidi_cleanup\fR() is applied with the specified, or defaulted, \fIcleanup_options\fR .RE .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} Either the LRO or an RLO override marker gets prepended to the Unicode string, forcing the entire string to be interpreted in a single rendering direction, when processed by the Unicode bi\-directional algorithm\&. .RE .sp \fBunicode::bidi_override\fR makes it possible to use a Unicode\-aware application or algorithm in a context that only works with text that\*(Aqs always displayed in a fixed direction, allowing graceful handling of input containing bi\-directional text\&. .RE .SS "unicode::literals namespace" .sp .if n \{\ .RS 4 .\} .nf using namespace unicode::literals; std::u32string foo(std::u32string bar) { return bar + LRO; } .fi .if n \{\ .RE .\} .PP This namespace contains the following constexpr definitions: .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} char32_t arrays with literal Unicode character strings containing Unicode directional, isolate, and override markers, like LRO, RLO and others\&. .RE .sp .RS 4 .ie n \{\ \h'-04'\(bu\h'+03'\c .\} .el \{\ .sp -1 .IP \(bu 2.3 .\} CLEANUP_EXTRA, CLEANUP_BNL, and CLEANUP_CANONICAL options for \fBunicode::bidi_cleanup\fR()\&. .RE .SH "SEE ALSO" .PP \fBcourier-unicode\fR(7), \fBunicode_bidi\fR(3)\&. .SH "AUTHOR" .PP \fBSam Varshavchik\fR .RS 4 Author .RE .SH "NOTES" .IP " 1." 4 Unicode Bi-Directional algorithm .RS 4 \%https://www.unicode.org/reports/tr9/tr9-48.html .RE