diff options
author | Manuel A. Fernandez Montecelo <manuel.montezelo@gmail.com> | 2016-05-27 14:28:30 +0100 |
---|---|---|
committer | Manuel A. Fernandez Montecelo <manuel.montezelo@gmail.com> | 2016-05-27 14:28:30 +0100 |
commit | a9a31b1de5776a3b08a82101a4fa711294f0dd1d (patch) | |
tree | 159134a624e51509f40ed8823249f09a70d1dda3 /lib/uniwbrk/u-wordbreaks.h | |
parent | 5f2b09982312c98863eb9a8dfe2c608b81f58259 (diff) |
Imported Upstream version 0.9.6+really0.9.3upstream/0.9.6+really0.9.3
Diffstat (limited to 'lib/uniwbrk/u-wordbreaks.h')
-rw-r--r-- | lib/uniwbrk/u-wordbreaks.h | 85 |
1 files changed, 28 insertions, 57 deletions
diff --git a/lib/uniwbrk/u-wordbreaks.h b/lib/uniwbrk/u-wordbreaks.h index cdeab0ba..b0fd301e 100644 --- a/lib/uniwbrk/u-wordbreaks.h +++ b/lib/uniwbrk/u-wordbreaks.h @@ -1,5 +1,5 @@ /* Word breaks in UTF-8/UTF-16/UTF-32 strings. - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2010 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This program is free software: you can redistribute it and/or modify it @@ -55,12 +55,16 @@ FUNC (const UNIT *s, size_t n, char *p) if (last_char_prop == WBP_CR && prop == WBP_LF) /* *p = 0 */; /* Break before and after newlines. */ - else if ((last_char_prop == WBP_CR - || last_char_prop == WBP_LF - || last_char_prop == WBP_NEWLINE) - || (prop == WBP_CR - || prop == WBP_LF - || prop == WBP_NEWLINE)) + else if (last_char_prop >= WBP_NEWLINE + /* same as: + last_char_prop == WBP_CR + || last_char_prop == WBP_LF + || last_char_prop == WBP_NEWLINE */ + || prop >= WBP_NEWLINE + /* same as: + prop == WBP_CR + || prop == WBP_LF + || prop == WBP_NEWLINE */) *p = 1; /* Ignore Format and Extend characters. */ else if (!(prop == WBP_EXTEND || prop == WBP_FORMAT)) @@ -69,66 +73,38 @@ FUNC (const UNIT *s, size_t n, char *p) secondlast last current - (ALetter | HL) (MidLetter | MidNumLet | SQ) × (ALetter | HL) (WB7) - (ALetter | HL) × (MidLetter | MidNumLet | SQ) (ALetter | HL) (WB6) - Numeric (MidNum | MidNumLet | SQ) × Numeric (WB11) - Numeric × (MidNum | MidNumLet | SQ) Numeric (WB12) - HL × DQ HL (WB7b) - HL DQ × HL (WB7c) - (ALetter | HL) × (ALetter | HL) (WB5) - (ALetter | HL) × Numeric (WB9) - Numeric × (ALetter | HL) (WB10) + ALetter (MidLetter | MidNumLet) × ALetter (WB7) + ALetter × (MidLetter | MidNumLet) ALetter (WB6) + Numeric (MidNum | MidNumLet) × Numeric (WB11) + Numeric × (MidNum | MidNumLet) Numeric (WB12) + ALetter × ALetter (WB5) + ALetter × Numeric (WB9) + Numeric × ALetter (WB10) Numeric × Numeric (WB8) - HL × SQ (WB7a) Katakana × Katakana (WB13) - (ALetter | HL | Numeric | Katakana) × ExtendNumLet (WB13a) + (ALetter | Numeric | Katakana) × ExtendNumLet (WB13a) ExtendNumLet × ExtendNumLet (WB13a) - ExtendNumLet × (ALetter | HL | Numeric | Katakana) (WB13b) - Regional_Indicator × Regional_Indicator (WB13c) + ExtendNumLet × (ALetter | Numeric | Katakana) (WB13b) */ /* No break across certain punctuation. Also, disable word breaks that were recognized earlier (due to lookahead of only one complex character). */ - if (((prop == WBP_ALETTER - || prop == WBP_HL) + if ((prop == WBP_ALETTER && (last_compchar_prop == WBP_MIDLETTER - || last_compchar_prop == WBP_MIDNUMLET - || last_compchar_prop == WBP_SQ) - && (secondlast_compchar_prop == WBP_ALETTER - || secondlast_compchar_prop == WBP_HL)) + || last_compchar_prop == WBP_MIDNUMLET) + && secondlast_compchar_prop == WBP_ALETTER) || (prop == WBP_NUMERIC && (last_compchar_prop == WBP_MIDNUM - || last_compchar_prop == WBP_MIDNUMLET - || last_compchar_prop == WBP_SQ) - && secondlast_compchar_prop == WBP_NUMERIC) - || (prop == WBP_HL - && last_compchar_prop == WBP_DQ - && secondlast_compchar_prop == WBP_HL)) + || last_compchar_prop == WBP_MIDNUMLET) + && secondlast_compchar_prop == WBP_NUMERIC)) { *last_compchar_ptr = 0; /* *p = 0; */ } - /* Break after Format and Extend characters. */ - else if (last_compchar_prop == WBP_EXTEND - || last_compchar_prop == WBP_FORMAT) - *p = 1; else { - /* Normalize property value to table index, - skipping 5 properties: WBP_EXTEND, - WBP_FORMAT, WBP_NEWLINE, WBP_CR, and - WBP_LF. */ - int last_compchar_prop_index = last_compchar_prop; - int prop_index = prop; - - if (last_compchar_prop_index >= WBP_EXTEND) - last_compchar_prop_index -= 5; - - if (prop_index >= WBP_EXTEND) - prop_index -= 5; - /* Perform a single table lookup. */ - if (uniwbrk_table[last_compchar_prop_index][prop_index]) + if (uniwbrk_table[last_compchar_prop][prop]) *p = 1; /* else *p = 0; */ } @@ -136,13 +112,8 @@ FUNC (const UNIT *s, size_t n, char *p) } last_char_prop = prop; - /* Ignore Format and Extend characters, except at the start - of the line. */ - if (last_compchar_prop < 0 - || last_compchar_prop == WBP_CR - || last_compchar_prop == WBP_LF - || last_compchar_prop == WBP_NEWLINE - || !(prop == WBP_EXTEND || prop == WBP_FORMAT)) + /* Ignore Format and Extend characters, except at the start of the string. */ + if (last_compchar_prop < 0 || !(prop == WBP_EXTEND || prop == WBP_FORMAT)) { secondlast_compchar_prop = last_compchar_prop; last_compchar_prop = prop; |