diff options
Diffstat (limited to 'doc')
-rw-r--r-- | doc/API | 4 | ||||
-rw-r--r-- | doc/API.ja | 4 | ||||
-rw-r--r-- | doc/CALLOUTS.BUILTIN | 11 | ||||
-rw-r--r-- | doc/CALLOUTS.BUILTIN.ja | 10 | ||||
-rw-r--r-- | doc/RE | 25 | ||||
-rw-r--r-- | doc/RE.ja | 25 | ||||
-rw-r--r-- | doc/SYNTAX.md | 369 | ||||
-rw-r--r-- | doc/UNICODE_PROPERTIES | 28 | ||||
-rw-r--r-- | doc/onig_syn_md.c | 667 |
9 files changed, 945 insertions, 198 deletions
@@ -1,4 +1,4 @@ -Oniguruma API Version 6.9.9 2022/10/28 +Oniguruma API Version 6.9.10 2024/06/26 #include <oniguruma.h> @@ -277,6 +277,7 @@ Oniguruma API Version 6.9.9 2022/10/28 # int onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* mp, unsigned long limit) Set a retry limit count of a match process. + 0 means unlimited. arguments 1 mp: match-param pointer @@ -985,6 +986,7 @@ Oniguruma API Version 6.9.9 2022/10/28 # int onig_set_retry_limit_in_match(unsigned long limit) Set the limit of retry counts in matching process. + 0 means unlimited. normal return: ONIG_NORMAL @@ -1,4 +1,4 @@ -鬼車インターフェース Version 6.9.9 2022/11/16 +鬼車インターフェース Version 6.9.10 2024/05/26 #include <oniguruma.h> @@ -275,6 +275,7 @@ # int onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* mp, unsigned long limit) 一回のマッチでのリトライ数の制限値をセットする。 + 0は無制限を意味する。 引数 1 mp: マッチパラメタオブジェクトアドレス @@ -987,6 +988,7 @@ # int onig_set_retry_limit_in_match(unsigned long limit) 一回のマッチング内でのリトライ数の制限値を指定する。 + 0は無制限を意味する。 正常終了戻り値: ONIG_NORMAL diff --git a/doc/CALLOUTS.BUILTIN b/doc/CALLOUTS.BUILTIN index 26840e7..3c3c40b 100644 --- a/doc/CALLOUTS.BUILTIN +++ b/doc/CALLOUTS.BUILTIN @@ -1,4 +1,4 @@ -CALLOUTS.BUILTIN 2018/03/26 +CALLOUTS.BUILTIN 2024/07/04 * FAIL (progress) @@ -92,4 +92,13 @@ CALLOUTS.BUILTIN 2018/03/26 [callout data] slot 0: op value (enum OP_CMP in src/regexec.c) + +* SKIP (progress) + + (*SKIP) + + Advance the position where the current matching fails and the next search + begins to the current position. + It has no effect on the current matching. + //END diff --git a/doc/CALLOUTS.BUILTIN.ja b/doc/CALLOUTS.BUILTIN.ja index d371beb..dabadcd 100644 --- a/doc/CALLOUTS.BUILTIN.ja +++ b/doc/CALLOUTS.BUILTIN.ja @@ -1,4 +1,4 @@ -CALLOUTS.BUILTIN.ja 2018/03/26 +CALLOUTS.BUILTIN.ja 2024/07/04 * FAIL (前進) @@ -90,4 +90,12 @@ CALLOUTS.BUILTIN.ja 2018/03/26 [callout data] slot 0: op値 (src/regexec.c の中の enum OP_CMP) + +* SKIP (前進) + + (*SKIP) + + 現在のマッチングが失敗して次の検索を開始する位置を、現在位置まで前進させる + 現在のマッチングには何の影響も与えない + //END @@ -1,4 +1,4 @@ -Oniguruma Regular Expressions Version 6.9.9 2023/03/27 +Oniguruma Regular Expressions Version 6.9.9 2024/06/10 syntax: ONIG_SYNTAX_ONIGURUMA (default syntax) @@ -237,22 +237,21 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default syntax) Unicode Case: - alnum Letter | Mark | Decimal_Number - alpha Letter | Mark - ascii 0000 - 007F - blank Space_Separator | 0009 - cntrl Control | Format | Unassigned | Private_Use | Surrogate + alnum Alphabetic | Decimal_Number + alpha Alphabetic + ascii U+0000 - U+007F + blank Space_Separator | U+0009 + cntrl U+0000 - U+001F, U+007F - U+009F digit Decimal_Number - graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate - lower Lowercase_Letter - print [[:graph:]] | [[:space:]] + graph ^White_Space && ^[[:cntrl:]] && ^Unassigned && ^Surrogate + lower Lowercase + print [[:graph:]] | Space_Separator punct Punctuation | Symbol - space Space_Separator | Line_Separator | Paragraph_Separator | - U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085 - upper Uppercase_Letter + space White_Space + upper Uppercase xdigit U+0030 - U+0039 | U+0041 - U+0046 | U+0061 - U+0066 (0-9, a-f, A-F) - word Letter | Mark | Decimal_Number | Connector_Punctuation + word Alphabetic | Mark | Decimal_Number | Connector_Punctuation @@ -1,4 +1,4 @@ -鬼車 正規表現 Version 6.9.9 2022/08/28 +鬼車 正規表現 Version 6.9.9 2024/06/10 使用文法: ONIG_SYNTAX_ONIGURUMA (既定値) @@ -234,22 +234,21 @@ Unicodeの場合: - alnum Letter | Mark | Decimal_Number - alpha Letter | Mark - ascii 0000 - 007F - blank Space_Separator | 0009 - cntrl Control | Format | Unassigned | Private_Use | Surrogate + alnum Alphabetic | Decimal_Number + alpha Alphabetic + ascii U+0000 - U+007F + blank Space_Separator | U+0009 + cntrl U+0000 - U+001F, U+007F - U+009F digit Decimal_Number - graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate - lower Lowercase_Letter - print [[:graph:]] | [[:space:]] + graph ^White_Space && ^[[:cntrl:]] && ^Unassigned && ^Surrogate + lower Lowercase + print [[:graph:]] | Space_Separator punct Punctuation | Symbol - space Space_Separator | Line_Separator | Paragraph_Separator | - U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085 - upper Uppercase_Letter + space White_Space + upper Uppercase xdigit U+0030 - U+0039 | U+0041 - U+0046 | U+0061 - U+0066 (0-9, a-f, A-F) - word Letter | Mark | Decimal_Number | Connector_Punctuation + word Alphabetic | Mark | Decimal_Number | Connector_Punctuation diff --git a/doc/SYNTAX.md b/doc/SYNTAX.md index c38e5c8..5ec7e87 100644 --- a/doc/SYNTAX.md +++ b/doc/SYNTAX.md @@ -1,7 +1,7 @@ # Oniguruma syntax (operator) configuration -_Documented for Oniguruma 6.9.5 (2020/01/23)_ +_Documented for Oniguruma 6.9.10 (2024/12/21)_ ---------- @@ -38,7 +38,7 @@ follow. The `options` field describes the default compile options to use if the caller does not specify any options when invoking `onig_new()`. -The `meta_char_table` field is used exclusively by the ONIG_SYN_OP_VARIABLE_META_CHARACTERS +The `meta_char_table` field is used exclusively by the `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` option, which allows the various regex metacharacters, like `*` and `?`, to be replaced with alternates (for example, SQL typically uses `%` instead of `.*` and `_` instead of `?`). @@ -75,7 +75,7 @@ data set by `onig_set_meta_char()` will be ignored. ### 1. ONIG_SYN_OP_DOT_ANYCHAR (enable `.`) -_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Grep, Emacs, PosixExtended, PosixBasic_ Enables support for the standard `.` metacharacter, meaning "any one character." You usually want this flag on unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -84,7 +84,7 @@ so that you can use a metacharacter other than `.` instead. ### 2. ONIG_SYN_OP_ASTERISK_ZERO_INF (enable `r*`) -_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Grep, Emacs, PosixExtended, PosixBasic_ Enables support for the standard `r*` metacharacter, meaning "zero or more r's." You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -103,7 +103,7 @@ behavior. ### 4. ONIG_SYN_OP_PLUS_ONE_INF (enable `r+`) -_Set in: Oniguruma, PosixExtended, Emacs, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Emacs, PosixExtended_ Enables support for the standard `r+` metacharacter, meaning "one or more r's." You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -122,7 +122,7 @@ behavior. ### 6. ONIG_SYN_OP_QMARK_ZERO_ONE (enable `r?`) -_Set in: Oniguruma, PosixExtended, Emacs, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Emacs, PosixExtended_ Enables support for the standard `r?` metacharacter, meaning "zero or one r" or "an optional r." You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -141,7 +141,7 @@ you want `?` to simply match a literal `?` character, but you still want some wa ### 8. ONIG_SYN_OP_BRACE_INTERVAL (enable `r{l,u}`) -_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, PosixExtended_ Enables support for the `r{lower,upper}` range form, common to more advanced regex engines, which lets you specify precisely a minimum and maximum range on how many r's @@ -158,7 +158,7 @@ this form also allows `r{,upper}` to be equivalent to `r{0,upper}`; otherwise, ### 9. ONIG_SYN_OP_ESC_BRACE_INTERVAL (enable `\{` and `\}`) -_Set in: PosixBasic, Emacs, Grep_ +_Set in: Grep, Emacs, PosixBasic_ Enables support for an escaped `r\{lower,upper\}` range form. This is useful if you have disabled support for the normal `r{...}` range form and want curly braces to simply @@ -168,7 +168,7 @@ match literal curly brace characters, but you still want some way of activating ### 10. ONIG_SYN_OP_VBAR_ALT (enable `r|s`) -_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, PosixExtended_ Enables support for the common `r|s` alternation operator. You usually want this flag set. @@ -176,7 +176,7 @@ flag set. ### 11. ONIG_SYN_OP_ESC_VBAR_ALT (enable `\|`) -_Set in: Emacs, Grep_ +_Set in: Grep, Emacs_ Enables support for an escaped `r\|s` alternation form. This is useful if you have disabled support for the normal `r|s` alternation form and want `|` to simply @@ -185,7 +185,7 @@ match a literal `|` character, but you still want some way of activating "altern ### 12. ONIG_SYN_OP_LPAREN_SUBEXP (enable `(r)`) -_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, PosixExtended_ Enables support for the common `(...)` grouping-and-capturing operators. You usually want this flag set. @@ -193,7 +193,7 @@ want this flag set. ### 13. ONIG_SYN_OP_ESC_LPAREN_SUBEXP (enable `\(` and `\)`) -_Set in: PosixBasic, Emacs, Grep_ +_Set in: Grep, Emacs, PosixBasic_ Enables support for escaped `\(...\)` grouping-and-capturing operators. This is useful if you have disabled support for the normal `(...)` grouping-and-capturing operators and want @@ -203,7 +203,7 @@ activating "grouping" or "capturing" behavior. ### 14. ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (enable `\A` and `\Z` and `\z`) -_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex_ Enables support for the anchors `\A` (start-of-string), `\Z` (end-of-string or newline-at-end-of-string), and `\z` (end-of-string) escapes. @@ -214,7 +214,7 @@ option will recognize that metacharacter instead.) ### 15. ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (enable `\G`) -_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex_ Enables support for the special anchor `\G` (start-of-previous-match). @@ -231,7 +231,7 @@ exactly the same as `\A`. ### 16. ONIG_SYN_OP_DECIMAL_BACKREF (enable `\num`) -_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Grep, Emacs, PosixExtended, PosixBasic_ Enables support for subsequent matches to back references to prior capture groups `(...)` using the common `\num` syntax (like `\3`). @@ -244,7 +244,7 @@ You usually want this enabled, and it is enabled by default in every built-in sy ### 17. ONIG_SYN_OP_BRACKET_CC (enable `[...]`) -_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Grep, Emacs, PosixExtended, PosixBasic_ Enables support for recognizing character classes, like `[a-z]`. If this flag is not set, `[` and `]` will be treated as ordinary literal characters instead of as metacharacters. @@ -254,7 +254,7 @@ You usually want this enabled, and it is enabled by default in every built-in sy ### 18. ONIG_SYN_OP_ESC_W_WORD (enable `\w` and `\W`) -_Set in: Oniguruma, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Grep_ Enables support for the common `\w` and `\W` shorthand forms. These match "word characters," whose meaning varies depending on the encoding being used. @@ -272,7 +272,7 @@ considered "word characters.") ### 19. ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (enable `\<` and `\>`) -_Set in: Grep, GnuRegex_ +_Set in: GnuRegex, Grep_ Enables support for the GNU-specific `\<` and `\>` word-boundary metacharacters. These work like the `\b` word-boundary metacharacter, but only match at one end of the word or the other: `\<` @@ -285,7 +285,7 @@ Most regex syntaxes do _not_ support these metacharacters. ### 20. ONIG_SYN_OP_ESC_B_WORD_BOUND (enable `\b` and `\B`) -_Set in: Oniguruma, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Grep_ Enables support for the common `\b` and `\B` word-boundary metacharacters. The `\b` metacharacter matches a zero-width position at a transition from word-characters to non-word-characters, or vice @@ -297,7 +297,7 @@ are considered "word characters." ### 21. ONIG_SYN_OP_ESC_S_WHITE_SPACE (enable `\s` and `\S`) -_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex_ Enables support for the common `\s` and `\S` whitespace-matching metacharacters. @@ -319,7 +319,7 @@ Unicode-equivalent code points, and then matching according to Unicode rules. ### 22. ONIG_SYN_OP_ESC_D_DIGIT (enable `\d` and `\D`) -_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex_ Enables support for the common `\d` and `\D` digit-matching metacharacters. @@ -337,7 +337,7 @@ Unicode-equivalent code points, and then matching according to Unicode rules. ### 23. ONIG_SYN_OP_LINE_ANCHOR (enable `^r` and `r$`) -_Set in: Oniguruma, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Grep, Emacs, PosixExtended, PosixBasic_ Enables support for the common `^` and `$` line-anchor metacharacters. @@ -352,7 +352,7 @@ and not any other form.) ### 24. ONIG_SYN_OP_POSIX_BRACKET (enable POSIX `[:xxxx:]`) -_Set in: Oniguruma, PosixBasic, PosixExtended, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG, Perl, GnuRegex, Grep, PosixExtended, PosixBasic_ Enables support for the POSIX `[:xxxx:]` character classes, like `[:alpha:]` and `[:digit:]`. The supported POSIX character classes are `alnum`, `alpha`, `blank`, `cntrl`, `digit`, @@ -361,7 +361,7 @@ The supported POSIX character classes are `alnum`, `alpha`, `blank`, `cntrl`, `d ### 25. ONIG_SYN_OP_QMARK_NON_GREEDY (enable `r??`, `r*?`, `r+?`, and `r{n,m}?`) -_Set in: Oniguruma, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java_ Enables support for lazy (non-greedy) quantifiers: That is, if you append a `?` after another quantifier such as `?`, `*`, `+`, or `{n,m}`, Oniguruma will try to match @@ -370,17 +370,17 @@ as _little_ as possible instead of as _much_ as possible. ### 26. ONIG_SYN_OP_ESC_CONTROL_CHARS (enable `\n`, `\r`, `\t`, etc.) -_Set in: Oniguruma, PosixBasic, PosixExtended, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, Emacs, PosixExtended, PosixBasic_ Enables support for C-style control-code escapes, like `\n` and `\r`. Specifically, this recognizes `\a` (7), `\b` (8), `\t` (9), `\n` (10), `\f` (12), `\r` (13), and -`\e` (27). If ONIG_SYN_OP2_ESC_V_VTAB is enabled (see below), this also enables +`\e` (27). If `ONIG_SYN_OP2_ESC_V_VTAB` is enabled (see below), this also enables support for recognizing `\v` as code point 11. ### 27. ONIG_SYN_OP_ESC_C_CONTROL (enable `\cx` control codes) -_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java_ Enables support for named control-code escapes, like `\cm` or `\cM` for code-point 13. In this shorthand form, control codes may be specified by `\c` (for "Control") @@ -390,7 +390,7 @@ followed by an alphabetic letter, a-z or A-Z, indicating which code point to rep ### 28. ONIG_SYN_OP_ESC_OCTAL3 (enable `\OOO` octal codes) -_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java_ Enables support for octal-style escapes of up to three digits, like `\1` for code point 1, and `\177` for code point 127. Octal values greater than 255 will result @@ -399,7 +399,7 @@ in an error message. ### 29. ONIG_SYN_OP_ESC_X_HEX2 (enable `\xHH` hex codes) -_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java_ Enables support for hexadecimal-style escapes of up to two digits, like `\x1` for code point 1, and `\x7F` for code point 127. @@ -407,7 +407,7 @@ point 1, and `\x7F` for code point 127. ### 30. ONIG_SYN_OP_ESC_X_BRACE_HEX8 (enable `\x{7HHHHHHH}` hex codes) -_Set in: Oniguruma, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG, Perl_ Enables support for brace-wrapped hexadecimal-style escapes of up to eight digits, like `\x{1}` for code point 1, and `\x{FFFE}` for code point 65534. @@ -415,7 +415,7 @@ like `\x{1}` for code point 1, and `\x{FFFE}` for code point 65534. ### 31. ONIG_SYN_OP_ESC_O_BRACE_OCTAL (enable `\o{1OOOOOOOOOO}` octal codes) -_Set in: Oniguruma, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG, Perl_ Enables support for brace-wrapped octal-style escapes of up to eleven digits, like `\o{1}` for code point 1, and `\o{177776}` for code point 65534. @@ -434,7 +434,7 @@ This group contains support for lesser-known regex syntax constructs. ### 0. ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (enable `\Q...\E`) -_Set in: Java, Perl, Perl_NG_ +_Set in: Perl_NG, Perl, Java_ Enables support for "quoted" parts of a pattern: Between `\Q` and `\E`, all syntax parsing is turned off, so that metacharacters like `*` and `+` will no @@ -444,7 +444,7 @@ longer be treated as metacharacters, and instead will be matched as literal ### 1. ONIG_SYN_OP2_QMARK_GROUP_EFFECT (enable `(?...)`) -_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, Emacs_ Enables support for the fairly-common `(?...)` grouping operator, which controls precedence but which does _not_ capture its contents. @@ -452,7 +452,7 @@ controls precedence but which does _not_ capture its contents. ### 2. ONIG_SYN_OP2_OPTION_PERL (enable options `(?imsx)` and `(?-imsx)`) -_Set in: Java, Perl, Perl_NG_ +_Set in: Python, Perl_NG, Perl, Java_ Enables support of regex options. (i,m,s,x) The supported toggle-able options for this flag are: @@ -465,7 +465,7 @@ The supported toggle-able options for this flag are: ### 3. ONIG_SYN_OP2_OPTION_RUBY (enable options `(?imx)` and `(?-imx)`) -_Set in: Oniguruma, Ruby_ +_Set in: Ruby_ Enables support of regex options. (i,m,x) The supported toggle-able options for this flag are: @@ -477,7 +477,7 @@ The supported toggle-able options for this flag are: ### 4. ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (enable `r?+`, `r*+`, and `r++`) -_Set in: Oniguruma, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG, Perl, Java_ Enables support for the _possessive_ quantifiers `?+`, `*+`, and `++`, which work similarly to `?` and `*` and `+`, respectively, but which do not backtrack @@ -488,7 +488,7 @@ extent if subsequent parts of the pattern fail to match. ### 5. ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (enable `r{n,m}+`) -_Set in: Java_ +_Set in: Perl_NG, Perl, Java_ Enables support for the _possessive_ quantifier `{n,m}+`, which works similarly to `{n,m}`, but which does not backtrack @@ -499,7 +499,7 @@ extent if subsequent parts of the pattern fail to match. ### 6. ONIG_SYN_OP2_CCLASS_SET_OP (enable `&&` within `[...]`) -_Set in: Oniguruma, Java, Ruby_ +_Set in: Oniguruma, Ruby, Java_ Enables support for character-class _intersection_. For example, with this feature enabled, you can write `[a-z&&[^aeiou]]` to produce a character class @@ -509,7 +509,7 @@ all control codes _except_ newlines. ### 7. ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (enable named captures `(?<name>...)`) -_Set in: Oniguruma, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG_ Enables support for _naming_ capture groups, so that instead of having to refer to captures by position (like `\3` or `$3`), you can refer to them by names @@ -519,7 +519,7 @@ and `(?'name'...)`, but not the Python `(?P<name>...)` syntax. ### 8. ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (enable named backreferences `\k<name>`) -_Set in: Oniguruma, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG_ Enables support for substituted backreferences by name, not just by position. This supports using `\k'name'` in addition to supporting `\k<name>`. This also @@ -530,7 +530,7 @@ the match, if the capture matched multiple times, by writing `\k<name+n>` or ### 9. ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (enable backreferences `\g<name>` and `\g<n>`) -_Set in: Oniguruma, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG_ Enables support for substituted backreferences by both name and position using the same syntax. This supports using `\g'name'` and `\g'1'` in addition to @@ -562,7 +562,7 @@ followed by a single character (or equivalent), indicating which code point to r based on that character's lowest five bits. So, like `\c`, you can represent code-point 10 with `\C-j`, but you can also represent it with `\C-*` as well. -See also ONIG_SYN_OP_ESC_C_CONTROL, which enables the more-common `\cx` syntax. +See also `ONIG_SYN_OP_ESC_C_CONTROL`, which enables the more-common `\cx` syntax. ### 12. ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (enable `\M-x`) @@ -577,7 +577,7 @@ with `0x80`). So, for example, you can match `\x81` using `\x81`, or you can wr ### 13. ONIG_SYN_OP2_ESC_V_VTAB (enable `\v` as vertical tab) -_Set in: Oniguruma, Java, Ruby_ +_Set in: Oniguruma, Python, Ruby, Java_ Enables support for a C-style `\v` escape code, meaning "vertical tab." If enabled, `\v` will be equivalent to ASCII code point 11. @@ -585,7 +585,7 @@ Enables support for a C-style `\v` escape code, meaning "vertical tab." If enab ### 14. ONIG_SYN_OP2_ESC_U_HEX4 (enable `\uHHHH` for Unicode) -_Set in: Oniguruma, Java, Ruby_ +_Set in: Oniguruma, Python, Ruby, Java_ Enables support for a Java-style `\uHHHH` escape code for representing Unicode code-points by number, using up to four hexadecimal digits (up to `\uFFFF`). So, @@ -593,8 +593,8 @@ for example, `\u221E` will match an infinity symbol, `∞`. For code points larger than four digits, like the emoji `🚡` (aerial tramway, or code point U+1F6A1), you must either represent the character directly using an encoding like -UTF-8, or you must enable support for ONIG_SYN_OP_ESC_X_BRACE_HEX8 or -ONIG_SYN_OP_ESC_O_BRACE_OCTAL, which support more than four digits. +UTF-8, or you must enable support for `ONIG_SYN_OP_ESC_X_BRACE_HEX8` or +`ONIG_SYN_OP_ESC_O_BRACE_OCTAL`, which support more than four digits. (New feature as of Oniguruma 6.7.) @@ -604,29 +604,29 @@ ONIG_SYN_OP_ESC_O_BRACE_OCTAL, which support more than four digits. _Set in: Emacs_ This flag makes the ``\` `` and `\'` escapes function identically to -`\A` and `\z`, respectively (when ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR is enabled). +`\A` and `\z`, respectively (when `ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR` is enabled). These anchor forms are very obscure, and rarely supported by other regex libraries. ### 16. ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (enable `\p{...}` and `\P{...}`) -_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java_ Enables support for an alternate syntax for POSIX character classes; instead of writing `[:alpha:]` when this is enabled, you can instead write `\p{alpha}`. -See also ONIG_SYN_OP_POSIX_BRACKET for the classic POSIX form. +See also `ONIG_SYN_OP_POSIX_BRACKET` for the classic POSIX form. ### 17. ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (enable `\p{^...}` and `\P{^...}`) -_Set in: Oniguruma, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl_ Enables support for an alternate syntax for POSIX character classes; instead of writing `[:^alpha:]` when this is enabled, you can instead write `\p{^alpha}`. -See also ONIG_SYN_OP_POSIX_BRACKET for the classic POSIX form. +See also `ONIG_SYN_OP_POSIX_BRACKET` for the classic POSIX form. ### 18. ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS @@ -647,7 +647,7 @@ characters in `[0-9a-fA-F]`. ### 20. ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (disable `\`) -_Set in: As-is_ +_Set in: ASIS_ If set, this disables all escape codes, shorthands, and metacharacters that start with `\` (or whatever the configured escape character is), allowing `\` to be treated @@ -658,7 +658,7 @@ You usually do not want this flag to be enabled. ### 21. ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE (enable `(?(...)then|else)`) -_Set in: Oniguruma, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl_ Enables support for conditional inclusion of subsequent regex patterns based on whether a prior named or numbered capture matched, or based on whether a pattern will @@ -676,7 +676,7 @@ match. This supports many different forms, including: ### 22. ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (enable `\K`) -_Set in: Oniguruma, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl_ Enables support for `\K`, which excludes all content before it from the overall regex match (i.e., capture #0). So, for example, pattern `foo\Kbar` would match @@ -687,7 +687,7 @@ regex match (i.e., capture #0). So, for example, pattern `foo\Kbar` would match ### 23. ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE (enable `\R`) -_Set in: Oniguruma, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG, Perl_ Enables support for `\R`, the "general newline" shorthand, which matches `(\r\n|[\n\v\f\r\u0085\u2028\u2029])` (obviously, the Unicode values are cannot be @@ -698,7 +698,7 @@ matched in ASCII encodings). ### 24. ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT (enable `\N` and `\O`) -_Set in: Oniguruma, Perl, Perl_NG_ +_Set in: Oniguruma, Perl_NG, Perl_ Enables support for `\N` and `\O`. `\N` is "not a line break," which is much like the standard `.` metacharacter, except that while `.` can be affected by @@ -713,7 +713,7 @@ multi-line mode are enabled or disabled. ### 25. ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP (enable `(?~...)`) -_Set in: Oniguruma, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG, Perl_ Enables support for the `(?~r)` "absent operator" syntax, which matches as much as possible as long as the result _doesn't_ match pattern `r`. This is @@ -731,7 +731,7 @@ excellent article about it is [available on Medium](https://medium.com/rubyinsid ### 26. ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT (enable `\X` and `\Y` and `\y`) -_Set in: Oniguruma, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG, Perl_ `\X` is another variation on `.`, designed to support Unicode, in that it matches a full _grapheme cluster_. In Unicode, `à` can be encoded as one code point, @@ -764,7 +764,7 @@ backreferences. ### 28. ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS (enable `(?{...})`) -_Set in: Oniguruma, Perl, Perl_NG_ +_Set in: Oniguruma, Perl_NG, Perl_ Enables support for Perl-style "callouts" — pattern substitutions that result from invoking a callback method. When `(?{foo})` is reached in a pattern, the callback @@ -779,7 +779,7 @@ Full documentation for this advanced feature can be found in the Oniguruma ### 29. ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME (enable `(*name)`) -_Set in: Oniguruma, Perl, Perl_NG_ +_Set in: Oniguruma, Python, Perl_NG, Perl_ Enables support for Perl-style "callouts" — pattern substitutions that result from invoking a callback method. When `(*foo)` is reached in a pattern, the callback @@ -809,6 +809,13 @@ Enables support of regex options. (i,m,x,W,S,D,P,y) - `S` - ASCII only space. - `P` - ASCII only POSIX properties. (includes W,D,S) + +### 31. ONIG_SYN_OP2_QMARK_CAPITAL_P_NAME (enable `(?P<name>...)` and `(?P=name)`) + +_Set in: Python_ + +(New feature as of Oniguruma 6.9.7) + ---------- @@ -820,19 +827,19 @@ some syntaxes but not in others. ### 0. ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (independent `?`, `*`, `+`, `{n,m}`) -_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, PosixExtended_ This flag specifies how to handle operators like `?` and `*` when they aren't directly attached to an operand, as in `^*` or `(*)`: Are they an error, are they discarded, or are they taken as literals? If this flag is clear, they -are taken as literals; otherwise, the ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS flag +are taken as literals; otherwise, the `ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS` flag determines if they are errors or if they are discarded. ### 1. ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (error or ignore independent operators) -_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, PosixExtended_ -If ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS is set, this flag controls what happens when +If `ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS` is set, this flag controls what happens when independent operators appear in a pattern: If this flag is set, then independent operators produce an error message; if this flag is clear, then independent operators are silently discarded. @@ -847,7 +854,7 @@ character will produce an error message. ### 3. ONIG_SYN_ALLOW_INVALID_INTERVAL (allow `{???`) -_Set in: Oniguruma, GnuRegex, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex_ This flag, if set, causes an invalid range, like `foo{bar}` or `foo{}`, to be silently discarded, as if `foo` had been written instead. If clear, an invalid @@ -855,13 +862,13 @@ range will produce an error message. ### 4. ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (allow `{,n}` to mean `{0,n}`) -_Set in: Oniguruma, Ruby_ +_Set in: Oniguruma, Python, Ruby_ If this flag is set, then `r{,n}` will be treated as equivalent to writing `{0,n}`. If this flag is clear, then `r{,n}` will produce an error message. Note that regardless of whether this flag is set or clear, if -ONIG_SYN_OP_BRACE_INTERVAL is enabled, then `r{n,}` will always be legal: This +`ONIG_SYN_OP_BRACE_INTERVAL` is enabled, then `r{n,}` will always be legal: This flag *only* controls the behavior of the opposite form, `r{,n}`. ### 5. ONIG_SYN_STRICT_CHECK_BACKREF (error on invalid backrefs) @@ -876,7 +883,7 @@ No built-in syntax has this flag enabled. ### 6. ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (allow `(?<=a|bc)`) -_Set in: Oniguruma, Java, Ruby_ +_Set in: Oniguruma, Ruby, Java_ If this flag is set, lookbehind patterns with alternate options may have differing lengths among those options. If this flag is clear, lookbehind patterns with options @@ -888,15 +895,15 @@ depend on this rule. ### 7. ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (prefer `\k<name>` over `\3`) -_Set in: Oniguruma, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG_ -If this flag is set on the syntax *and* ONIG_OPTION_CAPTURE_GROUP is set when calling +If this flag is set on the syntax *and* `ONIG_OPTION_CAPTURE_GROUP` is set when calling Oniguruma, then if a name is used on any capture, all captures must also use names: A single use of a named capture prohibits the use of numbered captures. ### 8. ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (allow `(?<x>)...(?<x>)`) -_Set in: Oniguruma, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG_ If this flag is set, multiple capture groups may use the same name. If this flag is clear, then reuse of a name will produce an error message. @@ -912,10 +919,10 @@ then `r{n}?` will mean the same as `r{n}`, and the useless `?` will be discarded ### 10. ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH (`..(?i)..`) -_Set in: Perl, Perl_NG, Java_ +_Set in: Python, Perl_NG, Perl, Java_ If this flag is set, then an isolated option doesn't break the branch and affects until the end of the group (or end of the pattern). -If this flag is not set, then an isolated option is interpreted as the starting point of a new branch. /a(?i)b|c/ ==> /a(?i:b|c)/ +If this flag is not set, then an isolated option is interpreted as the starting point of a new branch. `/a(?i)b|c/` ==> `/a(?i:b|c)/` ### 11. ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND (`(?<=...a+...)`) @@ -923,6 +930,24 @@ _Set in: Oniguruma, Java_ If this flag is set, then a variable length expressions are allowed in look-behind. +### 12. ONIG_SYN_PYTHON (enable `\UHHHHHHHH` for Unicode) + +_Set in: Python_ + +(New feature as of Oniguruma 6.9.7) + +### 13. ONIG_SYN_WHOLE_OPTIONS (enable options `(?CLI)`) + +_Set in: Oniguruma_ + +(New feature as of Oniguruma 6.9.8) + +### 14. ONIG_SYN_BRE_ANCHOR_AT_EDGE_OF_SUBEXP (enable `\(^abc$\)`) + +_Set in: Grep, PosixBasic_ + +(New feature as of Oniguruma 6.9.9) + ### 20. ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (add `\n` to `[^...]`) _Set in: Grep_ @@ -934,7 +959,7 @@ only exclude those characters and ranges written in them. ### 21. ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (allow `[...\w...]`) -_Set in: Oniguruma, GnuRegex, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex_ If this flag is set, shorthands like `\w` are allowed to describe characters in character classes. If this flag is clear, shorthands like `\w` are treated as a redundantly-escaped @@ -942,7 +967,7 @@ literal `w`. ### 22. ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (silently discard `[z-a]`) -_Set in: Emacs, Grep_ +_Set in: Grep, Emacs_ If this flag is set, then character ranges like `[z-a]` that are broken or contain no characters will be silently ignored. If this flag is clear, then broken or empty @@ -950,7 +975,7 @@ character ranges will produce an error message. ### 23. ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (treat `[0-9-a]` as `[0-9\-a]`) -_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, PosixExtended_ If this flag is set, then a trailing `-` after a character range will be taken as a literal `-`, as if it had been escaped as `\-`. If this flag is clear, then a trailing @@ -973,15 +998,21 @@ _Set in: Oniguruma, Ruby_ If this flag is set, Oniguruma will warn about nested repeat operators those have no meaning, like `(?:a*)+`. If this flag is clear, Oniguruma will allow the nested repeat operators without warning about them. -### 26. ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC (allow [a-\x{7fffffff}]) +### 26. ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC (allow `[a-\x{7fffffff}]`) _Set in: Oniguruma_ If this flag is set, then invalid code points at the end of range in character class are allowed. +### 27. ONIG_SYN_ALLOW_CHAR_TYPE_FOLLOWED_BY_MINUS_IN_CC (allow `[\w-%]` to mean `[\w\-%]`) + +_Set in: Perl_NG, Perl, Java_ + +(New feature as of Oniguruma 6.9.10) + ### 31. ONIG_SYN_CONTEXT_INDEP_ANCHORS -_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, PosixExtended_ Not currently used, and does nothing. (But still set in several syntaxes for some reason.) @@ -994,98 +1025,102 @@ These tables show which of the built-in syntaxes use which flags and options, fo ### Group One Flags (op) -| ID | Option | PosB | PosEx | Emacs | Grep | Gnu | Java | Perl | PeNG | Ruby | Onig | -| ----- | --------------------------------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | -| 0 | `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` | - | - | - | - | - | - | - | - | - | - | -| 1 | `ONIG_SYN_OP_DOT_ANYCHAR` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | -| 2 | `ONIG_SYN_OP_ASTERISK_ZERO_INF` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | -| 3 | `ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF` | - | - | - | - | - | - | - | - | - | - | -| 4 | `ONIG_SYN_OP_PLUS_ONE_INF` | - | Yes | Yes | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 5 | `ONIG_SYN_OP_ESC_PLUS_ONE_INF` | - | - | - | Yes | - | - | - | - | - | - | -| 6 | `ONIG_SYN_OP_QMARK_ZERO_ONE` | - | Yes | Yes | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 7 | `ONIG_SYN_OP_ESC_QMARK_ZERO_ONE` | - | - | - | Yes | - | - | - | - | - | - | -| 8 | `ONIG_SYN_OP_BRACE_INTERVAL` | - | Yes | - | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 9 | `ONIG_SYN_OP_ESC_BRACE_INTERVAL` | Yes | - | Yes | Yes | - | - | - | - | - | - | -| 10 | `ONIG_SYN_OP_VBAR_ALT` | - | Yes | - | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 11 | `ONIG_SYN_OP_ESC_VBAR_ALT` | - | - | Yes | Yes | - | - | - | - | - | - | -| 12 | `ONIG_SYN_OP_LPAREN_SUBEXP` | - | Yes | - | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 13 | `ONIG_SYN_OP_ESC_LPAREN_SUBEXP` | Yes | - | Yes | Yes | - | - | - | - | - | - | -| 14 | `ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 15 | `ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 16 | `ONIG_SYN_OP_DECIMAL_BACKREF` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | -| 17 | `ONIG_SYN_OP_BRACKET_CC` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | -| 18 | `ONIG_SYN_OP_ESC_W_WORD` | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | Yes | -| 19 | `ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END` | - | - | - | Yes | Yes | - | - | - | - | - | -| 20 | `ONIG_SYN_OP_ESC_B_WORD_BOUND` | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | Yes | -| 21 | `ONIG_SYN_OP_ESC_S_WHITE_SPACE` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 22 | `ONIG_SYN_OP_ESC_D_DIGIT` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 23 | `ONIG_SYN_OP_LINE_ANCHOR` | - | - | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | -| 24 | `ONIG_SYN_OP_POSIX_BRACKET` | Yes | Yes | Yes | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 25 | `ONIG_SYN_OP_QMARK_NON_GREEDY` | - | - | - | - | - | Yes | Yes | Yes | Yes | Yes | -| 26 | `ONIG_SYN_OP_ESC_CONTROL_CHARS` | Yes | Yes | - | - | - | Yes | Yes | Yes | Yes | Yes | -| 27 | `ONIG_SYN_OP_ESC_C_CONTROL` | - | - | - | - | - | Yes | Yes | Yes | Yes | Yes | -| 28 | `ONIG_SYN_OP_ESC_OCTAL3` | - | - | - | - | - | Yes | Yes | Yes | Yes | Yes | -| 29 | `ONIG_SYN_OP_ESC_X_HEX2` | - | - | - | - | - | Yes | Yes | Yes | Yes | Yes | -| 30 | `ONIG_SYN_OP_ESC_X_BRACE_HEX8` | - | - | - | - | - | - | Yes | Yes | Yes | Yes | -| 31 | `ONIG_SYN_OP_ESC_O_BRACE_OCTAL` | - | - | - | - | - | - | Yes | Yes | Yes | Yes | +| ID | Option | Onig | Pythn | Ruby | PeNG | Perl | Java | Gnu | Grep | Emacs | PosEx | PosB | ASIS | +| ----- | ------------------------------------------ | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | +| 0 | `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` | - | - | - | - | - | - | - | - | - | - | - | - | +| 1 | `ONIG_SYN_OP_DOT_ANYCHAR` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | +| 2 | `ONIG_SYN_OP_ASTERISK_ZERO_INF` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | +| 3 | `ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF` | - | - | - | - | - | - | - | - | - | - | - | - | +| 4 | `ONIG_SYN_OP_PLUS_ONE_INF` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | Yes | Yes | - | - | +| 5 | `ONIG_SYN_OP_ESC_PLUS_ONE_INF` | - | - | - | - | - | - | - | Yes | - | - | - | - | +| 6 | `ONIG_SYN_OP_QMARK_ZERO_ONE` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | Yes | Yes | - | - | +| 7 | `ONIG_SYN_OP_ESC_QMARK_ZERO_ONE` | - | - | - | - | - | - | - | Yes | - | - | - | - | +| 8 | `ONIG_SYN_OP_BRACE_INTERVAL` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | Yes | - | - | +| 9 | `ONIG_SYN_OP_ESC_BRACE_INTERVAL` | - | - | - | - | - | - | - | Yes | Yes | - | Yes | - | +| 10 | `ONIG_SYN_OP_VBAR_ALT` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | Yes | - | - | +| 11 | `ONIG_SYN_OP_ESC_VBAR_ALT` | - | - | - | - | - | - | - | Yes | Yes | - | - | - | +| 12 | `ONIG_SYN_OP_LPAREN_SUBEXP` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | Yes | - | - | +| 13 | `ONIG_SYN_OP_ESC_LPAREN_SUBEXP` | - | - | - | - | - | - | - | Yes | Yes | - | Yes | - | +| 14 | `ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | - | - | - | +| 15 | `ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | - | - | - | +| 16 | `ONIG_SYN_OP_DECIMAL_BACKREF` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | +| 17 | `ONIG_SYN_OP_BRACKET_CC` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | +| 18 | `ONIG_SYN_OP_ESC_W_WORD` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | - | - | +| 19 | `ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END` | - | - | - | - | - | - | Yes | Yes | - | - | - | - | +| 20 | `ONIG_SYN_OP_ESC_B_WORD_BOUND` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | - | - | +| 21 | `ONIG_SYN_OP_ESC_S_WHITE_SPACE` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | - | - | - | +| 22 | `ONIG_SYN_OP_ESC_D_DIGIT` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | - | - | - | +| 23 | `ONIG_SYN_OP_LINE_ANCHOR` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | +| 24 | `ONIG_SYN_OP_POSIX_BRACKET` | Yes | - | Yes | Yes | Yes | - | Yes | Yes | - | Yes | Yes | - | +| 25 | `ONIG_SYN_OP_QMARK_NON_GREEDY` | Yes | Yes | Yes | Yes | Yes | Yes | - | - | - | - | - | - | +| 26 | `ONIG_SYN_OP_ESC_CONTROL_CHARS` | Yes | Yes | Yes | Yes | Yes | Yes | - | - | Yes | Yes | Yes | - | +| 27 | `ONIG_SYN_OP_ESC_C_CONTROL` | Yes | Yes | Yes | Yes | Yes | Yes | - | - | - | - | - | - | +| 28 | `ONIG_SYN_OP_ESC_OCTAL3` | Yes | Yes | Yes | Yes | Yes | Yes | - | - | - | - | - | - | +| 29 | `ONIG_SYN_OP_ESC_X_HEX2` | Yes | Yes | Yes | Yes | Yes | Yes | - | - | - | - | - | - | +| 30 | `ONIG_SYN_OP_ESC_X_BRACE_HEX8` | Yes | - | Yes | Yes | Yes | - | - | - | - | - | - | - | +| 31 | `ONIG_SYN_OP_ESC_O_BRACE_OCTAL` | Yes | - | Yes | Yes | Yes | - | - | - | - | - | - | - | ### Group Two Flags (op2) -| ID | Option | PosB | PosEx | Emacs | Grep | Gnu | Java | Perl | PeNG | Ruby | Onig | -| ----- | --------------------------------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | -| 0 | `ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE` | - | - | - | - | - | Yes | Yes | Yes | - | - | -| 1 | `ONIG_SYN_OP2_QMARK_GROUP_EFFECT` | - | - | - | - | - | Yes | Yes | Yes | Yes | Yes | -| 2 | `ONIG_SYN_OP2_OPTION_PERL` | - | - | - | - | - | Yes | Yes | Yes | - | - | -| 3 | `ONIG_SYN_OP2_OPTION_RUBY` | - | - | - | - | - | - | - | - | Yes | - | -| 4 | `ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT` | - | - | - | - | - | - | - | - | Yes | Yes | -| 5 | `ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL` | - | - | - | - | - | Yes | - | - | - | - | -| 6 | `ONIG_SYN_OP2_CCLASS_SET_OP` | - | - | - | - | - | - | - | Yes | Yes | Yes | -| 7 | `ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP` | - | - | - | - | - | - | - | Yes | Yes | Yes | -| 8 | `ONIG_SYN_OP2_ESC_K_NAMED_BACKREF` | - | - | - | - | - | - | - | Yes | Yes | Yes | -| 9 | `ONIG_SYN_OP2_ESC_G_SUBEXP_CALL` | - | - | - | - | - | - | - | Yes | Yes | Yes | -| 10 | `ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY` | - | - | - | - | - | - | - | - | - | - | -| 11 | `ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL` | - | - | - | - | - | - | - | - | Yes | Yes | -| 12 | `ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META` | - | - | - | - | - | - | - | - | Yes | Yes | -| 13 | `ONIG_SYN_OP2_ESC_V_VTAB` | - | - | - | - | - | Yes | - | - | Yes | Yes | -| 14 | `ONIG_SYN_OP2_ESC_U_HEX4` | - | - | - | - | - | Yes | - | - | Yes | Yes | -| 15 | `ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR` | - | - | Yes | - | - | - | - | - | - | - | -| 16 | `ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY` | - | - | - | - | - | Yes | Yes | Yes | Yes | Yes | -| 17 | `ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT` | - | - | - | - | - | - | Yes | Yes | Yes | Yes | -| 18 | `ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS` | - | - | - | - | - | - | - | - | - | - | -| 19 | `ONIG_SYN_OP2_ESC_H_XDIGIT` | - | - | - | - | - | - | - | - | Yes | Yes | -| 20 | `ONIG_SYN_OP2_INEFFECTIVE_ESCAPE` | - | - | - | - | - | - | - | - | - | - | -| 21 | `ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE` | - | - | - | - | - | - | Yes | Yes | Yes | Yes | -| 22 | `ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP` | - | - | - | - | - | - | Yes | Yes | Yes | Yes | -| 23 | `ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE` | - | - | - | - | - | - | Yes | Yes | Yes | Yes | -| 24 | `ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT` | - | - | - | - | - | - | Yes | Yes | - | Yes | -| 25 | `ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP` | - | - | - | - | - | - | - | - | Yes | Yes | -| 26 | `ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT` | - | - | - | - | - | - | Yes | Yes | Yes | Yes | -| 27 | `ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL` | - | - | - | - | - | - | - | Yes | - | - | -| 28 | `ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS` | - | - | - | - | - | - | Yes | Yes | Yes | - | -| 29 | `ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME` | - | - | - | - | - | - | Yes | Yes | Yes | - | -| 30 | `ONIG_SYN_OP2_OPTION_ONIGURUMA` | - | - | - | - | - | - | - | - | - | Yes | +| ID | Option | Onig | Pythn | Ruby | PeNG | Perl | Java | Gnu | Grep | Emacs | PosEx | PosB | ASIS | +| ----- | ---------------------------------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | +| 0 | `ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE` | - | - | - | Yes | Yes | Yes | - | - | - | - | - | - | +| 1 | `ONIG_SYN_OP2_QMARK_GROUP_EFFECT` | Yes | Yes | Yes | Yes | Yes | Yes | - | - | Yes | - | - | - | +| 2 | `ONIG_SYN_OP2_OPTION_PERL` | - | Yes | - | Yes | Yes | Yes | - | - | - | - | - | - | +| 3 | `ONIG_SYN_OP2_OPTION_RUBY` | - | - | Yes | - | - | - | - | - | - | - | - | - | +| 4 | `ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT` | Yes | - | Yes | Yes | Yes | Yes | - | - | - | - | - | - | +| 5 | `ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL` | - | - | - | Yes | Yes | Yes | - | - | - | - | - | - | +| 6 | `ONIG_SYN_OP2_CCLASS_SET_OP` | Yes | - | Yes | - | - | Yes | - | - | - | - | - | - | +| 7 | `ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP` | Yes | - | Yes | Yes | - | - | - | - | - | - | - | - | +| 8 | `ONIG_SYN_OP2_ESC_K_NAMED_BACKREF` | Yes | - | Yes | Yes | - | - | - | - | - | - | - | - | +| 9 | `ONIG_SYN_OP2_ESC_G_SUBEXP_CALL` | Yes | - | Yes | Yes | - | - | - | - | - | - | - | - | +| 10 | `ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY` | - | - | - | - | - | - | - | - | - | - | - | - | +| 11 | `ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL` | Yes | - | Yes | - | - | - | - | - | - | - | - | - | +| 12 | `ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META` | Yes | - | Yes | - | - | - | - | - | - | - | - | - | +| 13 | `ONIG_SYN_OP2_ESC_V_VTAB` | Yes | Yes | Yes | - | - | Yes | - | - | - | - | - | - | +| 14 | `ONIG_SYN_OP2_ESC_U_HEX4` | Yes | Yes | Yes | - | - | Yes | - | - | - | - | - | - | +| 15 | `ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR` | - | - | - | - | - | - | - | - | Yes | - | - | - | +| 16 | `ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY` | Yes | Yes | Yes | Yes | Yes | Yes | - | - | - | - | - | - | +| 17 | `ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT` | Yes | Yes | Yes | Yes | Yes | - | - | - | - | - | - | - | +| 19 | `ONIG_SYN_OP2_ESC_H_XDIGIT` | Yes | - | Yes | - | - | - | - | - | - | - | - | - | +| 20 | `ONIG_SYN_OP2_INEFFECTIVE_ESCAPE` | - | - | - | - | - | - | - | - | - | - | - | Yes | +| 21 | `ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE` | Yes | Yes | Yes | Yes | Yes | - | - | - | - | - | - | - | +| 22 | `ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP` | Yes | Yes | Yes | Yes | Yes | - | - | - | - | - | - | - | +| 23 | `ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE` | Yes | - | Yes | Yes | Yes | - | - | - | - | - | - | - | +| 24 | `ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT` | Yes | - | - | Yes | Yes | - | - | - | - | - | - | - | +| 25 | `ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP` | Yes | - | Yes | Yes | Yes | - | - | - | - | - | - | - | +| 26 | `ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT` | Yes | - | Yes | Yes | Yes | - | - | - | - | - | - | - | +| 27 | `ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL` | - | - | - | Yes | - | - | - | - | - | - | - | - | +| 28 | `ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS` | Yes | - | - | Yes | Yes | - | - | - | - | - | - | - | +| 29 | `ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME` | Yes | Yes | - | Yes | Yes | - | - | - | - | - | - | - | +| 30 | `ONIG_SYN_OP2_OPTION_ONIGURUMA` | Yes | - | - | - | - | - | - | - | - | - | - | - | +| 31 | `ONIG_SYN_OP2_QMARK_CAPITAL_P_NAME` | - | Yes | - | - | - | - | - | - | - | - | - | - | ### Syntax Flags (syn) -| ID | Option | PosB | PosEx | Emacs | Grep | Gnu | Java | Perl | PeNG | Ruby | Onig | -| ----- | --------------------------------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | -| 0 | `ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS` | - | Yes | - | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 1 | `ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 2 | `ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP` | - | Yes | - | - | - | - | - | - | - | - | -| 3 | `ONIG_SYN_ALLOW_INVALID_INTERVAL` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 4 | `ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV` | - | - | - | - | - | - | - | - | Yes | Yes | -| 5 | `ONIG_SYN_STRICT_CHECK_BACKREF` | - | - | - | - | - | - | - | - | - | - | -| 6 | `ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND` | - | - | - | - | - | Yes | - | - | Yes | Yes | -| 7 | `ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP` | - | - | - | - | - | - | - | Yes | Yes | Yes | -| 8 | `ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME` | - | - | - | - | - | - | - | Yes | Yes | Yes | -| 9 | `ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY` | - | - | - | - | - | - | - | - | Yes | Yes | -| 10 | `ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH` | - | - | - | - | - | Yes | Yes | Yes | - | - | -| 11 | `ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND` | - | - | - | - | - | Yes | - | - | - | Yes | -| 20 | `ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC` | - | - | - | Yes | - | - | - | - | - | - | -| 21 | `ONIG_SYN_BACKSLASH_ESCAPE_IN_CC` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 22 | `ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC` | - | - | Yes | Yes | - | - | - | - | - | - | -| 23 | `ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC` | - | Yes | - | - | Yes | Yes | Yes | Yes | Yes | Yes | -| 24 | `ONIG_SYN_WARN_CC_OP_NOT_ESCAPED` | - | - | - | - | - | - | - | - | Yes | Yes | -| 25 | `ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT` | - | - | - | - | - | - | - | - | Yes | Yes | -| 26 | `ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC` | - | - | - | - | - | - | - | - | - | Yes | -| 31 | `ONIG_SYN_CONTEXT_INDEP_ANCHORS` | - | Yes | - | - | Yes | Yes | Yes | Yes | Yes | Yes | +| ID | Option | Onig | Pythn | Ruby | PeNG | Perl | Java | Gnu | Grep | Emacs | PosEx | PosB | ASIS | +| ----- | ---------------------------------------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | +| 0 | `ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | Yes | - | - | +| 1 | `ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | Yes | - | - | +| 2 | `ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP` | - | - | - | - | - | - | - | - | - | Yes | - | - | +| 3 | `ONIG_SYN_ALLOW_INVALID_INTERVAL` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | - | - | - | +| 4 | `ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV` | Yes | Yes | Yes | - | - | - | - | - | - | - | - | - | +| 5 | `ONIG_SYN_STRICT_CHECK_BACKREF` | - | - | - | - | - | - | - | - | - | - | - | - | +| 6 | `ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND` | Yes | - | Yes | - | - | Yes | - | - | - | - | - | - | +| 7 | `ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP` | Yes | - | Yes | Yes | - | - | - | - | - | - | - | - | +| 8 | `ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME` | Yes | - | Yes | Yes | - | - | - | - | - | - | - | - | +| 9 | `ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY` | Yes | - | Yes | - | - | - | - | - | - | - | - | - | +| 10 | `ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH` | - | Yes | - | Yes | Yes | Yes | - | - | - | - | - | - | +| 11 | `ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND` | Yes | - | - | - | - | Yes | - | - | - | - | - | - | +| 12 | `ONIG_SYN_PYTHON` | - | Yes | - | - | - | - | - | - | - | - | - | - | +| 13 | `ONIG_SYN_WHOLE_OPTIONS` | Yes | - | - | - | - | - | - | - | - | - | - | - | +| 14 | `ONIG_SYN_BRE_ANCHOR_AT_EDGE_OF_SUBEXP` | - | - | - | - | - | - | - | Yes | - | - | Yes | - | +| 20 | `ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC` | - | - | - | - | - | - | - | Yes | - | - | - | - | +| 21 | `ONIG_SYN_BACKSLASH_ESCAPE_IN_CC` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | - | - | - | +| 22 | `ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC` | - | - | - | - | - | - | - | Yes | Yes | - | - | - | +| 23 | `ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | Yes | - | - | +| 24 | `ONIG_SYN_WARN_CC_OP_NOT_ESCAPED` | Yes | - | Yes | - | - | - | - | - | - | - | - | - | +| 25 | `ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT` | Yes | - | Yes | - | - | - | - | - | - | - | - | - | +| 26 | `ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC` | Yes | - | - | - | - | - | - | - | - | - | - | - | +| 27 | `ONIG_SYN_ALLOW_CHAR_TYPE_FOLLOWED_BY_MINUS_IN_CC` | - | - | - | Yes | Yes | Yes | - | - | - | - | - | - | +| 31 | `ONIG_SYN_CONTEXT_INDEP_ANCHORS` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | - | - | Yes | - | - | diff --git a/doc/UNICODE_PROPERTIES b/doc/UNICODE_PROPERTIES index 3d2dc09..7ab6d78 100644 --- a/doc/UNICODE_PROPERTIES +++ b/doc/UNICODE_PROPERTIES @@ -1,4 +1,4 @@ -Unicode Properties (Unicode Version: 15.1.0, Emoji: 15.1) +Unicode Properties (Unicode Version: 16.0.0, Emoji: 16.0) ASCII_Hex_Digit Adlam @@ -68,6 +68,7 @@ Emoji_Presentation Ethiopic Extended_Pictographic Extender +Garay Georgian Glagolitic Gothic @@ -79,6 +80,7 @@ Greek Gujarati Gunjala_Gondi Gurmukhi +Gurung_Khema Han Hangul Hanifi_Rohingya @@ -113,6 +115,7 @@ Khitan_Small_Script Khmer Khojki Khudawadi +Kirat_Rai L LC Lao @@ -150,6 +153,7 @@ Meroitic_Hieroglyphs Miao Mn Modi +Modifier_Combining_Mark Mongolian Mro Multani @@ -169,6 +173,7 @@ Nushu Nyiakeng_Puachue_Hmong Ogham Ol_Chiki +Ol_Onal Old_Hungarian Old_Italic Old_North_Arabian @@ -229,6 +234,7 @@ Sogdian Sora_Sompeng Soyombo Sundanese +Sunuwar Syloti_Nagri Syriac Tagalog @@ -247,7 +253,9 @@ Thai Tibetan Tifinagh Tirhuta +Todhri Toto +Tulu_Tigalari Ugaritic Unified_Ideograph Unknown @@ -330,6 +338,7 @@ Ext ExtPict Final_Punctuation Format +Gara Geor Glag Gong @@ -341,6 +350,7 @@ Grek Gr_Ext Gr_Link Gujr +Gukh Guru Hang Hani @@ -370,6 +380,7 @@ Khmr Khoj Kits Knda +Krai Kthi Lana Laoo @@ -392,6 +403,7 @@ Mani Marc Mark Math_Symbol +MCM Medf Mend Merc @@ -422,6 +434,7 @@ OIDS Olck OLower OMath +Onao Open_Punctuation Orkh Orya @@ -476,6 +489,7 @@ Space_Separator Spacing_Mark STerm Sund +Sunu Surrogate Sylo Symbol @@ -496,6 +510,8 @@ Tibt Tirh Titlecase_Letter Tnsa +Todr +Tutg Ugar UIdeo Unassigned @@ -701,6 +717,7 @@ In_Osage In_Elbasan In_Caucasian_Albanian In_Vithkuqi +In_Todhri In_Linear_A In_Latin_Extended_F In_Cypriot_Syllabary @@ -723,6 +740,7 @@ In_Psalter_Pahlavi In_Old_Turkic In_Old_Hungarian In_Hanifi_Rohingya +In_Garay In_Rumi_Numeral_Symbols In_Yezidi In_Arabic_Extended_C @@ -742,12 +760,14 @@ In_Khojki In_Multani In_Khudawadi In_Grantha +In_Tulu_Tigalari In_Newa In_Tirhuta In_Siddham In_Modi In_Mongolian_Supplement In_Takri +In_Myanmar_Extended_C In_Ahom In_Dogra In_Warang_Citi @@ -758,6 +778,7 @@ In_Soyombo In_Unified_Canadian_Aboriginal_Syllabics_Extended_A In_Pau_Cin_Hau In_Devanagari_Extended_A +In_Sunuwar In_Bhaiksuki In_Marchen In_Masaram_Gondi @@ -772,12 +793,15 @@ In_Early_Dynastic_Cuneiform In_Cypro_Minoan In_Egyptian_Hieroglyphs In_Egyptian_Hieroglyph_Format_Controls +In_Egyptian_Hieroglyphs_Extended_A In_Anatolian_Hieroglyphs +In_Gurung_Khema In_Bamum_Supplement In_Mro In_Tangsa In_Bassa_Vah In_Pahawh_Hmong +In_Kirat_Rai In_Medefaidrin In_Miao In_Ideographic_Symbols_and_Punctuation @@ -792,6 +816,7 @@ In_Small_Kana_Extension In_Nushu In_Duployan In_Shorthand_Format_Controls +In_Symbols_for_Legacy_Computing_Supplement In_Znamenny_Musical_Notation In_Byzantine_Musical_Symbols In_Musical_Symbols @@ -809,6 +834,7 @@ In_Nyiakeng_Puachue_Hmong In_Toto In_Wancho In_Nag_Mundari +In_Ol_Onal In_Ethiopic_Extended_B In_Mende_Kikakui In_Adlam diff --git a/doc/onig_syn_md.c b/doc/onig_syn_md.c new file mode 100644 index 0000000..6e29e5a --- /dev/null +++ b/doc/onig_syn_md.c @@ -0,0 +1,667 @@ +/* + * onig_syn_md.c + * Copyright (c) 2024 K.Kosako + * + * Oniguruma OWner: K.Kosako https://github.com/kkos/oniguruma + * SYNTAX.md : seanofw https://github.com/seanofw + * onig_syn_md.c : tonco-miyazawa https://github.com/tonco-miyazawa + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "oniguruma.h" + +#define ONIG_SYN_MD_VERSION_INT (00002) +#define TOTAL_NUM_OF_BITS (32) + +#define PRINT_SEPARATOR (printf("===================================================\n")) + +#define INPUT_SYNTAX(syn, abb, set_in) { (syn), (#syn), (abb), (set_in) } +#define INPUT_FLAG(arg) { (arg), (#arg) } + + + +/************************************* Settings *********************************************/ +/* NOW_MODE +1: (OP) +2: (OP2) +3: (BEHAVIOR) */ +#define NOW_MODE (1) + +/* #define PRINT_UNDEFINED_FLAG */ + +/* #define PRINT_SYNTAX_FORWARD_ORDER */ + +/* #define USE_YOUR_OWN_SYNTAX */ + +/* #define PRINT_DEBUG_INFO */ + +#define PRINT_VERSION_INFO +#define PRINT_SET_IN_INFO +#define PRINT_TABLE_INFO +#define WARN_UNDEFINED_FLAG_USED +/************************************* Settings *********************************************/ + + +/************************ Switch between OP, OP2, BEHAVIOR **********************************/ +#if NOW_MODE == 1 + +#define SYNTAX_MEMBER_NAME ("op") +#define TITLE_STRING ("Group One Flags (op)") +#define SYNTAX_MEMBER(syn) ((syn)->op) +#define IS_SYNTAX_MEMBER(syn, opm) (((syn)->op & (opm)) != 0) + +#elif NOW_MODE == 2 + +#define SYNTAX_MEMBER_NAME ("op2") +#define TITLE_STRING ("Group Two Flags (op2)") +#define SYNTAX_MEMBER(syn) ((syn)->op2) +#define IS_SYNTAX_MEMBER(syn, opm) (((syn)->op2 & (opm)) != 0) + +#elif NOW_MODE == 3 + +#define SYNTAX_MEMBER_NAME ("behavior") +#define TITLE_STRING ("Syntax Flags (syn)") +#define SYNTAX_MEMBER(syn) ((syn)->behavior) +#define IS_SYNTAX_MEMBER(syn, opm) (((syn)->behavior & (opm)) != 0) + +#else +#error "Check 'NOW_MODE' value." +#endif +/************************ Switch between OP, OP2, BEHAVIOR **********************************/ + + +/*********************************** Your own syntax ****************************************/ +#ifdef USE_YOUR_OWN_SYNTAX +static OnigSyntaxType OnigSyntaxYourOwn; +#define ONIG_SYNTAX_YOUROWN (&OnigSyntaxYourOwn) + +static OnigSyntaxType OnigSyntaxYourOwn = { + 0xf0f0f0f0 /* Group One Flags (op) */ + , 0xffff0000 /* Group Two Flags (op2) */ + , 0x00ff00ff /* Syntax Flags (syn) */ + , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; +#endif +/*********************************** Your own syntax ****************************************/ + + +/************************************** syntax data *****************************************/ +typedef struct { + OnigSyntaxType* syn; + char *name; + char *abb; + char *set_in; +} syn_data; + + +static syn_data syn_data_list[] = +{ + /* INPUT_SYNTAX(syn, abb, set_in) ===> { (syn), (#syn), (abb), (set_in) } */ + + INPUT_SYNTAX( ONIG_SYNTAX_ASIS , "ASIS" , "ASIS" ) + , INPUT_SYNTAX( ONIG_SYNTAX_POSIX_BASIC , "PosB" , "PosixBasic" ) + , INPUT_SYNTAX( ONIG_SYNTAX_POSIX_EXTENDED , "PosEx", "PosixExtended") + , INPUT_SYNTAX( ONIG_SYNTAX_EMACS , "Emacs", "Emacs") + , INPUT_SYNTAX( ONIG_SYNTAX_GREP , "Grep" , "Grep" ) + , INPUT_SYNTAX( ONIG_SYNTAX_GNU_REGEX , "Gnu" , "GnuRegex" ) + , INPUT_SYNTAX( ONIG_SYNTAX_JAVA , "Java" , "Java" ) + , INPUT_SYNTAX( ONIG_SYNTAX_PERL , "Perl" , "Perl" ) + , INPUT_SYNTAX( ONIG_SYNTAX_PERL_NG , "PeNG" , "Perl_NG" ) + , INPUT_SYNTAX( ONIG_SYNTAX_RUBY , "Ruby" , "Ruby" ) + , INPUT_SYNTAX( ONIG_SYNTAX_PYTHON , "Pythn", "Python") + , INPUT_SYNTAX( ONIG_SYNTAX_ONIGURUMA , "Onig" , "Oniguruma" ) + +#ifdef USE_YOUR_OWN_SYNTAX + , INPUT_SYNTAX( ONIG_SYNTAX_YOUROWN , "Your" , "YourOwn" ) +#endif +}; + +static const int num_of_syntax_types = (sizeof syn_data_list /sizeof syn_data_list[0]); + + +static void print_syn_data_list() +{ + int y; + PRINT_SEPARATOR; + for (y = 0; y < num_of_syntax_types; y++) + { + printf( "\nsyn_data_list[%d]\n", y); + printf( "name='%s'\n" , syn_data_list[y].name); + + printf( "syn->"); + printf( SYNTAX_MEMBER_NAME ); + printf( "= 0x%08x\n", SYNTAX_MEMBER(syn_data_list[y].syn) ); + + printf( "abb='%s'\n" , syn_data_list[y].abb); + printf( "set_in='%s'\n" , syn_data_list[y].set_in); + }; + return ; +} +/************************************** syntax data *****************************************/ + + +/************************************** flag data *******************************************/ +typedef struct { + unsigned int num; + char *name; +} flag_data; + + +/* OP */ +#if NOW_MODE == 1 +static flag_data flag_data_list[] = +{ + +/* The following are no need to sort them in bit order. */ + +/* INPUT_FLAG(arg) ===> { (arg), (#arg) } */ + + INPUT_FLAG( ONIG_SYN_OP_VARIABLE_META_CHARACTERS ) + , INPUT_FLAG( ONIG_SYN_OP_DOT_ANYCHAR ) + , INPUT_FLAG( ONIG_SYN_OP_ASTERISK_ZERO_INF ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF ) + , INPUT_FLAG( ONIG_SYN_OP_PLUS_ONE_INF ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_PLUS_ONE_INF ) + , INPUT_FLAG( ONIG_SYN_OP_QMARK_ZERO_ONE ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_QMARK_ZERO_ONE ) + , INPUT_FLAG( ONIG_SYN_OP_BRACE_INTERVAL ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_BRACE_INTERVAL ) + , INPUT_FLAG( ONIG_SYN_OP_VBAR_ALT ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_VBAR_ALT ) + , INPUT_FLAG( ONIG_SYN_OP_LPAREN_SUBEXP ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_LPAREN_SUBEXP ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR ) + , INPUT_FLAG( ONIG_SYN_OP_DECIMAL_BACKREF ) + , INPUT_FLAG( ONIG_SYN_OP_BRACKET_CC ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_W_WORD ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_B_WORD_BOUND ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_S_WHITE_SPACE ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_D_DIGIT ) + , INPUT_FLAG( ONIG_SYN_OP_LINE_ANCHOR ) + , INPUT_FLAG( ONIG_SYN_OP_POSIX_BRACKET ) + , INPUT_FLAG( ONIG_SYN_OP_QMARK_NON_GREEDY ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_CONTROL_CHARS ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_C_CONTROL ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_OCTAL3 ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_X_HEX2 ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_X_BRACE_HEX8 ) + , INPUT_FLAG( ONIG_SYN_OP_ESC_O_BRACE_OCTAL ) +}; +#endif + + +/* OP2 */ +#if NOW_MODE == 2 +static flag_data flag_data_list[] = +{ + INPUT_FLAG( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE ) + , INPUT_FLAG( ONIG_SYN_OP2_QMARK_GROUP_EFFECT ) + , INPUT_FLAG( ONIG_SYN_OP2_OPTION_PERL ) + , INPUT_FLAG( ONIG_SYN_OP2_OPTION_RUBY ) + , INPUT_FLAG( ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT ) + , INPUT_FLAG( ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL ) + , INPUT_FLAG( ONIG_SYN_OP2_CCLASS_SET_OP ) + , INPUT_FLAG( ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP ) + , INPUT_FLAG( ONIG_SYN_OP2_ESC_K_NAMED_BACKREF ) + , INPUT_FLAG( ONIG_SYN_OP2_ESC_G_SUBEXP_CALL ) + , INPUT_FLAG( ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY ) + , INPUT_FLAG( ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL ) + , INPUT_FLAG( ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META ) + , INPUT_FLAG( ONIG_SYN_OP2_ESC_V_VTAB ) + , INPUT_FLAG( ONIG_SYN_OP2_ESC_U_HEX4 ) + , INPUT_FLAG( ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR ) + , INPUT_FLAG( ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY ) + , INPUT_FLAG( ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ) + /* , INPUT_FLAG( ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS ) */ + , INPUT_FLAG( ONIG_SYN_OP2_ESC_H_XDIGIT ) + , INPUT_FLAG( ONIG_SYN_OP2_INEFFECTIVE_ESCAPE ) + , INPUT_FLAG( ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE ) + , INPUT_FLAG( ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP ) + , INPUT_FLAG( ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE ) + , INPUT_FLAG( ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT ) + , INPUT_FLAG( ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP ) + /* , INPUT_FLAG( ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER ) */ /* obsoleted: use next */ + , INPUT_FLAG( ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT ) + , INPUT_FLAG( ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL ) + , INPUT_FLAG( ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS ) + , INPUT_FLAG( ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME ) + , INPUT_FLAG( ONIG_SYN_OP2_OPTION_ONIGURUMA ) + , INPUT_FLAG( ONIG_SYN_OP2_QMARK_CAPITAL_P_NAME ) +}; +#endif + + +/* BEHAVIOR */ +#if NOW_MODE == 3 +static flag_data flag_data_list[] = +{ + INPUT_FLAG( ONIG_SYN_CONTEXT_INDEP_ANCHORS ) + , INPUT_FLAG( ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS ) + , INPUT_FLAG( ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS ) + , INPUT_FLAG( ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP ) + , INPUT_FLAG( ONIG_SYN_ALLOW_INVALID_INTERVAL ) + , INPUT_FLAG( ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV ) + , INPUT_FLAG( ONIG_SYN_STRICT_CHECK_BACKREF ) + , INPUT_FLAG( ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) + , INPUT_FLAG( ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP ) + , INPUT_FLAG( ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) + , INPUT_FLAG( ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY ) + , INPUT_FLAG( ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH ) + , INPUT_FLAG( ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND ) + , INPUT_FLAG( ONIG_SYN_PYTHON ) + , INPUT_FLAG( ONIG_SYN_WHOLE_OPTIONS ) + , INPUT_FLAG( ONIG_SYN_BRE_ANCHOR_AT_EDGE_OF_SUBEXP ) + + /* syntax (behavior) in char class [...] */ + , INPUT_FLAG( ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC ) + , INPUT_FLAG( ONIG_SYN_BACKSLASH_ESCAPE_IN_CC ) + , INPUT_FLAG( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC ) + , INPUT_FLAG( ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) + , INPUT_FLAG( ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC ) + , INPUT_FLAG( ONIG_SYN_ALLOW_CHAR_TYPE_FOLLOWED_BY_MINUS_IN_CC ) + + /* syntax (behavior) warning */ + , INPUT_FLAG( ONIG_SYN_WARN_CC_OP_NOT_ESCAPED ) + , INPUT_FLAG( ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) +}; +#endif + + +static const int num_of_flags = (sizeof flag_data_list /sizeof flag_data_list[0]); + +static int flag_name_max_len = 0; + + +static int flag_name_max_len_int() +{ + int i, n, max_len; + max_len = 0; + + for (i = 0; i < num_of_flags; i++) + { + n = strlen(flag_data_list[i].name); + if ( max_len < n ) + { + max_len = n; + } + } + + /* Debug: printf("max_len=%d\n", max_len); */ + return max_len; +} + + +static int convert_bit_shift_num_to_flag_data_list_element( int bit_shift_num ) +{ + int i; + for (i = 0; i < num_of_flags; i++) + { + if ( flag_data_list[i].num == (1U << bit_shift_num) ){ + +#ifdef PRINT_DEBUG_INFO + printf("(1U << %d) ===> flag_data_list[%d]\n", bit_shift_num, i ); +#endif + return i; + } + } + +#ifdef PRINT_DEBUG_INFO + printf("(1U << %d) ===> ( none )\n", bit_shift_num ); +#endif + + /* If not found, returns "-1". This is not Error. */ + return -1; +} + + +static int convert_num_to_bit_shift_num(unsigned int arg_num){ + int x; + for (x = 0; x < TOTAL_NUM_OF_BITS; x++) + { + if ( arg_num == (1U << x) ) + { + return x; + } + } + + /* Error */ + printf("<Error:line%d> '0x%08x' is not bit flag.\n", __LINE__, arg_num ); + exit(-1); + return -1; +} + + +static int check_flag_data_duplication() +{ + int i, shift_num; + unsigned int used_bits = 0; + + for (i = 0; i < num_of_flags; i++) + { + if ( (used_bits & flag_data_list[i].num) != 0 ){ + + shift_num = convert_num_to_bit_shift_num(flag_data_list[i].num); + + PRINT_SEPARATOR; + fprintf(stderr, "\n<Error:line%d> The following bit has already been used.\n", __LINE__ ); + fprintf(stderr, "flag_data_list[%d] : '%s' ", i, flag_data_list[i].name); + fprintf(stderr, "(1U << %d)\n", shift_num); + +#ifndef PRINT_DEBUG_INFO + fprintf(stderr, "\nPlease use '#define PRINT_DEBUG_INFO'.\n"); +#endif + exit(-1); + return -1; + } + used_bits |= flag_data_list[i].num; + } + return 0; +} + + +static void print_flag_data_list() +{ + int i, shift_num; + + PRINT_SEPARATOR; + for (i = 0; i < num_of_flags; i++) + { + shift_num = convert_num_to_bit_shift_num( flag_data_list[i].num ); + + printf( "\nflag_data_list[%d]\n", i); + printf( "name='%s' " , flag_data_list[i].name); + printf( "(1U << %d)\n" , shift_num); + printf( "num=0x%08x\n" , flag_data_list[i].num); + } + return ; +} +/************************************** flag data *******************************************/ + + +/************************************** print table *****************************************/ +static void print_table_head() +{ + int i, y; + + printf("\n### "); + printf( TITLE_STRING ); + printf("\n\n| ID | Option"); + for (i = 0; i < (flag_name_max_len - 1); i++) + { + printf(" "); + } + printf("|"); + + /* ex. print "PeNG " */ +#ifdef PRINT_SYNTAX_FORWARD_ORDER + for (y = 0; y < num_of_syntax_types; y++) +#else + for (y = num_of_syntax_types -1; y > -1; y--) +#endif + { + printf(" %-5.5s |", syn_data_list[y].abb ); + } + + printf("\n| ----- | "); + for (i = 0; i < (flag_name_max_len + 4); i++) + { + printf("-"); + } + printf(" |"); + + for (y = 0; y < num_of_syntax_types; y++) + { + printf(" ----- |"); + } + printf("\n"); + return ; +} + + +static void print_table_body_one_line( int shift_num ) +{ + int i, y, elem, name_chars; + + elem = convert_bit_shift_num_to_flag_data_list_element(shift_num); + + printf("| %2d | ", shift_num); + + if (elem < 0){ + name_chars = printf(" `( Undefined )`"); /* elem == -1 */ + } else { + name_chars = printf("`%s`", flag_data_list[elem].name); + } + + for ( i=0; i < (flag_name_max_len - name_chars + 5); i++){ printf(" "); } + printf("|"); + + /* ex. print ' Yes |' */ +#ifdef PRINT_SYNTAX_FORWARD_ORDER + for (y = 0; y < num_of_syntax_types; y++) +#else + for (y = num_of_syntax_types -1; y > -1; y--) +#endif + { + if ( IS_SYNTAX_MEMBER(syn_data_list[y].syn, (1U << shift_num)) ) + { + printf(" Yes |"); + } else { + printf(" - |"); + } + } + printf("\n"); + return ; +} + + +static void print_table_body() +{ + int x, elem; + for (x = 0; x < TOTAL_NUM_OF_BITS; x++) + { + elem = convert_bit_shift_num_to_flag_data_list_element( x ); + + if (elem < 0) + { +#if !(defined( PRINT_UNDEFINED_FLAG )) + continue; +#endif + } + print_table_body_one_line(x); + } + return ; +} + +/************************************** print table *****************************************/ + + +/************************************* print Set_in *****************************************/ +static void print_set_in_one_line(int shift_num) +{ + int y, elem, count; + + elem = convert_bit_shift_num_to_flag_data_list_element(shift_num); + + if (elem < 0) + { + printf("### %d. ( Undefined )\n", shift_num ); /* elem == -1 */ + } else { + printf("### %d. %s\n", shift_num, flag_data_list[elem].name ); + } + + count = 0; + printf("_Set in: "); +#ifdef PRINT_SYNTAX_FORWARD_ORDER + for (y = 0; y < num_of_syntax_types; y++) +#else + for (y = num_of_syntax_types -1; y > -1; y--) +#endif + { + if ( IS_SYNTAX_MEMBER(syn_data_list[y].syn, (1U << shift_num)) ) + { + if (count > 0){ printf(", "); }; + printf("%s", syn_data_list[y].set_in); + count++; + } + } /* for y */ + + if (count==0){ printf("none"); }; + + printf("_\n\n"); + return ; +} + + +static void print_set_in() +{ + int x, elem; + + PRINT_SEPARATOR; + printf("The following are 'Set in' for oniguruma/doc/SYNTAX.md\n\n## "); + printf( TITLE_STRING ); + printf("\n\n"); + for (x = 0; x < TOTAL_NUM_OF_BITS; x++) + { + elem = convert_bit_shift_num_to_flag_data_list_element( x ); + + if (elem < 0) + { +#if !(defined( PRINT_UNDEFINED_FLAG )) + continue; +#endif + } + + print_set_in_one_line(x); + } + return ; +} + +/************************************* print Set_in *****************************************/ + + +/************************************** main() *****************************************/ + +static void print_version() +{ + printf( "[ onig_syn_md.c ver.%05d ] ", ONIG_SYN_MD_VERSION_INT ); + printf("The loaded oniguruma is '%d.%d.%d'.\n" + , ONIGURUMA_VERSION_MAJOR + , ONIGURUMA_VERSION_MINOR + , ONIGURUMA_VERSION_TEENY ); + return ; +} + + +static void print_debug() +{ + int x; + + print_version(); + print_syn_data_list(); + print_flag_data_list(); + check_flag_data_duplication(); + + /* print: (1U << x) ===> flag_data_list[elem] */ + PRINT_SEPARATOR; + printf("\nThe following are the array elements that correspond to each bit.\n\n"); + for (x = 0; x < TOTAL_NUM_OF_BITS; x++) + { + convert_bit_shift_num_to_flag_data_list_element(x); + } + return ; +} + + +static void warn_undefined_flag_used() +{ + int x, y, elem; + + for (x = 0; x < TOTAL_NUM_OF_BITS; x++) + { + elem = convert_bit_shift_num_to_flag_data_list_element(x); + + if (elem < 0){ + for (y = 0; y < num_of_syntax_types; y++) + { + if ( IS_SYNTAX_MEMBER( syn_data_list[y].syn, (1U << x) ) ) + { + PRINT_SEPARATOR; + printf("warning: An undefined bit flag is used."); + printf(" (1U << %d)\n\n", x); + print_set_in_one_line( x ); + +#ifndef PRINT_UNDEFINED_FLAG + printf("Please use '#define PRINT_UNDEFINED_FLAG'.\n"); +#endif + return ; + } + } /* for y */ + } /* if elem */ + } /* for x */ + return ; +} + + +extern int main(int argc, char* argv[]) +{ +#ifdef PRINT_DEBUG_INFO + print_debug(); + return 0; +#endif + +#ifdef PRINT_VERSION_INFO + print_version(); +#endif + + flag_name_max_len = flag_name_max_len_int(); + check_flag_data_duplication(); + +#ifdef PRINT_SET_IN_INFO + print_set_in(); +#endif + +#ifdef PRINT_TABLE_INFO + print_table_head(); + print_table_body(); +#endif + +#ifdef WARN_UNDEFINED_FLAG_USED + warn_undefined_flag_used(); +#endif + + /* test */ + /* + printf("\n------ test -------\n"); + + int test_x = 17; + print_set_in_one_line( test_x ); + print_table_body_one_line( test_x ); + printf("--------------------\n"); + */ + + /* To avoid 'gcc -Wall' warnings. */ + if (0) + { + print_debug(); + print_version(); + print_set_in(); + print_table_head(); + print_table_body(); + warn_undefined_flag_used(); + } + + return 0; +} |