diff options
Diffstat (limited to 'doc')
| -rw-r--r-- | doc/API | 4 | ||||
| -rw-r--r-- | doc/API.ja | 4 | ||||
| -rw-r--r-- | doc/CALLOUTS.BUILTIN | 11 | ||||
| -rw-r--r-- | doc/CALLOUTS.BUILTIN.ja | 10 | ||||
| -rw-r--r-- | doc/RE | 25 | ||||
| -rw-r--r-- | doc/RE.ja | 25 | ||||
| -rw-r--r-- | doc/SYNTAX.md | 369 | ||||
| -rw-r--r-- | doc/UNICODE_PROPERTIES | 28 | ||||
| -rw-r--r-- | doc/onig_syn_md.c | 667 | 
9 files changed, 945 insertions, 198 deletions
| @@ -1,4 +1,4 @@ -Oniguruma API  Version 6.9.9  2022/10/28 +Oniguruma API  Version 6.9.10  2024/06/26  #include <oniguruma.h> @@ -277,6 +277,7 @@ Oniguruma API  Version 6.9.9  2022/10/28  # int onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* mp, unsigned long limit)    Set a retry limit count of a match process. +  0 means unlimited.    arguments    1 mp: match-param pointer @@ -985,6 +986,7 @@ Oniguruma API  Version 6.9.9  2022/10/28  # int onig_set_retry_limit_in_match(unsigned long limit)    Set the limit of retry counts in matching process. +  0 means unlimited.    normal return: ONIG_NORMAL @@ -1,4 +1,4 @@ -鬼車インターフェース Version 6.9.9   2022/11/16 +鬼車インターフェース Version 6.9.10   2024/05/26  #include <oniguruma.h> @@ -275,6 +275,7 @@  # int onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* mp, unsigned long limit)    一回のマッチでのリトライ数の制限値をセットする。 +  0は無制限を意味する。    引数    1 mp: マッチパラメタオブジェクトアドレス @@ -987,6 +988,7 @@  # int onig_set_retry_limit_in_match(unsigned long limit)    一回のマッチング内でのリトライ数の制限値を指定する。 +  0は無制限を意味する。    正常終了戻り値: ONIG_NORMAL diff --git a/doc/CALLOUTS.BUILTIN b/doc/CALLOUTS.BUILTIN index 26840e7..3c3c40b 100644 --- a/doc/CALLOUTS.BUILTIN +++ b/doc/CALLOUTS.BUILTIN @@ -1,4 +1,4 @@ -CALLOUTS.BUILTIN               2018/03/26 +CALLOUTS.BUILTIN               2024/07/04  * FAIL    (progress) @@ -92,4 +92,13 @@ CALLOUTS.BUILTIN               2018/03/26    [callout data]    slot 0: op value (enum OP_CMP in src/regexec.c) + +* SKIP    (progress) + +  (*SKIP) + +  Advance the position where the current matching fails and the next search +  begins to the current position. +  It has no effect on the current matching. +  //END diff --git a/doc/CALLOUTS.BUILTIN.ja b/doc/CALLOUTS.BUILTIN.ja index d371beb..dabadcd 100644 --- a/doc/CALLOUTS.BUILTIN.ja +++ b/doc/CALLOUTS.BUILTIN.ja @@ -1,4 +1,4 @@ -CALLOUTS.BUILTIN.ja               2018/03/26 +CALLOUTS.BUILTIN.ja               2024/07/04  * FAIL    (前進) @@ -90,4 +90,12 @@ CALLOUTS.BUILTIN.ja               2018/03/26    [callout data]    slot 0: op値 (src/regexec.c の中の enum OP_CMP) + +* SKIP    (前進) + +  (*SKIP) + +  現在のマッチングが失敗して次の検索を開始する位置を、現在位置まで前進させる +  現在のマッチングには何の影響も与えない +  //END @@ -1,4 +1,4 @@ -Oniguruma Regular Expressions Version 6.9.9    2023/03/27 +Oniguruma Regular Expressions Version 6.9.9    2024/06/10  syntax: ONIG_SYNTAX_ONIGURUMA (default syntax) @@ -237,22 +237,21 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default syntax)      Unicode Case: -      alnum    Letter | Mark | Decimal_Number -      alpha    Letter | Mark -      ascii    0000 - 007F -      blank    Space_Separator | 0009 -      cntrl    Control | Format | Unassigned | Private_Use | Surrogate +      alnum    Alphabetic | Decimal_Number +      alpha    Alphabetic +      ascii    U+0000 - U+007F +      blank    Space_Separator | U+0009 +      cntrl    U+0000 - U+001F, U+007F - U+009F        digit    Decimal_Number -      graph    [[:^space:]] && ^Control && ^Unassigned && ^Surrogate -      lower    Lowercase_Letter -      print    [[:graph:]] | [[:space:]] +      graph    ^White_Space && ^[[:cntrl:]] && ^Unassigned && ^Surrogate +      lower    Lowercase +      print    [[:graph:]] | Space_Separator        punct    Punctuation | Symbol -      space    Space_Separator | Line_Separator | Paragraph_Separator | -               U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085 -      upper    Uppercase_Letter +      space    White_Space +      upper    Uppercase        xdigit   U+0030 - U+0039 | U+0041 - U+0046 | U+0061 - U+0066                 (0-9, a-f, A-F) -      word     Letter | Mark | Decimal_Number | Connector_Punctuation +      word     Alphabetic | Mark | Decimal_Number | Connector_Punctuation @@ -1,4 +1,4 @@ -鬼車 正規表現 Version 6.9.9    2022/08/28 +鬼車 正規表現 Version 6.9.9    2024/06/10  使用文法: ONIG_SYNTAX_ONIGURUMA (既定値) @@ -234,22 +234,21 @@      Unicodeの場合: -      alnum    Letter | Mark | Decimal_Number -      alpha    Letter | Mark -      ascii    0000 - 007F -      blank    Space_Separator | 0009 -      cntrl    Control | Format | Unassigned | Private_Use | Surrogate +      alnum    Alphabetic | Decimal_Number +      alpha    Alphabetic +      ascii    U+0000 - U+007F +      blank    Space_Separator | U+0009 +      cntrl    U+0000 - U+001F, U+007F - U+009F        digit    Decimal_Number -      graph    [[:^space:]] && ^Control && ^Unassigned && ^Surrogate -      lower    Lowercase_Letter -      print    [[:graph:]] | [[:space:]] +      graph    ^White_Space && ^[[:cntrl:]] && ^Unassigned && ^Surrogate +      lower    Lowercase +      print    [[:graph:]] | Space_Separator        punct    Punctuation | Symbol -      space    Space_Separator | Line_Separator | Paragraph_Separator | -               U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085 -      upper    Uppercase_Letter +      space    White_Space +      upper    Uppercase        xdigit   U+0030 - U+0039 | U+0041 - U+0046 | U+0061 - U+0066                 (0-9, a-f, A-F) -      word     Letter | Mark | Decimal_Number | Connector_Punctuation +      word     Alphabetic | Mark | Decimal_Number | Connector_Punctuation diff --git a/doc/SYNTAX.md b/doc/SYNTAX.md index c38e5c8..5ec7e87 100644 --- a/doc/SYNTAX.md +++ b/doc/SYNTAX.md @@ -1,7 +1,7 @@  # Oniguruma syntax (operator) configuration -_Documented for Oniguruma 6.9.5 (2020/01/23)_ +_Documented for Oniguruma 6.9.10 (2024/12/21)_  ---------- @@ -38,7 +38,7 @@ follow.  The `options` field describes the default compile options to use if the caller does  not specify any options when invoking `onig_new()`. -The `meta_char_table` field is used exclusively by the ONIG_SYN_OP_VARIABLE_META_CHARACTERS +The `meta_char_table` field is used exclusively by the `ONIG_SYN_OP_VARIABLE_META_CHARACTERS`  option, which allows the various regex metacharacters, like `*` and `?`, to be replaced  with alternates (for example, SQL typically uses `%` instead of `.*` and `_` instead of `?`). @@ -75,7 +75,7 @@ data set by `onig_set_meta_char()` will be ignored.  ### 1. ONIG_SYN_OP_DOT_ANYCHAR (enable `.`) -_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Grep, Emacs, PosixExtended, PosixBasic_  Enables support for the standard `.` metacharacter, meaning "any one character."  You  usually want this flag on unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -84,7 +84,7 @@ so that you can use a metacharacter other than `.` instead.  ### 2. ONIG_SYN_OP_ASTERISK_ZERO_INF (enable `r*`) -_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Grep, Emacs, PosixExtended, PosixBasic_  Enables support for the standard `r*` metacharacter, meaning "zero or more r's."  You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -103,7 +103,7 @@ behavior.  ### 4. ONIG_SYN_OP_PLUS_ONE_INF (enable `r+`) -_Set in: Oniguruma, PosixExtended, Emacs, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Emacs, PosixExtended_  Enables support for the standard `r+` metacharacter, meaning "one or more r's."  You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -122,7 +122,7 @@ behavior.  ### 6. ONIG_SYN_OP_QMARK_ZERO_ONE (enable `r?`) -_Set in: Oniguruma, PosixExtended, Emacs, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Emacs, PosixExtended_  Enables support for the standard `r?` metacharacter, meaning "zero or one r" or "an optional r."  You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -141,7 +141,7 @@ you want `?` to simply match a literal `?` character, but you still want some wa  ### 8. ONIG_SYN_OP_BRACE_INTERVAL (enable `r{l,u}`) -_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, PosixExtended_  Enables support for the `r{lower,upper}` range form, common to more advanced  regex engines, which lets you specify precisely a minimum and maximum range on how many r's @@ -158,7 +158,7 @@ this form also allows `r{,upper}` to be equivalent to `r{0,upper}`; otherwise,  ### 9. ONIG_SYN_OP_ESC_BRACE_INTERVAL (enable `\{` and `\}`) -_Set in: PosixBasic, Emacs, Grep_ +_Set in: Grep, Emacs, PosixBasic_  Enables support for an escaped `r\{lower,upper\}` range form.  This is useful if you  have disabled support for the normal `r{...}` range form and want curly braces to simply @@ -168,7 +168,7 @@ match literal curly brace characters, but you still want some way of activating  ### 10. ONIG_SYN_OP_VBAR_ALT (enable `r|s`) -_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, PosixExtended_  Enables support for the common `r|s` alternation operator.  You usually want this  flag set. @@ -176,7 +176,7 @@ flag set.  ### 11. ONIG_SYN_OP_ESC_VBAR_ALT (enable `\|`) -_Set in: Emacs, Grep_ +_Set in: Grep, Emacs_  Enables support for an escaped `r\|s` alternation form.  This is useful if you  have disabled support for the normal `r|s` alternation form and want `|` to simply @@ -185,7 +185,7 @@ match a literal `|` character, but you still want some way of activating "altern  ### 12. ONIG_SYN_OP_LPAREN_SUBEXP (enable `(r)`) -_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, PosixExtended_  Enables support for the common `(...)` grouping-and-capturing operators.  You usually  want this flag set. @@ -193,7 +193,7 @@ want this flag set.  ### 13. ONIG_SYN_OP_ESC_LPAREN_SUBEXP (enable `\(` and `\)`) -_Set in: PosixBasic, Emacs, Grep_ +_Set in: Grep, Emacs, PosixBasic_  Enables support for escaped `\(...\)` grouping-and-capturing operators.  This is useful if you  have disabled support for the normal `(...)` grouping-and-capturing operators and want @@ -203,7 +203,7 @@ activating "grouping" or "capturing" behavior.  ### 14. ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (enable `\A` and `\Z` and `\z`) -_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex_  Enables support for the anchors `\A` (start-of-string), `\Z` (end-of-string or  newline-at-end-of-string), and `\z` (end-of-string) escapes. @@ -214,7 +214,7 @@ option will recognize that metacharacter instead.)  ### 15. ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (enable `\G`) -_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex_  Enables support for the special anchor `\G` (start-of-previous-match). @@ -231,7 +231,7 @@ exactly the same as `\A`.  ### 16. ONIG_SYN_OP_DECIMAL_BACKREF (enable `\num`) -_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Grep, Emacs, PosixExtended, PosixBasic_  Enables support for subsequent matches to back references to prior capture groups `(...)` using  the common `\num` syntax (like `\3`). @@ -244,7 +244,7 @@ You usually want this enabled, and it is enabled by default in every built-in sy  ### 17. ONIG_SYN_OP_BRACKET_CC (enable `[...]`) -_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Grep, Emacs, PosixExtended, PosixBasic_  Enables support for recognizing character classes, like `[a-z]`.  If this flag is not set, `[`  and `]` will be treated as ordinary literal characters instead of as metacharacters. @@ -254,7 +254,7 @@ You usually want this enabled, and it is enabled by default in every built-in sy  ### 18. ONIG_SYN_OP_ESC_W_WORD (enable `\w` and `\W`) -_Set in: Oniguruma, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Grep_  Enables support for the common `\w` and `\W` shorthand forms.  These match "word characters,"  whose meaning varies depending on the encoding being used. @@ -272,7 +272,7 @@ considered "word characters.")  ### 19. ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (enable `\<` and `\>`) -_Set in: Grep, GnuRegex_ +_Set in: GnuRegex, Grep_  Enables support for the GNU-specific `\<` and `\>` word-boundary metacharacters.  These work like  the `\b` word-boundary metacharacter, but only match at one end of the word or the other:  `\<` @@ -285,7 +285,7 @@ Most regex syntaxes do _not_ support these metacharacters.  ### 20. ONIG_SYN_OP_ESC_B_WORD_BOUND (enable `\b` and `\B`) -_Set in: Oniguruma, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Grep_  Enables support for the common `\b` and `\B` word-boundary metacharacters.  The `\b` metacharacter  matches a zero-width position at a transition from word-characters to non-word-characters, or vice @@ -297,7 +297,7 @@ are considered "word characters."  ### 21. ONIG_SYN_OP_ESC_S_WHITE_SPACE (enable `\s` and `\S`) -_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex_  Enables support for the common `\s` and `\S` whitespace-matching metacharacters. @@ -319,7 +319,7 @@ Unicode-equivalent code points, and then matching according to Unicode rules.  ### 22. ONIG_SYN_OP_ESC_D_DIGIT (enable `\d` and `\D`) -_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex_  Enables support for the common `\d` and `\D` digit-matching metacharacters. @@ -337,7 +337,7 @@ Unicode-equivalent code points, and then matching according to Unicode rules.  ### 23. ONIG_SYN_OP_LINE_ANCHOR (enable `^r` and `r$`) -_Set in: Oniguruma, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, Grep, Emacs, PosixExtended, PosixBasic_  Enables support for the common `^` and `$` line-anchor metacharacters. @@ -352,7 +352,7 @@ and not any other form.)  ### 24. ONIG_SYN_OP_POSIX_BRACKET (enable POSIX `[:xxxx:]`) -_Set in: Oniguruma, PosixBasic, PosixExtended, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG, Perl, GnuRegex, Grep, PosixExtended, PosixBasic_  Enables support for the POSIX `[:xxxx:]` character classes, like `[:alpha:]` and `[:digit:]`.  The supported POSIX character classes are `alnum`, `alpha`, `blank`, `cntrl`, `digit`, @@ -361,7 +361,7 @@ The supported POSIX character classes are `alnum`, `alpha`, `blank`, `cntrl`, `d  ### 25. ONIG_SYN_OP_QMARK_NON_GREEDY (enable `r??`, `r*?`, `r+?`, and `r{n,m}?`) -_Set in: Oniguruma, Perl, Java, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java_  Enables support for lazy (non-greedy) quantifiers: That is, if you append a `?` after  another quantifier such as `?`, `*`, `+`, or `{n,m}`, Oniguruma will try to match @@ -370,17 +370,17 @@ as _little_ as possible instead of as _much_ as possible.  ### 26. ONIG_SYN_OP_ESC_CONTROL_CHARS (enable `\n`, `\r`, `\t`, etc.) -_Set in: Oniguruma, PosixBasic, PosixExtended, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, Emacs, PosixExtended, PosixBasic_  Enables support for C-style control-code escapes, like `\n` and `\r`.  Specifically,  this recognizes `\a` (7), `\b` (8), `\t` (9), `\n` (10), `\f` (12), `\r` (13), and -`\e` (27).  If ONIG_SYN_OP2_ESC_V_VTAB is enabled (see below), this also enables +`\e` (27).  If `ONIG_SYN_OP2_ESC_V_VTAB` is enabled (see below), this also enables  support for recognizing `\v` as code point 11.  ### 27. ONIG_SYN_OP_ESC_C_CONTROL (enable `\cx` control codes) -_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java_  Enables support for named control-code escapes, like `\cm` or `\cM` for code-point  13.  In this shorthand form, control codes may be specified by `\c` (for "Control") @@ -390,7 +390,7 @@ followed by an alphabetic letter, a-z or A-Z, indicating which code point to rep  ### 28. ONIG_SYN_OP_ESC_OCTAL3 (enable `\OOO` octal codes) -_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java_  Enables support for octal-style escapes of up to three digits, like `\1` for code  point 1, and `\177` for code point 127.  Octal values greater than 255 will result @@ -399,7 +399,7 @@ in an error message.  ### 29. ONIG_SYN_OP_ESC_X_HEX2 (enable `\xHH` hex codes) -_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java_  Enables support for hexadecimal-style escapes of up to two digits, like `\x1` for code  point 1, and `\x7F` for code point 127. @@ -407,7 +407,7 @@ point 1, and `\x7F` for code point 127.  ### 30. ONIG_SYN_OP_ESC_X_BRACE_HEX8 (enable `\x{7HHHHHHH}` hex codes) -_Set in: Oniguruma, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG, Perl_  Enables support for brace-wrapped hexadecimal-style escapes of up to eight digits,  like `\x{1}` for code point 1, and `\x{FFFE}` for code point 65534. @@ -415,7 +415,7 @@ like `\x{1}` for code point 1, and `\x{FFFE}` for code point 65534.  ### 31. ONIG_SYN_OP_ESC_O_BRACE_OCTAL (enable `\o{1OOOOOOOOOO}` octal codes) -_Set in: Oniguruma, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG, Perl_  Enables support for brace-wrapped octal-style escapes of up to eleven digits,  like `\o{1}` for code point 1, and `\o{177776}` for code point 65534. @@ -434,7 +434,7 @@ This group contains support for lesser-known regex syntax constructs.  ### 0. ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (enable `\Q...\E`) -_Set in: Java, Perl, Perl_NG_ +_Set in: Perl_NG, Perl, Java_  Enables support for "quoted" parts of a pattern:  Between `\Q` and `\E`, all  syntax parsing is turned off, so that metacharacters like `*` and `+` will no @@ -444,7 +444,7 @@ longer be treated as metacharacters, and instead will be matched as literal  ### 1. ONIG_SYN_OP2_QMARK_GROUP_EFFECT (enable `(?...)`) -_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, Emacs_  Enables support for the fairly-common `(?...)` grouping operator, which  controls precedence but which does _not_ capture its contents. @@ -452,7 +452,7 @@ controls precedence but which does _not_ capture its contents.  ### 2. ONIG_SYN_OP2_OPTION_PERL (enable options `(?imsx)` and `(?-imsx)`) -_Set in: Java, Perl, Perl_NG_ +_Set in: Python, Perl_NG, Perl, Java_  Enables support of regex options. (i,m,s,x)  The supported toggle-able options for this flag are: @@ -465,7 +465,7 @@ The supported toggle-able options for this flag are:  ### 3. ONIG_SYN_OP2_OPTION_RUBY (enable options `(?imx)` and `(?-imx)`) -_Set in: Oniguruma, Ruby_ +_Set in: Ruby_  Enables support of regex options. (i,m,x)  The supported toggle-able options for this flag are: @@ -477,7 +477,7 @@ The supported toggle-able options for this flag are:  ### 4. ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (enable `r?+`, `r*+`, and `r++`) -_Set in: Oniguruma, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG, Perl, Java_  Enables support for the _possessive_ quantifiers `?+`, `*+`, and `++`, which  work similarly to `?` and `*` and `+`, respectively, but which do not backtrack @@ -488,7 +488,7 @@ extent if subsequent parts of the pattern fail to match.  ### 5. ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (enable `r{n,m}+`) -_Set in: Java_ +_Set in: Perl_NG, Perl, Java_  Enables support for the _possessive_ quantifier `{n,m}+`, which  works similarly to `{n,m}`, but which does not backtrack @@ -499,7 +499,7 @@ extent if subsequent parts of the pattern fail to match.  ### 6. ONIG_SYN_OP2_CCLASS_SET_OP (enable `&&` within `[...]`) -_Set in: Oniguruma, Java, Ruby_ +_Set in: Oniguruma, Ruby, Java_  Enables support for character-class _intersection_.  For example, with this  feature enabled, you can write `[a-z&&[^aeiou]]` to produce a character class @@ -509,7 +509,7 @@ all control codes _except_ newlines.  ### 7. ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (enable named captures `(?<name>...)`) -_Set in: Oniguruma, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG_  Enables support for _naming_ capture groups, so that instead of having to  refer to captures by position (like `\3` or `$3`), you can refer to them by names @@ -519,7 +519,7 @@ and `(?'name'...)`, but not the Python `(?P<name>...)` syntax.  ### 8. ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (enable named backreferences `\k<name>`) -_Set in: Oniguruma, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG_  Enables support for substituted backreferences by name, not just by position.  This supports using `\k'name'` in addition to supporting `\k<name>`.  This also @@ -530,7 +530,7 @@ the match, if the capture matched multiple times, by writing `\k<name+n>` or  ### 9. ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (enable backreferences `\g<name>` and `\g<n>`) -_Set in: Oniguruma, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG_  Enables support for substituted backreferences by both name and position using  the same syntax.  This supports using `\g'name'` and `\g'1'` in addition to @@ -562,7 +562,7 @@ followed by a single character (or equivalent), indicating which code point to r  based on that character's lowest five bits.  So, like `\c`, you can represent code-point  10 with `\C-j`, but you can also represent it with `\C-*` as well. -See also ONIG_SYN_OP_ESC_C_CONTROL, which enables the more-common `\cx` syntax. +See also `ONIG_SYN_OP_ESC_C_CONTROL`, which enables the more-common `\cx` syntax.  ### 12. ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (enable `\M-x`) @@ -577,7 +577,7 @@ with `0x80`).  So, for example, you can match `\x81` using `\x81`, or you can wr  ### 13. ONIG_SYN_OP2_ESC_V_VTAB (enable `\v` as vertical tab) -_Set in: Oniguruma, Java, Ruby_ +_Set in: Oniguruma, Python, Ruby, Java_  Enables support for a C-style `\v` escape code, meaning "vertical tab."  If enabled,  `\v` will be equivalent to ASCII code point 11. @@ -585,7 +585,7 @@ Enables support for a C-style `\v` escape code, meaning "vertical tab."  If enab  ### 14. ONIG_SYN_OP2_ESC_U_HEX4 (enable `\uHHHH` for Unicode) -_Set in: Oniguruma, Java, Ruby_ +_Set in: Oniguruma, Python, Ruby, Java_  Enables support for a Java-style `\uHHHH` escape code for representing Unicode  code-points by number, using up to four hexadecimal digits (up to `\uFFFF`).  So, @@ -593,8 +593,8 @@ for example, `\u221E` will match an infinity symbol, `∞`.  For code points larger than four digits, like the emoji `🚡` (aerial tramway, or code  point U+1F6A1), you must either represent the character directly using an encoding like -UTF-8, or you must enable support for ONIG_SYN_OP_ESC_X_BRACE_HEX8 or -ONIG_SYN_OP_ESC_O_BRACE_OCTAL, which support more than four digits. +UTF-8, or you must enable support for `ONIG_SYN_OP_ESC_X_BRACE_HEX8` or +`ONIG_SYN_OP_ESC_O_BRACE_OCTAL`, which support more than four digits.  (New feature as of Oniguruma 6.7.) @@ -604,29 +604,29 @@ ONIG_SYN_OP_ESC_O_BRACE_OCTAL, which support more than four digits.  _Set in: Emacs_  This flag makes the ``\` `` and `\'` escapes function identically to -`\A` and `\z`, respectively (when ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR is enabled). +`\A` and `\z`, respectively (when `ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR` is enabled).  These anchor forms are very obscure, and rarely supported by other regex libraries.  ### 16. ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (enable `\p{...}` and `\P{...}`) -_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java_  Enables support for an alternate syntax for POSIX character classes; instead of  writing `[:alpha:]` when this is enabled, you can instead write `\p{alpha}`. -See also ONIG_SYN_OP_POSIX_BRACKET for the classic POSIX form. +See also `ONIG_SYN_OP_POSIX_BRACKET` for the classic POSIX form.  ### 17. ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (enable `\p{^...}` and `\P{^...}`) -_Set in: Oniguruma, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl_  Enables support for an alternate syntax for POSIX character classes; instead of  writing `[:^alpha:]` when this is enabled, you can instead write `\p{^alpha}`. -See also ONIG_SYN_OP_POSIX_BRACKET for the classic POSIX form. +See also `ONIG_SYN_OP_POSIX_BRACKET` for the classic POSIX form.  ### 18. ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS @@ -647,7 +647,7 @@ characters in `[0-9a-fA-F]`.  ### 20. ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (disable `\`) -_Set in: As-is_ +_Set in: ASIS_  If set, this disables all escape codes, shorthands, and metacharacters that start  with `\` (or whatever the configured escape character is), allowing `\` to be treated @@ -658,7 +658,7 @@ You usually do not want this flag to be enabled.  ### 21. ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE (enable `(?(...)then|else)`) -_Set in: Oniguruma, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl_  Enables support for conditional inclusion of subsequent regex patterns based on whether  a prior named or numbered capture matched, or based on whether a pattern will @@ -676,7 +676,7 @@ match.  This supports many different forms, including:  ### 22. ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (enable `\K`) -_Set in: Oniguruma, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl_  Enables support for `\K`, which excludes all content before it from the overall  regex match (i.e., capture #0).  So, for example, pattern `foo\Kbar` would match @@ -687,7 +687,7 @@ regex match (i.e., capture #0).  So, for example, pattern `foo\Kbar` would match  ### 23. ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE (enable `\R`) -_Set in: Oniguruma, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG, Perl_  Enables support for `\R`, the "general newline" shorthand, which matches  `(\r\n|[\n\v\f\r\u0085\u2028\u2029])` (obviously, the Unicode values are cannot be @@ -698,7 +698,7 @@ matched in ASCII encodings).  ### 24. ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT (enable `\N` and `\O`) -_Set in: Oniguruma, Perl, Perl_NG_ +_Set in: Oniguruma, Perl_NG, Perl_  Enables support for `\N` and `\O`.  `\N` is "not a line break," which is much  like the standard `.` metacharacter, except that while `.` can be affected by @@ -713,7 +713,7 @@ multi-line mode are enabled or disabled.  ### 25. ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP (enable `(?~...)`) -_Set in: Oniguruma, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG, Perl_  Enables support for the `(?~r)` "absent operator" syntax, which matches  as much as possible as long as the result _doesn't_ match pattern `r`.  This is @@ -731,7 +731,7 @@ excellent article about it is [available on Medium](https://medium.com/rubyinsid  ### 26. ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT (enable `\X` and `\Y` and `\y`) -_Set in: Oniguruma, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG, Perl_  `\X` is another variation on `.`, designed to support Unicode, in that it matches  a full _grapheme cluster_.  In Unicode, `à` can be encoded as one code point, @@ -764,7 +764,7 @@ backreferences.  ### 28. ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS (enable `(?{...})`) -_Set in: Oniguruma, Perl, Perl_NG_ +_Set in: Oniguruma, Perl_NG, Perl_  Enables support for Perl-style "callouts" — pattern substitutions that result from  invoking a callback method.  When `(?{foo})` is reached in a pattern, the callback @@ -779,7 +779,7 @@ Full documentation for this advanced feature can be found in the Oniguruma  ### 29. ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME (enable `(*name)`) -_Set in: Oniguruma, Perl, Perl_NG_ +_Set in: Oniguruma, Python, Perl_NG, Perl_  Enables support for Perl-style "callouts" — pattern substitutions that result from  invoking a callback method.  When `(*foo)` is reached in a pattern, the callback @@ -809,6 +809,13 @@ Enables support of regex options. (i,m,x,W,S,D,P,y)    - `S` - ASCII only space.    - `P` - ASCII only POSIX properties. (includes W,D,S) + +### 31. ONIG_SYN_OP2_QMARK_CAPITAL_P_NAME (enable `(?P<name>...)` and `(?P=name)`) + +_Set in: Python_ + +(New feature as of Oniguruma 6.9.7) +  ---------- @@ -820,19 +827,19 @@ some syntaxes but not in others.  ### 0. ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (independent `?`, `*`, `+`, `{n,m}`) -_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, PosixExtended_  This flag specifies how to handle operators like `?` and `*` when they aren't  directly attached to an operand, as in `^*` or `(*)`:  Are they an error, are  they discarded, or are they taken as literals?  If this flag is clear, they -are taken as literals; otherwise, the ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS flag +are taken as literals; otherwise, the `ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS` flag  determines if they are errors or if they are discarded.  ### 1. ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (error or ignore independent operators) -_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, PosixExtended_ -If ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS is set, this flag controls what happens when +If `ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS` is set, this flag controls what happens when  independent operators appear in a pattern:  If this flag is set, then independent  operators produce an error message; if this flag is clear, then independent  operators are silently discarded. @@ -847,7 +854,7 @@ character will produce an error message.  ### 3. ONIG_SYN_ALLOW_INVALID_INTERVAL (allow `{???`) -_Set in: Oniguruma, GnuRegex, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex_  This flag, if set, causes an invalid range, like `foo{bar}` or `foo{}`, to be  silently discarded, as if `foo` had been written instead.  If clear, an invalid @@ -855,13 +862,13 @@ range will produce an error message.  ### 4. ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (allow `{,n}` to mean `{0,n}`) -_Set in: Oniguruma, Ruby_ +_Set in: Oniguruma, Python, Ruby_  If this flag is set, then `r{,n}` will be treated as equivalent to writing  `{0,n}`.  If this flag is clear, then `r{,n}` will produce an error message.  Note that regardless of whether this flag is set or clear, if -ONIG_SYN_OP_BRACE_INTERVAL is enabled, then `r{n,}` will always be legal:  This +`ONIG_SYN_OP_BRACE_INTERVAL` is enabled, then `r{n,}` will always be legal:  This  flag *only* controls the behavior of the opposite form, `r{,n}`.  ### 5. ONIG_SYN_STRICT_CHECK_BACKREF (error on invalid backrefs) @@ -876,7 +883,7 @@ No built-in syntax has this flag enabled.  ### 6. ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (allow `(?<=a|bc)`) -_Set in: Oniguruma, Java, Ruby_ +_Set in: Oniguruma, Ruby, Java_  If this flag is set, lookbehind patterns with alternate options may have differing  lengths among those options.  If this flag is clear, lookbehind patterns with options @@ -888,15 +895,15 @@ depend on this rule.  ### 7. ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (prefer `\k<name>` over `\3`) -_Set in: Oniguruma, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG_ -If this flag is set on the syntax *and* ONIG_OPTION_CAPTURE_GROUP is set when calling +If this flag is set on the syntax *and* `ONIG_OPTION_CAPTURE_GROUP` is set when calling  Oniguruma, then if a name is used on any capture, all captures must also use names:  A  single use of a named capture prohibits the use of numbered captures.  ### 8. ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (allow `(?<x>)...(?<x>)`) -_Set in: Oniguruma, Perl_NG, Ruby_ +_Set in: Oniguruma, Ruby, Perl_NG_  If this flag is set, multiple capture groups may use the same name.  If this flag is  clear, then reuse of a name will produce an error message. @@ -912,10 +919,10 @@ then `r{n}?` will mean the same as `r{n}`, and the useless `?` will be discarded  ### 10. ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH (`..(?i)..`) -_Set in: Perl, Perl_NG, Java_ +_Set in: Python, Perl_NG, Perl, Java_  If this flag is set, then an isolated option doesn't break the branch and affects until the end of the group (or end of the pattern). -If this flag is not set, then an isolated option is interpreted as the starting point of a new branch. /a(?i)b|c/ ==> /a(?i:b|c)/ +If this flag is not set, then an isolated option is interpreted as the starting point of a new branch. `/a(?i)b|c/` ==> `/a(?i:b|c)/`  ### 11. ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND (`(?<=...a+...)`) @@ -923,6 +930,24 @@ _Set in: Oniguruma, Java_  If this flag is set, then a variable length expressions are allowed in look-behind. +### 12. ONIG_SYN_PYTHON (enable `\UHHHHHHHH` for Unicode) + +_Set in: Python_ + +(New feature as of Oniguruma 6.9.7) + +### 13. ONIG_SYN_WHOLE_OPTIONS (enable options `(?CLI)`) + +_Set in: Oniguruma_ + +(New feature as of Oniguruma 6.9.8) + +### 14. ONIG_SYN_BRE_ANCHOR_AT_EDGE_OF_SUBEXP (enable `\(^abc$\)`) + +_Set in: Grep, PosixBasic_ + +(New feature as of Oniguruma 6.9.9) +  ### 20. ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (add `\n` to `[^...]`)  _Set in: Grep_ @@ -934,7 +959,7 @@ only exclude those characters and ranges written in them.  ### 21. ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (allow `[...\w...]`) -_Set in: Oniguruma, GnuRegex, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex_  If this flag is set, shorthands like `\w` are allowed to describe characters in character  classes.  If this flag is clear, shorthands like `\w` are treated as a redundantly-escaped @@ -942,7 +967,7 @@ literal `w`.  ### 22. ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (silently discard `[z-a]`) -_Set in: Emacs, Grep_ +_Set in: Grep, Emacs_  If this flag is set, then character ranges like `[z-a]` that are broken or contain no  characters will be silently ignored.  If this flag is clear, then broken or empty @@ -950,7 +975,7 @@ character ranges will produce an error message.  ### 23. ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (treat `[0-9-a]` as `[0-9\-a]`) -_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, PosixExtended_  If this flag is set, then a trailing `-` after a character range will be taken as a  literal `-`, as if it had been escaped as `\-`.  If this flag is clear, then a trailing @@ -973,15 +998,21 @@ _Set in: Oniguruma, Ruby_  If this flag is set, Oniguruma will warn about nested repeat operators those have no meaning, like `(?:a*)+`.  If this flag is clear, Oniguruma will allow the nested repeat operators without warning about them. -### 26. ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC (allow [a-\x{7fffffff}]) +### 26. ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC (allow `[a-\x{7fffffff}]`)  _Set in: Oniguruma_  If this flag is set, then invalid code points at the end of range in character class are allowed. +### 27. ONIG_SYN_ALLOW_CHAR_TYPE_FOLLOWED_BY_MINUS_IN_CC (allow `[\w-%]` to mean `[\w\-%]`)  + +_Set in: Perl_NG, Perl, Java_ + +(New feature as of Oniguruma 6.9.10) +  ### 31. ONIG_SYN_CONTEXT_INDEP_ANCHORS -_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ +_Set in: Oniguruma, Python, Ruby, Perl_NG, Perl, Java, GnuRegex, PosixExtended_  Not currently used, and does nothing.  (But still set in several syntaxes for some  reason.) @@ -994,98 +1025,102 @@ These tables show which of the built-in syntaxes use which flags and options, fo  ### Group One Flags (op) -| ID    | Option                                        | PosB  | PosEx | Emacs | Grep  | Gnu   | Java  | Perl  | PeNG  | Ruby  | Onig  | -| ----- | --------------------------------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | -| 0     | `ONIG_SYN_OP_VARIABLE_META_CHARACTERS`        | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -| 1     | `ONIG_SYN_OP_DOT_ANYCHAR`                     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 2     | `ONIG_SYN_OP_ASTERISK_ZERO_INF`               | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 3     | `ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF`           | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -| 4     | `ONIG_SYN_OP_PLUS_ONE_INF`                    | -     | Yes   | Yes   | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 5     | `ONIG_SYN_OP_ESC_PLUS_ONE_INF`                | -     | -     | -     | Yes   | -     | -     | -     | -     | -     | -     | -| 6     | `ONIG_SYN_OP_QMARK_ZERO_ONE`                  | -     | Yes   | Yes   | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 7     | `ONIG_SYN_OP_ESC_QMARK_ZERO_ONE`              | -     | -     | -     | Yes   | -     | -     | -     | -     | -     | -     | -| 8     | `ONIG_SYN_OP_BRACE_INTERVAL`                  | -     | Yes   | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 9     | `ONIG_SYN_OP_ESC_BRACE_INTERVAL`              | Yes   | -     | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -| 10    | `ONIG_SYN_OP_VBAR_ALT`                        | -     | Yes   | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 11    | `ONIG_SYN_OP_ESC_VBAR_ALT`                    | -     | -     | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -| 12    | `ONIG_SYN_OP_LPAREN_SUBEXP`                   | -     | Yes   | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 13    | `ONIG_SYN_OP_ESC_LPAREN_SUBEXP`               | Yes   | -     | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -| 14    | `ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR`               | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 15    | `ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR`      | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 16    | `ONIG_SYN_OP_DECIMAL_BACKREF`                 | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 17    | `ONIG_SYN_OP_BRACKET_CC`                      | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 18    | `ONIG_SYN_OP_ESC_W_WORD`                      | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 19    | `ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END`         | -     | -     | -     | Yes   | Yes   | -     | -     | -     | -     | -     | -| 20    | `ONIG_SYN_OP_ESC_B_WORD_BOUND`                | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 21    | `ONIG_SYN_OP_ESC_S_WHITE_SPACE`               | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 22    | `ONIG_SYN_OP_ESC_D_DIGIT`                     | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 23    | `ONIG_SYN_OP_LINE_ANCHOR`                     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 24    | `ONIG_SYN_OP_POSIX_BRACKET`                   | Yes   | Yes   | Yes   | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 25    | `ONIG_SYN_OP_QMARK_NON_GREEDY`                | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | -| 26    | `ONIG_SYN_OP_ESC_CONTROL_CHARS`               | Yes   | Yes   | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | -| 27    | `ONIG_SYN_OP_ESC_C_CONTROL`                   | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | -| 28    | `ONIG_SYN_OP_ESC_OCTAL3`                      | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | -| 29    | `ONIG_SYN_OP_ESC_X_HEX2`                      | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | -| 30    | `ONIG_SYN_OP_ESC_X_BRACE_HEX8`                | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | -| 31    | `ONIG_SYN_OP_ESC_O_BRACE_OCTAL`               | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | +| ID    | Option                                     | Onig  | Pythn | Ruby  | PeNG  | Perl  | Java  | Gnu   | Grep  | Emacs | PosEx | PosB  | ASIS  | +| ----- | ------------------------------------------ | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | +|  0    | `ONIG_SYN_OP_VARIABLE_META_CHARACTERS`     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | +|  1    | `ONIG_SYN_OP_DOT_ANYCHAR`                  | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | +|  2    | `ONIG_SYN_OP_ASTERISK_ZERO_INF`            | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | +|  3    | `ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF`        | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | +|  4    | `ONIG_SYN_OP_PLUS_ONE_INF`                 | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | Yes   | Yes   | -     | -     | +|  5    | `ONIG_SYN_OP_ESC_PLUS_ONE_INF`             | -     | -     | -     | -     | -     | -     | -     | Yes   | -     | -     | -     | -     | +|  6    | `ONIG_SYN_OP_QMARK_ZERO_ONE`               | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | Yes   | Yes   | -     | -     | +|  7    | `ONIG_SYN_OP_ESC_QMARK_ZERO_ONE`           | -     | -     | -     | -     | -     | -     | -     | Yes   | -     | -     | -     | -     | +|  8    | `ONIG_SYN_OP_BRACE_INTERVAL`               | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | Yes   | -     | -     | +|  9    | `ONIG_SYN_OP_ESC_BRACE_INTERVAL`           | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -     | Yes   | -     | +| 10    | `ONIG_SYN_OP_VBAR_ALT`                     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | Yes   | -     | -     | +| 11    | `ONIG_SYN_OP_ESC_VBAR_ALT`                 | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -     | -     | -     | +| 12    | `ONIG_SYN_OP_LPAREN_SUBEXP`                | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | Yes   | -     | -     | +| 13    | `ONIG_SYN_OP_ESC_LPAREN_SUBEXP`            | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -     | Yes   | -     | +| 14    | `ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR`            | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | +| 15    | `ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR`   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | +| 16    | `ONIG_SYN_OP_DECIMAL_BACKREF`              | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | +| 17    | `ONIG_SYN_OP_BRACKET_CC`                   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | +| 18    | `ONIG_SYN_OP_ESC_W_WORD`                   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | +| 19    | `ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END`      | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -     | -     | -     | -     | +| 20    | `ONIG_SYN_OP_ESC_B_WORD_BOUND`             | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | +| 21    | `ONIG_SYN_OP_ESC_S_WHITE_SPACE`            | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | +| 22    | `ONIG_SYN_OP_ESC_D_DIGIT`                  | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | +| 23    | `ONIG_SYN_OP_LINE_ANCHOR`                  | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | +| 24    | `ONIG_SYN_OP_POSIX_BRACKET`                | Yes   | -     | Yes   | Yes   | Yes   | -     | Yes   | Yes   | -     | Yes   | Yes   | -     | +| 25    | `ONIG_SYN_OP_QMARK_NON_GREEDY`             | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | +| 26    | `ONIG_SYN_OP_ESC_CONTROL_CHARS`            | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | Yes   | Yes   | Yes   | -     | +| 27    | `ONIG_SYN_OP_ESC_C_CONTROL`                | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | +| 28    | `ONIG_SYN_OP_ESC_OCTAL3`                   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | +| 29    | `ONIG_SYN_OP_ESC_X_HEX2`                   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | +| 30    | `ONIG_SYN_OP_ESC_X_BRACE_HEX8`             | Yes   | -     | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | +| 31    | `ONIG_SYN_OP_ESC_O_BRACE_OCTAL`            | Yes   | -     | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     |  ### Group Two Flags (op2) -| ID    | Option                                        | PosB  | PosEx | Emacs | Grep  | Gnu   | Java  | Perl  | PeNG  | Ruby  | Onig  | -| ----- | --------------------------------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | -| 0     | `ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE`            | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | -     | -     | -| 1     | `ONIG_SYN_OP2_QMARK_GROUP_EFFECT`             | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | -| 2     | `ONIG_SYN_OP2_OPTION_PERL`                    | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | -     | -     | -| 3     | `ONIG_SYN_OP2_OPTION_RUBY`                    | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | -     | -| 4     | `ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT`         | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -| 5     | `ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL`       | -     | -     | -     | -     | -     | Yes   | -     | -     | -     | -     | -| 6     | `ONIG_SYN_OP2_CCLASS_SET_OP`                  | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | -| 7     | `ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP`           | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | -| 8     | `ONIG_SYN_OP2_ESC_K_NAMED_BACKREF`            | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | -| 9     | `ONIG_SYN_OP2_ESC_G_SUBEXP_CALL`              | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | -| 10    | `ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY`         | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -| 11    | `ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL`      | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -| 12    | `ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META`         | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -| 13    | `ONIG_SYN_OP2_ESC_V_VTAB`                     | -     | -     | -     | -     | -     | Yes   | -     | -     | Yes   | Yes   | -| 14    | `ONIG_SYN_OP2_ESC_U_HEX4`                     | -     | -     | -     | -     | -     | Yes   | -     | -     | Yes   | Yes   | -| 15    | `ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR`             | -     | -     | Yes   | -     | -     | -     | -     | -     | -     | -     | -| 16    | `ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY`      | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | -| 17    | `ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT`     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | -| 18    | `ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS`        | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -| 19    | `ONIG_SYN_OP2_ESC_H_XDIGIT`                   | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -| 20    | `ONIG_SYN_OP2_INEFFECTIVE_ESCAPE`             | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -| 21    | `ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE`           | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | -| 22    | `ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP`             | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | -| 23    | `ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE`  | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | -| 24    | `ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT`      | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -     | Yes   | -| 25    | `ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP`       | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -| 26    | `ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT`           | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | -| 27    | `ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL`         | -     | -     | -     | -     | -     | -     | -     | Yes   | -     | -     | -| 28    | `ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS`   | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | -     | -| 29    | `ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME`          | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | -     | -| 30    | `ONIG_SYN_OP2_OPTION_ONIGURUMA`               | -     | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | +| ID    | Option                                         | Onig  | Pythn | Ruby  | PeNG  | Perl  | Java  | Gnu   | Grep  | Emacs | PosEx | PosB  | ASIS  | +| ----- | ---------------------------------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | +|  0    | `ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE`             | -     | -     | -     | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | +|  1    | `ONIG_SYN_OP2_QMARK_GROUP_EFFECT`              | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | Yes   | -     | -     | -     | +|  2    | `ONIG_SYN_OP2_OPTION_PERL`                     | -     | Yes   | -     | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | +|  3    | `ONIG_SYN_OP2_OPTION_RUBY`                     | -     | -     | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | -     | +|  4    | `ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT`          | Yes   | -     | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | +|  5    | `ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL`        | -     | -     | -     | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | +|  6    | `ONIG_SYN_OP2_CCLASS_SET_OP`                   | Yes   | -     | Yes   | -     | -     | Yes   | -     | -     | -     | -     | -     | -     | +|  7    | `ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP`            | Yes   | -     | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | +|  8    | `ONIG_SYN_OP2_ESC_K_NAMED_BACKREF`             | Yes   | -     | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | +|  9    | `ONIG_SYN_OP2_ESC_G_SUBEXP_CALL`               | Yes   | -     | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | +| 10    | `ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY`          | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | +| 11    | `ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL`       | Yes   | -     | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | -     | +| 12    | `ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META`          | Yes   | -     | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | -     | +| 13    | `ONIG_SYN_OP2_ESC_V_VTAB`                      | Yes   | Yes   | Yes   | -     | -     | Yes   | -     | -     | -     | -     | -     | -     | +| 14    | `ONIG_SYN_OP2_ESC_U_HEX4`                      | Yes   | Yes   | Yes   | -     | -     | Yes   | -     | -     | -     | -     | -     | -     | +| 15    | `ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR`              | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | -     | -     | -     | +| 16    | `ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY`       | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | +| 17    | `ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT`      | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | +| 19    | `ONIG_SYN_OP2_ESC_H_XDIGIT`                    | Yes   | -     | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | -     | +| 20    | `ONIG_SYN_OP2_INEFFECTIVE_ESCAPE`              | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | +| 21    | `ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE`            | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | +| 22    | `ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP`              | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | +| 23    | `ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE`   | Yes   | -     | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | +| 24    | `ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT`       | Yes   | -     | -     | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | +| 25    | `ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP`        | Yes   | -     | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | +| 26    | `ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT`            | Yes   | -     | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | +| 27    | `ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL`          | -     | -     | -     | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | +| 28    | `ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS`    | Yes   | -     | -     | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | +| 29    | `ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME`           | Yes   | Yes   | -     | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | +| 30    | `ONIG_SYN_OP2_OPTION_ONIGURUMA`                | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | +| 31    | `ONIG_SYN_OP2_QMARK_CAPITAL_P_NAME`            | -     | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     |  ### Syntax Flags (syn) -| ID    | Option                                        | PosB  | PosEx | Emacs | Grep  | Gnu   | Java  | Perl  | PeNG  | Ruby  | Onig  | -| ----- | --------------------------------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | -| 0     | `ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS`           | -     | Yes   | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 1     | `ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS`         | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 2     | `ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP`       | -     | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | -| 3     | `ONIG_SYN_ALLOW_INVALID_INTERVAL`             | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 4     | `ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV`          | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -| 5     | `ONIG_SYN_STRICT_CHECK_BACKREF`               | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -| 6     | `ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND`      | -     | -     | -     | -     | -     | Yes   | -     | -     | Yes   | Yes   | -| 7     | `ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP`           | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | -| 8     | `ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME`    | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | -| 9     | `ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY`      | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -| 10    | `ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH`    | -     | -     | -     | -     | -     | Yes   | Yes   | Yes   | -     | -     | -| 11    | `ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND`           | -     | -     | -     | -     | -     | Yes   | -     | -     | -     | Yes   | -| 20    | `ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC`         | -     | -     | -     | Yes   | -     | -     | -     | -     | -     | -     | -| 21    | `ONIG_SYN_BACKSLASH_ESCAPE_IN_CC`             | -     | -     | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 22    | `ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC`            | -     | -     | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -| 23    | `ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC`        | -     | Yes   | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -| 24    | `ONIG_SYN_WARN_CC_OP_NOT_ESCAPED`             | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -| 25    | `ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT`       | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -| 26    | `ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC` | -     | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | -| 31    | `ONIG_SYN_CONTEXT_INDEP_ANCHORS`              | -     | Yes   | -     | -     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | +| ID    | Option                                               | Onig  | Pythn | Ruby  | PeNG  | Perl  | Java  | Gnu   | Grep  | Emacs | PosEx | PosB  | ASIS  | +| ----- | ---------------------------------------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | +|  0    | `ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS`                  | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | Yes   | -     | -     | +|  1    | `ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS`                | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | Yes   | -     | -     | +|  2    | `ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP`              | -     | -     | -     | -     | -     | -     | -     | -     | -     | Yes   | -     | -     | +|  3    | `ONIG_SYN_ALLOW_INVALID_INTERVAL`                    | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | +|  4    | `ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV`                 | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | -     | +|  5    | `ONIG_SYN_STRICT_CHECK_BACKREF`                      | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | +|  6    | `ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND`             | Yes   | -     | Yes   | -     | -     | Yes   | -     | -     | -     | -     | -     | -     | +|  7    | `ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP`                  | Yes   | -     | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | +|  8    | `ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME`           | Yes   | -     | Yes   | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | +|  9    | `ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY`             | Yes   | -     | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | -     | +| 10    | `ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH`           | -     | Yes   | -     | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | +| 11    | `ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND`                  | Yes   | -     | -     | -     | -     | Yes   | -     | -     | -     | -     | -     | -     | +| 12    | `ONIG_SYN_PYTHON`                                    | -     | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | +| 13    | `ONIG_SYN_WHOLE_OPTIONS`                             | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | +| 14    | `ONIG_SYN_BRE_ANCHOR_AT_EDGE_OF_SUBEXP`              | -     | -     | -     | -     | -     | -     | -     | Yes   | -     | -     | Yes   | -     | +| 20    | `ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC`                | -     | -     | -     | -     | -     | -     | -     | Yes   | -     | -     | -     | -     | +| 21    | `ONIG_SYN_BACKSLASH_ESCAPE_IN_CC`                    | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | +| 22    | `ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC`                   | -     | -     | -     | -     | -     | -     | -     | Yes   | Yes   | -     | -     | -     | +| 23    | `ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC`               | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | Yes   | -     | -     | +| 24    | `ONIG_SYN_WARN_CC_OP_NOT_ESCAPED`                    | Yes   | -     | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | -     | +| 25    | `ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT`              | Yes   | -     | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | -     | +| 26    | `ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC`     | Yes   | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | -     | +| 27    | `ONIG_SYN_ALLOW_CHAR_TYPE_FOLLOWED_BY_MINUS_IN_CC`   | -     | -     | -     | Yes   | Yes   | Yes   | -     | -     | -     | -     | -     | -     | +| 31    | `ONIG_SYN_CONTEXT_INDEP_ANCHORS`                     | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | Yes   | -     | -     | Yes   | -     | -     | diff --git a/doc/UNICODE_PROPERTIES b/doc/UNICODE_PROPERTIES index 3d2dc09..7ab6d78 100644 --- a/doc/UNICODE_PROPERTIES +++ b/doc/UNICODE_PROPERTIES @@ -1,4 +1,4 @@ -Unicode Properties (Unicode Version: 15.1.0,  Emoji: 15.1) +Unicode Properties (Unicode Version: 16.0.0,  Emoji: 16.0)  ASCII_Hex_Digit  Adlam @@ -68,6 +68,7 @@ Emoji_Presentation  Ethiopic  Extended_Pictographic  Extender +Garay  Georgian  Glagolitic  Gothic @@ -79,6 +80,7 @@ Greek  Gujarati  Gunjala_Gondi  Gurmukhi +Gurung_Khema  Han  Hangul  Hanifi_Rohingya @@ -113,6 +115,7 @@ Khitan_Small_Script  Khmer  Khojki  Khudawadi +Kirat_Rai  L  LC  Lao @@ -150,6 +153,7 @@ Meroitic_Hieroglyphs  Miao  Mn  Modi +Modifier_Combining_Mark  Mongolian  Mro  Multani @@ -169,6 +173,7 @@ Nushu  Nyiakeng_Puachue_Hmong  Ogham  Ol_Chiki +Ol_Onal  Old_Hungarian  Old_Italic  Old_North_Arabian @@ -229,6 +234,7 @@ Sogdian  Sora_Sompeng  Soyombo  Sundanese +Sunuwar  Syloti_Nagri  Syriac  Tagalog @@ -247,7 +253,9 @@ Thai  Tibetan  Tifinagh  Tirhuta +Todhri  Toto +Tulu_Tigalari  Ugaritic  Unified_Ideograph  Unknown @@ -330,6 +338,7 @@ Ext  ExtPict  Final_Punctuation  Format +Gara  Geor  Glag  Gong @@ -341,6 +350,7 @@ Grek  Gr_Ext  Gr_Link  Gujr +Gukh  Guru  Hang  Hani @@ -370,6 +380,7 @@ Khmr  Khoj  Kits  Knda +Krai  Kthi  Lana  Laoo @@ -392,6 +403,7 @@ Mani  Marc  Mark  Math_Symbol +MCM  Medf  Mend  Merc @@ -422,6 +434,7 @@ OIDS  Olck  OLower  OMath +Onao  Open_Punctuation  Orkh  Orya @@ -476,6 +489,7 @@ Space_Separator  Spacing_Mark  STerm  Sund +Sunu  Surrogate  Sylo  Symbol @@ -496,6 +510,8 @@ Tibt  Tirh  Titlecase_Letter  Tnsa +Todr +Tutg  Ugar  UIdeo  Unassigned @@ -701,6 +717,7 @@ In_Osage  In_Elbasan  In_Caucasian_Albanian  In_Vithkuqi +In_Todhri  In_Linear_A  In_Latin_Extended_F  In_Cypriot_Syllabary @@ -723,6 +740,7 @@ In_Psalter_Pahlavi  In_Old_Turkic  In_Old_Hungarian  In_Hanifi_Rohingya +In_Garay  In_Rumi_Numeral_Symbols  In_Yezidi  In_Arabic_Extended_C @@ -742,12 +760,14 @@ In_Khojki  In_Multani  In_Khudawadi  In_Grantha +In_Tulu_Tigalari  In_Newa  In_Tirhuta  In_Siddham  In_Modi  In_Mongolian_Supplement  In_Takri +In_Myanmar_Extended_C  In_Ahom  In_Dogra  In_Warang_Citi @@ -758,6 +778,7 @@ In_Soyombo  In_Unified_Canadian_Aboriginal_Syllabics_Extended_A  In_Pau_Cin_Hau  In_Devanagari_Extended_A +In_Sunuwar  In_Bhaiksuki  In_Marchen  In_Masaram_Gondi @@ -772,12 +793,15 @@ In_Early_Dynastic_Cuneiform  In_Cypro_Minoan  In_Egyptian_Hieroglyphs  In_Egyptian_Hieroglyph_Format_Controls +In_Egyptian_Hieroglyphs_Extended_A  In_Anatolian_Hieroglyphs +In_Gurung_Khema  In_Bamum_Supplement  In_Mro  In_Tangsa  In_Bassa_Vah  In_Pahawh_Hmong +In_Kirat_Rai  In_Medefaidrin  In_Miao  In_Ideographic_Symbols_and_Punctuation @@ -792,6 +816,7 @@ In_Small_Kana_Extension  In_Nushu  In_Duployan  In_Shorthand_Format_Controls +In_Symbols_for_Legacy_Computing_Supplement  In_Znamenny_Musical_Notation  In_Byzantine_Musical_Symbols  In_Musical_Symbols @@ -809,6 +834,7 @@ In_Nyiakeng_Puachue_Hmong  In_Toto  In_Wancho  In_Nag_Mundari +In_Ol_Onal  In_Ethiopic_Extended_B  In_Mende_Kikakui  In_Adlam diff --git a/doc/onig_syn_md.c b/doc/onig_syn_md.c new file mode 100644 index 0000000..6e29e5a --- /dev/null +++ b/doc/onig_syn_md.c @@ -0,0 +1,667 @@ +/* + * onig_syn_md.c + * Copyright (c) 2024  K.Kosako + * + * Oniguruma OWner: K.Kosako       https://github.com/kkos/oniguruma + * SYNTAX.md      : seanofw        https://github.com/seanofw + * onig_syn_md.c  : tonco-miyazawa https://github.com/tonco-miyazawa + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "oniguruma.h" + +#define ONIG_SYN_MD_VERSION_INT           (00002) +#define TOTAL_NUM_OF_BITS                 (32) + +#define PRINT_SEPARATOR                   (printf("===================================================\n")) + +#define INPUT_SYNTAX(syn, abb, set_in)    { (syn), (#syn), (abb), (set_in) } +#define INPUT_FLAG(arg)                   { (arg), (#arg) } + + + +/************************************* Settings *********************************************/ +/* NOW_MODE +1: (OP) +2: (OP2) +3: (BEHAVIOR) */ +#define NOW_MODE                          (1) + +/*  #define PRINT_UNDEFINED_FLAG  */ + +/*  #define PRINT_SYNTAX_FORWARD_ORDER  */ + +/*  #define USE_YOUR_OWN_SYNTAX  */ + +/*  #define PRINT_DEBUG_INFO  */ + +#define PRINT_VERSION_INFO +#define PRINT_SET_IN_INFO +#define PRINT_TABLE_INFO +#define WARN_UNDEFINED_FLAG_USED +/************************************* Settings *********************************************/ + + +/************************ Switch between OP, OP2, BEHAVIOR **********************************/ +#if   NOW_MODE == 1 + +#define SYNTAX_MEMBER_NAME                ("op") +#define TITLE_STRING                      ("Group One Flags (op)") +#define SYNTAX_MEMBER(syn)                ((syn)->op) +#define IS_SYNTAX_MEMBER(syn, opm)        (((syn)->op & (opm)) != 0) + +#elif NOW_MODE == 2 + +#define SYNTAX_MEMBER_NAME                ("op2") +#define TITLE_STRING                      ("Group Two Flags (op2)") +#define SYNTAX_MEMBER(syn)                ((syn)->op2) +#define IS_SYNTAX_MEMBER(syn, opm)        (((syn)->op2 & (opm)) != 0) + +#elif NOW_MODE == 3 + +#define SYNTAX_MEMBER_NAME                ("behavior") +#define TITLE_STRING                      ("Syntax Flags (syn)") +#define SYNTAX_MEMBER(syn)                ((syn)->behavior) +#define IS_SYNTAX_MEMBER(syn, opm)        (((syn)->behavior & (opm)) != 0) + +#else +#error "Check 'NOW_MODE' value." +#endif +/************************ Switch between OP, OP2, BEHAVIOR **********************************/ + + +/*********************************** Your own syntax ****************************************/ +#ifdef USE_YOUR_OWN_SYNTAX +static OnigSyntaxType OnigSyntaxYourOwn; +#define ONIG_SYNTAX_YOUROWN    (&OnigSyntaxYourOwn) + +static OnigSyntaxType OnigSyntaxYourOwn = { +    0xf0f0f0f0      /*  Group One Flags (op)   */ +  , 0xffff0000      /*  Group Two Flags (op2)  */ +  , 0x00ff00ff      /*  Syntax Flags    (syn)  */ +  , ONIG_OPTION_NONE +  , +  { +      (OnigCodePoint )'\\'                       /* esc */ +    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */ +    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */ +    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ +    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ +    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ +  } +}; +#endif +/*********************************** Your own syntax ****************************************/ + + +/************************************** syntax data *****************************************/ +typedef struct { +    OnigSyntaxType* syn; +    char *name; +    char *abb; +    char *set_in; +} syn_data; + + +static syn_data  syn_data_list[] = +{ +   /*  INPUT_SYNTAX(syn, abb, set_in)  ===>   { (syn), (#syn), (abb), (set_in) }  */ + +    INPUT_SYNTAX( ONIG_SYNTAX_ASIS           , "ASIS" , "ASIS" ) +  , INPUT_SYNTAX( ONIG_SYNTAX_POSIX_BASIC    , "PosB" , "PosixBasic" ) +  , INPUT_SYNTAX( ONIG_SYNTAX_POSIX_EXTENDED , "PosEx", "PosixExtended") +  , INPUT_SYNTAX( ONIG_SYNTAX_EMACS          , "Emacs", "Emacs") +  , INPUT_SYNTAX( ONIG_SYNTAX_GREP           , "Grep" , "Grep" ) +  , INPUT_SYNTAX( ONIG_SYNTAX_GNU_REGEX      , "Gnu"  , "GnuRegex" ) +  , INPUT_SYNTAX( ONIG_SYNTAX_JAVA           , "Java" , "Java" ) +  , INPUT_SYNTAX( ONIG_SYNTAX_PERL           , "Perl" , "Perl" ) +  , INPUT_SYNTAX( ONIG_SYNTAX_PERL_NG        , "PeNG" , "Perl_NG" ) +  , INPUT_SYNTAX( ONIG_SYNTAX_RUBY           , "Ruby" , "Ruby" ) +  , INPUT_SYNTAX( ONIG_SYNTAX_PYTHON         , "Pythn", "Python") +  , INPUT_SYNTAX( ONIG_SYNTAX_ONIGURUMA      , "Onig" , "Oniguruma" ) + +#ifdef USE_YOUR_OWN_SYNTAX +  , INPUT_SYNTAX( ONIG_SYNTAX_YOUROWN        , "Your" , "YourOwn" ) +#endif +}; + +static const int num_of_syntax_types = (sizeof syn_data_list /sizeof syn_data_list[0]); + + +static void print_syn_data_list() +{ +  int y; +  PRINT_SEPARATOR; +  for (y = 0; y < num_of_syntax_types; y++) +  { +    printf( "\nsyn_data_list[%d]\n", y); +    printf( "name='%s'\n"   , syn_data_list[y].name); + +    printf( "syn->"); +    printf( SYNTAX_MEMBER_NAME ); +    printf( "= 0x%08x\n", SYNTAX_MEMBER(syn_data_list[y].syn) ); + +    printf( "abb='%s'\n"    , syn_data_list[y].abb); +    printf( "set_in='%s'\n"    , syn_data_list[y].set_in);         +  }; +  return ; +} +/************************************** syntax data *****************************************/ + + +/************************************** flag data *******************************************/ +typedef struct { +    unsigned int num; +    char *name; +} flag_data; + + +/*  OP  */ +#if NOW_MODE == 1 +static flag_data  flag_data_list[] = +{ + +/*  The following are no need to sort them in bit order.  */ + +/*  INPUT_FLAG(arg)  ===>    { (arg), (#arg) }   */ + +    INPUT_FLAG( ONIG_SYN_OP_VARIABLE_META_CHARACTERS ) +  , INPUT_FLAG( ONIG_SYN_OP_DOT_ANYCHAR ) +  , INPUT_FLAG( ONIG_SYN_OP_ASTERISK_ZERO_INF ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF ) +  , INPUT_FLAG( ONIG_SYN_OP_PLUS_ONE_INF ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_PLUS_ONE_INF ) +  , INPUT_FLAG( ONIG_SYN_OP_QMARK_ZERO_ONE ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_QMARK_ZERO_ONE ) +  , INPUT_FLAG( ONIG_SYN_OP_BRACE_INTERVAL ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_BRACE_INTERVAL ) +  , INPUT_FLAG( ONIG_SYN_OP_VBAR_ALT ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_VBAR_ALT ) +  , INPUT_FLAG( ONIG_SYN_OP_LPAREN_SUBEXP ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_LPAREN_SUBEXP ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR ) +  , INPUT_FLAG( ONIG_SYN_OP_DECIMAL_BACKREF ) +  , INPUT_FLAG( ONIG_SYN_OP_BRACKET_CC ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_W_WORD ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_B_WORD_BOUND ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_S_WHITE_SPACE ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_D_DIGIT ) +  , INPUT_FLAG( ONIG_SYN_OP_LINE_ANCHOR ) +  , INPUT_FLAG( ONIG_SYN_OP_POSIX_BRACKET ) +  , INPUT_FLAG( ONIG_SYN_OP_QMARK_NON_GREEDY ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_CONTROL_CHARS ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_C_CONTROL ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_OCTAL3 ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_X_HEX2 ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_X_BRACE_HEX8 ) +  , INPUT_FLAG( ONIG_SYN_OP_ESC_O_BRACE_OCTAL ) +}; +#endif + + +/*  OP2 */ +#if NOW_MODE == 2 +static flag_data  flag_data_list[] = +{ +    INPUT_FLAG( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE ) +  , INPUT_FLAG( ONIG_SYN_OP2_QMARK_GROUP_EFFECT ) +  , INPUT_FLAG( ONIG_SYN_OP2_OPTION_PERL ) +  , INPUT_FLAG( ONIG_SYN_OP2_OPTION_RUBY ) +  , INPUT_FLAG( ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT ) +  , INPUT_FLAG( ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL ) +  , INPUT_FLAG( ONIG_SYN_OP2_CCLASS_SET_OP ) +  , INPUT_FLAG( ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP ) +  , INPUT_FLAG( ONIG_SYN_OP2_ESC_K_NAMED_BACKREF ) +  , INPUT_FLAG( ONIG_SYN_OP2_ESC_G_SUBEXP_CALL ) +  , INPUT_FLAG( ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY ) +  , INPUT_FLAG( ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL ) +  , INPUT_FLAG( ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META ) +  , INPUT_FLAG( ONIG_SYN_OP2_ESC_V_VTAB ) +  , INPUT_FLAG( ONIG_SYN_OP2_ESC_U_HEX4 ) +  , INPUT_FLAG( ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR ) +  , INPUT_FLAG( ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY ) +  , INPUT_FLAG( ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ) +  /*  , INPUT_FLAG( ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS )  */ +  , INPUT_FLAG( ONIG_SYN_OP2_ESC_H_XDIGIT ) +  , INPUT_FLAG( ONIG_SYN_OP2_INEFFECTIVE_ESCAPE ) +  , INPUT_FLAG( ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE ) +  , INPUT_FLAG( ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP ) +  , INPUT_FLAG( ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE ) +  , INPUT_FLAG( ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT ) +  , INPUT_FLAG( ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP ) +  /*  , INPUT_FLAG( ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER ) */   /* obsoleted: use next */ +  , INPUT_FLAG( ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT ) +  , INPUT_FLAG( ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL ) +  , INPUT_FLAG( ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS ) +  , INPUT_FLAG( ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME ) +  , INPUT_FLAG( ONIG_SYN_OP2_OPTION_ONIGURUMA ) +  , INPUT_FLAG( ONIG_SYN_OP2_QMARK_CAPITAL_P_NAME ) +}; +#endif + + +/*  BEHAVIOR  */ +#if NOW_MODE == 3 +static flag_data  flag_data_list[] = +{ +    INPUT_FLAG( ONIG_SYN_CONTEXT_INDEP_ANCHORS ) +  , INPUT_FLAG( ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS ) +  , INPUT_FLAG( ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS ) +  , INPUT_FLAG( ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP ) +  , INPUT_FLAG( ONIG_SYN_ALLOW_INVALID_INTERVAL ) +  , INPUT_FLAG( ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV ) +  , INPUT_FLAG( ONIG_SYN_STRICT_CHECK_BACKREF ) +  , INPUT_FLAG( ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) +  , INPUT_FLAG( ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP ) +  , INPUT_FLAG( ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) +  , INPUT_FLAG( ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY ) +  , INPUT_FLAG( ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH ) +  , INPUT_FLAG( ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND ) +  , INPUT_FLAG( ONIG_SYN_PYTHON ) +  , INPUT_FLAG( ONIG_SYN_WHOLE_OPTIONS ) +  , INPUT_FLAG( ONIG_SYN_BRE_ANCHOR_AT_EDGE_OF_SUBEXP ) + +  /* syntax (behavior) in char class [...] */ +  , INPUT_FLAG( ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC ) +  , INPUT_FLAG( ONIG_SYN_BACKSLASH_ESCAPE_IN_CC ) +  , INPUT_FLAG( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC ) +  , INPUT_FLAG( ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) +  , INPUT_FLAG( ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC ) +  , INPUT_FLAG( ONIG_SYN_ALLOW_CHAR_TYPE_FOLLOWED_BY_MINUS_IN_CC ) + +  /* syntax (behavior) warning */ +  , INPUT_FLAG( ONIG_SYN_WARN_CC_OP_NOT_ESCAPED ) +  , INPUT_FLAG( ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) +}; +#endif + + +static const int num_of_flags = (sizeof flag_data_list /sizeof flag_data_list[0]); + +static int flag_name_max_len = 0; + + +static int flag_name_max_len_int() +{ +  int i, n, max_len; +  max_len = 0; + +  for (i = 0; i < num_of_flags; i++) +  { +    n = strlen(flag_data_list[i].name); +    if ( max_len < n ) +    { +      max_len = n; +    } +  } + +  /*  Debug: printf("max_len=%d\n", max_len);  */ +  return max_len; +} + + +static int convert_bit_shift_num_to_flag_data_list_element( int bit_shift_num ) +{ +  int i; +  for (i = 0; i < num_of_flags; i++) +  { +    if ( flag_data_list[i].num == (1U << bit_shift_num) ){ + +#ifdef PRINT_DEBUG_INFO +      printf("(1U << %d) ===> flag_data_list[%d]\n", bit_shift_num, i ); +#endif +      return i; +    } +  } + +#ifdef PRINT_DEBUG_INFO +  printf("(1U << %d) ===>     ( none )\n", bit_shift_num ); +#endif + +  /*  If not found, returns "-1". This is not Error. */ +  return -1; +} + + +static int convert_num_to_bit_shift_num(unsigned int arg_num){ +  int x; +  for (x = 0; x < TOTAL_NUM_OF_BITS; x++) +  { +     if ( arg_num == (1U << x) ) +     { +       return x; +     } +  } + +  /* Error */ +  printf("<Error:line%d> '0x%08x' is not bit flag.\n", __LINE__, arg_num ); +  exit(-1); +  return -1; +} + + +static int check_flag_data_duplication() +{ +  int i, shift_num; +  unsigned int used_bits = 0; + +  for (i = 0; i < num_of_flags; i++) +  { +    if ( (used_bits & flag_data_list[i].num) != 0 ){ + +       shift_num = convert_num_to_bit_shift_num(flag_data_list[i].num); + +       PRINT_SEPARATOR; +       fprintf(stderr, "\n<Error:line%d>  The following bit has already been used.\n", __LINE__ ); +       fprintf(stderr, "flag_data_list[%d] : '%s' ", i, flag_data_list[i].name); +       fprintf(stderr, "(1U << %d)\n", shift_num); + +#ifndef PRINT_DEBUG_INFO +       fprintf(stderr, "\nPlease use '#define PRINT_DEBUG_INFO'.\n"); +#endif +       exit(-1); +       return -1; +    } +    used_bits |= flag_data_list[i].num; +  } +  return 0; +} + + +static void print_flag_data_list() +{ +  int i, shift_num; + +  PRINT_SEPARATOR; +  for (i = 0; i < num_of_flags; i++) +  { +    shift_num = convert_num_to_bit_shift_num( flag_data_list[i].num ); + +    printf( "\nflag_data_list[%d]\n", i); +    printf( "name='%s' "   , flag_data_list[i].name); +    printf( "(1U << %d)\n" , shift_num); +    printf( "num=0x%08x\n"   , flag_data_list[i].num); +  } +  return ; +} +/************************************** flag data *******************************************/ + + +/************************************** print table *****************************************/ +static void print_table_head() +{ +  int i, y; + +  printf("\n### "); +  printf( TITLE_STRING ); +  printf("\n\n| ID    | Option"); +  for (i = 0; i < (flag_name_max_len - 1); i++) +  { +    printf(" "); +  } +  printf("|"); + +  /*  ex. print "PeNG "  */ +#ifdef PRINT_SYNTAX_FORWARD_ORDER +  for (y = 0; y < num_of_syntax_types; y++) +#else +  for (y = num_of_syntax_types -1; y > -1; y--) +#endif +  { +    printf(" %-5.5s |", syn_data_list[y].abb ); +  } + +  printf("\n| ----- | "); +  for (i = 0; i < (flag_name_max_len + 4); i++) +  { +    printf("-"); +  } +  printf(" |"); + +  for (y = 0; y < num_of_syntax_types; y++) +  { +    printf(" ----- |"); +  } +  printf("\n"); +  return ; +} + + +static void print_table_body_one_line( int shift_num ) +{ +  int i, y, elem, name_chars; + +  elem = convert_bit_shift_num_to_flag_data_list_element(shift_num); + +  printf("| %2d    | ", shift_num); + +  if (elem < 0){ +    name_chars = printf("     `( Undefined )`");    /*  elem == -1  */ +  } else { +    name_chars = printf("`%s`", flag_data_list[elem].name); +  } + +  for ( i=0; i < (flag_name_max_len - name_chars + 5); i++){ printf(" "); } +  printf("|"); + +  /*  ex. print  ' Yes   |'   */ +#ifdef PRINT_SYNTAX_FORWARD_ORDER +  for (y = 0; y < num_of_syntax_types; y++) +#else +  for (y = num_of_syntax_types -1; y > -1; y--) +#endif +  { +    if ( IS_SYNTAX_MEMBER(syn_data_list[y].syn, (1U << shift_num)) ) +    { +      printf(" Yes   |"); +    } else { +      printf(" -     |"); +    } +  } +  printf("\n"); +  return ; +} + + +static void print_table_body() +{ +  int x, elem; +  for (x = 0; x < TOTAL_NUM_OF_BITS; x++) +  { +    elem = convert_bit_shift_num_to_flag_data_list_element( x ); + +    if (elem < 0) +    { +#if !(defined( PRINT_UNDEFINED_FLAG )) +      continue; +#endif +    } +    print_table_body_one_line(x); +  } +  return ; +} + +/************************************** print table *****************************************/ + + +/************************************* print Set_in *****************************************/ +static void print_set_in_one_line(int shift_num) +{ +  int y, elem, count; + +  elem = convert_bit_shift_num_to_flag_data_list_element(shift_num); + +  if (elem < 0) +  { +    printf("### %d.       ( Undefined )\n", shift_num );    /*  elem == -1  */ +  } else { +    printf("### %d. %s\n", shift_num, flag_data_list[elem].name ); +  } + +  count = 0; +  printf("_Set in: "); +#ifdef PRINT_SYNTAX_FORWARD_ORDER +  for (y = 0; y < num_of_syntax_types; y++) +#else +  for (y = num_of_syntax_types -1; y > -1; y--) +#endif +  { +    if ( IS_SYNTAX_MEMBER(syn_data_list[y].syn, (1U << shift_num)) ) +    { +      if (count > 0){ printf(", "); }; +      printf("%s", syn_data_list[y].set_in); +      count++; +    } +  }   /* for y */ + +  if (count==0){ printf("none"); }; + +  printf("_\n\n"); +  return ; +} + + +static void print_set_in() +{ +  int x, elem; + +  PRINT_SEPARATOR; +  printf("The following are 'Set in' for oniguruma/doc/SYNTAX.md\n\n## "); +  printf( TITLE_STRING ); +  printf("\n\n"); +  for (x = 0; x < TOTAL_NUM_OF_BITS; x++) +  { +    elem = convert_bit_shift_num_to_flag_data_list_element( x ); + +    if (elem < 0) +    { +#if !(defined( PRINT_UNDEFINED_FLAG )) +      continue; +#endif +    } + +    print_set_in_one_line(x); +  } +  return ; +} + +/************************************* print Set_in *****************************************/ + + +/**************************************    main()   *****************************************/ + +static void print_version() +{ +  printf( "[ onig_syn_md.c ver.%05d ] ", ONIG_SYN_MD_VERSION_INT ); +  printf("The loaded oniguruma is '%d.%d.%d'.\n" +    , ONIGURUMA_VERSION_MAJOR +    , ONIGURUMA_VERSION_MINOR +    , ONIGURUMA_VERSION_TEENY ); +  return ; +} + + +static void print_debug() +{ +  int x; + +  print_version(); +  print_syn_data_list(); +  print_flag_data_list(); +  check_flag_data_duplication(); + +  /*  print: (1U << x) ===> flag_data_list[elem] */ +  PRINT_SEPARATOR; +  printf("\nThe following are the array elements that correspond to each bit.\n\n"); +  for (x = 0; x < TOTAL_NUM_OF_BITS; x++) +  { +    convert_bit_shift_num_to_flag_data_list_element(x); +  } +  return ; +} + + +static void warn_undefined_flag_used() +{ +  int x, y, elem; + +  for (x = 0; x < TOTAL_NUM_OF_BITS; x++) +  { +    elem = convert_bit_shift_num_to_flag_data_list_element(x); + +    if (elem < 0){ +      for (y = 0; y < num_of_syntax_types; y++) +      { +        if ( IS_SYNTAX_MEMBER( syn_data_list[y].syn, (1U << x) ) ) +        { +          PRINT_SEPARATOR; +          printf("warning: An undefined bit flag is used."); +          printf("  (1U << %d)\n\n", x); +          print_set_in_one_line( x ); + +#ifndef PRINT_UNDEFINED_FLAG +          printf("Please use '#define PRINT_UNDEFINED_FLAG'.\n"); +#endif +          return ; +        } +      }   /*  for y  */ +    }   /*  if elem */ +  }   /*  for x  */ +  return ; +} + + +extern int main(int argc, char* argv[]) +{ +#ifdef PRINT_DEBUG_INFO +  print_debug(); +  return 0; +#endif + +#ifdef PRINT_VERSION_INFO +  print_version(); +#endif + +  flag_name_max_len = flag_name_max_len_int(); +  check_flag_data_duplication(); + +#ifdef PRINT_SET_IN_INFO +  print_set_in(); +#endif + +#ifdef PRINT_TABLE_INFO +  print_table_head(); +  print_table_body(); +#endif + +#ifdef WARN_UNDEFINED_FLAG_USED +  warn_undefined_flag_used(); +#endif + +  /*  test  */ +  /* +  printf("\n------  test -------\n"); + +  int test_x = 17; +  print_set_in_one_line( test_x ); +  print_table_body_one_line( test_x ); +  printf("--------------------\n"); +  */ + +  /*  To avoid 'gcc -Wall' warnings.  */ +  if (0) +  { +    print_debug(); +    print_version(); +    print_set_in(); +    print_table_head(); +    print_table_body(); +    warn_undefined_flag_used(); +  } + +  return 0; +} | 
