Imported Upstream version 6.1.0upstream/6.1.0

author: J旦rg Frings-F端rst <debian@jff-webhosting.net> 2016-08-31 03:42:05 +0200
committer: J旦rg Frings-F端rst <debian@jff-webhosting.net> 2016-08-31 03:42:05 +0200
commit: a76fa337cc657dbe669ffb8dbdac606d4d6616f1 (patch)
tree: a6f004237df60876d087f79ac369fdc2545697c9 /doc
parent: 5e01a4852b31d537307994248869caf38b4023cc (diff)
3 files changed, 164 insertions, 115 deletions
diff --git a/doc/API b/doc/API
index 9904a06..8e824f5 100644
--- a/doc/API
+++ b/doc/API
@@ -1,4 +1,4 @@
-Oniguruma API  Version 6.0.0  2016/05/06
+Oniguruma API  Version 6.1.0  2016/08/22
 
 #include <oniguruma.h>
 
@@ -256,6 +256,27 @@ Oniguruma API  Version 6.0.0  2016/05/06
     ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] type of POSIX API.
 
 
+# int onig_scan(regex_t* reg, const UChar* str, const UChar* end,
+                OnigRegion* region, OnigOptionType option,
+                int (*scan_callback)(int, int, OnigRegion*, void*),
+                void* callback_arg)
+
+  Scan string and callback with matching region.
+
+  normal return: number of matching times
+  error:         error code
+  interruption:  return value of callback function (!= 0)
+
+  arguments
+  1 reg:    regex object
+  2 str:    target string
+  3 end:    terminate address of target string
+  4 region: address for return group match range info (NULL is allowed)
+  5 option: search time option
+  6 scan_callback: callback function (defined by user)
+  7 callback_arg:  optional argument passed to callback
+
+
 # OnigRegion* onig_region_new(void)
 
   Create a region.
@@ -601,6 +622,10 @@ Oniguruma API  Version 6.0.0  2016/05/06
   2 ranges:  property code point ranges
              (first element is number of ranges.)
 
+    [num-of-ranges, 1st-range-start, 1st-range-end, 2nd-range-start... ]
+
+    * Don't destroy the ranges after having called this function.
+
   normal return: ONIG_NORMAL
 
 
diff --git a/doc/API.ja b/doc/API.ja
index ac8cc6a..f617a1c 100644
--- a/doc/API.ja
+++ b/doc/API.ja
@@ -1,4 +1,4 @@
-鬼車インターフェース Version 6.0.0   2016/05/06
+鬼車インターフェース Version 6.1.0   2016/08/22
 
 #include <oniguruma.h>
 
@@ -256,6 +256,27 @@
     ONIG_OPTION_POSIX_REGION  region引数をPOSIX APIのregmatch_t[]にする
 
 
+# int onig_scan(regex_t* reg, const UChar* str, const UChar* end,
+                OnigRegion* region, OnigOptionType option,
+                int (*scan_callback)(int, int, OnigRegion*, void*),
+                void* callback_arg)
+
+  正規表現で文字列をスキャンして、マッチングする毎にコールバック関数を呼び出す。
+
+  正常終了: マッチ回数 (0回も含める)
+  エラー:   エラーコード (< 0)
+  中断: コールバック関数が０以外の戻り値を返したとき、その値を戻り値として中断
+
+  引数
+  1 reg:    正規表現オブジェクト
+  2 str:    検索対象文字列
+  3 end:    検索対象文字列の終端アドレス
+  4 region: マッチ領域情報(region)  (NULLも許される)
+  5 option: 検索時オプション
+  6 scan_callback: コールバック関数
+  7 callback_arg:  コールバック関数に渡される付加引数値
+
+
 # OnigRegion* onig_region_new(void)
 
   マッチ領域情報(region)を作成する。
@@ -608,6 +629,10 @@
   2 ranges:  プロパティコードポイント範囲
              (最初の要素は範囲の数)
 
+    [num-of-ranges, 1st-range-start, 1st-range-end, 2nd-range-start... ]
+
+    * この関数を呼んだ後で、rangesを変更/破壊しないこと
+
   正常終了戻り値: ONIG_NORMAL
 
 
diff --git a/doc/RE b/doc/RE
index b4bf536..e8a6aa4 100644
--- a/doc/RE
+++ b/doc/RE
@@ -1,35 +1,35 @@
-Oniguruma Regular Expressions Version 6.0.0    2016/05/02
+Oniguruma Regular Expressions Version 6.0.0    2016/08/18
 
 syntax: ONIG_SYNTAX_RUBY (default)
 
 
 1. Syntax elements
 
-  \       escape (enable or disable meta character meaning)
+  \       escape (enable or disable meta character)
   |       alternation
   (...)   group
-  [...]   character class  
+  [...]   character class
 
 
 2. Characters
 
-  \t           horizontal tab (0x09)
-  \v           vertical tab   (0x0B)
-  \n           newline        (0x0A)
-  \r           return         (0x0D)
-  \b           back space     (0x08)
-  \f           form feed      (0x0C)
-  \a           bell           (0x07)
-  \e           escape         (0x1B)
-  \nnn         octal char            (encoded byte value)
-  \xHH         hexadecimal char      (encoded byte value)
-  \x{7HHHHHHH} wide hexadecimal char (character code point value)
-  \cx          control char          (character code point value)
-  \C-x         control char          (character code point value)
-  \M-x         meta  (x|0x80)        (character code point value)
-  \M-\C-x      meta control char     (character code point value)
-
- (* \b is effective in character class [...] only)
+  \t           horizontal tab         (0x09)
+  \v           vertical tab           (0x0B)
+  \n           newline (line feed)    (0x0A)
+  \r           carriage return        (0x0D)
+  \b           backspace              (0x08)
+  \f           form feed              (0x0C)
+  \a           bell                   (0x07)
+  \e           escape                 (0x1B)
+  \nnn         octal char             (encoded byte value)
+  \xHH         hexadecimal char       (encoded byte value)
+  \x{7HHHHHHH} wide hexadecimal char  (character code point value)
+  \cx          control char           (character code point value)
+  \C-x         control char           (character code point value)
+  \M-x         meta  (x|0x80)         (character code point value)
+  \M-\C-x      meta control char      (character code point value)
+
+ (* \b as backspace is effective in character class only)
 
 
 3. Character types
@@ -39,12 +39,12 @@ syntax: ONIG_SYNTAX_RUBY (default)
   \w       word character
 
            Not Unicode:
-             alphanumeric, "_" and multibyte char. 
+             alphanumeric, "_" and multibyte char.
 
            Unicode:
              General_Category -- (Letter|Mark|Number|Connector_Punctuation)
 
-  \W       non word char
+  \W       non-word char
 
   \s       whitespace char
 
@@ -52,22 +52,22 @@ syntax: ONIG_SYNTAX_RUBY (default)
              \t, \n, \v, \f, \r, \x20
 
            Unicode:
-             0009, 000A, 000B, 000C, 000D, 0085(NEL), 
+             0009, 000A, 000B, 000C, 000D, 0085(NEL),
              General_Category -- Line_Separator
                               -- Paragraph_Separator
                               -- Space_Separator
 
-  \S       non whitespace char
+  \S       non-whitespace char
 
   \d       decimal digit char
 
            Unicode: General_Category -- Decimal_Number
 
-  \D       non decimal digit char
+  \D       non-decimal-digit char
 
   \h       hexadecimal digit char   [0-9a-fA-F]
 
-  \H       non hexadecimal digit char
+  \H       non-hexdigit char
 
 
   Character Property
@@ -80,7 +80,7 @@ syntax: ONIG_SYNTAX_RUBY (default)
 
      + works on all encodings
        Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower,
-       Print, Punct, Space, Upper, XDigit, Word, ASCII,
+       Print, Punct, Space, Upper, XDigit, Word, ASCII
 
      + works on EUC_JP, Shift_JIS
        Hiragana, Katakana
@@ -97,9 +97,9 @@ syntax: ONIG_SYNTAX_RUBY (default)
     ?       1 or 0 times
     *       0 or more times
     +       1 or more times
-    {n,m}   at least n but not more than m times
+    {n,m}   at least n but no more than m times
     {n,}    at least n times
-    {,n}    at least 0 but not more than n times ({0,n})
+    {,n}    at least 0 but no more than n times ({0,n})
     {n}     n times
 
   reluctant
@@ -107,11 +107,11 @@ syntax: ONIG_SYNTAX_RUBY (default)
     ??      1 or 0 times
     *?      0 or more times
     +?      1 or more times
-    {n,m}?  at least n but not more than m times  
+    {n,m}?  at least n but not more than m times
     {n,}?   at least n times
     {,n}?   at least 0 but not more than n times (== {0,n}?)
 
-  possessive (greedy and does not backtrack after repeated)
+  possessive (greedy and does not backtrack once match)
 
     ?+      1 or 0 times
     *+      0 or more times
@@ -127,24 +127,24 @@ syntax: ONIG_SYNTAX_RUBY (default)
   ^       beginning of the line
   $       end of the line
   \b      word boundary
-  \B      not word boundary
+  \B      non-word boundary
   \A      beginning of string
   \Z      end of string, or before newline at the end
   \z      end of string
-  \G      matching start position 
+  \G      where the current search attempt begins
 
 
 6. Character class
 
-  ^...    negative class (lowest precedence operator)
+  ^...    negative class (lowest precedence)
   x-y     range from x to y
   [...]   set (character class in character class)
-  ..&&..  intersection (low precedence at the next of ^)
-          
+  ..&&..  intersection (low precedence, only higher than ^)
+
     ex. [a-w&&[^c-g]z] ==> ([a-w] AND ([^c-g] OR z)) ==> [abh-w]
 
-  * If you want to use '[', '-', ']' as a normal character
-    in a character class, you should escape these characters by '\'.
+  * If you want to use '[', '-', or ']' as a normal character
+    in character class, you should escape them with '\'.
 
 
   POSIX bracket ([:xxxxx:], negate [:^xxxxx:])
@@ -196,79 +196,75 @@ syntax: ONIG_SYNTAX_RUBY (default)
 
   (?imx-imx)         option on/off
                          i: ignore case
-                         m: multi-line (dot(.) match newline)
+                         m: multi-line (dot (.) also matches newline)
                          x: extended form
   (?imx-imx:subexp)  option on/off for subexp
 
-  (?:subexp)         not captured group
-  (subexp)           captured group
+  (?:subexp)         non-capturing group
+  (subexp)           capturing group
 
   (?=subexp)         look-ahead
   (?!subexp)         negative look-ahead
   (?<=subexp)        look-behind
   (?<!subexp)        negative look-behind
 
-                     Subexp of look-behind must be fixed character length.
-                     But different character length is allowed in top level
-                     alternatives only.
+                     Subexp of look-behind must be fixed-width.
+                     But top-level alternatives can be of various lengths.
                      ex. (?<=a|bc) is OK. (?<=aaa(?:b|cd)) is not allowed.
 
-                     In negative-look-behind, captured group isn't allowed, 
-                     but shy group(?:) is allowed.
+                     In negative look-behind, capturing group isn't allowed,
+                     but non-capturing group (?:) is allowed.
 
   (?>subexp)         atomic group
-                     don't backtrack in subexp.
+                     no backtracks in subexp.
 
   (?<name>subexp), (?'name'subexp)
                      define named group
-                     (All characters of the name must be a word character.)
+                     (Each character of the name must be a word character.)
 
-                     Not only a name but a number is assigned like a captured
+                     Not only a name but a number is assigned like a capturing
                      group.
 
-                     Assigning the same name as two or more subexps is allowed.
-                     In this case, a subexp call can not be performed although
-                     the back reference is possible.
+                     Assigning the same name to two or more subexps is allowed.
+
 
+8. Backreferences
 
-8. Back reference
+  When we say "backreference a group," it actually means, "re-match the same
+  text matched by the subexp in that group."
 
-  \n          back reference by group number (n >= 1)
-  \k<n>       back reference by group number (n >= 1)
-  \k'n'       back reference by group number (n >= 1)
-  \k<-n>      back reference by relative group number (n >= 1)
-  \k'-n'      back reference by relative group number (n >= 1)
-  \k<name>    back reference by group name
-  \k'name'    back reference by group name
+  \n  \k<n>     \k'n'     (n >= 1) backreference the nth group in the regexp
+      \k<-n>    \k'-n'    (n >= 1) backreference the nth group counting
+                          backwards from the referring position
+      \k<name>  \k'name'  backreference a group with the specified name
 
-  In the back reference by the multiplex definition name,
-  a subexp with a large number is referred to preferentially.
-  (When not matched, a group of the small number is referred to.)
+  When backreferencing with a name that is assigned to more than one groups,
+  the last group with the name is checked first, if not matched then the
+  previous one with the name, and so on, until there is a match.
 
-  * Back reference by group number is forbidden if named group is defined 
-    in the pattern and ONIG_OPTION_CAPTURE_GROUP is not setted.
+  * Backreference by number is forbidden if any named group is defined and
+    ONIG_OPTION_CAPTURE_GROUP is not set.
 
 
-  back reference with nest level
+  backreference with recursion level
 
-    level: 0, 1, 2, ...
+    (n >= 1, level >= 0)
 
-    \k<n+level>     (n >= 1)
-    \k<n-level>     (n >= 1)
-    \k'n+level'     (n >= 1)
-    \k'n-level'     (n >= 1)
+    \k<n+level> \k'n+level'
+    \k<n-level> \k'n-level'
 
-    \k<name+level>
-    \k<name-level>
-    \k'name+level'
-    \k'name-level'
+    \k<name+level> \k'name+level'
+    \k<name-level> \k'name-level'
 
-    Destinate relative nest level from back reference position.    
+    Destine a group on the recursion level relative to the referring position.
 
     ex 1.
 
+      /\A(?<a>|.|(?:(?<b>.)\g<a>\k<b>))\z/.match("reee")
       /\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z/.match("reer")
 
+      \k<b+0> refers to the (?<b>.) on the same recursion level with it.
+
     ex 2.
 
       r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED)
@@ -280,53 +276,56 @@ syntax: ONIG_SYNTAX_RUBY (default)
       \g<element>
       __REGEXP__
 
-      p r.match('<foo>f<bar>bbb</bar>f</foo>').captures
+      p r.match("<foo>f<bar>bbb</bar>f</foo>").captures
+
+
+9. Subexp calls ("Tanaka Akira special")
 
+  When we say "call a group," it actually means, "re-execute the subexp in
+  that group."
 
+  \g<n>     \g'n'     (n >= 1) call the nth group
+  \g<-n>    \g'-n'    (n >= 1) call the nth group counting backwards from
+                      the calling position
+  \g<name>  \g'name'  call the group with the specified name
 
-9. Subexp call ("Tanaka Akira special")
+  * Left-most recursive calls are not allowed.
 
-  \g<name>    call by group name
-  \g'name'    call by group name
-  \g<n>       call by group number (n >= 1)
-  \g'n'       call by group number (n >= 1)
-  \g<-n>      call by relative group number (n >= 1)
-  \g'-n'      call by relative group number (n >= 1)
+    ex. (?<name>a|\g<name>b)    => error
+        (?<name>a|b\g<name>c)   => OK
 
-  * left-most recursive call is not allowed.
-     ex. (?<name>a|\g<name>b)   => error
-         (?<name>a|b\g<name>c)  => OK
+  * Calls with a name that is assigned to more than one groups are not
+    allowed.
 
-  * Call by group number is forbidden if named group is defined in the pattern
-    and ONIG_OPTION_CAPTURE_GROUP is not setted.
+  * Call by number is forbidden if any named group is defined and
+    ONIG_OPTION_CAPTURE_GROUP is not set.
 
-  * If the option status of called group is different from calling position
-    then the group's option is effective.
+  * The option status of the called group is always effective.
 
-    ex. (?-i:\g<name>)(?i:(?<name>a)){0}  match to "A"
+    ex. /(?-i:\g<name>)(?i:(?<name>a)){0}/.match("A")
 
 
 10. Captured group
 
-  Behavior of the no-named group (...) changes with the following conditions.
+  Behavior of an unnamed group (...) changes with the following conditions.
   (But named group is not changed.)
 
   case 1. /.../     (named group is not used, no option)
 
-     (...) is treated as a captured group.
+     (...) is treated as a capturing group.
 
   case 2. /.../g    (named group is not used, 'g' option)
 
-     (...) is treated as a no-captured group (?:...).
+     (...) is treated as a non-capturing group (?:...).
 
   case 3. /..(?<name>..)../   (named group is used, no option)
 
-     (...) is treated as a no-captured group (?:...).
+     (...) is treated as a non-capturing group.
      numbered-backref/call is not allowed.
 
   case 4. /..(?<name>..)../G  (named group is used, 'G' option)
 
-     (...) is treated as a captured group.
+     (...) is treated as a capturing group.
      numbered-backref/call is allowed.
 
   where
@@ -338,14 +337,14 @@ syntax: ONIG_SYNTAX_RUBY (default)
 
 
 -----------------------------
-A-1. Syntax depend options
+A-1. Syntax-dependent options
 
    + ONIG_SYNTAX_RUBY
-     (?m): dot(.) match newline
+     (?m): dot (.) also matches newline
 
    + ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA
-     (?s): dot(.) match newline
-     (?m): ^ match after newline, $ match before newline
+     (?s): dot (.) also matches newline
+     (?m): ^ matches after newline, $ matches before newline
 
 
 A-2. Original extensions
@@ -356,7 +355,7 @@ A-2. Original extensions
    + subexp call                  \g<name>, \g<group-num>
 
 
-A-3. Lacked features compare with perl 5.8.0
+A-3. Missing features compared with perl 5.8.0
 
    + \N{name}
    + \l,\u,\L,\U, \X, \C
@@ -373,12 +372,12 @@ A-4. Differences with Japanized GNU regex(version 0.12) of Ruby 1.8
    + add character property (\p{property}, \P{property})
    + add hexadecimal digit char type (\h, \H)
    + add look-behind
-     (?<=fixed-char-length-pattern), (?<!fixed-char-length-pattern)
+     (?<=fixed-width-pattern), (?<!fixed-width-pattern)
    + add possessive quantifier. ?+, *+, ++
    + add operations in character class. [], &&
      ('[' must be escaped as an usual char in character class.)
    + add named group and subexp call.
-   + octal or hexadecimal number sequence can be treated as 
+   + octal or hexadecimal number sequence can be treated as
      a multibyte code char in character class if multibyte encoding
      is specified.
      (ex. [\xa1\xa2], [\xa1\xa7-\xa4\xa1])
@@ -389,29 +388,29 @@ A-4. Differences with Japanized GNU regex(version 0.12) of Ruby 1.8
      ex. (?:(?i)a|b) is interpreted as (?:(?i:a|b)), not (?:(?i:a)|b).
    + isolated option is not transparent to previous pattern.
      ex. a(?i)* is a syntax error pattern.
-   + allowed incompleted left brace as an usual string.
+   + allowed unpaired left brace as a normal character.
      ex. /{/, /({)/, /a{2,3/ etc...
    + negative POSIX bracket [:^xxxx:] is supported.
    + POSIX bracket [:ascii:] is added.
    + repeat of look-ahead is not allowed.
      ex. /(?=a)*/, /(?!b){5}/
-   + Ignore case option is effective to numbered character.
+   + Ignore case option is effective to escape sequence.
      ex. /\x61/i =~ "A"
-   + In the range quantifier, the number of the minimum is omissible.
+   + In the range quantifier, the number of the minimum is optional.
      /a{,n}/ == /a{0,n}/
-     The simultanious abbreviation of the number of times of the minimum
-     and the maximum is not allowed. (/a{,}/)
-   + /a{n}?/ is not a non-greedy operator.
+     The omission of both minimum and maximum values is not allowed.
+     /a{,}/
+   + /{n}?/ is not a reluctant quantifier.
      /a{n}?/ == /(?:a{n})?/
-   + invalid back reference is checked and cause error.
+   + invalid back reference is checked and raises error.
      /\1/, /(a)\2/
-   + Zero-length match in infinite repeat stops the repeat,
+   + Zero-width match in an infinite loop stops the repeat,
      then changes of the capture group status are checked as stop condition.
      /(?:()|())*\1\2/ =~ ""
      /(?:\1a|())*/ =~ "a"
 
 
-A-5. Disabled functions by default syntax
+A-5. Features disabled in default syntax
 
    + capture history
author	J旦rg Frings-F端rst <debian@jff-webhosting.net>	2016-08-31 03:42:05 +0200
committer	J旦rg Frings-F端rst <debian@jff-webhosting.net>	2016-08-31 03:42:05 +0200
commit	a76fa337cc657dbe669ffb8dbdac606d4d6616f1 (patch)
tree	a6f004237df60876d087f79ac369fdc2545697c9 /doc
parent	5e01a4852b31d537307994248869caf38b4023cc (diff)