From 3e0814cd9862b89c7a39672672937477bd87ddfb Mon Sep 17 00:00:00 2001
From: Andreas Rottmann <a.rottmann@gmx.at>
Date: Thu, 27 May 2010 18:23:15 +0200
Subject: Imported Upstream version 0.9.3

---
 lib/uniwbrk/u-wordbreaks.h       | 166 +++++++++++++++++++-------------------
 lib/uniwbrk/u16-wordbreaks.c     |   2 +-
 lib/uniwbrk/u32-wordbreaks.c     |   2 +-
 lib/uniwbrk/u8-wordbreaks.c      |  68 ++++++++--------
 lib/uniwbrk/ulc-wordbreaks.c     | 168 +++++++++++++++++++--------------------
 lib/uniwbrk/wbrkprop.h           |   2 +-
 lib/uniwbrk/wbrktable.c          |  22 ++---
 lib/uniwbrk/wbrktable.h          |   2 +-
 lib/uniwbrk/wordbreak-property.c |  20 ++---
 9 files changed, 226 insertions(+), 226 deletions(-)

(limited to 'lib/uniwbrk')

diff --git a/lib/uniwbrk/u-wordbreaks.h b/lib/uniwbrk/u-wordbreaks.h
index 5ef4e8c1..b0fd301e 100644
--- a/lib/uniwbrk/u-wordbreaks.h
+++ b/lib/uniwbrk/u-wordbreaks.h
@@ -1,5 +1,5 @@
 /* Word breaks in UTF-8/UTF-16/UTF-32 strings.
-   Copyright (C) 2009 Free Software Foundation, Inc.
+   Copyright (C) 2009-2010 Free Software Foundation, Inc.
    Written by Bruno Haible <bruno@clisp.org>, 2009.
 
    This program is free software: you can redistribute it and/or modify it
@@ -23,105 +23,105 @@ FUNC (const UNIT *s, size_t n, char *p)
       const UNIT *s_end = s + n;
 
       /* Word break property of the last character.
-	 -1 at the very beginning of the string.  */
+         -1 at the very beginning of the string.  */
       int last_char_prop = -1;
 
       /* Format and Extend characters are ignored; this means, the mostly used
-	 unit is the complex character (= character with subsequent ignored
-	 characters).
-	 Word break property of the last complex character.
-	 -1 at the very beginning of the string.  */
+         unit is the complex character (= character with subsequent ignored
+         characters).
+         Word break property of the last complex character.
+         -1 at the very beginning of the string.  */
       int last_compchar_prop = -1;
       char *last_compchar_ptr = NULL;
 
       /* For recognizing rules involving 3 complex characters:
-	 Word break property of the second-to-last complex character.
-	 -1 at the very beginning of the string.  */
+         Word break property of the second-to-last complex character.
+         -1 at the very beginning of the string.  */
       int secondlast_compchar_prop = -1;
 
       /* Don't break inside multibyte characters.  */
       memset (p, 0, n);
 
       while (s < s_end)
-	{
-	  ucs4_t uc;
-	  int count = U_MBTOUC_UNSAFE (&uc, s, s_end - s);
-	  int prop = uc_wordbreak_property (uc);
+        {
+          ucs4_t uc;
+          int count = U_MBTOUC_UNSAFE (&uc, s, s_end - s);
+          int prop = uc_wordbreak_property (uc);
 
-	  /* No break at the start of the string.  */
-	  if (last_char_prop >= 0)
-	    {
-	      /* No break between CR and LF.  */
-	      if (last_char_prop == WBP_CR && prop == WBP_LF)
-		/* *p = 0 */;
-	      /* Break before and after newlines.  */
-	      else if (last_char_prop >= WBP_NEWLINE
-		       /* same as:
-			  last_char_prop == WBP_CR
-			  || last_char_prop == WBP_LF
-			  || last_char_prop == WBP_NEWLINE */
-		       || prop >= WBP_NEWLINE
-			  /* same as:
-			     prop == WBP_CR
-			     || prop == WBP_LF
-			     || prop == WBP_NEWLINE */)
-		*p = 1;
-	      /* Ignore Format and Extend characters.  */
-	      else if (!(prop == WBP_EXTEND || prop == WBP_FORMAT))
-		{
-		  /* No break in these situations (see UAX #29):
+          /* No break at the start of the string.  */
+          if (last_char_prop >= 0)
+            {
+              /* No break between CR and LF.  */
+              if (last_char_prop == WBP_CR && prop == WBP_LF)
+                /* *p = 0 */;
+              /* Break before and after newlines.  */
+              else if (last_char_prop >= WBP_NEWLINE
+                       /* same as:
+                          last_char_prop == WBP_CR
+                          || last_char_prop == WBP_LF
+                          || last_char_prop == WBP_NEWLINE */
+                       || prop >= WBP_NEWLINE
+                          /* same as:
+                             prop == WBP_CR
+                             || prop == WBP_LF
+                             || prop == WBP_NEWLINE */)
+                *p = 1;
+              /* Ignore Format and Extend characters.  */
+              else if (!(prop == WBP_EXTEND || prop == WBP_FORMAT))
+                {
+                  /* No break in these situations (see UAX #29):
 
-		      secondlast          last             current
+                      secondlast          last             current
 
-		       ALetter   (MidLetter | MidNumLet) × ALetter      (WB7)
-		       ALetter × (MidLetter | MidNumLet)   ALetter      (WB6)
-		       Numeric   (MidNum | MidNumLet)    × Numeric      (WB11)
-		       Numeric × (MidNum | MidNumLet)      Numeric      (WB12)
-						 ALetter × ALetter      (WB5)
-						 ALetter × Numeric      (WB9)
-						 Numeric × ALetter      (WB10)
-						 Numeric × Numeric      (WB8)
-						Katakana × Katakana     (WB13)
-			  (ALetter | Numeric | Katakana) × ExtendNumLet (WB13a)
-					    ExtendNumLet × ExtendNumLet (WB13a)
-			 ExtendNumLet × (ALetter | Numeric | Katakana)  (WB13b)
-		   */
-		  /* No break across certain punctuation.  Also, disable word
-		     breaks that were recognized earlier (due to lookahead of
-		     only one complex character).  */
-		  if ((prop == WBP_ALETTER
-		       && (last_compchar_prop == WBP_MIDLETTER
-			   || last_compchar_prop == WBP_MIDNUMLET)
-		       && secondlast_compchar_prop == WBP_ALETTER)
-		      || (prop == WBP_NUMERIC
-			  && (last_compchar_prop == WBP_MIDNUM
-			      || last_compchar_prop == WBP_MIDNUMLET)
-			  && secondlast_compchar_prop == WBP_NUMERIC))
-		    {
-		      *last_compchar_ptr = 0;
-		      /* *p = 0; */
-		    }
-		  else
-		    {
-		      /* Perform a single table lookup.  */
-		      if (uniwbrk_table[last_compchar_prop][prop])
-			*p = 1;
-		      /* else *p = 0; */
-		    }
-		}
-	    }
+                       ALetter   (MidLetter | MidNumLet) × ALetter      (WB7)
+                       ALetter × (MidLetter | MidNumLet)   ALetter      (WB6)
+                       Numeric   (MidNum | MidNumLet)    × Numeric      (WB11)
+                       Numeric × (MidNum | MidNumLet)      Numeric      (WB12)
+                                                 ALetter × ALetter      (WB5)
+                                                 ALetter × Numeric      (WB9)
+                                                 Numeric × ALetter      (WB10)
+                                                 Numeric × Numeric      (WB8)
+                                                Katakana × Katakana     (WB13)
+                          (ALetter | Numeric | Katakana) × ExtendNumLet (WB13a)
+                                            ExtendNumLet × ExtendNumLet (WB13a)
+                         ExtendNumLet × (ALetter | Numeric | Katakana)  (WB13b)
+                   */
+                  /* No break across certain punctuation.  Also, disable word
+                     breaks that were recognized earlier (due to lookahead of
+                     only one complex character).  */
+                  if ((prop == WBP_ALETTER
+                       && (last_compchar_prop == WBP_MIDLETTER
+                           || last_compchar_prop == WBP_MIDNUMLET)
+                       && secondlast_compchar_prop == WBP_ALETTER)
+                      || (prop == WBP_NUMERIC
+                          && (last_compchar_prop == WBP_MIDNUM
+                              || last_compchar_prop == WBP_MIDNUMLET)
+                          && secondlast_compchar_prop == WBP_NUMERIC))
+                    {
+                      *last_compchar_ptr = 0;
+                      /* *p = 0; */
+                    }
+                  else
+                    {
+                      /* Perform a single table lookup.  */
+                      if (uniwbrk_table[last_compchar_prop][prop])
+                        *p = 1;
+                      /* else *p = 0; */
+                    }
+                }
+            }
 
-	  last_char_prop = prop;
-	  /* Ignore Format and Extend characters, except at the start of the string.  */
-	  if (last_compchar_prop < 0 || !(prop == WBP_EXTEND || prop == WBP_FORMAT))
-	    {
-	      secondlast_compchar_prop = last_compchar_prop;
-	      last_compchar_prop = prop;
-	      last_compchar_ptr = p;
-	    }
+          last_char_prop = prop;
+          /* Ignore Format and Extend characters, except at the start of the string.  */
+          if (last_compchar_prop < 0 || !(prop == WBP_EXTEND || prop == WBP_FORMAT))
+            {
+              secondlast_compchar_prop = last_compchar_prop;
+              last_compchar_prop = prop;
+              last_compchar_ptr = p;
+            }
 
-	  s += count;
-	  p += count;
-	}
+          s += count;
+          p += count;
+        }
     }
 }
diff --git a/lib/uniwbrk/u16-wordbreaks.c b/lib/uniwbrk/u16-wordbreaks.c
index 3398fd3a..ea2a53d2 100644
--- a/lib/uniwbrk/u16-wordbreaks.c
+++ b/lib/uniwbrk/u16-wordbreaks.c
@@ -1,5 +1,5 @@
 /* Word breaks in UTF-16 strings.
-   Copyright (C) 2009 Free Software Foundation, Inc.
+   Copyright (C) 2009-2010 Free Software Foundation, Inc.
    Written by Bruno Haible <bruno@clisp.org>, 2009.
 
    This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uniwbrk/u32-wordbreaks.c b/lib/uniwbrk/u32-wordbreaks.c
index 6763fb9e..86a26160 100644
--- a/lib/uniwbrk/u32-wordbreaks.c
+++ b/lib/uniwbrk/u32-wordbreaks.c
@@ -1,5 +1,5 @@
 /* Word breaks in UTF-32 strings.
-   Copyright (C) 2009 Free Software Foundation, Inc.
+   Copyright (C) 2009-2010 Free Software Foundation, Inc.
    Written by Bruno Haible <bruno@clisp.org>, 2009.
 
    This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uniwbrk/u8-wordbreaks.c b/lib/uniwbrk/u8-wordbreaks.c
index 59d2076d..c7edfe9e 100644
--- a/lib/uniwbrk/u8-wordbreaks.c
+++ b/lib/uniwbrk/u8-wordbreaks.c
@@ -1,5 +1,5 @@
 /* Word breaks in UTF-8 strings.
-   Copyright (C) 2009 Free Software Foundation, Inc.
+   Copyright (C) 2009-2010 Free Software Foundation, Inc.
    Written by Bruno Haible <bruno@clisp.org>, 2009.
 
    This program is free software: you can redistribute it and/or modify it
@@ -50,28 +50,28 @@ read_file (FILE *stream)
   while (! feof (stream))
     {
       if (size + BUFSIZE > alloc)
-	{
-	  alloc = alloc + alloc / 2;
-	  if (alloc < size + BUFSIZE)
-	    alloc = size + BUFSIZE;
-	  buf = realloc (buf, alloc);
-	  if (buf == NULL)
-	    {
-	      fprintf (stderr, "out of memory\n");
-	      exit (1);
-	    }
-	}
+        {
+          alloc = alloc + alloc / 2;
+          if (alloc < size + BUFSIZE)
+            alloc = size + BUFSIZE;
+          buf = realloc (buf, alloc);
+          if (buf == NULL)
+            {
+              fprintf (stderr, "out of memory\n");
+              exit (1);
+            }
+        }
       count = fread (buf + size, 1, BUFSIZE, stream);
       if (count == 0)
-	{
-	  if (ferror (stream))
-	    {
-	      perror ("fread");
-	      exit (1);
-	    }
-	}
+        {
+          if (ferror (stream))
+            {
+              perror ("fread");
+              exit (1);
+            }
+        }
       else
-	size += count;
+        size += count;
     }
   buf = realloc (buf, size + 1);
   if (buf == NULL)
@@ -98,20 +98,20 @@ main (int argc, char * argv[])
       u8_wordbreaks ((uint8_t *) input, length, breaks);
 
       for (i = 0; i < length; i++)
-	{
-	  switch (breaks[i])
-	    {
-	    case 1:
-	      /* U+2027 in UTF-8 encoding */
-	      putc (0xe2, stdout); putc (0x80, stdout); putc (0xa7, stdout);
-	      break;
-	    case 0:
-	      break;
-	    default:
-	      abort ();
-	    }
-	  putc (input[i], stdout);
-	}
+        {
+          switch (breaks[i])
+            {
+            case 1:
+              /* U+2027 in UTF-8 encoding */
+              putc (0xe2, stdout); putc (0x80, stdout); putc (0xa7, stdout);
+              break;
+            case 0:
+              break;
+            default:
+              abort ();
+            }
+          putc (input[i], stdout);
+        }
 
       free (breaks);
 
diff --git a/lib/uniwbrk/ulc-wordbreaks.c b/lib/uniwbrk/ulc-wordbreaks.c
index cb6e131c..6e17026b 100644
--- a/lib/uniwbrk/ulc-wordbreaks.c
+++ b/lib/uniwbrk/ulc-wordbreaks.c
@@ -1,5 +1,5 @@
 /* Word breaks in strings.
-   Copyright (C) 2001-2003, 2006-2009 Free Software Foundation, Inc.
+   Copyright (C) 2001-2003, 2006-2010 Free Software Foundation, Inc.
    Written by Bruno Haible <bruno@clisp.org>, 2009.
 
    This program is free software: you can redistribute it and/or modify it
@@ -49,60 +49,60 @@ ulc_wordbreaks (const char *s, size_t n, char *p)
       const char *encoding = locale_charset ();
 
       if (is_utf8_encoding (encoding))
-	u8_wordbreaks ((const uint8_t *) s, n, p);
+        u8_wordbreaks ((const uint8_t *) s, n, p);
       else
-	{
-	  /* Convert the string to UTF-8 and build a translation table
-	     from offsets into s to offsets into the translated string.  */
-	  size_t *offsets = (size_t *) malloc (n * sizeof (size_t));
-
-	  if (offsets != NULL)
-	    {
-	      uint8_t *t;
-	      size_t m;
-
-	      t = u8_conv_from_encoding (encoding, iconveh_question_mark,
-					 s, n, offsets, NULL, &m);
-	      if (t != NULL)
-		{
-		  char *q = (char *) (m > 0 ? malloc (m) : NULL);
-
-		  if (m == 0 || q != NULL)
-		    {
-		      size_t i;
-
-		      /* Determine the word breaks of the UTF-8 string.  */
-		      u8_wordbreaks (t, m, q);
-
-		      /* Translate the result back to the original string.  */
-		      memset (p, 0, n);
-		      for (i = 0; i < n; i++)
-			if (offsets[i] != (size_t)(-1))
-			  p[i] = q[offsets[i]];
-
-		      free (q);
-		      free (t);
-		      free (offsets);
-		      return;
-		    }
-		  free (t);
-		}
-	      free (offsets);
-	    }
-
-	  /* Impossible to convert.  */
+        {
+          /* Convert the string to UTF-8 and build a translation table
+             from offsets into s to offsets into the translated string.  */
+          size_t *offsets = (size_t *) malloc (n * sizeof (size_t));
+
+          if (offsets != NULL)
+            {
+              uint8_t *t;
+              size_t m;
+
+              t = u8_conv_from_encoding (encoding, iconveh_question_mark,
+                                         s, n, offsets, NULL, &m);
+              if (t != NULL)
+                {
+                  char *q = (char *) (m > 0 ? malloc (m) : NULL);
+
+                  if (m == 0 || q != NULL)
+                    {
+                      size_t i;
+
+                      /* Determine the word breaks of the UTF-8 string.  */
+                      u8_wordbreaks (t, m, q);
+
+                      /* Translate the result back to the original string.  */
+                      memset (p, 0, n);
+                      for (i = 0; i < n; i++)
+                        if (offsets[i] != (size_t)(-1))
+                          p[i] = q[offsets[i]];
+
+                      free (q);
+                      free (t);
+                      free (offsets);
+                      return;
+                    }
+                  free (t);
+                }
+              free (offsets);
+            }
+
+          /* Impossible to convert.  */
 #if C_CTYPE_ASCII
-	  if (is_all_ascii (s, n))
-	    {
-	      /* ASCII is a subset of UTF-8.  */
-	      u8_wordbreaks ((const uint8_t *) s, n, p);
-	      return;
-	    }
+          if (is_all_ascii (s, n))
+            {
+              /* ASCII is a subset of UTF-8.  */
+              u8_wordbreaks ((const uint8_t *) s, n, p);
+              return;
+            }
 #endif
-	  /* We have a non-ASCII string and cannot convert it.
-	     Don't produce any word breaks.  */
-	  memset (p, 0, n);
-	}
+          /* We have a non-ASCII string and cannot convert it.
+             Don't produce any word breaks.  */
+          memset (p, 0, n);
+        }
     }
 }
 
@@ -127,28 +127,28 @@ read_file (FILE *stream)
   while (! feof (stream))
     {
       if (size + BUFSIZE > alloc)
-	{
-	  alloc = alloc + alloc / 2;
-	  if (alloc < size + BUFSIZE)
-	    alloc = size + BUFSIZE;
-	  buf = realloc (buf, alloc);
-	  if (buf == NULL)
-	    {
-	      fprintf (stderr, "out of memory\n");
-	      exit (1);
-	    }
-	}
+        {
+          alloc = alloc + alloc / 2;
+          if (alloc < size + BUFSIZE)
+            alloc = size + BUFSIZE;
+          buf = realloc (buf, alloc);
+          if (buf == NULL)
+            {
+              fprintf (stderr, "out of memory\n");
+              exit (1);
+            }
+        }
       count = fread (buf + size, 1, BUFSIZE, stream);
       if (count == 0)
-	{
-	  if (ferror (stream))
-	    {
-	      perror ("fread");
-	      exit (1);
-	    }
-	}
+        {
+          if (ferror (stream))
+            {
+              perror ("fread");
+              exit (1);
+            }
+        }
       else
-	size += count;
+        size += count;
     }
   buf = realloc (buf, size + 1);
   if (buf == NULL)
@@ -176,19 +176,19 @@ main (int argc, char * argv[])
       ulc_wordbreaks (input, length, breaks);
 
       for (i = 0; i < length; i++)
-	{
-	  switch (breaks[i])
-	    {
-	    case 1:
-	      putc ('|', stdout);
-	      break;
-	    case 0:
-	      break;
-	    default:
-	      abort ();
-	    }
-	  putc (input[i], stdout);
-	}
+        {
+          switch (breaks[i])
+            {
+            case 1:
+              putc ('|', stdout);
+              break;
+            case 0:
+              break;
+            default:
+              abort ();
+            }
+          putc (input[i], stdout);
+        }
 
       free (breaks);
 
diff --git a/lib/uniwbrk/wbrkprop.h b/lib/uniwbrk/wbrkprop.h
index 3b50e17e..77fd61de 100644
--- a/lib/uniwbrk/wbrkprop.h
+++ b/lib/uniwbrk/wbrkprop.h
@@ -2,7 +2,7 @@
 /* Line breaking properties of Unicode characters.  */
 /* Generated automatically by gen-uni-tables for Unicode 5.1.0.  */
 
-/* Copyright (C) 2000-2002, 2004, 2007-2009 Free Software Foundation, Inc.
+/* Copyright (C) 2000-2002, 2004, 2007-2010 Free Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Lesser General Public License as published by
diff --git a/lib/uniwbrk/wbrktable.c b/lib/uniwbrk/wbrktable.c
index 81a2323e..ff25fb31 100644
--- a/lib/uniwbrk/wbrktable.c
+++ b/lib/uniwbrk/wbrktable.c
@@ -1,5 +1,5 @@
 /* Word break auxiliary table.
-   Copyright (C) 2009 Free Software Foundation, Inc.
+   Copyright (C) 2009-2010 Free Software Foundation, Inc.
    Written by Bruno Haible <bruno@clisp.org>, 2009.
 
    This program is free software: you can redistribute it and/or modify it
@@ -22,22 +22,22 @@
 
 /* This table contains the following rules (see UAX #29):
 
-		       last         current
+                       last         current
 
-			 ALetter × ALetter                         (WB5)
-			 ALetter × Numeric                         (WB9)
-			 Numeric × ALetter                         (WB10)
-			 Numeric × Numeric                         (WB8)
-			Katakana × Katakana                        (WB13)
+                         ALetter × ALetter                         (WB5)
+                         ALetter × Numeric                         (WB9)
+                         Numeric × ALetter                         (WB10)
+                         Numeric × Numeric                         (WB8)
+                        Katakana × Katakana                        (WB13)
   (ALetter | Numeric | Katakana) × ExtendNumLet                    (WB13a)
-		    ExtendNumLet × ExtendNumLet                    (WB13a)
-		    ExtendNumLet × (ALetter | Numeric | Katakana)  (WB13b)
+                    ExtendNumLet × ExtendNumLet                    (WB13a)
+                    ExtendNumLet × (ALetter | Numeric | Katakana)  (WB13b)
  */
 
 const unsigned char uniwbrk_table[10][8] =
 {        /* current:      OTHER            MIDNUMLET         NUMERIC         */
-	 /*                   KATAKANA           MIDLETTER      EXTENDNUMLET */
-	 /*                          ALETTER            MIDNUM               */
+         /*                   KATAKANA           MIDLETTER      EXTENDNUMLET */
+         /*                          ALETTER            MIDNUM               */
   /* last */
   /* WBP_OTHER */        {  1,    1,    1,    1,    1,    1,    1,    1 },
   /* WBP_KATAKANA */     {  1,    0,    1,    1,    1,    1,    1,    0 },
diff --git a/lib/uniwbrk/wbrktable.h b/lib/uniwbrk/wbrktable.h
index 14efee90..8a13378b 100644
--- a/lib/uniwbrk/wbrktable.h
+++ b/lib/uniwbrk/wbrktable.h
@@ -1,5 +1,5 @@
 /* Word break auxiliary table.
-   Copyright (C) 2009 Free Software Foundation, Inc.
+   Copyright (C) 2009-2010 Free Software Foundation, Inc.
    Written by Bruno Haible <bruno@clisp.org>, 2009.
 
    This program is free software: you can redistribute it and/or modify it
diff --git a/lib/uniwbrk/wordbreak-property.c b/lib/uniwbrk/wordbreak-property.c
index 4d0a212d..9d98b0b5 100644
--- a/lib/uniwbrk/wordbreak-property.c
+++ b/lib/uniwbrk/wordbreak-property.c
@@ -1,5 +1,5 @@
 /* Word break property.
-   Copyright (C) 2001-2003, 2006-2009 Free Software Foundation, Inc.
+   Copyright (C) 2001-2003, 2006-2010 Free Software Foundation, Inc.
    Written by Bruno Haible <bruno@clisp.org>, 2009.
 
    This program is free software: you can redistribute it and/or modify it
@@ -30,15 +30,15 @@ uc_wordbreak_property (ucs4_t uc)
     {
       int lookup1 = uniwbrkprop.level1[index1];
       if (lookup1 >= 0)
-	{
-	  unsigned int index2 = (uc >> wbrkprop_header_2) & wbrkprop_header_3;
-	  int lookup2 = uniwbrkprop.level2[lookup1 + index2];
-	  if (lookup2 >= 0)
-	    {
-	      unsigned int index3 = uc & wbrkprop_header_4;
-	      return uniwbrkprop.level3[lookup2 + index3];
-	    }
-	}
+        {
+          unsigned int index2 = (uc >> wbrkprop_header_2) & wbrkprop_header_3;
+          int lookup2 = uniwbrkprop.level2[lookup1 + index2];
+          if (lookup2 >= 0)
+            {
+              unsigned int index3 = uc & wbrkprop_header_4;
+              return uniwbrkprop.level3[lookup2 + index3];
+            }
+        }
     }
   return WBP_OTHER;
 }
-- 
cgit v1.2.3