Imported Upstream version 0.9.3upstream/0.9.3

author: Andreas Rottmann <a.rottmann@gmx.at> 2010-05-27 18:23:15 +0200
committer: Andreas Rottmann <a.rottmann@gmx.at> 2010-05-27 18:23:15 +0200
commit: 3e0814cd9862b89c7a39672672937477bd87ddfb (patch)
tree: 159134a624e51509f40ed8823249f09a70d1dda3 /lib/uninorm/u-normalize-internal.h
parent: 3bebb94360f1c2333feb8d504a9fa4f96984d8b7 (diff)
1 files changed, 283 insertions, 283 deletions
diff --git a/lib/uninorm/u-normalize-internal.h b/lib/uninorm/u-normalize-internal.h
index 70c32551..43b7ec32 100644
--- a/lib/uninorm/u-normalize-internal.h
+++ b/lib/uninorm/u-normalize-internal.h
@@ -1,5 +1,5 @@
 /* Decomposition and composition of Unicode strings.
-   Copyright (C) 2009 Free Software Foundation, Inc.
+   Copyright (C) 2009-2010 Free Software Foundation, Inc.
    Written by Bruno Haible <bruno@clisp.org>, 2009.
 
    This program is free software: you can redistribute it and/or modify it
@@ -56,293 +56,293 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
 
     for (;;)
       {
-	int count;
-	ucs4_t decomposed[UC_DECOMPOSITION_MAX_LENGTH];
-	int decomposed_count;
-	int i;
-
-	if (s < s_end)
-	  {
-	    /* Fetch the next character.  */
-	    count = U_MBTOUC_UNSAFE (&decomposed[0], s, s_end - s);
-	    decomposed_count = 1;
-
-	    /* Decompose it, recursively.
-	       It would be possible to precompute the recursive decomposition
-	       and store it in a table.  But this would significantly increase
-	       the size of the decomposition tables, because for example for
-	       U+1FC1 the recursive canonical decomposition and the recursive
-	       compatibility decomposition are different.  */
-	    {
-	      int curr;
-
-	      for (curr = 0; curr < decomposed_count; )
-		{
-		  /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e.
-		     all elements are atomic.  */
-		  ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH];
-		  int curr_decomposed_count;
-
-		  curr_decomposed_count = decomposer (decomposed[curr], curr_decomposed);
-		  if (curr_decomposed_count >= 0)
-		    {
-		      /* Move curr_decomposed[0..curr_decomposed_count-1] over
-			 decomposed[curr], making room.  It's not worth using
-			 memcpy() here, since the counts are so small.  */
-		      int shift = curr_decomposed_count - 1;
-
-		      if (shift < 0)
-			abort ();
-		      if (shift > 0)
-			{
-			  int j;
-
-			  decomposed_count += shift;
-			  if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH)
-			    abort ();
-			  for (j = decomposed_count - 1 - shift; j > curr; j--)
-			    decomposed[j + shift] = decomposed[j];
-			}
-		      for (; shift >= 0; shift--)
-			decomposed[curr + shift] = curr_decomposed[shift];
-		    }
-		  else
-		    {
-		      /* decomposed[curr] is atomic.  */
-		      curr++;
-		    }
-		}
-	    }
-	  }
-	else
-	  {
-	    count = 0;
-	    decomposed_count = 0;
-	  }
-
-	i = 0;
-	for (;;)
-	  {
-	    ucs4_t uc;
-	    int ccc;
-
-	    if (s < s_end)
-	      {
-		/* Fetch the next character from the decomposition.  */
-		if (i == decomposed_count)
-		  break;
-		uc = decomposed[i];
-		ccc = uc_combining_class (uc);
-	      }
-	    else
-	      {
-		/* End of string reached.  */
-		uc = 0;
-		ccc = 0;
-	      }
-
-	    if (ccc == 0)
-	      {
-		size_t j;
-
-		/* Apply the canonical ordering algorithm to the accumulated
-		   sequence of characters.  */
-		if (sortbuf_count > 1)
-		  gl_uninorm_decompose_merge_sort_inplace (sortbuf, sortbuf_count,
-							   sortbuf + sortbuf_count);
-
-		if (composer != NULL)
-		  {
-		    /* Attempt to combine decomposed characters, as specified
-		       in the Unicode Standard Annex #15 "Unicode Normalization
-		       Forms".  We need to check
-			 1. whether the first accumulated character is a
-			    "starter" (i.e. has ccc = 0).  This is usually the
-			    case.  But when the string starts with a
-			    non-starter, the sortbuf also starts with a
-			    non-starter.  Btw, this check could also be
-			    omitted, because the composition table has only
-			    entries (code1, code2) for which code1 is a
-			    starter; if the first accumulated character is not
-			    a starter, no lookup will succeed.
-			 2. If the sortbuf has more than one character, check
-			    for each of these characters that are not "blocked"
-			    from the starter (i.e. have a ccc that is higher
-			    than the ccc of the previous character) whether it
-			    can be combined with the first character.
-			 3. If only one character is left in sortbuf, check
-			    whether it can be combined with the next character
-			    (also a starter).  */
-		    if (sortbuf_count > 0 && sortbuf[0].ccc == 0)
-		      {
-			for (j = 1; j < sortbuf_count; )
-			  {
-			    if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
-			      {
-				ucs4_t combined =
-				  composer (sortbuf[0].code, sortbuf[j].code);
-				if (combined)
-				  {
-				    size_t k;
-
-				    sortbuf[0].code = combined;
-				    /* sortbuf[0].ccc = 0, still valid.  */
-				    for (k = j + 1; k < sortbuf_count; k++)
-				      sortbuf[k - 1] = sortbuf[k];
-				    sortbuf_count--;
-				    continue;
-				  }
-			      }
-			    j++;
-			  }
-			if (s < s_end && sortbuf_count == 1)
-			  {
-			    ucs4_t combined =
-			      composer (sortbuf[0].code, uc);
-			    if (combined)
-			      {
-				uc = combined;
-				ccc = 0;
-				/* uc could be further combined with subsequent
-				   characters.  So don't put it into sortbuf[0] in
-				   this round, only in the next round.  */
-				sortbuf_count = 0;
-			      }
-			  }
-		      }
-		  }
-
-		for (j = 0; j < sortbuf_count; j++)
-		  {
-		    ucs4_t muc = sortbuf[j].code;
-
-		    /* Append muc to the result accumulator.  */
-		    if (length < allocated)
-		      {
-			int ret =
-			  U_UCTOMB (result + length, muc, allocated - length);
-			if (ret == -1)
-			  {
-			    errno = EINVAL;
-			    goto fail;
-			  }
-			if (ret >= 0)
-			  {
-			    length += ret;
-			    goto done_appending;
-			  }
-		      }
-		    {
-		      size_t old_allocated = allocated;
-		      size_t new_allocated = 2 * old_allocated;
-		      if (new_allocated < 64)
-			new_allocated = 64;
-		      if (new_allocated < old_allocated) /* integer overflow? */
-			abort ();
-		      {
-			UNIT *larger_result;
-			if (result == NULL)
-			  {
-			    larger_result =
-			      (UNIT *) malloc (new_allocated * sizeof (UNIT));
-			    if (larger_result == NULL)
-			      {
-				errno = ENOMEM;
-				goto fail;
-			      }
-			  }
-			else if (result == resultbuf)
-			  {
-			    larger_result =
-			      (UNIT *) malloc (new_allocated * sizeof (UNIT));
-			    if (larger_result == NULL)
-			      {
-				errno = ENOMEM;
-				goto fail;
-			      }
-			    U_CPY (larger_result, resultbuf, length);
-			  }
-			else
-			  {
-			    larger_result =
-			      (UNIT *) realloc (result, new_allocated * sizeof (UNIT));
-			    if (larger_result == NULL)
-			      {
-				errno = ENOMEM;
-				goto fail;
-			      }
-			  }
-			result = larger_result;
-			allocated = new_allocated;
-			{
-			  int ret =
-			    U_UCTOMB (result + length, muc, allocated - length);
-			  if (ret == -1)
-			    {
-			      errno = EINVAL;
-			      goto fail;
-			    }
-			  if (ret < 0)
-			    abort ();
-			  length += ret;
-			  goto done_appending;
-			}
-		      }
-		    }
-		   done_appending: ;
-		  }
-
-		/* sortbuf is now empty.  */
-		sortbuf_count = 0;
-	      }
-
-	    if (!(s < s_end))
-	      /* End of string reached.  */
-	      break;
-
-	    /* Append (uc, ccc) to sortbuf.  */
-	    if (sortbuf_count == sortbuf_allocated)
-	      {
-		struct ucs4_with_ccc *new_sortbuf;
-
-		sortbuf_allocated = 2 * sortbuf_allocated;
-		if (sortbuf_allocated < sortbuf_count) /* integer overflow? */
-		  abort ();
-		new_sortbuf =
-		  (struct ucs4_with_ccc *) malloc (2 * sortbuf_allocated * sizeof (struct ucs4_with_ccc));
-		memcpy (new_sortbuf, sortbuf,
-			sortbuf_count * sizeof (struct ucs4_with_ccc));
-		if (sortbuf != sortbuf_preallocated)
-		  free (sortbuf);
-		sortbuf = new_sortbuf;
-	      }
-	    sortbuf[sortbuf_count].code = uc;
-	    sortbuf[sortbuf_count].ccc = ccc;
-	    sortbuf_count++;
-
-	    i++;
-	  }
-
-	if (!(s < s_end))
-	  /* End of string reached.  */
-	  break;
-
-	s += count;
+        int count;
+        ucs4_t decomposed[UC_DECOMPOSITION_MAX_LENGTH];
+        int decomposed_count;
+        int i;
+
+        if (s < s_end)
+          {
+            /* Fetch the next character.  */
+            count = U_MBTOUC_UNSAFE (&decomposed[0], s, s_end - s);
+            decomposed_count = 1;
+
+            /* Decompose it, recursively.
+               It would be possible to precompute the recursive decomposition
+               and store it in a table.  But this would significantly increase
+               the size of the decomposition tables, because for example for
+               U+1FC1 the recursive canonical decomposition and the recursive
+               compatibility decomposition are different.  */
+            {
+              int curr;
+
+              for (curr = 0; curr < decomposed_count; )
+                {
+                  /* Invariant: decomposed[0..curr-1] is fully decomposed, i.e.
+                     all elements are atomic.  */
+                  ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH];
+                  int curr_decomposed_count;
+
+                  curr_decomposed_count = decomposer (decomposed[curr], curr_decomposed);
+                  if (curr_decomposed_count >= 0)
+                    {
+                      /* Move curr_decomposed[0..curr_decomposed_count-1] over
+                         decomposed[curr], making room.  It's not worth using
+                         memcpy() here, since the counts are so small.  */
+                      int shift = curr_decomposed_count - 1;
+
+                      if (shift < 0)
+                        abort ();
+                      if (shift > 0)
+                        {
+                          int j;
+
+                          decomposed_count += shift;
+                          if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH)
+                            abort ();
+                          for (j = decomposed_count - 1 - shift; j > curr; j--)
+                            decomposed[j + shift] = decomposed[j];
+                        }
+                      for (; shift >= 0; shift--)
+                        decomposed[curr + shift] = curr_decomposed[shift];
+                    }
+                  else
+                    {
+                      /* decomposed[curr] is atomic.  */
+                      curr++;
+                    }
+                }
+            }
+          }
+        else
+          {
+            count = 0;
+            decomposed_count = 0;
+          }
+
+        i = 0;
+        for (;;)
+          {
+            ucs4_t uc;
+            int ccc;
+
+            if (s < s_end)
+              {
+                /* Fetch the next character from the decomposition.  */
+                if (i == decomposed_count)
+                  break;
+                uc = decomposed[i];
+                ccc = uc_combining_class (uc);
+              }
+            else
+              {
+                /* End of string reached.  */
+                uc = 0;
+                ccc = 0;
+              }
+
+            if (ccc == 0)
+              {
+                size_t j;
+
+                /* Apply the canonical ordering algorithm to the accumulated
+                   sequence of characters.  */
+                if (sortbuf_count > 1)
+                  gl_uninorm_decompose_merge_sort_inplace (sortbuf, sortbuf_count,
+                                                           sortbuf + sortbuf_count);
+
+                if (composer != NULL)
+                  {
+                    /* Attempt to combine decomposed characters, as specified
+                       in the Unicode Standard Annex #15 "Unicode Normalization
+                       Forms".  We need to check
+                         1. whether the first accumulated character is a
+                            "starter" (i.e. has ccc = 0).  This is usually the
+                            case.  But when the string starts with a
+                            non-starter, the sortbuf also starts with a
+                            non-starter.  Btw, this check could also be
+                            omitted, because the composition table has only
+                            entries (code1, code2) for which code1 is a
+                            starter; if the first accumulated character is not
+                            a starter, no lookup will succeed.
+                         2. If the sortbuf has more than one character, check
+                            for each of these characters that are not "blocked"
+                            from the starter (i.e. have a ccc that is higher
+                            than the ccc of the previous character) whether it
+                            can be combined with the first character.
+                         3. If only one character is left in sortbuf, check
+                            whether it can be combined with the next character
+                            (also a starter).  */
+                    if (sortbuf_count > 0 && sortbuf[0].ccc == 0)
+                      {
+                        for (j = 1; j < sortbuf_count; )
+                          {
+                            if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
+                              {
+                                ucs4_t combined =
+                                  composer (sortbuf[0].code, sortbuf[j].code);
+                                if (combined)
+                                  {
+                                    size_t k;
+
+                                    sortbuf[0].code = combined;
+                                    /* sortbuf[0].ccc = 0, still valid.  */
+                                    for (k = j + 1; k < sortbuf_count; k++)
+                                      sortbuf[k - 1] = sortbuf[k];
+                                    sortbuf_count--;
+                                    continue;
+                                  }
+                              }
+                            j++;
+                          }
+                        if (s < s_end && sortbuf_count == 1)
+                          {
+                            ucs4_t combined =
+                              composer (sortbuf[0].code, uc);
+                            if (combined)
+                              {
+                                uc = combined;
+                                ccc = 0;
+                                /* uc could be further combined with subsequent
+                                   characters.  So don't put it into sortbuf[0] in
+                                   this round, only in the next round.  */
+                                sortbuf_count = 0;
+                              }
+                          }
+                      }
+                  }
+
+                for (j = 0; j < sortbuf_count; j++)
+                  {
+                    ucs4_t muc = sortbuf[j].code;
+
+                    /* Append muc to the result accumulator.  */
+                    if (length < allocated)
+                      {
+                        int ret =
+                          U_UCTOMB (result + length, muc, allocated - length);
+                        if (ret == -1)
+                          {
+                            errno = EINVAL;
+                            goto fail;
+                          }
+                        if (ret >= 0)
+                          {
+                            length += ret;
+                            goto done_appending;
+                          }
+                      }
+                    {
+                      size_t old_allocated = allocated;
+                      size_t new_allocated = 2 * old_allocated;
+                      if (new_allocated < 64)
+                        new_allocated = 64;
+                      if (new_allocated < old_allocated) /* integer overflow? */
+                        abort ();
+                      {
+                        UNIT *larger_result;
+                        if (result == NULL)
+                          {
+                            larger_result =
+                              (UNIT *) malloc (new_allocated * sizeof (UNIT));
+                            if (larger_result == NULL)
+                              {
+                                errno = ENOMEM;
+                                goto fail;
+                              }
+                          }
+                        else if (result == resultbuf)
+                          {
+                            larger_result =
+                              (UNIT *) malloc (new_allocated * sizeof (UNIT));
+                            if (larger_result == NULL)
+                              {
+                                errno = ENOMEM;
+                                goto fail;
+                              }
+                            U_CPY (larger_result, resultbuf, length);
+                          }
+                        else
+                          {
+                            larger_result =
+                              (UNIT *) realloc (result, new_allocated * sizeof (UNIT));
+                            if (larger_result == NULL)
+                              {
+                                errno = ENOMEM;
+                                goto fail;
+                              }
+                          }
+                        result = larger_result;
+                        allocated = new_allocated;
+                        {
+                          int ret =
+                            U_UCTOMB (result + length, muc, allocated - length);
+                          if (ret == -1)
+                            {
+                              errno = EINVAL;
+                              goto fail;
+                            }
+                          if (ret < 0)
+                            abort ();
+                          length += ret;
+                          goto done_appending;
+                        }
+                      }
+                    }
+                   done_appending: ;
+                  }
+
+                /* sortbuf is now empty.  */
+                sortbuf_count = 0;
+              }
+
+            if (!(s < s_end))
+              /* End of string reached.  */
+              break;
+
+            /* Append (uc, ccc) to sortbuf.  */
+            if (sortbuf_count == sortbuf_allocated)
+              {
+                struct ucs4_with_ccc *new_sortbuf;
+
+                sortbuf_allocated = 2 * sortbuf_allocated;
+                if (sortbuf_allocated < sortbuf_count) /* integer overflow? */
+                  abort ();
+                new_sortbuf =
+                  (struct ucs4_with_ccc *) malloc (2 * sortbuf_allocated * sizeof (struct ucs4_with_ccc));
+                memcpy (new_sortbuf, sortbuf,
+                        sortbuf_count * sizeof (struct ucs4_with_ccc));
+                if (sortbuf != sortbuf_preallocated)
+                  free (sortbuf);
+                sortbuf = new_sortbuf;
+              }
+            sortbuf[sortbuf_count].code = uc;
+            sortbuf[sortbuf_count].ccc = ccc;
+            sortbuf_count++;
+
+            i++;
+          }
+
+        if (!(s < s_end))
+          /* End of string reached.  */
+          break;
+
+        s += count;
       }
   }
 
   if (length == 0)
     {
       if (result == NULL)
-	{
-	  /* Return a non-NULL value.  NULL means error.  */
-	  result = (UNIT *) malloc (1);
-	  if (result == NULL)
-	    {
-	      errno = ENOMEM;
-	      goto fail;
-	    }
-	}
+        {
+          /* Return a non-NULL value.  NULL means error.  */
+          result = (UNIT *) malloc (1);
+          if (result == NULL)
+            {
+              errno = ENOMEM;
+              goto fail;
+            }
+        }
     }
   else if (result != resultbuf && length < allocated)
     {
@@ -351,7 +351,7 @@ FUNC (uninorm_t nf, const UNIT *s, size_t n,
 
       memory = (UNIT *) realloc (result, length * sizeof (UNIT));
       if (memory != NULL)
-	result = memory;
+        result = memory;
     }
 
   if (sortbuf_count > 0)
author	Andreas Rottmann <a.rottmann@gmx.at>	2010-05-27 18:23:15 +0200
committer	Andreas Rottmann <a.rottmann@gmx.at>	2010-05-27 18:23:15 +0200
commit	3e0814cd9862b89c7a39672672937477bd87ddfb (patch)
tree	159134a624e51509f40ed8823249f09a70d1dda3 /lib/uninorm/u-normalize-internal.h
parent	3bebb94360f1c2333feb8d504a9fa4f96984d8b7 (diff)