Merge pull request #2213 from RincewindsHat/update/gnulib

Sync with Gnulib stable-202507 code (a8ac9f9ce5)
author: Lorenz Kästle <12514511+RincewindsHat@users.noreply.github.com> 2025-12-28 12:50:39 +0100
committer: GitHub <noreply@github.com> 2025-12-28 12:50:39 +0100
commit: e7dd07c8025b169b7b43b955066a7200d9cdf244 (patch)
tree: 6e8c927cfc67f3708b91ac79df07707af26e4929 /gl/uniwidth/width.c
parent: 828a9720b10814c5836d03aa35af05d196c4104b (diff)
parent: b0afb8fe0ff1d87165af9df61501197a06240dda (diff)
download: monitoring-plugins-e7dd07c8025b169b7b43b955066a7200d9cdf244.tar.gz
1 files changed, 95 insertions, 0 deletions
diff --git a/gl/uniwidth/width.c b/gl/uniwidth/width.c
new file mode 100644
index 00000000..c99a74cb
--- /dev/null
+++ b/gl/uniwidth/width.c
@@ -0,0 +1,95 @@
+/* Determine display width of Unicode character.
+   Copyright (C) 2001-2002, 2006-2025 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2002.
+   This file is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+   This file is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+#include <config.h>
+/* Specification.  */
+#include "uniwidth.h"
+#include "cjk.h"
+/* The non-spacing attribute table consists of:
+   * Non-spacing characters; generated from PropList.txt or
+     "grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt"
+   * Format control characters; generated from
+     "grep '^[^;]*;[^;]*;Cf;' UnicodeData.txt"
+   * Zero width characters; generated from
+     "grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt"
+   * Hangul Jamo characters that have conjoining behaviour:
+       - jungseong = syllable-middle vowels
+       - jongseong = syllable-final consonants
+     Rationale:
+     1) These characters act like combining characters. They have no
+     equivalent in legacy character sets. Therefore the EastAsianWidth.txt
+     file does not really matter for them; UAX #11 East Asian Width
+     <https://www.unicode.org/reports/tr11/> makes it clear that it focus
+     is on compatibility with traditional Japanese layout.
+     By contrast, the same glyphs without conjoining behaviour are available
+     in the U+3130..U+318F block, and these characters are mapped to legacy
+     character sets, and traditional Japanese layout matters for them.
+     2) glibc does the same thing, see
+     <https://sourceware.org/bugzilla/show_bug.cgi?id=21750>
+     <https://sourceware.org/bugzilla/show_bug.cgi?id=26120>
+ */
+#include "uniwidth/width0.h"
+#include "uniwidth/width2.h"
+#include "unictype/bitmap.h"
+#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
+/* Determine number of column positions required for UC.  */
+int
+uc_width (ucs4_t uc, const char *encoding)
+{
+  /* Test for non-spacing or control character.  */
+  if ((uc >> 9) < SIZEOF (nonspacing_table_ind))
+    {
+      int ind = nonspacing_table_ind[uc >> 9];
+      if (ind >= 0)
+        if ((nonspacing_table_data[64*ind + ((uc >> 3) & 63)] >> (uc & 7)) & 1)
+          {
+            if (uc > 0 && uc < 0xa0)
+              return -1;
+            else
+              return 0;
+          }
+    }
+  else if ((uc >> 9) == (0xe0000 >> 9))
+    {
+      if (uc >= 0xe0100)
+        {
+          if (uc <= 0xe01ef)
+            return 0;
+        }
+      else
+        {
+          if (uc >= 0xe0020 ? uc <= 0xe007f : uc == 0xe0001)
+            return 0;
+        }
+    }
+  /* Test for double-width character.  */
+  if (bitmap_lookup (&u_width2, uc))
+    return 2;
+  /* In ancient CJK encodings, Cyrillic and most other characters are
+     double-width as well.  */
+  if (uc >= 0x00A1 && uc < 0xFF61 && uc != 0x20A9
+      && is_cjk_encoding (encoding))
+    return 2;
+  return 1;
+}
author	Lorenz Kästle <12514511+RincewindsHat@users.noreply.github.com>	2025-12-28 12:50:39 +0100
committer	GitHub <noreply@github.com>	2025-12-28 12:50:39 +0100
commit	e7dd07c8025b169b7b43b955066a7200d9cdf244 (patch)
tree	6e8c927cfc67f3708b91ac79df07707af26e4929 /gl/uniwidth/width.c
parent	828a9720b10814c5836d03aa35af05d196c4104b (diff)
parent	b0afb8fe0ff1d87165af9df61501197a06240dda (diff)
download	monitoring-plugins-e7dd07c8025b169b7b43b955066a7200d9cdf244.tar.gz

diff --git a/gl/uniwidth/width.c b/gl/uniwidth/width.c new file mode 100644 index 00000000..c99a74cb --- /dev/null +++ b/gl/uniwidth/width.c
@@ -0,0 +1,95 @@
	1	/* Determine display width of Unicode character.
	2	Copyright (C) 2001-2002, 2006-2025 Free Software Foundation, Inc.
	3	Written by Bruno Haible <bruno@clisp.org>, 2002.
	4
	5	This file is free software: you can redistribute it and/or modify
	6	it under the terms of the GNU Lesser General Public License as
	7	published by the Free Software Foundation; either version 2.1 of the
	8	License, or (at your option) any later version.
	9
	10	This file is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	GNU Lesser General Public License for more details.
	14
	15	You should have received a copy of the GNU Lesser General Public License
	16	along with this program. If not, see <https://www.gnu.org/licenses/>. */
	17
	18	#include <config.h>
	19
	20	/* Specification. */
	21	#include "uniwidth.h"
	22
	23	#include "cjk.h"
	24
	25	/* The non-spacing attribute table consists of:
	26	* Non-spacing characters; generated from PropList.txt or
	27	"grep '^[^;];[^;];[^;];[^;];NSM;' UnicodeData.txt"
	28	* Format control characters; generated from
	29	"grep '^[^;];[^;];Cf;' UnicodeData.txt"
	30	* Zero width characters; generated from
	31	"grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt"
	32	* Hangul Jamo characters that have conjoining behaviour:
	33	- jungseong = syllable-middle vowels
	34	- jongseong = syllable-final consonants
	35	Rationale:
	36	1) These characters act like combining characters. They have no
	37	equivalent in legacy character sets. Therefore the EastAsianWidth.txt
	38	file does not really matter for them; UAX #11 East Asian Width
	39	<https://www.unicode.org/reports/tr11/> makes it clear that it focus
	40	is on compatibility with traditional Japanese layout.
	41	By contrast, the same glyphs without conjoining behaviour are available
	42	in the U+3130..U+318F block, and these characters are mapped to legacy
	43	character sets, and traditional Japanese layout matters for them.
	44	2) glibc does the same thing, see
	45	<https://sourceware.org/bugzilla/show_bug.cgi?id=21750>
	46	<https://sourceware.org/bugzilla/show_bug.cgi?id=26120>
	47	*/
	48	#include "uniwidth/width0.h"
	49
	50	#include "uniwidth/width2.h"
	51	#include "unictype/bitmap.h"
	52
	53	#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
	54
	55
	56	/* Determine number of column positions required for UC. */
	57	int
	58	uc_width (ucs4_t uc, const char *encoding)
	59	{
	60	/* Test for non-spacing or control character. */
	61	if ((uc >> 9) < SIZEOF (nonspacing_table_ind))
	62	{
	63	int ind = nonspacing_table_ind[uc >> 9];
	64	if (ind >= 0)
	65	if ((nonspacing_table_data[64*ind + ((uc >> 3) & 63)] >> (uc & 7)) & 1)
	66	{
	67	if (uc > 0 && uc < 0xa0)
	68	return -1;
	69	else
	70	return 0;
	71	}
	72	}
	73	else if ((uc >> 9) == (0xe0000 >> 9))
	74	{
	75	if (uc >= 0xe0100)
	76	{
	77	if (uc <= 0xe01ef)
	78	return 0;
	79	}
	80	else
	81	{
	82	if (uc >= 0xe0020 ? uc <= 0xe007f : uc == 0xe0001)
	83	return 0;
	84	}
	85	}
	86	/* Test for double-width character. */
	87	if (bitmap_lookup (&u_width2, uc))
	88	return 2;
	89	/* In ancient CJK encodings, Cyrillic and most other characters are
	90	double-width as well. */
	91	if (uc >= 0x00A1 && uc < 0xFF61 && uc != 0x20A9
	92	&& is_cjk_encoding (encoding))
	93	return 2;
	94	return 1;
	95	}