summaryrefslogtreecommitdiffstats
path: root/gl/uniwidth/width.c
diff options
context:
space:
mode:
authorLorenz Kästle <12514511+RincewindsHat@users.noreply.github.com>2025-12-28 12:50:39 +0100
committerGitHub <noreply@github.com>2025-12-28 12:50:39 +0100
commite7dd07c8025b169b7b43b955066a7200d9cdf244 (patch)
tree6e8c927cfc67f3708b91ac79df07707af26e4929 /gl/uniwidth/width.c
parent828a9720b10814c5836d03aa35af05d196c4104b (diff)
parentb0afb8fe0ff1d87165af9df61501197a06240dda (diff)
downloadmonitoring-plugins-e7dd07c8025b169b7b43b955066a7200d9cdf244.tar.gz
Merge pull request #2213 from RincewindsHat/update/gnulib
Sync with Gnulib stable-202507 code (a8ac9f9ce5)
Diffstat (limited to 'gl/uniwidth/width.c')
-rw-r--r--gl/uniwidth/width.c95
1 files changed, 95 insertions, 0 deletions
diff --git a/gl/uniwidth/width.c b/gl/uniwidth/width.c
new file mode 100644
index 00000000..c99a74cb
--- /dev/null
+++ b/gl/uniwidth/width.c
@@ -0,0 +1,95 @@
1/* Determine display width of Unicode character.
2 Copyright (C) 2001-2002, 2006-2025 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2002.
4
5 This file is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation; either version 2.1 of the
8 License, or (at your option) any later version.
9
10 This file is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18#include <config.h>
19
20/* Specification. */
21#include "uniwidth.h"
22
23#include "cjk.h"
24
25/* The non-spacing attribute table consists of:
26 * Non-spacing characters; generated from PropList.txt or
27 "grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt"
28 * Format control characters; generated from
29 "grep '^[^;]*;[^;]*;Cf;' UnicodeData.txt"
30 * Zero width characters; generated from
31 "grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt"
32 * Hangul Jamo characters that have conjoining behaviour:
33 - jungseong = syllable-middle vowels
34 - jongseong = syllable-final consonants
35 Rationale:
36 1) These characters act like combining characters. They have no
37 equivalent in legacy character sets. Therefore the EastAsianWidth.txt
38 file does not really matter for them; UAX #11 East Asian Width
39 <https://www.unicode.org/reports/tr11/> makes it clear that it focus
40 is on compatibility with traditional Japanese layout.
41 By contrast, the same glyphs without conjoining behaviour are available
42 in the U+3130..U+318F block, and these characters are mapped to legacy
43 character sets, and traditional Japanese layout matters for them.
44 2) glibc does the same thing, see
45 <https://sourceware.org/bugzilla/show_bug.cgi?id=21750>
46 <https://sourceware.org/bugzilla/show_bug.cgi?id=26120>
47 */
48#include "uniwidth/width0.h"
49
50#include "uniwidth/width2.h"
51#include "unictype/bitmap.h"
52
53#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
54
55
56/* Determine number of column positions required for UC. */
57int
58uc_width (ucs4_t uc, const char *encoding)
59{
60 /* Test for non-spacing or control character. */
61 if ((uc >> 9) < SIZEOF (nonspacing_table_ind))
62 {
63 int ind = nonspacing_table_ind[uc >> 9];
64 if (ind >= 0)
65 if ((nonspacing_table_data[64*ind + ((uc >> 3) & 63)] >> (uc & 7)) & 1)
66 {
67 if (uc > 0 && uc < 0xa0)
68 return -1;
69 else
70 return 0;
71 }
72 }
73 else if ((uc >> 9) == (0xe0000 >> 9))
74 {
75 if (uc >= 0xe0100)
76 {
77 if (uc <= 0xe01ef)
78 return 0;
79 }
80 else
81 {
82 if (uc >= 0xe0020 ? uc <= 0xe007f : uc == 0xe0001)
83 return 0;
84 }
85 }
86 /* Test for double-width character. */
87 if (bitmap_lookup (&u_width2, uc))
88 return 2;
89 /* In ancient CJK encodings, Cyrillic and most other characters are
90 double-width as well. */
91 if (uc >= 0x00A1 && uc < 0xFF61 && uc != 0x20A9
92 && is_cjk_encoding (encoding))
93 return 2;
94 return 1;
95}