diff options
| author | Lorenz Kästle <12514511+RincewindsHat@users.noreply.github.com> | 2025-12-28 12:50:39 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-12-28 12:50:39 +0100 |
| commit | e7dd07c8025b169b7b43b955066a7200d9cdf244 (patch) | |
| tree | 6e8c927cfc67f3708b91ac79df07707af26e4929 /gl/uniwidth/width.c | |
| parent | 828a9720b10814c5836d03aa35af05d196c4104b (diff) | |
| parent | b0afb8fe0ff1d87165af9df61501197a06240dda (diff) | |
| download | monitoring-plugins-e7dd07c8025b169b7b43b955066a7200d9cdf244.tar.gz | |
Merge pull request #2213 from RincewindsHat/update/gnulib
Sync with Gnulib stable-202507 code (a8ac9f9ce5)
Diffstat (limited to 'gl/uniwidth/width.c')
| -rw-r--r-- | gl/uniwidth/width.c | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/gl/uniwidth/width.c b/gl/uniwidth/width.c new file mode 100644 index 00000000..c99a74cb --- /dev/null +++ b/gl/uniwidth/width.c | |||
| @@ -0,0 +1,95 @@ | |||
| 1 | /* Determine display width of Unicode character. | ||
| 2 | Copyright (C) 2001-2002, 2006-2025 Free Software Foundation, Inc. | ||
| 3 | Written by Bruno Haible <bruno@clisp.org>, 2002. | ||
| 4 | |||
| 5 | This file is free software: you can redistribute it and/or modify | ||
| 6 | it under the terms of the GNU Lesser General Public License as | ||
| 7 | published by the Free Software Foundation; either version 2.1 of the | ||
| 8 | License, or (at your option) any later version. | ||
| 9 | |||
| 10 | This file is distributed in the hope that it will be useful, | ||
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 13 | GNU Lesser General Public License for more details. | ||
| 14 | |||
| 15 | You should have received a copy of the GNU Lesser General Public License | ||
| 16 | along with this program. If not, see <https://www.gnu.org/licenses/>. */ | ||
| 17 | |||
| 18 | #include <config.h> | ||
| 19 | |||
| 20 | /* Specification. */ | ||
| 21 | #include "uniwidth.h" | ||
| 22 | |||
| 23 | #include "cjk.h" | ||
| 24 | |||
| 25 | /* The non-spacing attribute table consists of: | ||
| 26 | * Non-spacing characters; generated from PropList.txt or | ||
| 27 | "grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt" | ||
| 28 | * Format control characters; generated from | ||
| 29 | "grep '^[^;]*;[^;]*;Cf;' UnicodeData.txt" | ||
| 30 | * Zero width characters; generated from | ||
| 31 | "grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt" | ||
| 32 | * Hangul Jamo characters that have conjoining behaviour: | ||
| 33 | - jungseong = syllable-middle vowels | ||
| 34 | - jongseong = syllable-final consonants | ||
| 35 | Rationale: | ||
| 36 | 1) These characters act like combining characters. They have no | ||
| 37 | equivalent in legacy character sets. Therefore the EastAsianWidth.txt | ||
| 38 | file does not really matter for them; UAX #11 East Asian Width | ||
| 39 | <https://www.unicode.org/reports/tr11/> makes it clear that it focus | ||
| 40 | is on compatibility with traditional Japanese layout. | ||
| 41 | By contrast, the same glyphs without conjoining behaviour are available | ||
| 42 | in the U+3130..U+318F block, and these characters are mapped to legacy | ||
| 43 | character sets, and traditional Japanese layout matters for them. | ||
| 44 | 2) glibc does the same thing, see | ||
| 45 | <https://sourceware.org/bugzilla/show_bug.cgi?id=21750> | ||
| 46 | <https://sourceware.org/bugzilla/show_bug.cgi?id=26120> | ||
| 47 | */ | ||
| 48 | #include "uniwidth/width0.h" | ||
| 49 | |||
| 50 | #include "uniwidth/width2.h" | ||
| 51 | #include "unictype/bitmap.h" | ||
| 52 | |||
| 53 | #define SIZEOF(a) (sizeof(a) / sizeof(a[0])) | ||
| 54 | |||
| 55 | |||
| 56 | /* Determine number of column positions required for UC. */ | ||
| 57 | int | ||
| 58 | uc_width (ucs4_t uc, const char *encoding) | ||
| 59 | { | ||
| 60 | /* Test for non-spacing or control character. */ | ||
| 61 | if ((uc >> 9) < SIZEOF (nonspacing_table_ind)) | ||
| 62 | { | ||
| 63 | int ind = nonspacing_table_ind[uc >> 9]; | ||
| 64 | if (ind >= 0) | ||
| 65 | if ((nonspacing_table_data[64*ind + ((uc >> 3) & 63)] >> (uc & 7)) & 1) | ||
| 66 | { | ||
| 67 | if (uc > 0 && uc < 0xa0) | ||
| 68 | return -1; | ||
| 69 | else | ||
| 70 | return 0; | ||
| 71 | } | ||
| 72 | } | ||
| 73 | else if ((uc >> 9) == (0xe0000 >> 9)) | ||
| 74 | { | ||
| 75 | if (uc >= 0xe0100) | ||
| 76 | { | ||
| 77 | if (uc <= 0xe01ef) | ||
| 78 | return 0; | ||
| 79 | } | ||
| 80 | else | ||
| 81 | { | ||
| 82 | if (uc >= 0xe0020 ? uc <= 0xe007f : uc == 0xe0001) | ||
| 83 | return 0; | ||
| 84 | } | ||
| 85 | } | ||
| 86 | /* Test for double-width character. */ | ||
| 87 | if (bitmap_lookup (&u_width2, uc)) | ||
| 88 | return 2; | ||
| 89 | /* In ancient CJK encodings, Cyrillic and most other characters are | ||
| 90 | double-width as well. */ | ||
| 91 | if (uc >= 0x00A1 && uc < 0xFF61 && uc != 0x20A9 | ||
| 92 | && is_cjk_encoding (encoding)) | ||
| 93 | return 2; | ||
| 94 | return 1; | ||
| 95 | } | ||
