Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
unicode.py
Go to the documentation of this file.
1
# unicode.py
2
3
import
sys
4
from
itertools
import
filterfalse
5
from
typing
import
List, Tuple, Union
6
7
8
class
_lazyclassproperty
:
9
def
__init__
(self, fn):
10
self.
fn
= fn
11
self.
__doc__
=
fn.__doc__
12
self.
__name__
=
fn.__name__
13
14
def
__get__
(self, obj, cls):
15
if
cls
is
None
:
16
cls = type(obj)
17
if
not
hasattr
(cls,
"_intern"
)
or
any
(
18
cls.
_intern
is
getattr
(superclass,
"_intern"
, [])
19
for
superclass
in
cls.__mro__[1:]
20
):
21
cls.
_intern
= {}
22
attrname = self.
fn
.__name__
23
if
attrname
not
in
cls.
_intern
:
24
cls.
_intern
[attrname] = self.
fn
(cls)
25
return
cls.
_intern
[attrname]
26
27
28
UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]]
29
30
31
class
unicode_set
:
32
"""
33
A set of Unicode characters, for language-specific strings for
34
``alphas``, ``nums``, ``alphanums``, and ``printables``.
35
A unicode_set is defined by a list of ranges in the Unicode character
36
set, in a class attribute ``_ranges``. Ranges can be specified using
37
2-tuples or a 1-tuple, such as::
38
39
_ranges = [
40
(0x0020, 0x007e),
41
(0x00a0, 0x00ff),
42
(0x0100,),
43
]
44
45
Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x).
46
47
A unicode set can also be defined using multiple inheritance of other unicode sets::
48
49
class CJK(Chinese, Japanese, Korean):
50
pass
51
"""
52
53
_ranges: UnicodeRangeList = []
54
55
@_lazyclassproperty
56
def
_chars_for_ranges
(cls):
57
ret = []
58
for
cc
in
cls.__mro__:
59
if
cc
is
unicode_set:
60
break
61
for
rr
in
getattr
(cc,
"_ranges"
, ()):
62
ret.extend
(
range
(rr[0], rr[-1] + 1))
63
return
[
chr
(c)
for
c
in
sorted(set(ret))]
64
65
@_lazyclassproperty
66
def
printables
(cls):
67
"""all non-whitespace characters in this range"""
68
return
""
.join(filterfalse(
str.isspace
, cls.
_chars_for_ranges
_chars_for_ranges
))
69
70
@_lazyclassproperty
71
def
alphas
(cls):
72
"""all alphabetic characters in this range"""
73
return
""
.join(filter(
str.isalpha
, cls.
_chars_for_ranges
_chars_for_ranges
))
74
75
@_lazyclassproperty
76
def
nums
(cls):
77
"""all numeric digit characters in this range"""
78
return
""
.join(filter(
str.isdigit
, cls.
_chars_for_ranges
_chars_for_ranges
))
79
80
@_lazyclassproperty
81
def
alphanums
(cls):
82
"""all alphanumeric characters in this range"""
83
return
cls.
alphas
+ cls.
nums
84
85
@_lazyclassproperty
86
def
identchars
(cls):
87
"""all characters in this range that are valid identifier characters, plus underscore '_'"""
88
return
""
.join(
89
sorted(
90
set(
91
""
.join(filter(
str.isidentifier
, cls.
_chars_for_ranges
_chars_for_ranges
))
92
+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº"
93
+
"ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
94
+
"_"
95
)
96
)
97
)
98
99
@_lazyclassproperty
100
def
identbodychars
(cls):
101
"""
102
all characters in this range that are valid identifier body characters,
103
plus the digits 0-9, and · (Unicode MIDDLE DOT)
104
"""
105
return
""
.join(
106
sorted(
107
set(
108
cls.
identchars
identchars
109
+
"0123456789·"
110
+
""
.join(
111
[c
for
c
in
cls.
_chars_for_ranges
_chars_for_ranges
if
(
"_"
+ c).
isidentifier
()]
112
)
113
)
114
)
115
)
116
117
@_lazyclassproperty
118
def
identifier
(cls):
119
"""
120
a pyparsing Word expression for an identifier using this range's definitions for
121
identchars and identbodychars
122
"""
123
from
pip._vendor.pyparsing
import
Word
124
125
return
Word
(cls.
identchars
identchars
, cls.
identbodychars
identbodychars
)
126
127
128
class
pyparsing_unicode
(
unicode_set
):
129
"""
130
A namespace class for defining common language unicode_sets.
131
"""
132
133
# fmt: off
134
135
# define ranges in language character sets
136
_ranges: UnicodeRangeList = [
137
(0x0020,
sys.maxunicode
),
138
]
139
140
class
BasicMultilingualPlane
(
unicode_set
):
141
"""Unicode set for the Basic Multilingual Plane"""
142
_ranges: UnicodeRangeList = [
143
(0x0020, 0xFFFF),
144
]
145
146
class
Latin1
(
unicode_set
):
147
"""Unicode set for Latin-1 Unicode Character Range"""
148
_ranges: UnicodeRangeList = [
149
(0x0020, 0x007E),
150
(0x00A0, 0x00FF),
151
]
152
153
class
LatinA
(
unicode_set
):
154
"""Unicode set for Latin-A Unicode Character Range"""
155
_ranges: UnicodeRangeList = [
156
(0x0100, 0x017F),
157
]
158
159
class
LatinB
(
unicode_set
):
160
"""Unicode set for Latin-B Unicode Character Range"""
161
_ranges: UnicodeRangeList = [
162
(0x0180, 0x024F),
163
]
164
165
class
Greek
(
unicode_set
):
166
"""Unicode set for Greek Unicode Character Ranges"""
167
_ranges: UnicodeRangeList = [
168
(0x0342, 0x0345),
169
(0x0370, 0x0377),
170
(0x037A, 0x037F),
171
(0x0384, 0x038A),
172
(0x038C,),
173
(0x038E, 0x03A1),
174
(0x03A3, 0x03E1),
175
(0x03F0, 0x03FF),
176
(0x1D26, 0x1D2A),
177
(0x1D5E,),
178
(0x1D60,),
179
(0x1D66, 0x1D6A),
180
(0x1F00, 0x1F15),
181
(0x1F18, 0x1F1D),
182
(0x1F20, 0x1F45),
183
(0x1F48, 0x1F4D),
184
(0x1F50, 0x1F57),
185
(0x1F59,),
186
(0x1F5B,),
187
(0x1F5D,),
188
(0x1F5F, 0x1F7D),
189
(0x1F80, 0x1FB4),
190
(0x1FB6, 0x1FC4),
191
(0x1FC6, 0x1FD3),
192
(0x1FD6, 0x1FDB),
193
(0x1FDD, 0x1FEF),
194
(0x1FF2, 0x1FF4),
195
(0x1FF6, 0x1FFE),
196
(0x2129,),
197
(0x2719, 0x271A),
198
(0xAB65,),
199
(0x10140, 0x1018D),
200
(0x101A0,),
201
(0x1D200, 0x1D245),
202
(0x1F7A1, 0x1F7A7),
203
]
204
205
class
Cyrillic
(
unicode_set
):
206
"""Unicode set for Cyrillic Unicode Character Range"""
207
_ranges: UnicodeRangeList = [
208
(0x0400, 0x052F),
209
(0x1C80, 0x1C88),
210
(0x1D2B,),
211
(0x1D78,),
212
(0x2DE0, 0x2DFF),
213
(0xA640, 0xA672),
214
(0xA674, 0xA69F),
215
(0xFE2E, 0xFE2F),
216
]
217
218
class
Chinese
(
unicode_set
):
219
"""Unicode set for Chinese Unicode Character Range"""
220
_ranges: UnicodeRangeList = [
221
(0x2E80, 0x2E99),
222
(0x2E9B, 0x2EF3),
223
(0x31C0, 0x31E3),
224
(0x3400, 0x4DB5),
225
(0x4E00, 0x9FEF),
226
(0xA700, 0xA707),
227
(0xF900, 0xFA6D),
228
(0xFA70, 0xFAD9),
229
(0x16FE2, 0x16FE3),
230
(0x1F210, 0x1F212),
231
(0x1F214, 0x1F23B),
232
(0x1F240, 0x1F248),
233
(0x20000, 0x2A6D6),
234
(0x2A700, 0x2B734),
235
(0x2B740, 0x2B81D),
236
(0x2B820, 0x2CEA1),
237
(0x2CEB0, 0x2EBE0),
238
(0x2F800, 0x2FA1D),
239
]
240
241
class
Japanese
(
unicode_set
):
242
"""Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"""
243
244
class
Kanji
(
unicode_set
):
245
"Unicode set for Kanji Unicode Character Range"
246
_ranges: UnicodeRangeList = [
247
(0x4E00, 0x9FBF),
248
(0x3000, 0x303F),
249
]
250
251
class
Hiragana
(
unicode_set
):
252
"""Unicode set for Hiragana Unicode Character Range"""
253
_ranges: UnicodeRangeList = [
254
(0x3041, 0x3096),
255
(0x3099, 0x30A0),
256
(0x30FC,),
257
(0xFF70,),
258
(0x1B001,),
259
(0x1B150, 0x1B152),
260
(0x1F200,),
261
]
262
263
class
Katakana
(
unicode_set
):
264
"""Unicode set for Katakana Unicode Character Range"""
265
_ranges: UnicodeRangeList = [
266
(0x3099, 0x309C),
267
(0x30A0, 0x30FF),
268
(0x31F0, 0x31FF),
269
(0x32D0, 0x32FE),
270
(0xFF65, 0xFF9F),
271
(0x1B000,),
272
(0x1B164, 0x1B167),
273
(0x1F201, 0x1F202),
274
(0x1F213,),
275
]
276
277
漢字 = Kanji
278
カタカナ = Katakana
279
ひらがな = Hiragana
280
281
_ranges = (
282
Kanji._ranges
283
+
Hiragana._ranges
284
+
Katakana._ranges
285
)
286
287
class
Hangul
(
unicode_set
):
288
"""Unicode set for Hangul (Korean) Unicode Character Range"""
289
_ranges: UnicodeRangeList = [
290
(0x1100, 0x11FF),
291
(0x302E, 0x302F),
292
(0x3131, 0x318E),
293
(0x3200, 0x321C),
294
(0x3260, 0x327B),
295
(0x327E,),
296
(0xA960, 0xA97C),
297
(0xAC00, 0xD7A3),
298
(0xD7B0, 0xD7C6),
299
(0xD7CB, 0xD7FB),
300
(0xFFA0, 0xFFBE),
301
(0xFFC2, 0xFFC7),
302
(0xFFCA, 0xFFCF),
303
(0xFFD2, 0xFFD7),
304
(0xFFDA, 0xFFDC),
305
]
306
307
Korean = Hangul
308
309
class
CJK
(
Chinese
,
Japanese
,
Hangul
):
310
"""Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"""
311
312
class
Thai(unicode_set):
313
"""Unicode set for Thai Unicode Character Range"""
314
_ranges: UnicodeRangeList = [
315
(0x0E01, 0x0E3A),
316
(0x0E3F, 0x0E5B)
317
]
318
319
class
Arabic
(
unicode_set
):
320
"""Unicode set for Arabic Unicode Character Range"""
321
_ranges: UnicodeRangeList = [
322
(0x0600, 0x061B),
323
(0x061E, 0x06FF),
324
(0x0700, 0x077F),
325
]
326
327
class
Hebrew
(
unicode_set
):
328
"""Unicode set for Hebrew Unicode Character Range"""
329
_ranges: UnicodeRangeList = [
330
(0x0591, 0x05C7),
331
(0x05D0, 0x05EA),
332
(0x05EF, 0x05F4),
333
(0xFB1D, 0xFB36),
334
(0xFB38, 0xFB3C),
335
(0xFB3E,),
336
(0xFB40, 0xFB41),
337
(0xFB43, 0xFB44),
338
(0xFB46, 0xFB4F),
339
]
340
341
class
Devanagari
(
unicode_set
):
342
"""Unicode set for Devanagari Unicode Character Range"""
343
_ranges: UnicodeRangeList = [
344
(0x0900, 0x097F),
345
(0xA8E0, 0xA8FF)
346
]
347
348
BMP = BasicMultilingualPlane
349
350
# add language identifiers using language Unicode
351
العربية = Arabic
352
中文 = Chinese
353
кириллица = Cyrillic
354
Ελληνικά = Greek
355
עִברִית = Hebrew
356
日本語 = Japanese
357
한국어 = Korean
358
ไทย = Thai
359
देवनागरी = Devanagari
360
361
# fmt: on
pip._vendor.pyparsing.core.Word
Definition
core.py:2736
pip._vendor.pyparsing.unicode._lazyclassproperty
Definition
unicode.py:8
pip._vendor.pyparsing.unicode._lazyclassproperty.fn
fn
Definition
unicode.py:10
pip._vendor.pyparsing.unicode._lazyclassproperty._intern
_intern
Definition
unicode.py:21
pip._vendor.pyparsing.unicode._lazyclassproperty.__init__
__init__(self, fn)
Definition
unicode.py:9
pip._vendor.pyparsing.unicode._lazyclassproperty.__get__
__get__(self, obj, cls)
Definition
unicode.py:14
pip._vendor.pyparsing.unicode._lazyclassproperty.__name__
__name__
Definition
unicode.py:12
pip._vendor.pyparsing.unicode._lazyclassproperty.__doc__
__doc__
Definition
unicode.py:11
pip._vendor.pyparsing.unicode.pyparsing_unicode.Arabic
Definition
unicode.py:319
pip._vendor.pyparsing.unicode.pyparsing_unicode.BasicMultilingualPlane
Definition
unicode.py:140
pip._vendor.pyparsing.unicode.pyparsing_unicode.CJK
Definition
unicode.py:309
pip._vendor.pyparsing.unicode.pyparsing_unicode.Chinese
Definition
unicode.py:218
pip._vendor.pyparsing.unicode.pyparsing_unicode.Cyrillic
Definition
unicode.py:205
pip._vendor.pyparsing.unicode.pyparsing_unicode.Devanagari
Definition
unicode.py:341
pip._vendor.pyparsing.unicode.pyparsing_unicode.Greek
Definition
unicode.py:165
pip._vendor.pyparsing.unicode.pyparsing_unicode.Hangul
Definition
unicode.py:287
pip._vendor.pyparsing.unicode.pyparsing_unicode.Hebrew
Definition
unicode.py:327
pip._vendor.pyparsing.unicode.pyparsing_unicode.Japanese.Hiragana
Definition
unicode.py:251
pip._vendor.pyparsing.unicode.pyparsing_unicode.Japanese.Kanji
Definition
unicode.py:244
pip._vendor.pyparsing.unicode.pyparsing_unicode.Japanese.Katakana
Definition
unicode.py:263
pip._vendor.pyparsing.unicode.pyparsing_unicode.Japanese
Definition
unicode.py:241
pip._vendor.pyparsing.unicode.pyparsing_unicode.Latin1
Definition
unicode.py:146
pip._vendor.pyparsing.unicode.pyparsing_unicode.LatinA
Definition
unicode.py:153
pip._vendor.pyparsing.unicode.pyparsing_unicode.LatinB
Definition
unicode.py:159
pip._vendor.pyparsing.unicode.pyparsing_unicode
Definition
unicode.py:128
pip._vendor.pyparsing.unicode.unicode_set
Definition
unicode.py:31
pip._vendor.pyparsing.unicode.unicode_set.identchars
identchars(cls)
Definition
unicode.py:86
pip._vendor.pyparsing.unicode.unicode_set._chars_for_ranges
_chars_for_ranges(cls)
Definition
unicode.py:56
pip._vendor.pyparsing.unicode.unicode_set.identbodychars
identbodychars(cls)
Definition
unicode.py:100
pip._vendor.pyparsing.unicode.unicode_set.alphas
alphas(cls)
Definition
unicode.py:71
pip._vendor.pyparsing.unicode.unicode_set.identifier
identifier(cls)
Definition
unicode.py:118
pip._vendor.pyparsing.unicode.unicode_set.identbodychars
identbodychars
Definition
unicode.py:125
pip._vendor.pyparsing.unicode.unicode_set.identchars
identchars
Definition
unicode.py:125
pip._vendor.pyparsing.unicode.unicode_set.nums
nums(cls)
Definition
unicode.py:76
pip._vendor.pyparsing.unicode.unicode_set._chars_for_ranges
_chars_for_ranges
Definition
unicode.py:68
pip._vendor.pyparsing.core.alphanums
str alphanums
Definition
core.py:263
pip._vendor.pyparsing.core.alphas
alphas
Definition
core.py:258
pip._vendor.pyparsing.core.nums
str nums
Definition
core.py:261
pip._vendor.pyparsing.core.printables
str printables
Definition
core.py:264
pip._vendor.pyparsing
Definition
__init__.py:1
i
for i
Definition
prime_search.m:10
venv
lib
python3.12
site-packages
pip
_vendor
pyparsing
unicode.py
Generated by
1.9.8