Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
mbcssm.py
Go to the documentation of this file.
27
28from .codingstatemachinedict import CodingStateMachineDict
29from .enums import MachineState
30
31# BIG5
32
33# fmt: off
34BIG5_CLS = (
35 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 #allow 0x00 as legal value
36 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
37 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
38 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
39 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
40 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
41 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
42 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
43 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
44 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
45 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
46 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
47 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
48 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
49 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
50 2, 2, 2, 2, 2, 2, 2, 1, # 78 - 7f
51 4, 4, 4, 4, 4, 4, 4, 4, # 80 - 87
52 4, 4, 4, 4, 4, 4, 4, 4, # 88 - 8f
53 4, 4, 4, 4, 4, 4, 4, 4, # 90 - 97
54 4, 4, 4, 4, 4, 4, 4, 4, # 98 - 9f
55 4, 3, 3, 3, 3, 3, 3, 3, # a0 - a7
56 3, 3, 3, 3, 3, 3, 3, 3, # a8 - af
57 3, 3, 3, 3, 3, 3, 3, 3, # b0 - b7
58 3, 3, 3, 3, 3, 3, 3, 3, # b8 - bf
59 3, 3, 3, 3, 3, 3, 3, 3, # c0 - c7
60 3, 3, 3, 3, 3, 3, 3, 3, # c8 - cf
61 3, 3, 3, 3, 3, 3, 3, 3, # d0 - d7
62 3, 3, 3, 3, 3, 3, 3, 3, # d8 - df
63 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7
64 3, 3, 3, 3, 3, 3, 3, 3, # e8 - ef
65 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7
66 3, 3, 3, 3, 3, 3, 3, 0 # f8 - ff
67)
68
69BIG5_ST = (
73)
74# fmt: on
75
76BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
77
78BIG5_SM_MODEL: CodingStateMachineDict = {
79 "class_table": BIG5_CLS,
80 "class_factor": 5,
81 "state_table": BIG5_ST,
82 "char_len_table": BIG5_CHAR_LEN_TABLE,
83 "name": "Big5",
84}
85
86# CP949
87# fmt: off
88CP949_CLS = (
89 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, # 00 - 0f
90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, # 10 - 1f
91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 2f
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 3f
93 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, # 40 - 4f
94 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, # 50 - 5f
95 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, # 60 - 6f
96 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, # 70 - 7f
97 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, # 80 - 8f
98 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, # 90 - 9f
99 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, # a0 - af
100 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, # b0 - bf
101 7, 7, 7, 7, 7, 7, 9, 2, 2, 3, 2, 2, 2, 2, 2, 2, # c0 - cf
102 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # d0 - df
103 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # e0 - ef
104 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, # f0 - ff
105)
106
107CP949_ST = (
108#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
116)
117# fmt: on
118
119CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
120
121CP949_SM_MODEL: CodingStateMachineDict = {
122 "class_table": CP949_CLS,
123 "class_factor": 10,
124 "state_table": CP949_ST,
125 "char_len_table": CP949_CHAR_LEN_TABLE,
126 "name": "CP949",
127}
128
129# EUC-JP
130# fmt: off
131EUCJP_CLS = (
132 4, 4, 4, 4, 4, 4, 4, 4, # 00 - 07
133 4, 4, 4, 4, 4, 4, 5, 5, # 08 - 0f
134 4, 4, 4, 4, 4, 4, 4, 4, # 10 - 17
135 4, 4, 4, 5, 4, 4, 4, 4, # 18 - 1f
136 4, 4, 4, 4, 4, 4, 4, 4, # 20 - 27
137 4, 4, 4, 4, 4, 4, 4, 4, # 28 - 2f
138 4, 4, 4, 4, 4, 4, 4, 4, # 30 - 37
139 4, 4, 4, 4, 4, 4, 4, 4, # 38 - 3f
140 4, 4, 4, 4, 4, 4, 4, 4, # 40 - 47
141 4, 4, 4, 4, 4, 4, 4, 4, # 48 - 4f
142 4, 4, 4, 4, 4, 4, 4, 4, # 50 - 57
143 4, 4, 4, 4, 4, 4, 4, 4, # 58 - 5f
144 4, 4, 4, 4, 4, 4, 4, 4, # 60 - 67
145 4, 4, 4, 4, 4, 4, 4, 4, # 68 - 6f
146 4, 4, 4, 4, 4, 4, 4, 4, # 70 - 77
147 4, 4, 4, 4, 4, 4, 4, 4, # 78 - 7f
148 5, 5, 5, 5, 5, 5, 5, 5, # 80 - 87
149 5, 5, 5, 5, 5, 5, 1, 3, # 88 - 8f
150 5, 5, 5, 5, 5, 5, 5, 5, # 90 - 97
151 5, 5, 5, 5, 5, 5, 5, 5, # 98 - 9f
152 5, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
153 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
154 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
155 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
156 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
157 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
158 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
159 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
160 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7
161 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef
162 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7
163 0, 0, 0, 0, 0, 0, 0, 5 # f8 - ff
164)
165
166EUCJP_ST = (
172)
173# fmt: on
174
175EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
176
177EUCJP_SM_MODEL: CodingStateMachineDict = {
178 "class_table": EUCJP_CLS,
179 "class_factor": 6,
180 "state_table": EUCJP_ST,
181 "char_len_table": EUCJP_CHAR_LEN_TABLE,
182 "name": "EUC-JP",
183}
184
185# EUC-KR
186# fmt: off
187EUCKR_CLS = (
188 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07
189 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
190 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
191 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
192 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
193 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
194 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
195 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
196 1, 1, 1, 1, 1, 1, 1, 1, # 40 - 47
197 1, 1, 1, 1, 1, 1, 1, 1, # 48 - 4f
198 1, 1, 1, 1, 1, 1, 1, 1, # 50 - 57
199 1, 1, 1, 1, 1, 1, 1, 1, # 58 - 5f
200 1, 1, 1, 1, 1, 1, 1, 1, # 60 - 67
201 1, 1, 1, 1, 1, 1, 1, 1, # 68 - 6f
202 1, 1, 1, 1, 1, 1, 1, 1, # 70 - 77
203 1, 1, 1, 1, 1, 1, 1, 1, # 78 - 7f
204 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
205 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f
206 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
207 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
208 0, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
209 2, 2, 2, 2, 2, 3, 3, 3, # a8 - af
210 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
211 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
212 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
213 2, 3, 2, 2, 2, 2, 2, 2, # c8 - cf
214 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
215 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
216 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7
217 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef
218 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7
219 2, 2, 2, 2, 2, 2, 2, 0 # f8 - ff
220)
221
222EUCKR_ST = (
225)
226# fmt: on
227
228EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
229
230EUCKR_SM_MODEL: CodingStateMachineDict = {
231 "class_table": EUCKR_CLS,
232 "class_factor": 4,
233 "state_table": EUCKR_ST,
234 "char_len_table": EUCKR_CHAR_LEN_TABLE,
235 "name": "EUC-KR",
236}
237
238# JOHAB
239# fmt: off
240JOHAB_CLS = (
241 4,4,4,4,4,4,4,4, # 00 - 07
242 4,4,4,4,4,4,0,0, # 08 - 0f
243 4,4,4,4,4,4,4,4, # 10 - 17
244 4,4,4,0,4,4,4,4, # 18 - 1f
245 4,4,4,4,4,4,4,4, # 20 - 27
246 4,4,4,4,4,4,4,4, # 28 - 2f
247 4,3,3,3,3,3,3,3, # 30 - 37
248 3,3,3,3,3,3,3,3, # 38 - 3f
249 3,1,1,1,1,1,1,1, # 40 - 47
250 1,1,1,1,1,1,1,1, # 48 - 4f
251 1,1,1,1,1,1,1,1, # 50 - 57
252 1,1,1,1,1,1,1,1, # 58 - 5f
253 1,1,1,1,1,1,1,1, # 60 - 67
254 1,1,1,1,1,1,1,1, # 68 - 6f
255 1,1,1,1,1,1,1,1, # 70 - 77
256 1,1,1,1,1,1,1,2, # 78 - 7f
257 6,6,6,6,8,8,8,8, # 80 - 87
258 8,8,8,8,8,8,8,8, # 88 - 8f
259 8,7,7,7,7,7,7,7, # 90 - 97
260 7,7,7,7,7,7,7,7, # 98 - 9f
261 7,7,7,7,7,7,7,7, # a0 - a7
262 7,7,7,7,7,7,7,7, # a8 - af
263 7,7,7,7,7,7,7,7, # b0 - b7
264 7,7,7,7,7,7,7,7, # b8 - bf
265 7,7,7,7,7,7,7,7, # c0 - c7
266 7,7,7,7,7,7,7,7, # c8 - cf
267 7,7,7,7,5,5,5,5, # d0 - d7
268 5,9,9,9,9,9,9,5, # d8 - df
269 9,9,9,9,9,9,9,9, # e0 - e7
270 9,9,9,9,9,9,9,9, # e8 - ef
271 9,9,9,9,9,9,9,9, # f0 - f7
272 9,9,5,5,5,5,5,0 # f8 - ff
273)
274
275JOHAB_ST = (
276# cls = 0 1 2 3 4 5 6 7 8 9
282)
283# fmt: on
284
285JOHAB_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 0, 0, 2, 2, 2)
286
287JOHAB_SM_MODEL: CodingStateMachineDict = {
288 "class_table": JOHAB_CLS,
289 "class_factor": 10,
290 "state_table": JOHAB_ST,
291 "char_len_table": JOHAB_CHAR_LEN_TABLE,
292 "name": "Johab",
293}
294
295# EUC-TW
296# fmt: off
297EUCTW_CLS = (
298 2, 2, 2, 2, 2, 2, 2, 2, # 00 - 07
299 2, 2, 2, 2, 2, 2, 0, 0, # 08 - 0f
300 2, 2, 2, 2, 2, 2, 2, 2, # 10 - 17
301 2, 2, 2, 0, 2, 2, 2, 2, # 18 - 1f
302 2, 2, 2, 2, 2, 2, 2, 2, # 20 - 27
303 2, 2, 2, 2, 2, 2, 2, 2, # 28 - 2f
304 2, 2, 2, 2, 2, 2, 2, 2, # 30 - 37
305 2, 2, 2, 2, 2, 2, 2, 2, # 38 - 3f
306 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
307 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
308 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
309 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
310 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
311 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
312 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
313 2, 2, 2, 2, 2, 2, 2, 2, # 78 - 7f
314 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
315 0, 0, 0, 0, 0, 0, 6, 0, # 88 - 8f
316 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
317 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
318 0, 3, 4, 4, 4, 4, 4, 4, # a0 - a7
319 5, 5, 1, 1, 1, 1, 1, 1, # a8 - af
320 1, 1, 1, 1, 1, 1, 1, 1, # b0 - b7
321 1, 1, 1, 1, 1, 1, 1, 1, # b8 - bf
322 1, 1, 3, 1, 3, 3, 3, 3, # c0 - c7
323 3, 3, 3, 3, 3, 3, 3, 3, # c8 - cf
324 3, 3, 3, 3, 3, 3, 3, 3, # d0 - d7
325 3, 3, 3, 3, 3, 3, 3, 3, # d8 - df
326 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7
327 3, 3, 3, 3, 3, 3, 3, 3, # e8 - ef
328 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7
329 3, 3, 3, 3, 3, 3, 3, 0 # f8 - ff
330)
331
332EUCTW_ST = (
339)
340# fmt: on
341
342EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
343
344EUCTW_SM_MODEL: CodingStateMachineDict = {
345 "class_table": EUCTW_CLS,
346 "class_factor": 7,
347 "state_table": EUCTW_ST,
348 "char_len_table": EUCTW_CHAR_LEN_TABLE,
349 "name": "x-euc-tw",
350}
351
352# GB2312
353# fmt: off
354GB2312_CLS = (
355 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07
356 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
357 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
358 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
359 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
360 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
361 3, 3, 3, 3, 3, 3, 3, 3, # 30 - 37
362 3, 3, 1, 1, 1, 1, 1, 1, # 38 - 3f
363 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
364 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
365 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
366 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
367 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
368 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
369 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
370 2, 2, 2, 2, 2, 2, 2, 4, # 78 - 7f
371 5, 6, 6, 6, 6, 6, 6, 6, # 80 - 87
372 6, 6, 6, 6, 6, 6, 6, 6, # 88 - 8f
373 6, 6, 6, 6, 6, 6, 6, 6, # 90 - 97
374 6, 6, 6, 6, 6, 6, 6, 6, # 98 - 9f
375 6, 6, 6, 6, 6, 6, 6, 6, # a0 - a7
376 6, 6, 6, 6, 6, 6, 6, 6, # a8 - af
377 6, 6, 6, 6, 6, 6, 6, 6, # b0 - b7
378 6, 6, 6, 6, 6, 6, 6, 6, # b8 - bf
379 6, 6, 6, 6, 6, 6, 6, 6, # c0 - c7
380 6, 6, 6, 6, 6, 6, 6, 6, # c8 - cf
381 6, 6, 6, 6, 6, 6, 6, 6, # d0 - d7
382 6, 6, 6, 6, 6, 6, 6, 6, # d8 - df
383 6, 6, 6, 6, 6, 6, 6, 6, # e0 - e7
384 6, 6, 6, 6, 6, 6, 6, 6, # e8 - ef
385 6, 6, 6, 6, 6, 6, 6, 6, # f0 - f7
386 6, 6, 6, 6, 6, 6, 6, 0 # f8 - ff
387)
388
389GB2312_ST = (
396)
397# fmt: on
398
399# To be accurate, the length of class 6 can be either 2 or 4.
400# But it is not necessary to discriminate between the two since
401# it is used for frequency analysis only, and we are validating
402# each code range there as well. So it is safe to set it to be
403# 2 here.
404GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
405
406GB2312_SM_MODEL: CodingStateMachineDict = {
407 "class_table": GB2312_CLS,
408 "class_factor": 7,
409 "state_table": GB2312_ST,
410 "char_len_table": GB2312_CHAR_LEN_TABLE,
411 "name": "GB2312",
412}
413
414# Shift_JIS
415# fmt: off
416SJIS_CLS = (
417 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07
418 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
419 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
420 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
421 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
422 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
423 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
424 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
425 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
426 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
427 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
428 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
429 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
430 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
431 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
432 2, 2, 2, 2, 2, 2, 2, 1, # 78 - 7f
433 3, 3, 3, 3, 3, 2, 2, 3, # 80 - 87
434 3, 3, 3, 3, 3, 3, 3, 3, # 88 - 8f
435 3, 3, 3, 3, 3, 3, 3, 3, # 90 - 97
436 3, 3, 3, 3, 3, 3, 3, 3, # 98 - 9f
437 #0xa0 is illegal in sjis encoding, but some pages does
438 #contain such byte. We need to be more error forgiven.
439 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
440 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
441 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
442 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
443 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
444 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
445 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
446 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
447 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7
448 3, 3, 3, 3, 3, 4, 4, 4, # e8 - ef
449 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7
450 3, 3, 3, 3, 3, 0, 0, 0, # f8 - ff
451)
452
453SJIS_ST = (
457)
458# fmt: on
459
460SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
461
462SJIS_SM_MODEL: CodingStateMachineDict = {
463 "class_table": SJIS_CLS,
464 "class_factor": 6,
465 "state_table": SJIS_ST,
466 "char_len_table": SJIS_CHAR_LEN_TABLE,
467 "name": "Shift_JIS",
468}
469
470# UCS2-BE
471# fmt: off
472UCS2BE_CLS = (
473 0, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
474 0, 0, 1, 0, 0, 2, 0, 0, # 08 - 0f
475 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
476 0, 0, 0, 3, 0, 0, 0, 0, # 18 - 1f
477 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27
478 0, 3, 3, 3, 3, 3, 0, 0, # 28 - 2f
479 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
480 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
481 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47
482 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
483 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
484 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
485 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
486 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
487 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
488 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
489 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
490 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f
491 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
492 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
493 0, 0, 0, 0, 0, 0, 0, 0, # a0 - a7
494 0, 0, 0, 0, 0, 0, 0, 0, # a8 - af
495 0, 0, 0, 0, 0, 0, 0, 0, # b0 - b7
496 0, 0, 0, 0, 0, 0, 0, 0, # b8 - bf
497 0, 0, 0, 0, 0, 0, 0, 0, # c0 - c7
498 0, 0, 0, 0, 0, 0, 0, 0, # c8 - cf
499 0, 0, 0, 0, 0, 0, 0, 0, # d0 - d7
500 0, 0, 0, 0, 0, 0, 0, 0, # d8 - df
501 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7
502 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef
503 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7
504 0, 0, 0, 0, 0, 0, 4, 5 # f8 - ff
505)
506
507UCS2BE_ST = (
511 6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f
512 6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27
513 5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f
515)
516# fmt: on
517
518UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
519
520UCS2BE_SM_MODEL: CodingStateMachineDict = {
521 "class_table": UCS2BE_CLS,
522 "class_factor": 6,
523 "state_table": UCS2BE_ST,
524 "char_len_table": UCS2BE_CHAR_LEN_TABLE,
525 "name": "UTF-16BE",
526}
527
528# UCS2-LE
529# fmt: off
530UCS2LE_CLS = (
531 0, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
532 0, 0, 1, 0, 0, 2, 0, 0, # 08 - 0f
533 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
534 0, 0, 0, 3, 0, 0, 0, 0, # 18 - 1f
535 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27
536 0, 3, 3, 3, 3, 3, 0, 0, # 28 - 2f
537 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
538 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
539 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47
540 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
541 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
542 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
543 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
544 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
545 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
546 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
547 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
548 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f
549 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
550 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
551 0, 0, 0, 0, 0, 0, 0, 0, # a0 - a7
552 0, 0, 0, 0, 0, 0, 0, 0, # a8 - af
553 0, 0, 0, 0, 0, 0, 0, 0, # b0 - b7
554 0, 0, 0, 0, 0, 0, 0, 0, # b8 - bf
555 0, 0, 0, 0, 0, 0, 0, 0, # c0 - c7
556 0, 0, 0, 0, 0, 0, 0, 0, # c8 - cf
557 0, 0, 0, 0, 0, 0, 0, 0, # d0 - d7
558 0, 0, 0, 0, 0, 0, 0, 0, # d8 - df
559 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7
560 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef
561 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7
562 0, 0, 0, 0, 0, 0, 4, 5 # f8 - ff
563)
564
565UCS2LE_ST = (
566 6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
569 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f
570 7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27
573)
574# fmt: on
575
576UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
577
578UCS2LE_SM_MODEL: CodingStateMachineDict = {
579 "class_table": UCS2LE_CLS,
580 "class_factor": 6,
581 "state_table": UCS2LE_ST,
582 "char_len_table": UCS2LE_CHAR_LEN_TABLE,
583 "name": "UTF-16LE",
584}
585
586# UTF-8
587# fmt: off
588UTF8_CLS = (
589 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 #allow 0x00 as a legal value
590 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
591 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
592 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
593 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
594 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
595 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
596 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
597 1, 1, 1, 1, 1, 1, 1, 1, # 40 - 47
598 1, 1, 1, 1, 1, 1, 1, 1, # 48 - 4f
599 1, 1, 1, 1, 1, 1, 1, 1, # 50 - 57
600 1, 1, 1, 1, 1, 1, 1, 1, # 58 - 5f
601 1, 1, 1, 1, 1, 1, 1, 1, # 60 - 67
602 1, 1, 1, 1, 1, 1, 1, 1, # 68 - 6f
603 1, 1, 1, 1, 1, 1, 1, 1, # 70 - 77
604 1, 1, 1, 1, 1, 1, 1, 1, # 78 - 7f
605 2, 2, 2, 2, 3, 3, 3, 3, # 80 - 87
606 4, 4, 4, 4, 4, 4, 4, 4, # 88 - 8f
607 4, 4, 4, 4, 4, 4, 4, 4, # 90 - 97
608 4, 4, 4, 4, 4, 4, 4, 4, # 98 - 9f
609 5, 5, 5, 5, 5, 5, 5, 5, # a0 - a7
610 5, 5, 5, 5, 5, 5, 5, 5, # a8 - af
611 5, 5, 5, 5, 5, 5, 5, 5, # b0 - b7
612 5, 5, 5, 5, 5, 5, 5, 5, # b8 - bf
613 0, 0, 6, 6, 6, 6, 6, 6, # c0 - c7
614 6, 6, 6, 6, 6, 6, 6, 6, # c8 - cf
615 6, 6, 6, 6, 6, 6, 6, 6, # d0 - d7
616 6, 6, 6, 6, 6, 6, 6, 6, # d8 - df
617 7, 8, 8, 8, 8, 8, 8, 8, # e0 - e7
618 8, 8, 8, 8, 8, 9, 8, 8, # e8 - ef
619 10, 11, 11, 11, 11, 11, 11, 11, # f0 - f7
620 12, 13, 13, 13, 14, 15, 0, 0 # f8 - ff
621)
622
623UTF8_ST = (
625 9, 11, 8, 7, 6, 5, 4, 3,#08-0f
650)
651# fmt: on
652
653UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
654
655UTF8_SM_MODEL: CodingStateMachineDict = {
656 "class_table": UTF8_CLS,
657 "class_factor": 16,
658 "state_table": UTF8_ST,
659 "char_len_table": UTF8_CHAR_LEN_TABLE,
660 "name": "UTF-8",
661}
for i