28from typing
import Tuple, Union
30from .big5freq
import (
31 BIG5_CHAR_TO_FREQ_ORDER,
33 BIG5_TYPICAL_DISTRIBUTION_RATIO,
35from .euckrfreq
import (
36 EUCKR_CHAR_TO_FREQ_ORDER,
38 EUCKR_TYPICAL_DISTRIBUTION_RATIO,
40from .euctwfreq
import (
41 EUCTW_CHAR_TO_FREQ_ORDER,
43 EUCTW_TYPICAL_DISTRIBUTION_RATIO,
45from .gb2312freq
import (
46 GB2312_CHAR_TO_FREQ_ORDER,
48 GB2312_TYPICAL_DISTRIBUTION_RATIO,
51 JIS_CHAR_TO_FREQ_ORDER,
53 JIS_TYPICAL_DISTRIBUTION_RATIO,
55from .johabfreq
import JOHAB_TO_EUCKR_ORDER_TABLE
59 ENOUGH_DATA_THRESHOLD = 1024
62 MINIMUM_DATA_THRESHOLD = 3
67 self._char_to_freq_order: Tuple[int, ...] = tuple()
80 """reset analyser, clear any state"""
88 def feed(self, char: Union[bytes, bytearray], char_len: int) ->
None:
89 """feed a character with known length"""
99 if 512 > self._char_to_freq_order[order]:
103 """return confidence based on existing data"""
139 def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
144 first_char = byte_str[0]
145 if first_char >= 0xC4:
146 return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1
157 def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
162 first_char = byte_str[0]
163 if first_char >= 0xB0:
164 return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1
175 def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
176 first_char = byte_str[0]
177 if 0x88 <= first_char < 0xD4:
178 code = first_char * 256 + byte_str[1]
190 def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
195 first_char, second_char = byte_str[0], byte_str[1]
196 if (first_char >= 0xB0)
and (second_char >= 0xA1):
197 return 94 * (first_char - 0xB0) + second_char - 0xA1
208 def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
213 first_char, second_char = byte_str[0], byte_str[1]
214 if first_char >= 0xA4:
215 if second_char >= 0xA1:
216 return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
217 return 157 * (first_char - 0xA4) + second_char - 0x40
228 def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
233 first_char, second_char = byte_str[0], byte_str[1]
234 if 0x81 <= first_char <= 0x9F:
235 order = 188 * (first_char - 0x81)
236 elif 0xE0 <= first_char <= 0xEF:
237 order = 188 * (first_char - 0xE0 + 31)
240 order = order + second_char - 0x40
241 if second_char > 0x7F:
253 def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
260 return 94 * (char - 0xA1) + byte_str[1] - 0xA1
int get_order(self, Union[bytes, bytearray] byte_str)
typical_distribution_ratio
None feed(self, Union[bytes, bytearray] char, int char_len)
typical_distribution_ratio
bool got_enough_data(self)
int ENOUGH_DATA_THRESHOLD
float get_confidence(self)
int get_order(self, Union[bytes, bytearray] _)
int MINIMUM_DATA_THRESHOLD
int get_order(self, Union[bytes, bytearray] byte_str)
typical_distribution_ratio
int get_order(self, Union[bytes, bytearray] byte_str)
typical_distribution_ratio
int get_order(self, Union[bytes, bytearray] byte_str)
typical_distribution_ratio
int get_order(self, Union[bytes, bytearray] byte_str)
typical_distribution_ratio
int get_order(self, Union[bytes, bytearray] byte_str)
typical_distribution_ratio
int get_order(self, Union[bytes, bytearray] byte_str)
typical_distribution_ratio