Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
eucjpprober.py
Go to the documentation of this file.
27
28from typing import Union
29
30from .chardistribution import EUCJPDistributionAnalysis
31from .codingstatemachine import CodingStateMachine
32from .enums import MachineState, ProbingState
33from .jpcntx import EUCJPContextAnalysis
34from .mbcharsetprober import MultiByteCharSetProber
35from .mbcssm import EUCJP_SM_MODEL
36
37
46 def reset(self) -> None:
47 super().reset()
49
50 @property
51 def charset_name(self) -> str:
52 return "EUC-JP"
53
54 @property
55 def language(self) -> str:
56 return "Japanese"
57
58 def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
59 assert self.coding_sm is not None
60 assert self.distribution_analyzer is not None
61
62 for i, byte in enumerate(byte_str):
63 # PY3K: byte_str is a byte array, so byte is an int, not a byte
64 coding_state = self.coding_sm.next_state(byte)
65 if coding_state == MachineState.ERROR:
66 self.logger.debug(
67 "%s %s prober hit error at byte %s",
70 i,
71 )
73 break
74 if coding_state == MachineState.ITS_ME:
76 break
77 if coding_state == MachineState.START:
78 char_len = self.coding_sm.get_current_charlen()
79 if i == 0:
80 self._last_char_last_char[1] = byte
83 else:
84 self.context_analyzer.feed(byte_str[i - 1 : i + 1], char_len)
85 self.distribution_analyzer.feed(byte_str[i - 1 : i + 1], char_len)
86
87 self._last_char_last_char[0] = byte_str[-1]
88
90 if self.context_analyzer.got_enough_data() and (
92 ):
94
95 return self.statestatestate
96
97 def get_confidence(self) -> float:
98 assert self.distribution_analyzer is not None
99
100 context_conf = self.context_analyzer.get_confidence()
101 distrib_conf = self.distribution_analyzer.get_confidence()
102 return max(context_conf, distrib_conf)
ProbingState feed(self, Union[bytes, bytearray] byte_str)
for i