Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
escprober.py
Go to the documentation of this file.
27
28from typing import Optional, Union
29
30from .charsetprober import CharSetProber
31from .codingstatemachine import CodingStateMachine
32from .enums import LanguageFilter, MachineState, ProbingState
33from .escsm import (
34 HZ_SM_MODEL,
35 ISO2022CN_SM_MODEL,
36 ISO2022JP_SM_MODEL,
37 ISO2022KR_SM_MODEL,
38)
39
40
42 """
43 This CharSetProber uses a "code scheme" approach for detecting encodings,
44 whereby easily recognizable escape or shift sequences are relied on to
45 identify these encodings.
46 """
47
48 def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
49 super().__init__(lang_filter=lang_filter)
50 self.coding_sm = []
52 self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
53 self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))
55 self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
57 self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
59 self._detected_charset: Optional[str] = None
60 self._detected_language: Optional[str] = None
62 self.resetreset()
63
64 def reset(self) -> None:
65 super().reset()
66 for coding_sm in self.coding_sm:
67 coding_sm.active = True
69 self.active_sm_count = len(self.coding_sm)
72
73 @property
74 def charset_name(self) -> Optional[str]:
75 return self._detected_charset
76
77 @property
78 def language(self) -> Optional[str]:
79 return self._detected_language
80
81 def get_confidence(self) -> float:
82 return 0.99 if self._detected_charset else 0.00
83
84 def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
85 for c in byte_str:
86 for coding_sm in self.coding_sm:
87 if not coding_sm.active:
88 continue
89 coding_state = coding_sm.next_state(c)
90 if coding_state == MachineState.ERROR:
91 coding_sm.active = False
92 self.active_sm_count -= 1
93 if self.active_sm_count <= 0:
95 return self.state
96 elif coding_state == MachineState.ITS_ME:
100 return self.state
101
102 return self.state
None __init__(self, LanguageFilter lang_filter=LanguageFilter.NONE)
Definition escprober.py:48
ProbingState feed(self, Union[bytes, bytearray] byte_str)
Definition escprober.py:84
for i