29from typing
import Dict, List, NamedTuple, Optional, Union
31from .charsetprober
import CharSetProber
32from .enums
import CharacterCategory, ProbingState, SequenceLikelihood
38 char_to_order_map: Dict[int, int]
39 language_model: Dict[int, Dict[int, int]]
40 typical_positive_ratio: float
41 keep_ascii_letters: bool
47 SB_ENOUGH_REL_THRESHOLD = 1024
48 POSITIVE_SHORTCUT_THRESHOLD = 0.95
49 NEGATIVE_SHORTCUT_THRESHOLD = 0.05
53 model: SingleByteCharSetModel,
54 is_reversed: bool =
False,
55 name_prober: Optional[CharSetProber] =
None,
86 return self.
_model.charset_name
92 return self.
_model.language
94 def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
96 if not self.
_model.keep_ascii_letters:
102 char_to_order_map = self.
_model.char_to_order_map
103 language_model = self.
_model.language_model
104 for char
in byte_str:
124 charset_name = self.
_model.charset_name
130 "%s confidence = %s, we have a winner", charset_name, confidence
135 "%s confidence = %s, below negative shortcut threshold %s",
153 / self.
_model.typical_positive_ratio
bytearray filter_international_words(Union[bytes, bytearray] buf)
bytes remove_xml_tags(Union[bytes, bytearray] buf)
float get_confidence(self)
float NEGATIVE_SHORTCUT_THRESHOLD
None __init__(self, SingleByteCharSetModel model, bool is_reversed=False, Optional[CharSetProber] name_prober=None)
int SB_ENOUGH_REL_THRESHOLD
NEGATIVE_SHORTCUT_THRESHOLD
Optional[str] language(self)
float POSITIVE_SHORTCUT_THRESHOLD
Optional[str] charset_name(self)
float get_confidence(self)
ProbingState feed(self, Union[bytes, bytearray] byte_str)