Use a regex more adherent to the characters actually translated, also
follow the order in which the characters appear in the signals table.
def sanitize_text(self, text):
sanitized = text.lower()
- sanitized = re.sub(r"[^a-z0-9.,?\'\"/() \-=\+@]", "", sanitized)
+ sanitized = re.sub(r"[^a-z0-9.,:?\'-/()\"=\+@ ]", "", sanitized)
sanitized = re.sub(r"\s+", " ", sanitized)
sanitized = re.sub(r"^\s+", "", sanitized)
sanitized = re.sub(r"\s+$", "", sanitized)