Use a regex more adherent to the characters actually translated, also
follow the order in which the characters appear in the signals table.
 
     def sanitize_text(self, text):
         sanitized = text.lower()
-        sanitized = re.sub(r"[^a-z0-9.,?\'\"/() \-=\+@]", "", sanitized)
+        sanitized = re.sub(r"[^a-z0-9.,:?\'-/()\"=\+@ ]", "", sanitized)
         sanitized = re.sub(r"\s+", " ", sanitized)
         sanitized = re.sub(r"^\s+", "", sanitized)
         sanitized = re.sub(r"\s+$", "", sanitized)