You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

word_finder.py 1.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. from datetime import date
  2. import re
  3. import million.analyze.message_evaluation as msg_val
  4. from typing import List
  5. from million.model.message import Message
  6. def filter_words(messages: List[Message], words: List[str]) -> List[Message]:
  7. """
  8. Return every message containg the given words, you can use regex syntax inside your words
  9. i.e. find_words(messages, ["dogs?","m(ous|ic)e"])
  10. will search for : dog, dogs, mouse, mice
  11. """
  12. r_words = [rf"\b{w}\b" for w in words]
  13. rgx = "(" + "|".join(r_words) + ")"
  14. return [m for m in messages if m.content and re.search(rgx, m.content, re.I)]
  15. def filter_value(messages: List[Message], val: int) -> List[Message]:
  16. """
  17. Return every message whose value is evaluated to the given val
  18. """
  19. return [m for m in messages if msg_val.get(m) == val]
  20. def filter_date(messages: List[Message], other: date) -> List[Message]:
  21. """
  22. Return every message posted on the given date
  23. """
  24. return [m for m in messages if m.date_time.date() == other]
  25. def filter_neighbours(
  26. messages: List[Message], msg: Message, amplitude: int = 10
  27. ) -> List[Message]:
  28. """
  29. Return the messages posted just before and after the given one.
  30. amplitude will indicate how many messages to look for in each direction
  31. """
  32. idx = messages.index(msg)
  33. start_index = max(0, idx - amplitude)
  34. end_index = min(len(messages), idx + amplitude + 1)
  35. return messages[start_index:end_index]
  36. def retain_counts(messages: List[Message]) -> List[Message]:
  37. """
  38. Retain only the messages for which are considered having a counted value
  39. """
  40. return [msg for msg in messages if msg_val.get(msg)]