Somewhat fancy voice command recognition software
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

numbers.py 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. #!/usr/bin/env python
  2. # This is part of Kaylee
  3. # -- this code is licensed GPLv3
  4. # Copyright 2015-2016 Clayton G. Hobbs
  5. # Portions Copyright 2013 Jezra
  6. import re
  7. class NumberParser:
  8. """Parses integers from English strings"""
  9. zero = {
  10. 'zero': 0
  11. }
  12. ones = {
  13. 'one': 1,
  14. 'two': 2,
  15. 'three': 3,
  16. 'four': 4,
  17. 'five': 5,
  18. 'six': 6,
  19. 'seven': 7,
  20. 'eight': 8,
  21. 'nine': 9
  22. }
  23. special_ones = {
  24. 'ten': 10,
  25. 'eleven': 11,
  26. 'twelve': 12,
  27. 'thirteen': 13,
  28. 'fourteen': 14,
  29. 'fifteen': 15,
  30. 'sixteen': 16,
  31. 'seventeen': 17,
  32. 'eighteen': 18,
  33. 'ninteen': 19
  34. }
  35. tens = {
  36. 'twenty': 20,
  37. 'thirty': 30,
  38. 'fourty': 40,
  39. 'fifty': 50,
  40. 'sixty': 60,
  41. 'seventy': 70,
  42. 'eighty': 80,
  43. 'ninty': 90
  44. }
  45. hundred = {
  46. 'hundred': 100
  47. }
  48. exp = {
  49. 'thousand': 1000,
  50. 'million': 1000000,
  51. 'billion': 1000000000
  52. }
  53. allowed = [
  54. 'and'
  55. ]
  56. def __init__(self):
  57. self.number_words = []
  58. for word in sorted(self.zero.keys()):
  59. self.number_words.append(word)
  60. for word in sorted(self.ones.keys()):
  61. self.number_words.append(word)
  62. for word in sorted(self.special_ones.keys()):
  63. self.number_words.append(word)
  64. for word in sorted(self.tens.keys()):
  65. self.number_words.append(word)
  66. for word in sorted(self.hundred.keys()):
  67. self.number_words.append(word)
  68. for word in sorted(self.exp.keys()):
  69. self.number_words.append(word)
  70. self.mandatory_number_words = self.number_words.copy()
  71. for word in sorted(self.allowed):
  72. self.number_words.append(word)
  73. def parse_number(self, text_line):
  74. """Parse a number from English into an int"""
  75. value = 0
  76. partial_value = 0
  77. last_list = None
  78. # Split text_line by commas, whitespace, and hyphens
  79. text_line = text_line.strip()
  80. text_words = re.split(r'[,\s-]+', text_line)
  81. # Parse the number
  82. for word in text_words:
  83. if word in self.zero:
  84. if last_list is not None:
  85. raise ValueError('Invalid number')
  86. value = 0
  87. last_list = self.zero
  88. elif word in self.ones:
  89. if last_list in (self.zero, self.ones, self.special_ones):
  90. raise ValueError('Invalid number')
  91. value += self.ones[word]
  92. last_list = self.ones
  93. elif word in self.special_ones:
  94. if last_list in (self.zero, self.ones, self.special_ones, self.tens):
  95. raise ValueError('Invalid number')
  96. value += self.special_ones[word]
  97. last_list = self.special_ones
  98. elif word in self.tens:
  99. if last_list in (self.zero, self.ones, self.special_ones, self.tens):
  100. raise ValueError('Invalid number')
  101. value += self.tens[word]
  102. last_list = self.tens
  103. elif word in self.hundred:
  104. if last_list not in (self.ones, self.special_ones, self.tens):
  105. raise ValueError('Invalid number')
  106. value *= self.hundred[word]
  107. last_list = self.hundred
  108. elif word in self.exp:
  109. if last_list in (self.zero, self.exp):
  110. raise ValueError('Invalid number')
  111. partial_value += value * self.exp[word]
  112. value = 0
  113. last_list = self.exp
  114. elif word not in self.allowed:
  115. raise ValueError('Invalid number')
  116. # Debugging information
  117. #print(word, value, partial_value)
  118. # Finish parsing the number
  119. value += partial_value
  120. return value
  121. def parse_all_numbers(self, text_line):
  122. """
  123. Parse all numbers from English to ints
  124. Returns a tuple whose first element is text_line with all English
  125. numbers replaced with "%d", and whose second element is a list
  126. containing all the parsed numbers as ints.
  127. """
  128. nums = []
  129. t_numless = ''
  130. # Split text_line by commas, whitespace, and hyphens
  131. text_words = re.split(r'[,\s-]+', text_line.strip())
  132. # Get a string of word classes
  133. tw_classes = ''
  134. for word in text_words:
  135. if word in self.mandatory_number_words:
  136. tw_classes += 'm'
  137. elif word in self.allowed:
  138. tw_classes += 'a'
  139. else:
  140. tw_classes += 'w'
  141. # For each string of number words:
  142. last_end = 0
  143. for m in re.finditer('m[am]*m|m', tw_classes):
  144. # Get the number words
  145. num_words = ' '.join(text_words[m.start():m.end()])
  146. # Parse the number and store the value
  147. try:
  148. nums.append(self.parse_number(num_words))
  149. except ValueError:
  150. nums.append(-1)
  151. # Add words to t_numless
  152. t_numless += ' '.join(text_words[last_end:m.start()]) + ' %d '
  153. last_end = m.end()
  154. t_numless += ' '.join(text_words[last_end:])
  155. return (t_numless.strip(), nums)
  156. if __name__ == '__main__':
  157. np = NumberParser()
  158. # Get the words to translate
  159. text_line = input('Enter a string: ')
  160. # Parse it to an integer
  161. value = np.parse_all_numbers(text_line)
  162. # Print the result
  163. print(value)