Somewhat fancy voice command recognition software
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

numbers.py 5.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. #!/usr/bin/env python
  2. # This is part of Kaylee
  3. # -- this code is licensed GPLv3
  4. # Copyright 2015-2017 Clayton G. Hobbs
  5. # Portions Copyright 2013 Jezra
  6. import re
  7. class NumberParser:
  8. """Parses integers from English strings"""
  9. zero = {
  10. 'zero': 0
  11. }
  12. ones = {
  13. 'one': 1,
  14. 'two': 2,
  15. 'three': 3,
  16. 'four': 4,
  17. 'five': 5,
  18. 'six': 6,
  19. 'seven': 7,
  20. 'eight': 8,
  21. 'nine': 9
  22. }
  23. special_ones = {
  24. 'ten': 10,
  25. 'eleven': 11,
  26. 'twelve': 12,
  27. 'thirteen': 13,
  28. 'fourteen': 14,
  29. 'fifteen': 15,
  30. 'sixteen': 16,
  31. 'seventeen': 17,
  32. 'eighteen': 18,
  33. 'ninteen': 19
  34. }
  35. tens = {
  36. 'twenty': 20,
  37. 'thirty': 30,
  38. 'forty': 40,
  39. 'fifty': 50,
  40. 'sixty': 60,
  41. 'seventy': 70,
  42. 'eighty': 80,
  43. 'ninty': 90
  44. }
  45. hundred = {
  46. 'hundred': 100
  47. }
  48. exp = {
  49. 'thousand': 1000,
  50. 'million': 1000000,
  51. 'billion': 1000000000
  52. }
  53. allowed = [
  54. 'and'
  55. ]
  56. number_words = None
  57. mandatory_number_words = None
  58. def __init__(self):
  59. if NumberParser.number_words is None:
  60. NumberParser.number_words = []
  61. for word in sorted(self.zero.keys()):
  62. NumberParser.number_words.append(word)
  63. for word in sorted(self.ones.keys()):
  64. NumberParser.number_words.append(word)
  65. for word in sorted(self.special_ones.keys()):
  66. NumberParser.number_words.append(word)
  67. for word in sorted(self.tens.keys()):
  68. NumberParser.number_words.append(word)
  69. for word in sorted(self.hundred.keys()):
  70. NumberParser.number_words.append(word)
  71. for word in sorted(self.exp.keys()):
  72. NumberParser.number_words.append(word)
  73. NumberParser.mandatory_number_words = self.number_words.copy()
  74. for word in sorted(self.allowed):
  75. NumberParser.number_words.append(word)
  76. def parse_number(self, text_line):
  77. """Parse a number from English into an int"""
  78. value = 0
  79. partial_value = 0
  80. last_list = None
  81. # Split text_line by commas, whitespace, and hyphens
  82. text_line = text_line.strip()
  83. text_words = re.split(r'[,\s-]+', text_line)
  84. # Parse the number
  85. for word in text_words:
  86. if word in self.zero:
  87. if last_list is not None:
  88. raise ValueError('Invalid number')
  89. value = 0
  90. last_list = self.zero
  91. elif word in self.ones:
  92. if last_list in (self.zero, self.ones, self.special_ones):
  93. raise ValueError('Invalid number')
  94. value += self.ones[word]
  95. last_list = self.ones
  96. elif word in self.special_ones:
  97. if last_list in (self.zero, self.ones, self.special_ones, self.tens):
  98. raise ValueError('Invalid number')
  99. value += self.special_ones[word]
  100. last_list = self.special_ones
  101. elif word in self.tens:
  102. if last_list in (self.zero, self.ones, self.special_ones, self.tens):
  103. raise ValueError('Invalid number')
  104. value += self.tens[word]
  105. last_list = self.tens
  106. elif word in self.hundred:
  107. if last_list not in (self.ones, self.special_ones, self.tens):
  108. raise ValueError('Invalid number')
  109. value *= self.hundred[word]
  110. last_list = self.hundred
  111. elif word in self.exp:
  112. if last_list in (self.zero, self.exp):
  113. raise ValueError('Invalid number')
  114. partial_value += value * self.exp[word]
  115. value = 0
  116. last_list = self.exp
  117. elif word not in self.allowed:
  118. raise ValueError('Invalid number')
  119. # Debugging information
  120. #print(word, value, partial_value)
  121. # Finish parsing the number
  122. value += partial_value
  123. return value
  124. def parse_all_numbers(self, text_line):
  125. """
  126. Parse all numbers from English to ints
  127. Returns a tuple whose first element is text_line with all English
  128. numbers replaced with "%d", and whose second element is a list
  129. containing all the parsed numbers as ints.
  130. """
  131. nums = []
  132. t_numless = ''
  133. # Split text_line by commas, whitespace, and hyphens
  134. text_words = re.split(r'[,\s-]+', text_line.strip())
  135. # Get a string of word classes
  136. tw_classes = ''
  137. for word in text_words:
  138. if word in self.mandatory_number_words:
  139. tw_classes += 'm'
  140. elif word in self.allowed:
  141. tw_classes += 'a'
  142. else:
  143. tw_classes += 'w'
  144. # For each string of number words:
  145. last_end = 0
  146. for m in re.finditer('m[am]*m|m', tw_classes):
  147. # Get the number words
  148. num_words = ' '.join(text_words[m.start():m.end()])
  149. # Parse the number and store the value
  150. try:
  151. nums.append(self.parse_number(num_words))
  152. except ValueError:
  153. nums.append(-1)
  154. # Add words to t_numless
  155. t_numless += ' '.join(text_words[last_end:m.start()]) + ' %d '
  156. last_end = m.end()
  157. t_numless += ' '.join(text_words[last_end:])
  158. return (t_numless.strip(), nums)
  159. if __name__ == '__main__':
  160. np = NumberParser()
  161. # Get the words to translate
  162. text_line = input('Enter a string: ')
  163. # Parse it to an integer
  164. value = np.parse_all_numbers(text_line)
  165. # Print the result
  166. print(value)