Somewhat fancy voice command recognition software
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

numberparser.py 5.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. #!/usr/bin/env python
  2. # numberparser.py - Translate words to decimal
  3. # This is part of Kaylee
  4. # -- this code is licensed GPLv3
  5. # Copyright 2015 Clayton G. Hobbs
  6. # Portions Copyright 2013 Jezra
  7. import re
  8. # Define the mappings from words to numbers
  9. class NumberParser:
  10. zero = {
  11. 'zero': 0
  12. }
  13. ones = {
  14. 'one': 1,
  15. 'two': 2,
  16. 'three': 3,
  17. 'four': 4,
  18. 'five': 5,
  19. 'six': 6,
  20. 'seven': 7,
  21. 'eight': 8,
  22. 'nine': 9
  23. }
  24. special_ones = {
  25. 'ten': 10,
  26. 'eleven': 11,
  27. 'twelve': 12,
  28. 'thirteen': 13,
  29. 'fourteen': 14,
  30. 'fifteen': 15,
  31. 'sixteen': 16,
  32. 'seventeen': 17,
  33. 'eighteen': 18,
  34. 'ninteen': 19
  35. }
  36. tens = {
  37. 'twenty': 20,
  38. 'thirty': 30,
  39. 'fourty': 40,
  40. 'fifty': 50,
  41. 'sixty': 60,
  42. 'seventy': 70,
  43. 'eighty': 80,
  44. 'ninty': 90
  45. }
  46. hundred = {
  47. 'hundred': 100
  48. }
  49. exp = {
  50. 'thousand': 1000,
  51. 'million': 1000000,
  52. 'billion': 1000000000
  53. }
  54. allowed = [
  55. 'and'
  56. ]
  57. def __init__(self):
  58. self.number_words = []
  59. for word in self.zero:
  60. self.number_words.append(word)
  61. for word in self.ones:
  62. self.number_words.append(word)
  63. for word in self.special_ones:
  64. self.number_words.append(word)
  65. for word in self.tens:
  66. self.number_words.append(word)
  67. for word in self.hundred:
  68. self.number_words.append(word)
  69. for word in self.exp:
  70. self.number_words.append(word)
  71. self.mandatory_number_words = self.number_words.copy()
  72. for word in self.allowed:
  73. self.number_words.append(word)
  74. def parse_number(self, text_line):
  75. """
  76. Parse numbers from natural language into ints
  77. TODO: Throw more exceptions when invalid numbers are detected. Only
  78. allow certian valueless words within numbers. Support zero.
  79. """
  80. value = 0
  81. partial_value = 0
  82. last_list = None
  83. # Split text_line by commas, whitespace, and hyphens
  84. text_line = text_line.strip()
  85. text_words = re.split(r'[,\s-]+', text_line)
  86. # Parse the number
  87. for word in text_words:
  88. if word in self.zero:
  89. if last_list is not None:
  90. raise ValueError('Invalid number')
  91. value = 0
  92. last_list = self.zero
  93. elif word in self.ones:
  94. if last_list in (self.zero, self.ones, self.special_ones):
  95. raise ValueError('Invalid number')
  96. value += self.ones[word]
  97. last_list = self.ones
  98. elif word in self.special_ones:
  99. if last_list in (self.zero, self.ones, self.special_ones, self.tens):
  100. raise ValueError('Invalid number')
  101. value += self.special_ones[word]
  102. last_list = self.special_ones
  103. elif word in self.tens:
  104. if last_list in (self.zero, self.ones, self.special_ones, self.tens):
  105. raise ValueError('Invalid number')
  106. value += self.tens[word]
  107. last_list = self.tens
  108. elif word in self.hundred:
  109. if last_list not in (self.ones, self.special_ones, self.tens):
  110. raise ValueError('Invalid number')
  111. value *= self.hundred[word]
  112. last_list = self.hundred
  113. elif word in self.exp:
  114. if last_list in (self.zero, self.exp):
  115. raise ValueError('Invalid number')
  116. partial_value += value * self.exp[word]
  117. value = 0
  118. last_list = self.exp
  119. elif word not in self.allowed:
  120. raise ValueError('Invalid number')
  121. # Debugging information
  122. #print(word, value, partial_value)
  123. # Finish parsing the number
  124. value += partial_value
  125. return value
  126. def parse_all_numbers(self, text_line):
  127. nums = []
  128. t_numless = ''
  129. current_num = ''
  130. # Split text_line by commas, whitespace, and hyphens
  131. text_line = text_line.strip()
  132. text_words = re.split(r'[,\s-]+', text_line)
  133. for word in text_words:
  134. # If we aren't starting a number, add the word to the result string
  135. if word not in self.mandatory_number_words:
  136. if current_num:
  137. if word in self.number_words:
  138. current_num += word + ' '
  139. else:
  140. try:
  141. nums.append(self.parse_number(current_num))
  142. except ValueError:
  143. nums.append(-1)
  144. current_num = ''
  145. t_numless += '%d' + ' '
  146. if not current_num:
  147. t_numless += word + ' '
  148. else:
  149. # We're parsing a number now
  150. current_num += word + ' '
  151. if current_num:
  152. try:
  153. nums.append(self.parse_number(current_num))
  154. except ValueError:
  155. nums.append(-1)
  156. current_num = ''
  157. t_numless += '%d' + ' '
  158. return (t_numless.strip(), nums)
  159. if __name__ == '__main__':
  160. np = NumberParser()
  161. # Get the words to translate
  162. text_line = input('Enter a number: ')
  163. # Parse it to an integer
  164. value = np.parse_all_numbers(text_line)
  165. # Print the result
  166. print('I claim that you meant the decimal number', value)