Somewhat fancy voice command recognition software
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

numberparser.py 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. #!/usr/bin/env python
  2. # numberparser.py - Translate words to decimal
  3. # This is part of Kaylee
  4. # -- this code is licensed GPLv3
  5. # Copyright 2015-2016 Clayton G. Hobbs
  6. # Portions Copyright 2013 Jezra
  7. import re
  8. # Define the mappings from words to numbers
  9. class NumberParser:
  10. zero = {
  11. 'zero': 0
  12. }
  13. ones = {
  14. 'one': 1,
  15. 'two': 2,
  16. 'three': 3,
  17. 'four': 4,
  18. 'five': 5,
  19. 'six': 6,
  20. 'seven': 7,
  21. 'eight': 8,
  22. 'nine': 9
  23. }
  24. special_ones = {
  25. 'ten': 10,
  26. 'eleven': 11,
  27. 'twelve': 12,
  28. 'thirteen': 13,
  29. 'fourteen': 14,
  30. 'fifteen': 15,
  31. 'sixteen': 16,
  32. 'seventeen': 17,
  33. 'eighteen': 18,
  34. 'ninteen': 19
  35. }
  36. tens = {
  37. 'twenty': 20,
  38. 'thirty': 30,
  39. 'fourty': 40,
  40. 'fifty': 50,
  41. 'sixty': 60,
  42. 'seventy': 70,
  43. 'eighty': 80,
  44. 'ninty': 90
  45. }
  46. hundred = {
  47. 'hundred': 100
  48. }
  49. exp = {
  50. 'thousand': 1000,
  51. 'million': 1000000,
  52. 'billion': 1000000000
  53. }
  54. allowed = [
  55. 'and'
  56. ]
  57. def __init__(self):
  58. self.number_words = []
  59. for word in sorted(self.zero.keys()):
  60. self.number_words.append(word)
  61. for word in sorted(self.ones.keys()):
  62. self.number_words.append(word)
  63. for word in sorted(self.special_ones.keys()):
  64. self.number_words.append(word)
  65. for word in sorted(self.tens.keys()):
  66. self.number_words.append(word)
  67. for word in sorted(self.hundred.keys()):
  68. self.number_words.append(word)
  69. for word in sorted(self.exp.keys()):
  70. self.number_words.append(word)
  71. self.mandatory_number_words = self.number_words.copy()
  72. for word in sorted(self.allowed):
  73. self.number_words.append(word)
  74. def parse_number(self, text_line):
  75. """
  76. Parse numbers from natural language into ints
  77. TODO: Throw more exceptions when invalid numbers are detected. Only
  78. allow certian valueless words within numbers. Support zero.
  79. """
  80. value = 0
  81. partial_value = 0
  82. last_list = None
  83. # Split text_line by commas, whitespace, and hyphens
  84. text_line = text_line.strip()
  85. text_words = re.split(r'[,\s-]+', text_line)
  86. # Parse the number
  87. for word in text_words:
  88. if word in self.zero:
  89. if last_list is not None:
  90. raise ValueError('Invalid number')
  91. value = 0
  92. last_list = self.zero
  93. elif word in self.ones:
  94. if last_list in (self.zero, self.ones, self.special_ones):
  95. raise ValueError('Invalid number')
  96. value += self.ones[word]
  97. last_list = self.ones
  98. elif word in self.special_ones:
  99. if last_list in (self.zero, self.ones, self.special_ones, self.tens):
  100. raise ValueError('Invalid number')
  101. value += self.special_ones[word]
  102. last_list = self.special_ones
  103. elif word in self.tens:
  104. if last_list in (self.zero, self.ones, self.special_ones, self.tens):
  105. raise ValueError('Invalid number')
  106. value += self.tens[word]
  107. last_list = self.tens
  108. elif word in self.hundred:
  109. if last_list not in (self.ones, self.special_ones, self.tens):
  110. raise ValueError('Invalid number')
  111. value *= self.hundred[word]
  112. last_list = self.hundred
  113. elif word in self.exp:
  114. if last_list in (self.zero, self.exp):
  115. raise ValueError('Invalid number')
  116. partial_value += value * self.exp[word]
  117. value = 0
  118. last_list = self.exp
  119. elif word not in self.allowed:
  120. raise ValueError('Invalid number')
  121. # Debugging information
  122. #print(word, value, partial_value)
  123. # Finish parsing the number
  124. value += partial_value
  125. return value
  126. def parse_all_numbers(self, text_line):
  127. nums = []
  128. t_numless = ''
  129. # Split text_line by commas, whitespace, and hyphens
  130. text_words = re.split(r'[,\s-]+', text_line.strip())
  131. # Get a string of word classes
  132. tw_classes = ''
  133. for word in text_words:
  134. if word in self.mandatory_number_words:
  135. tw_classes += 'm'
  136. elif word in self.allowed:
  137. tw_classes += 'a'
  138. else:
  139. tw_classes += 'w'
  140. # For each string of number words:
  141. last_end = 0
  142. for m in re.finditer('m[am]*m|m', tw_classes):
  143. # Get the number words
  144. num_words = ' '.join(text_words[m.start():m.end()])
  145. # Parse the number and store the value
  146. try:
  147. nums.append(self.parse_number(num_words))
  148. except ValueError:
  149. nums.append(-1)
  150. # Add words to t_numless
  151. t_numless += ' '.join(text_words[last_end:m.start()]) + ' %d '
  152. last_end = m.end()
  153. t_numless += ' '.join(text_words[last_end:])
  154. return (t_numless.strip(), nums)
  155. if __name__ == '__main__':
  156. np = NumberParser()
  157. # Get the words to translate
  158. text_line = input('Enter a string: ')
  159. # Parse it to an integer
  160. value = np.parse_all_numbers(text_line)
  161. # Print the result
  162. print(value)