Somewhat fancy voice command recognition software
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

numbers.py 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. #!/usr/bin/env python
  2. # This is part of Kaylee
  3. # -- this code is licensed GPLv3
  4. # Copyright 2015-2016 Clayton G. Hobbs
  5. # Portions Copyright 2013 Jezra
  6. import re
  7. class NumberParser:
  8. """Parses integers from English strings"""
  9. zero = {
  10. 'zero': 0
  11. }
  12. ones = {
  13. 'one': 1,
  14. 'two': 2,
  15. 'three': 3,
  16. 'four': 4,
  17. 'five': 5,
  18. 'six': 6,
  19. 'seven': 7,
  20. 'eight': 8,
  21. 'nine': 9
  22. }
  23. special_ones = {
  24. 'ten': 10,
  25. 'eleven': 11,
  26. 'twelve': 12,
  27. 'thirteen': 13,
  28. 'fourteen': 14,
  29. 'fifteen': 15,
  30. 'sixteen': 16,
  31. 'seventeen': 17,
  32. 'eighteen': 18,
  33. 'ninteen': 19
  34. }
  35. tens = {
  36. 'twenty': 20,
  37. 'thirty': 30,
  38. 'fourty': 40,
  39. 'fifty': 50,
  40. 'sixty': 60,
  41. 'seventy': 70,
  42. 'eighty': 80,
  43. 'ninty': 90
  44. }
  45. hundred = {
  46. 'hundred': 100
  47. }
  48. exp = {
  49. 'thousand': 1000,
  50. 'million': 1000000,
  51. 'billion': 1000000000
  52. }
  53. allowed = [
  54. 'and'
  55. ]
  56. def __init__(self):
  57. self.number_words = []
  58. for word in sorted(self.zero.keys()):
  59. self.number_words.append(word)
  60. for word in sorted(self.ones.keys()):
  61. self.number_words.append(word)
  62. for word in sorted(self.special_ones.keys()):
  63. self.number_words.append(word)
  64. for word in sorted(self.tens.keys()):
  65. self.number_words.append(word)
  66. for word in sorted(self.hundred.keys()):
  67. self.number_words.append(word)
  68. for word in sorted(self.exp.keys()):
  69. self.number_words.append(word)
  70. self.mandatory_number_words = self.number_words.copy()
  71. for word in sorted(self.allowed):
  72. self.number_words.append(word)
  73. def parse_number(self, text_line):
  74. """Parse a number from English into an int"""
  75. value = 0
  76. partial_value = 0
  77. last_list = None
  78. # Split text_line by commas, whitespace, and hyphens
  79. text_line = text_line.strip()
  80. text_words = re.split(r'[,\s-]+', text_line)
  81. # Parse the number
  82. for word in text_words:
  83. if word in self.zero:
  84. if last_list is not None:
  85. raise ValueError('Invalid number')
  86. value = 0
  87. last_list = self.zero
  88. elif word in self.ones:
  89. if last_list in (self.zero, self.ones, self.special_ones):
  90. raise ValueError('Invalid number')
  91. value += self.ones[word]
  92. last_list = self.ones
  93. elif word in self.special_ones:
  94. if last_list in (self.zero, self.ones, self.special_ones, self.tens):
  95. raise ValueError('Invalid number')
  96. value += self.special_ones[word]
  97. last_list = self.special_ones
  98. elif word in self.tens:
  99. if last_list in (self.zero, self.ones, self.special_ones, self.tens):
  100. raise ValueError('Invalid number')
  101. value += self.tens[word]
  102. last_list = self.tens
  103. elif word in self.hundred:
  104. if last_list not in (self.ones, self.special_ones, self.tens):
  105. raise ValueError('Invalid number')
  106. value *= self.hundred[word]
  107. last_list = self.hundred
  108. elif word in self.exp:
  109. if last_list in (self.zero, self.exp):
  110. raise ValueError('Invalid number')
  111. partial_value += value * self.exp[word]
  112. value = 0
  113. last_list = self.exp
  114. elif word not in self.allowed:
  115. raise ValueError('Invalid number')
  116. # Debugging information
  117. #print(word, value, partial_value)
  118. # Finish parsing the number
  119. value += partial_value
  120. return value
  121. def parse_all_numbers(self, text_line):
  122. """
  123. Parse all numbers from English to ints
  124. Returns a tuple whose first element is text_line with all English
  125. numbers replaced with "%d", and whose second element is a list
  126. containing all the parsed numbers as ints.
  127. """
  128. nums = []
  129. t_numless = ''
  130. # Split text_line by commas, whitespace, and hyphens
  131. text_words = re.split(r'[,\s-]+', text_line.strip())
  132. # Get a string of word classes
  133. tw_classes = ''
  134. for word in text_words:
  135. if word in self.mandatory_number_words:
  136. tw_classes += 'm'
  137. elif word in self.allowed:
  138. tw_classes += 'a'
  139. else:
  140. tw_classes += 'w'
  141. # For each string of number words:
  142. last_end = 0
  143. for m in re.finditer('m[am]*m|m', tw_classes):
  144. # Get the number words
  145. num_words = ' '.join(text_words[m.start():m.end()])
  146. # Parse the number and store the value
  147. try:
  148. nums.append(self.parse_number(num_words))
  149. except ValueError:
  150. nums.append(-1)
  151. # Add words to t_numless
  152. t_numless += ' '.join(text_words[last_end:m.start()]) + ' %d '
  153. last_end = m.end()
  154. t_numless += ' '.join(text_words[last_end:])
  155. return (t_numless.strip(), nums)
  156. if __name__ == '__main__':
  157. np = NumberParser()
  158. # Get the words to translate
  159. text_line = input('Enter a string: ')
  160. # Parse it to an integer
  161. value = np.parse_all_numbers(text_line)
  162. # Print the result
  163. print(value)