Browse Source

Added number parsing capabilities

See commands.tmp for an example.  It's pretty neat, but it could still
use some work.  I thought of a really clever way to parse numbers, better
than the one I came up with last night, but since I have a working
implementation now I figure I'd better commit it.

We have a new bug which causes the dictionary to be updated every time the
program starts.  I hope I didn't force that to happen last night or
something, but I have a vague feeling I did.
Clara Hobbs 8 years ago
parent
commit
e19d76f051
8 changed files with 216 additions and 15 deletions
  1. 6
    4
      commands.tmp
  2. 1
    1
      config.py
  3. 1
    1
      gtktrayui.py
  4. 1
    1
      gtkui.py
  5. 22
    5
      kaylee.py
  6. 2
    2
      languageupdater.py
  7. 182
    0
      numberparser.py
  8. 1
    1
      recognizer.py

+ 6
- 4
commands.tmp View File

@@ -1,5 +1,7 @@
1
-# commands are key:value pairs 
2
-# key is the sentence to listen for
3
-# value is the command to run when the key is spoken
1
+# commands are pars of the form:
2
+#     KEY: VALUE
3
+# KEY is the sentence to listen for
4
+# VALUE is the command to run when the key is spoken
4 5
 
5
-hello world:echo "hello world"
6
+hello world: echo "hello world"
7
+start a %d minute timer: (echo {0} minute timer started && sleep {0}m && echo {0} minute timer ended) &

+ 1
- 1
config.py View File

@@ -1,7 +1,7 @@
1 1
 # This is part of Kaylee
2 2
 # -- this code is licensed GPLv3
3
-# Copyright 2013 Jezra
4 3
 # Copyright 2015 Clayton G. Hobbs
4
+# Portions Copyright 2013 Jezra
5 5
 
6 6
 import json
7 7
 import os

+ 1
- 1
gtktrayui.py View File

@@ -1,7 +1,7 @@
1 1
 # This is part of Kaylee
2 2
 # -- this code is licensed GPLv3
3
-# Copyright 2013 Jezra
4 3
 # Copyright 2015 Clayton G. Hobbs
4
+# Portions Copyright 2013 Jezra
5 5
 
6 6
 import sys
7 7
 import gi

+ 1
- 1
gtkui.py View File

@@ -1,7 +1,7 @@
1 1
 # This is part of Kaylee
2 2
 # -- this code is licensed GPLv3
3
-# Copyright 2013 Jezra
4 3
 # Copyright 2015 Clayton G. Hobbs
4
+# Portions Copyright 2013 Jezra
5 5
 
6 6
 import sys
7 7
 import gi

+ 22
- 5
kaylee.py View File

@@ -2,8 +2,8 @@
2 2
 
3 3
 # This is part of Kaylee
4 4
 # -- this code is licensed GPLv3
5
-# Copyright 2013 Jezra
6 5
 # Copyright 2015 Clayton G. Hobbs
6
+# Portions Copyright 2013 Jezra
7 7
 
8 8
 from __future__ import print_function
9 9
 import sys
@@ -17,6 +17,7 @@ import json
17 17
 from recognizer import Recognizer
18 18
 from config import Config
19 19
 from languageupdater import LanguageUpdater
20
+from numberparser import NumberParser
20 21
 
21 22
 
22 23
 class Kaylee:
@@ -33,6 +34,9 @@ class Kaylee:
33 34
         self.config = Config()
34 35
         self.options = vars(self.config.options)
35 36
 
37
+        # Create number parser for later use
38
+        self.number_parser = NumberParser()
39
+
36 40
         # Read the commands
37 41
         self.read_commands()
38 42
 
@@ -79,7 +83,10 @@ class Kaylee:
79 83
                 # This is a parsible line
80 84
                 (key, value) = line.split(":", 1)
81 85
                 self.commands[key.strip().lower()] = value.strip()
82
-                strings.write(key.strip() + "\n")
86
+                strings.write(key.strip().replace('%d', '') + "\n")
87
+        # Add number words to the corpus
88
+        for word in self.number_parser.number_words:
89
+            strings.write(word + "\n")
83 90
         # Close the strings file
84 91
         strings.close()
85 92
 
@@ -104,6 +111,7 @@ class Kaylee:
104 111
 
105 112
     def recognizer_finished(self, recognizer, text):
106 113
         t = text.lower()
114
+        numt, nums = self.number_parser.parse_all_numbers(t)
107 115
         # Is there a matching command?
108 116
         if t in self.commands:
109 117
             # Run the valid_sentence_command if there is a valid sentence command
@@ -113,9 +121,18 @@ class Kaylee:
113 121
             # Should we be passing words?
114 122
             if self.options['pass_words']:
115 123
                 cmd += " " + t
116
-                self.run_command(cmd)
117
-            else:
118
-                self.run_command(cmd)
124
+            self.run_command(cmd)
125
+            self.log_history(text)
126
+        elif numt in self.commands:
127
+            # Run the valid_sentence_command if there is a valid sentence command
128
+            if self.options['valid_sentence_command']:
129
+                subprocess.call(self.options['valid_sentence_command'], shell=True)
130
+            cmd = self.commands[numt]
131
+            cmd = cmd.format(*nums)
132
+            # Should we be passing words?
133
+            if self.options['pass_words']:
134
+                cmd += " " + t
135
+            self.run_command(cmd)
119 136
             self.log_history(text)
120 137
         else:
121 138
             # Run the invalid_sentence_command if there is an invalid sentence command

+ 2
- 2
languageupdater.py View File

@@ -1,7 +1,7 @@
1 1
 # This is part of Kaylee
2 2
 # -- this code is licensed GPLv3
3
-# Copyright 2013 Jezra
4 3
 # Copyright 2015 Clayton G. Hobbs
4
+# Portions Copyright 2013 Jezra
5 5
 
6 6
 import hashlib
7 7
 import json
@@ -56,7 +56,7 @@ class LanguageUpdater:
56 56
 
57 57
         # Parse response to get URLs of the files we need
58 58
         path_re = r'.*<title>Index of (.*?)</title>.*'
59
-        number_re = r'.*TAR[0-9]*?\.tgz.*'
59
+        number_re = r'.*TAR([0-9]*?)\.tgz.*'
60 60
         for line in r.text.split('\n'):
61 61
             # If we found the directory, keep it and don't break
62 62
             if re.search(path_re, line):

+ 182
- 0
numberparser.py View File

@@ -0,0 +1,182 @@
1
+#!/usr/bin/env python
2
+# numberparser.py - Translate words to decimal
3
+
4
+# This is part of Kaylee
5
+# -- this code is licensed GPLv3
6
+# Copyright 2015 Clayton G. Hobbs
7
+# Portions Copyright 2013 Jezra
8
+import re
9
+
10
+# Define the mappings from words to numbers
11
+class NumberParser:
12
+    zero = {
13
+        'zero': 0
14
+    }
15
+
16
+    ones = {
17
+        'one': 1,
18
+        'two': 2,
19
+        'three': 3,
20
+        'four': 4,
21
+        'five': 5,
22
+        'six': 6,
23
+        'seven': 7,
24
+        'eight': 8,
25
+        'nine': 9
26
+    }
27
+
28
+    special_ones = {
29
+        'ten': 10,
30
+        'eleven': 11,
31
+        'twelve': 12,
32
+        'thirteen': 13,
33
+        'fourteen': 14,
34
+        'fifteen': 15,
35
+        'sixteen': 16,
36
+        'seventeen': 17,
37
+        'eighteen': 18,
38
+        'ninteen': 19
39
+    }
40
+
41
+    tens = {
42
+        'twenty': 20,
43
+        'thirty': 30,
44
+        'fourty': 40,
45
+        'fifty': 50,
46
+        'sixty': 60,
47
+        'seventy': 70,
48
+        'eighty': 80,
49
+        'ninty': 90
50
+    }
51
+
52
+    hundred = {
53
+        'hundred': 100
54
+    }
55
+
56
+    exp = {
57
+        'thousand': 1000,
58
+        'million': 1000000,
59
+        'billion': 1000000000
60
+    }
61
+
62
+    allowed = [
63
+        'and'
64
+    ]
65
+
66
+    def __init__(self):
67
+        self.number_words = []
68
+        for word in self.zero:
69
+            self.number_words.append(word)
70
+        for word in self.ones:
71
+            self.number_words.append(word)
72
+        for word in self.special_ones:
73
+            self.number_words.append(word)
74
+        for word in self.tens:
75
+            self.number_words.append(word)
76
+        for word in self.hundred:
77
+            self.number_words.append(word)
78
+        for word in self.exp:
79
+            self.number_words.append(word)
80
+        self.mandatory_number_words = self.number_words.copy()
81
+        for word in self.allowed:
82
+            self.number_words.append(word)
83
+
84
+    def parse_number(self, text_line):
85
+        """
86
+        Parse numbers from natural language into ints
87
+
88
+        TODO: Throw more exceptions when invalid numbers are detected.  Only
89
+        allow certian valueless words within numbers.  Support zero.
90
+        """
91
+        value = 0
92
+        partial_value = 0
93
+        last_list = None
94
+
95
+        # Split text_line by commas, whitespace, and hyphens
96
+        text_line = text_line.strip()
97
+        text_words = re.split(r'[,\s-]+', text_line)
98
+        # Parse the number
99
+        for word in text_words:
100
+            if word in self.zero:
101
+                if last_list is not None:
102
+                    raise ValueError('Invalid number')
103
+                value = 0
104
+                last_list = self.zero
105
+            elif word in self.ones:
106
+                if last_list in (self.zero, self.ones, self.special_ones):
107
+                    raise ValueError('Invalid number')
108
+                value += self.ones[word]
109
+                last_list = self.ones
110
+            elif word in self.special_ones:
111
+                if last_list in (self.zero, self.ones, self.special_ones, self.tens):
112
+                    raise ValueError('Invalid number')
113
+                value += self.special_ones[word]
114
+                last_list = self.special_ones
115
+            elif word in self.tens:
116
+                if last_list in (self.zero, self.ones, self.special_ones, self.tens):
117
+                    raise ValueError('Invalid number')
118
+                value += self.tens[word]
119
+                last_list = self.tens
120
+            elif word in self.hundred:
121
+                if last_list not in (self.ones, self.special_ones, self.tens):
122
+                    raise ValueError('Invalid number')
123
+                value *= self.hundred[word]
124
+                last_list = self.hundred
125
+            elif word in self.exp:
126
+                if last_list in (self.zero, self.exp):
127
+                    raise ValueError('Invalid number')
128
+                partial_value += value * self.exp[word]
129
+                value = 0
130
+                last_list = self.exp
131
+            elif word not in self.allowed:
132
+                raise ValueError('Invalid number')
133
+            # Debugging information
134
+            #print(word, value, partial_value)
135
+        # Finish parsing the number
136
+        value += partial_value
137
+        return value
138
+
139
+    def parse_all_numbers(self, text_line):
140
+        nums = []
141
+        t_numless = ''
142
+        current_num = ''
143
+
144
+        # Split text_line by commas, whitespace, and hyphens
145
+        text_line = text_line.strip()
146
+        text_words = re.split(r'[,\s-]+', text_line)
147
+        for word in text_words:
148
+            # If we aren't starting a number, add the word to the result string
149
+            if word not in self.mandatory_number_words:
150
+                if current_num:
151
+                    if word in self.number_words:
152
+                        current_num += word + ' '
153
+                    else:
154
+                        try:
155
+                            nums.append(self.parse_number(current_num))
156
+                        except ValueError:
157
+                            nums.append(-1)
158
+                        current_num = ''
159
+                        t_numless += '%d' + ' '
160
+                if not current_num:
161
+                    t_numless += word + ' '
162
+            else:
163
+                # We're parsing a number now
164
+                current_num += word + ' '
165
+        if current_num:
166
+            try:
167
+                nums.append(self.parse_number(current_num))
168
+            except ValueError:
169
+                nums.append(-1)
170
+            current_num = ''
171
+            t_numless += '%d' + ' '
172
+
173
+        return (t_numless.strip(), nums)
174
+
175
+if __name__ == '__main__':
176
+    np = NumberParser()
177
+    # Get the words to translate
178
+    text_line = input('Enter a number: ')
179
+    # Parse it to an integer
180
+    value = np.parse_all_numbers(text_line)
181
+    # Print the result
182
+    print('I claim that you meant the decimal number', value)

+ 1
- 1
recognizer.py View File

@@ -1,7 +1,7 @@
1 1
 # This is part of Kaylee
2 2
 # -- this code is licensed GPLv3
3
-# Copyright 2013 Jezra
4 3
 # Copyright 2015 Clayton G. Hobbs
4
+# Portions Copyright 2013 Jezra
5 5
 
6 6
 import os.path
7 7
 import sys

Loading…
Cancel
Save