Pārlūkot izejas kodu

Added number parsing capabilities

See commands.tmp for an example.  It's pretty neat, but it could still
use some work.  I thought of a really clever way to parse numbers, better
than the one I came up with last night, but since I have a working
implementation now I figure I'd better commit it.

We have a new bug which causes the dictionary to be updated every time the
program starts.  I hope I didn't force that to happen last night or
something, but I have a vague feeling I did.
Clara Hobbs 8 gadus atpakaļ
vecāks
revīzija
e19d76f051
8 mainītis faili ar 216 papildinājumiem un 15 dzēšanām
  1. 6
    4
      commands.tmp
  2. 1
    1
      config.py
  3. 1
    1
      gtktrayui.py
  4. 1
    1
      gtkui.py
  5. 22
    5
      kaylee.py
  6. 2
    2
      languageupdater.py
  7. 182
    0
      numberparser.py
  8. 1
    1
      recognizer.py

+ 6
- 4
commands.tmp Parādīt failu

1
-# commands are key:value pairs 
2
-# key is the sentence to listen for
3
-# value is the command to run when the key is spoken
1
+# commands are pars of the form:
2
+#     KEY: VALUE
3
+# KEY is the sentence to listen for
4
+# VALUE is the command to run when the key is spoken
4
 
5
 
5
-hello world:echo "hello world"
6
+hello world: echo "hello world"
7
+start a %d minute timer: (echo {0} minute timer started && sleep {0}m && echo {0} minute timer ended) &

+ 1
- 1
config.py Parādīt failu

1
 # This is part of Kaylee
1
 # This is part of Kaylee
2
 # -- this code is licensed GPLv3
2
 # -- this code is licensed GPLv3
3
-# Copyright 2013 Jezra
4
 # Copyright 2015 Clayton G. Hobbs
3
 # Copyright 2015 Clayton G. Hobbs
4
+# Portions Copyright 2013 Jezra
5
 
5
 
6
 import json
6
 import json
7
 import os
7
 import os

+ 1
- 1
gtktrayui.py Parādīt failu

1
 # This is part of Kaylee
1
 # This is part of Kaylee
2
 # -- this code is licensed GPLv3
2
 # -- this code is licensed GPLv3
3
-# Copyright 2013 Jezra
4
 # Copyright 2015 Clayton G. Hobbs
3
 # Copyright 2015 Clayton G. Hobbs
4
+# Portions Copyright 2013 Jezra
5
 
5
 
6
 import sys
6
 import sys
7
 import gi
7
 import gi

+ 1
- 1
gtkui.py Parādīt failu

1
 # This is part of Kaylee
1
 # This is part of Kaylee
2
 # -- this code is licensed GPLv3
2
 # -- this code is licensed GPLv3
3
-# Copyright 2013 Jezra
4
 # Copyright 2015 Clayton G. Hobbs
3
 # Copyright 2015 Clayton G. Hobbs
4
+# Portions Copyright 2013 Jezra
5
 
5
 
6
 import sys
6
 import sys
7
 import gi
7
 import gi

+ 22
- 5
kaylee.py Parādīt failu

2
 
2
 
3
 # This is part of Kaylee
3
 # This is part of Kaylee
4
 # -- this code is licensed GPLv3
4
 # -- this code is licensed GPLv3
5
-# Copyright 2013 Jezra
6
 # Copyright 2015 Clayton G. Hobbs
5
 # Copyright 2015 Clayton G. Hobbs
6
+# Portions Copyright 2013 Jezra
7
 
7
 
8
 from __future__ import print_function
8
 from __future__ import print_function
9
 import sys
9
 import sys
17
 from recognizer import Recognizer
17
 from recognizer import Recognizer
18
 from config import Config
18
 from config import Config
19
 from languageupdater import LanguageUpdater
19
 from languageupdater import LanguageUpdater
20
+from numberparser import NumberParser
20
 
21
 
21
 
22
 
22
 class Kaylee:
23
 class Kaylee:
33
         self.config = Config()
34
         self.config = Config()
34
         self.options = vars(self.config.options)
35
         self.options = vars(self.config.options)
35
 
36
 
37
+        # Create number parser for later use
38
+        self.number_parser = NumberParser()
39
+
36
         # Read the commands
40
         # Read the commands
37
         self.read_commands()
41
         self.read_commands()
38
 
42
 
79
                 # This is a parsible line
83
                 # This is a parsible line
80
                 (key, value) = line.split(":", 1)
84
                 (key, value) = line.split(":", 1)
81
                 self.commands[key.strip().lower()] = value.strip()
85
                 self.commands[key.strip().lower()] = value.strip()
82
-                strings.write(key.strip() + "\n")
86
+                strings.write(key.strip().replace('%d', '') + "\n")
87
+        # Add number words to the corpus
88
+        for word in self.number_parser.number_words:
89
+            strings.write(word + "\n")
83
         # Close the strings file
90
         # Close the strings file
84
         strings.close()
91
         strings.close()
85
 
92
 
104
 
111
 
105
     def recognizer_finished(self, recognizer, text):
112
     def recognizer_finished(self, recognizer, text):
106
         t = text.lower()
113
         t = text.lower()
114
+        numt, nums = self.number_parser.parse_all_numbers(t)
107
         # Is there a matching command?
115
         # Is there a matching command?
108
         if t in self.commands:
116
         if t in self.commands:
109
             # Run the valid_sentence_command if there is a valid sentence command
117
             # Run the valid_sentence_command if there is a valid sentence command
113
             # Should we be passing words?
121
             # Should we be passing words?
114
             if self.options['pass_words']:
122
             if self.options['pass_words']:
115
                 cmd += " " + t
123
                 cmd += " " + t
116
-                self.run_command(cmd)
117
-            else:
118
-                self.run_command(cmd)
124
+            self.run_command(cmd)
125
+            self.log_history(text)
126
+        elif numt in self.commands:
127
+            # Run the valid_sentence_command if there is a valid sentence command
128
+            if self.options['valid_sentence_command']:
129
+                subprocess.call(self.options['valid_sentence_command'], shell=True)
130
+            cmd = self.commands[numt]
131
+            cmd = cmd.format(*nums)
132
+            # Should we be passing words?
133
+            if self.options['pass_words']:
134
+                cmd += " " + t
135
+            self.run_command(cmd)
119
             self.log_history(text)
136
             self.log_history(text)
120
         else:
137
         else:
121
             # Run the invalid_sentence_command if there is an invalid sentence command
138
             # Run the invalid_sentence_command if there is an invalid sentence command

+ 2
- 2
languageupdater.py Parādīt failu

1
 # This is part of Kaylee
1
 # This is part of Kaylee
2
 # -- this code is licensed GPLv3
2
 # -- this code is licensed GPLv3
3
-# Copyright 2013 Jezra
4
 # Copyright 2015 Clayton G. Hobbs
3
 # Copyright 2015 Clayton G. Hobbs
4
+# Portions Copyright 2013 Jezra
5
 
5
 
6
 import hashlib
6
 import hashlib
7
 import json
7
 import json
56
 
56
 
57
         # Parse response to get URLs of the files we need
57
         # Parse response to get URLs of the files we need
58
         path_re = r'.*<title>Index of (.*?)</title>.*'
58
         path_re = r'.*<title>Index of (.*?)</title>.*'
59
-        number_re = r'.*TAR[0-9]*?\.tgz.*'
59
+        number_re = r'.*TAR([0-9]*?)\.tgz.*'
60
         for line in r.text.split('\n'):
60
         for line in r.text.split('\n'):
61
             # If we found the directory, keep it and don't break
61
             # If we found the directory, keep it and don't break
62
             if re.search(path_re, line):
62
             if re.search(path_re, line):

+ 182
- 0
numberparser.py Parādīt failu

1
+#!/usr/bin/env python
2
+# numberparser.py - Translate words to decimal
3
+
4
+# This is part of Kaylee
5
+# -- this code is licensed GPLv3
6
+# Copyright 2015 Clayton G. Hobbs
7
+# Portions Copyright 2013 Jezra
8
+import re
9
+
10
+# Define the mappings from words to numbers
11
+class NumberParser:
12
+    zero = {
13
+        'zero': 0
14
+    }
15
+
16
+    ones = {
17
+        'one': 1,
18
+        'two': 2,
19
+        'three': 3,
20
+        'four': 4,
21
+        'five': 5,
22
+        'six': 6,
23
+        'seven': 7,
24
+        'eight': 8,
25
+        'nine': 9
26
+    }
27
+
28
+    special_ones = {
29
+        'ten': 10,
30
+        'eleven': 11,
31
+        'twelve': 12,
32
+        'thirteen': 13,
33
+        'fourteen': 14,
34
+        'fifteen': 15,
35
+        'sixteen': 16,
36
+        'seventeen': 17,
37
+        'eighteen': 18,
38
+        'ninteen': 19
39
+    }
40
+
41
+    tens = {
42
+        'twenty': 20,
43
+        'thirty': 30,
44
+        'fourty': 40,
45
+        'fifty': 50,
46
+        'sixty': 60,
47
+        'seventy': 70,
48
+        'eighty': 80,
49
+        'ninty': 90
50
+    }
51
+
52
+    hundred = {
53
+        'hundred': 100
54
+    }
55
+
56
+    exp = {
57
+        'thousand': 1000,
58
+        'million': 1000000,
59
+        'billion': 1000000000
60
+    }
61
+
62
+    allowed = [
63
+        'and'
64
+    ]
65
+
66
+    def __init__(self):
67
+        self.number_words = []
68
+        for word in self.zero:
69
+            self.number_words.append(word)
70
+        for word in self.ones:
71
+            self.number_words.append(word)
72
+        for word in self.special_ones:
73
+            self.number_words.append(word)
74
+        for word in self.tens:
75
+            self.number_words.append(word)
76
+        for word in self.hundred:
77
+            self.number_words.append(word)
78
+        for word in self.exp:
79
+            self.number_words.append(word)
80
+        self.mandatory_number_words = self.number_words.copy()
81
+        for word in self.allowed:
82
+            self.number_words.append(word)
83
+
84
+    def parse_number(self, text_line):
85
+        """
86
+        Parse numbers from natural language into ints
87
+
88
+        TODO: Throw more exceptions when invalid numbers are detected.  Only
89
+        allow certian valueless words within numbers.  Support zero.
90
+        """
91
+        value = 0
92
+        partial_value = 0
93
+        last_list = None
94
+
95
+        # Split text_line by commas, whitespace, and hyphens
96
+        text_line = text_line.strip()
97
+        text_words = re.split(r'[,\s-]+', text_line)
98
+        # Parse the number
99
+        for word in text_words:
100
+            if word in self.zero:
101
+                if last_list is not None:
102
+                    raise ValueError('Invalid number')
103
+                value = 0
104
+                last_list = self.zero
105
+            elif word in self.ones:
106
+                if last_list in (self.zero, self.ones, self.special_ones):
107
+                    raise ValueError('Invalid number')
108
+                value += self.ones[word]
109
+                last_list = self.ones
110
+            elif word in self.special_ones:
111
+                if last_list in (self.zero, self.ones, self.special_ones, self.tens):
112
+                    raise ValueError('Invalid number')
113
+                value += self.special_ones[word]
114
+                last_list = self.special_ones
115
+            elif word in self.tens:
116
+                if last_list in (self.zero, self.ones, self.special_ones, self.tens):
117
+                    raise ValueError('Invalid number')
118
+                value += self.tens[word]
119
+                last_list = self.tens
120
+            elif word in self.hundred:
121
+                if last_list not in (self.ones, self.special_ones, self.tens):
122
+                    raise ValueError('Invalid number')
123
+                value *= self.hundred[word]
124
+                last_list = self.hundred
125
+            elif word in self.exp:
126
+                if last_list in (self.zero, self.exp):
127
+                    raise ValueError('Invalid number')
128
+                partial_value += value * self.exp[word]
129
+                value = 0
130
+                last_list = self.exp
131
+            elif word not in self.allowed:
132
+                raise ValueError('Invalid number')
133
+            # Debugging information
134
+            #print(word, value, partial_value)
135
+        # Finish parsing the number
136
+        value += partial_value
137
+        return value
138
+
139
+    def parse_all_numbers(self, text_line):
140
+        nums = []
141
+        t_numless = ''
142
+        current_num = ''
143
+
144
+        # Split text_line by commas, whitespace, and hyphens
145
+        text_line = text_line.strip()
146
+        text_words = re.split(r'[,\s-]+', text_line)
147
+        for word in text_words:
148
+            # If we aren't starting a number, add the word to the result string
149
+            if word not in self.mandatory_number_words:
150
+                if current_num:
151
+                    if word in self.number_words:
152
+                        current_num += word + ' '
153
+                    else:
154
+                        try:
155
+                            nums.append(self.parse_number(current_num))
156
+                        except ValueError:
157
+                            nums.append(-1)
158
+                        current_num = ''
159
+                        t_numless += '%d' + ' '
160
+                if not current_num:
161
+                    t_numless += word + ' '
162
+            else:
163
+                # We're parsing a number now
164
+                current_num += word + ' '
165
+        if current_num:
166
+            try:
167
+                nums.append(self.parse_number(current_num))
168
+            except ValueError:
169
+                nums.append(-1)
170
+            current_num = ''
171
+            t_numless += '%d' + ' '
172
+
173
+        return (t_numless.strip(), nums)
174
+
175
+if __name__ == '__main__':
176
+    np = NumberParser()
177
+    # Get the words to translate
178
+    text_line = input('Enter a number: ')
179
+    # Parse it to an integer
180
+    value = np.parse_all_numbers(text_line)
181
+    # Print the result
182
+    print('I claim that you meant the decimal number', value)

+ 1
- 1
recognizer.py Parādīt failu

1
 # This is part of Kaylee
1
 # This is part of Kaylee
2
 # -- this code is licensed GPLv3
2
 # -- this code is licensed GPLv3
3
-# Copyright 2013 Jezra
4
 # Copyright 2015 Clayton G. Hobbs
3
 # Copyright 2015 Clayton G. Hobbs
4
+# Portions Copyright 2013 Jezra
5
 
5
 
6
 import os.path
6
 import os.path
7
 import sys
7
 import sys

Notiek ielāde…
Atcelt
Saglabāt