Browse Source

patched language_updater.sh to generate corpus from command file

Jezra 11 years ago
parent
commit
40b219723f
1 changed files with 9 additions and 2 deletions
  1. 9
    2
      language_updater.sh

+ 9
- 2
language_updater.sh View File

1
 #!/bin/bash
1
 #!/bin/bash
2
 
2
 
3
 blatherdir=~/.config/blather
3
 blatherdir=~/.config/blather
4
-sourcefile=$blatherdir/sentences.corpus
4
+sentences=$blatherdir/sentences.corpus
5
+sourcefile=$blatherdir/commands
5
 langdir=$blatherdir/language
6
 langdir=$blatherdir/language
6
 tempfile=$blatherdir/url.txt
7
 tempfile=$blatherdir/url.txt
7
 lmtoolurl=http://www.speech.cs.cmu.edu/cgi-bin/tools/lmtool/run
8
 lmtoolurl=http://www.speech.cs.cmu.edu/cgi-bin/tools/lmtool/run
8
 
9
 
9
 cd $blatherdir
10
 cd $blatherdir
10
 
11
 
12
+sed -f - $sourcefile > $sentences <<EOFcommands
13
+  /^$/d
14
+  /^#/d
15
+  s/\:.*$//
16
+EOFcommands
17
+
11
 # upload corpus file, find the resulting dictionary file url
18
 # upload corpus file, find the resulting dictionary file url
12
-curl -L -F corpus=@"$sourcefile" -F formtype=simple $lmtoolurl \
19
+curl -L -F corpus=@"$sentences" -F formtype=simple $lmtoolurl \
13
   |grep -A 1 "base name" |grep http \
20
   |grep -A 1 "base name" |grep http \
14
   | sed -e 's/^.*\="//' | sed -e 's/\.tgz.*$//' | sed -e 's/TAR//' > $tempfile
21
   | sed -e 's/^.*\="//' | sed -e 's/\.tgz.*$//' | sed -e 's/TAR//' > $tempfile
15
 
22
 

Loading…
Cancel
Save