User Tools


analyse.sh
#!/bin/sh
 
zcat $1 \
| sort | uniq | while read line
do
        m=$(echo $line \
                | awk '{print $1}' \
                | iconv -c -f UTF-8 -t LATIN1 \
                | ls-gertwol \
                | iconv -f LATIN1 -t UTF-8 \
                | gertwolscore \
                | tail -n +2 | head -n -1 \
                | sed -r "s/\s+\"(.*)\"\s+.*\s+(\S+)$/\2\t\1/" \
                | uniq | sort -k 1 | head -n 1 | cut -f 2- \
                | sed -r -e "s/#/ /g" -e "s/\\\\\w+//g" \
                | tr -d "|~" | tr '[:upper:]' '[:lower:]' \
                | sed -r "s/\s+/ /g"
        )
        echo "$line\t$m"
done | gzip > $1+morph.tsv.gz

CL Wiki

Institute of Computational Linguistics – University of Zurich