CL Wiki

Institute of Computational Linguistics – University of Zurich

User Tools

Site Tools


public:paste:analyse.sh
analyse.sh
#!/bin/sh
 
zcat $1 \
| sort | uniq | while read line
do
        m=$(echo $line \
                | awk '{print $1}' \
                | iconv -c -f UTF-8 -t LATIN1 \
                | ls-gertwol \
                | iconv -f LATIN1 -t UTF-8 \
                | gertwolscore \
                | tail -n +2 | head -n -1 \
                | sed -r "s/\s+\"(.*)\"\s+.*\s+(\S+)$/\2\t\1/" \
                | uniq | sort -k 1 | head -n 1 | cut -f 2- \
                | sed -r -e "s/#/ /g" -e "s/\\\\\w+//g" \
                | tr -d "|~" | tr '[:upper:]' '[:lower:]' \
                | sed -r "s/\s+/ /g"
        )
        echo "$line\t$m"
done | gzip > $1+morph.tsv.gz
public/paste/analyse.sh.txt · Last modified: 2023-09-15 20:33 by 127.0.0.1

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki