Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Next revision
Previous revision
public:costep:rules [2014-11-04 10:53] – created Johannes Graënpublic:costep:rules [2023-09-15 20:33] (current) – external edit 127.0.0.1
Line 1: Line 1:
 +====== Replacment rules ======
 +
 +===== Global =====
 +
 +==== First run ====
 +
 +  /\xad/u -> ""
 +  /\xa0/u -> " "
 +  /–/ -> "-"
 +  /ʼ/ -> "'"
 +  /č/ -> "č"
 +  /&/ -> "&"
 +  /\(\d+\)\.$/ -> "."
 +  /\.\(\d+\)$/ -> "."
 +  /^\s*\(\d+\) / -> ""
 +  /\((\w+)$/ -> "(\1)"
 +  /<[.]{3}|…>/ -> "<ellipsis />"
 +  /[.]{3}|…/ -> "<ellipsis />"
 +  /\(\)/ -> ""
 +  /\($/ -> ""
 +  /^<$/ -> ""  
 +  /(\d+º)/ -> "<ord>\1</ord>"  
 +  /(http:\/\/[a-z.-]+)"?\s*(\.| \w)/ -> "<url>\1</url>\2"  
 +  /\((A\d-\d{4}\/(?:\d{2})?\d{2})\)/ -> "<report>\1</report>"
 +  /(?<!>)(A\d-\d{4}(?:\/(?:\d{2})?\d{2})?)/ -> "<report>\1</report>"
 +  /\[(\d{4}\/\d{4}\([A-Z]+\))\]/ -> "<procedure>\1</procedure>"  
 +  /\(((?:[A-Z]+\(\d+\))?[A-Z\d\/ ]*\d{4} - C\d-\d{4})[7\/]((?:\d{2})?\d{2}(?: - \d{4}\/\d{4}\/?(?:\([A-Z]+\))?)?)\)/ -> "<ref>\1/\2</ref>"
 +  / ,/ -> ','
 +  /(\([^)]+)$/ -> '\1)'
 +  /\s\s+/ -> ' '
 +
 +
 +==== Second run ====
 +
 +  /" ?([^"”]+)["”]/u -> "<quote>\1</quote>"
 +  /« ?([^»]+?) ?»/u -> "<quote>\1</quote>"
 +  /(?<=>) ([:;.!?])/ -> "\1"
 +  /<\/quote> ,/ -> "</quote>,"
 +  /^"\s+/ -> ""
 +  /^\)\s+/ -> ""
 +  /&/ -> "&amp;"
 +  /,,+/ -> ","
 +
 +
 +===== Local (language specific) =====
 +
 +==== First run ====
 +
 +=== Danish ===
 +  /[»]([^«]+)«/u -> "<quote>\1</quote>"
 +
 +=== German ===
 +  /„([^“]+)(["”“])([^„]+„[^\2]+\2)*/u -> "<quote>\1</quote>"
 +  /(?:,,)([\w ]+)"/u -> "<quote>\1</quote>"
 +
 +=== English ===
 +  /\' ?s\b/u -> "<gen>’s</gen>"
 +  /["]([^\']+)\'/u -> "<quote>\1</quote>"
 +  
 +=== Spanish ===
 +  / -(?=\w)/u -> " —"
 +  /(?<=\w)-(\W)/u -> "—\1"
 +
 +=== Greek ===
 +  /\' \b/u -> "’ "
 +
 +=== Finish ===
 +  /\b(\p{Lu}+): ?n\b/u -> "\1<gen>:n</gen>"
 +  
 +=== French ===  
 +  /\b(l|d|n|j|t|m|qu|c|s|jusqu|lorsqu|aujourd|puisqu|quelqu|quoiqu)\' ?/iu -> "\1"
 +  /»([^»]+?)»/u -> "<quote>\1</quote>"
 +
 +=== Italian ===
 +  /\be[\'’]/u -> "è"
 +  /\bE[\'’]/u -> "È"
 +  /\bpò\b/u -> "po’"
 +  /\bpo\'\b/u -> "po’"
 +  /\bperchè\b/u -> "perché"
 +  /\bpoichè\b/u -> "poiché"
 +  /\baffinchè\b/u -> "affinché"
 +  /\bpero\b/u -> "però"
 +  /\b(un|l|d|dell|nell|all|dall|sull)\' ?/iu -> "\1’"
 +  /[“"]([^”]+)”/u -> "<quote>\1</quote>"
 +  /["]([^»]+)»/u -> "<quote>\1</quote>"
 +  /»([^»]+?)»/u -> "<quote>\1</quote>"
 +
 +=== Portuguese ===
 +  /["]([^»]+)»/u -> "<quote>\1</quote>"
 +  
 +=== Swedish ===
 +  /\b(\p{Lu}+): ?s\b/u -> "\1<gen>:s</gen>"
 +
 +
 +==== Second run ====
 +
 +=== Bulgarian ===
 +  /<quote>/ -> "<quote start="„" end="“">"
 +
 +=== Czech ===
 +  /<quote>/ -> "<quote start="„" end="“">"
 +
 +=== Danish ===
 +  /<quote>/ -> "<quote start="»" end="«">"
 +
 +=== German ===
 +  /<quote>/ -> "<quote start="„" end="“">"
 +  /<\/quote>(\w)/ -> "</quote>-\1"
 +
 +=== Greek ===
 +  /<quote>/ -> "<quote start="«" end="»">"
 +
 +=== English ===
 +  /<quote>/ -> "<quote start="‘" end="’">"
 +
 +=== Spanish ===
 +  /<quote>/ -> "<quote start="«" end="»">"
 +
 +=== Estonian ===
 +  /<quote>/ -> "<quote start="„" end="”">"
 +
 +=== Finnish ===
 +  /<quote>/ -> "<quote start="”" end="”">"
 +
 +=== French ===
 +  /(?<! |&amp|&[lg]t)([?!:;])/u -> " \1"
 +  /<quote>/ -> "<quote start="« " end=" »">"
 +
 +=== Hungarian ===
 +  /<quote>/ -> "<quote start="„" end="”">"
 +
 +=== Italian ===
 +  /<quote>/ -> "<quote start="«" end="»">"
 +
 +=== Lithuanian ===
 +  /<quote>/ -> "<quote start="„" end="“">"
 +
 +=== Latvian ===
 +  /<quote>/ -> "<quote start="„" end="“">"
 +
 +=== Dutch ===
 +  /<quote>/ -> "<quote start="“" end="”">"
 +
 +=== Polish ===
 +  /<quote>/ -> "<quote start="„" end="”">"
 +
 +=== Portuguese ===
 +  /<quote>/ -> "<quote start="«" end="»">"
 +
 +=== Romanian ===
 +  /<quote>/ -> "<quote start="„" end="”">"
 +
 +=== Slovak ===
 +  /<quote>/ -> "<quote start="„" end="“">"
 +
 +=== Slovenian ===
 +  /<quote>/ -> "<quote start="„" end="“">"
 +
 +=== Swedish ===
 +  /<quote>/ -> "<quote start="”" end="”">"
  

CL Wiki

Institute of Computational Linguistics – University of Zurich