Replacment rules
Global
First run
/\xad/u -> ""
/\xa0/u -> " "
/–/ -> "-"
/ʼ/ -> "'"
/č/ -> "č"
/&/ -> "&"
/\(\d+\)\.$/ -> "."
/\.\(\d+\)$/ -> "."
/^\s*\(\d+\) / -> ""
/\((\w+)$/ -> "(\1)"
/<[.]{3}|…>/ -> "<ellipsis />"
/[.]{3}|…/ -> "<ellipsis />"
/\(\)/ -> ""
/\($/ -> ""
/^<$/ -> ""
/(\d+º)/ -> "<ord>\1</ord>"
/(http:\/\/[a-z.-]+)"?\s*(\.| \w)/ -> "<url>\1</url>\2"
/\((A\d-\d{4}\/(?:\d{2})?\d{2})\)/ -> "<report>\1</report>"
/(?<!>)(A\d-\d{4}(?:\/(?:\d{2})?\d{2})?)/ -> "<report>\1</report>"
/\[(\d{4}\/\d{4}\([A-Z]+\))\]/ -> "<procedure>\1</procedure>"
/\(((?:[A-Z]+\(\d+\))?[A-Z\d\/ ]*\d{4} - C\d-\d{4})[7\/]((?:\d{2})?\d{2}(?: - \d{4}\/\d{4}\/?(?:\([A-Z]+\))?)?)\)/ -> "<ref>\1/\2</ref>"
/ ,/ -> ','
/(\([^)]+)$/ -> '\1)'
/\s\s+/ -> ' '
Second run
/" ?([^"”]+)["”]/u -> "<quote>\1</quote>"
/« ?([^»]+?) ?»/u -> "<quote>\1</quote>"
/(?<=>) ([:;.!?])/ -> "\1"
/<\/quote> ,/ -> "</quote>,"
/^"\s+/ -> ""
/^\)\s+/ -> ""
/&/ -> "&"
/,,+/ -> ","
Local (language specific)
First run
Danish
/[»]([^«]+)«/u -> "<quote>\1</quote>"
German
/„([^“]+)(["”“])([^„]+„[^\2]+\2)*/u -> "<quote>\1</quote>"
/(?:,,)([\w ]+)"/u -> "<quote>\1</quote>"
English
/\' ?s\b/u -> "<gen>’s</gen>"
/["]([^\']+)\'/u -> "<quote>\1</quote>"
Spanish
/ -(?=\w)/u -> " —"
/(?<=\w)-(\W)/u -> "—\1"
Greek
/\' \b/u -> "’ "
Finish
/\b(\p{Lu}+): ?n\b/u -> "\1<gen>:n</gen>"
French
/\b(l|d|n|j|t|m|qu|c|s|jusqu|lorsqu|aujourd|puisqu|quelqu|quoiqu)\' ?/iu -> "\1"
/»([^»]+?)»/u -> "<quote>\1</quote>"
Italian
/\be[\'’]/u -> "è"
/\bE[\'’]/u -> "È"
/\bpò\b/u -> "po’"
/\bpo\'\b/u -> "po’"
/\bperchè\b/u -> "perché"
/\bpoichè\b/u -> "poiché"
/\baffinchè\b/u -> "affinché"
/\bpero\b/u -> "però"
/\b(un|l|d|dell|nell|all|dall|sull)\' ?/iu -> "\1’"
/[“"]([^”]+)”/u -> "<quote>\1</quote>"
/["]([^»]+)»/u -> "<quote>\1</quote>"
/»([^»]+?)»/u -> "<quote>\1</quote>"
Portuguese
/["]([^»]+)»/u -> "<quote>\1</quote>"
Swedish
/\b(\p{Lu}+): ?s\b/u -> "\1<gen>:s</gen>"
Second run
Bulgarian
/<quote>/ -> "<quote start="„" end="“">"
Czech
/<quote>/ -> "<quote start="„" end="“">"
Danish
/<quote>/ -> "<quote start="»" end="«">"
German
/<quote>/ -> "<quote start="„" end="“">"
/<\/quote>(\w)/ -> "</quote>-\1"
Greek
/<quote>/ -> "<quote start="«" end="»">"
English
/<quote>/ -> "<quote start="‘" end="’">"
Spanish
/<quote>/ -> "<quote start="«" end="»">"
Estonian
/<quote>/ -> "<quote start="„" end="”">"
Finnish
/<quote>/ -> "<quote start="”" end="”">"
French
/(?<! |&|&[lg]t)([?!:;])/u -> " \1"
/<quote>/ -> "<quote start="« " end=" »">"
Hungarian
/<quote>/ -> "<quote start="„" end="”">"
Italian
/<quote>/ -> "<quote start="«" end="»">"
Lithuanian
/<quote>/ -> "<quote start="„" end="“">"
Latvian
/<quote>/ -> "<quote start="„" end="“">"
Dutch
/<quote>/ -> "<quote start="“" end="”">"
Polish
/<quote>/ -> "<quote start="„" end="”">"
Portuguese
/<quote>/ -> "<quote start="«" end="»">"
Romanian
/<quote>/ -> "<quote start="„" end="”">"
Slovak
/<quote>/ -> "<quote start="„" end="“">"
Slovenian
/<quote>/ -> "<quote start="„" end="“">"
Swedish
/<quote>/ -> "<quote start="”" end="”">"