package de.frankfiedler.cms.client.transformer;
/**
* The default transformer used by makeYourNet. Does some really basic
* conversion.
*/
public class DefaultTransformer implements Transformer {
public String getAsciiFromHtml(String html) {
// System.out.println("html ascii before:"+html);
// cut off all whitespace except space
String result = html.replaceAll("[\\x00-\\x1F]", "");
// cut off space before
result = result.replaceAll(" +
", "
");
// cut off before
result = result.replaceAll("( )+
", "
");
// reduce all repeated space to one
result = result.replaceAll("\\s+", " ");
// replace every appearence of
with newline
result = result.replaceAll("
", "\n");
// replace all with space
result = result.replaceAll(" ", " ");
// special letters
result = result.replaceAll(""", "\"");
result = result.replaceAll("ä", "\u00e4");
result = result.replaceAll("Ä", "\u00c4");
result = result.replaceAll("ü", "\u00fc");
result = result.replaceAll("Ü", "\u00dc");
result = result.replaceAll("ö", "\u00f6");
result = result.replaceAll("Ö", "\u00d6");
result = result.replaceAll("ß", "\u00df");// \u00df
result = result.replaceAll("€", "\u20ac"); // \u0080
// System.out.println("html ascii after:"+html);
return result;
}
public String getHtmlFromAscii(String ascii) {
// System.out.println("ascii ->html before: " + ascii);
// Quotations are replaced only outside tags, to leave the user with the
// possibility to enter html
String result = replaceQuotsOutsideTags(ascii);
// newline is replaced by
plus newline
// (The newline is for readability)
// ascii=ascii.replaceAll("\"", "quot");
result = result.replaceAll("\\n", "
\n");
// Replace
after a newline with
// So it is not surpressed by the browser
result = result.replaceAll("\\n
", "\n
");
// Replace tabs with 6
result = result.replaceAll("\\t", " ");
// Replace 2 spaces by 2
result = result.replaceAll(" {2}", " ");
// special letters
result = result.replaceAll("\u00e4", "ä");
result = result.replaceAll("\u00c4", "Ä");
result = result.replaceAll("\u00fc", "ü");
result = result.replaceAll("\u00dc", "Ü");
result = result.replaceAll("\u00f6", "ö");
result = result.replaceAll("\u00d6", "Ö");
result = result.replaceAll("\u00df", "ß"); // \u00df
result = result.replaceAll("\u20ac", "€"); // \u0080
// System.out.println("ascii -> html after: " + ascii);
return result;
}
/**
* An algorithm to replace the double quotes with " but not
* inside html tags
*
* @param s the text to process
* @return the result
*/
private String replaceQuotsOutsideTags(String s) {
StringBuilder r = new StringBuilder();
String m = s;
while (m.indexOf("<") != -1) {
// There is at least one <
int p = m.indexOf("<");
r.append(replaceQuotes(m.substring(0, p))); // add the part before < to
// result
m = m.substring(p); // cut off the part before <
p = m.indexOf(">");
if (p != -1) {
// There is a closing >
r.append(m.substring(0, p)); // add part inside <> without change to
// result
m = m.substring(p); // cut off the processed part
} else {
// There is no closing >, so we assume that it is not valid html and
// replace the quots
r.append(replaceQuotes(m));
m = "";
}
}
// add rest
r.append(replaceQuotes(m));
return r.toString();
}
protected String replaceQuotes(String string) {
String result = string.replaceAll("\"", """);
return result;
}
}