]+(?:mso-list:|:\s*symbol)[^>]+>)/gi, '$1__MCE_ITEM__'], // Convert mso-list and symbol spans to item markers
[/(]+(?:MsoListParagraph)[^>]+>)/gi, '$1__MCE_ITEM__'] // Convert mso-list and symbol paragraphs to item markers (FF)
// Word comments like conditional comments etc
// Remove comments, scripts (e.g., msoShowComment), XML tag, VML content, MS Office namespaced tags, and a few other tags
// Convert into for line-though
[/<(\/?)s>/gi, "<$1strike>"],
// Replace nsbp entites to char since it's easier to handle
[/ /gi, "\u00a0"]
// Remove bad attributes, with or without quotes, ensuring that attribute text is really inside a tag.
// If JavaScript had a RegExp look-behind, we could have integrated this with the last process() array and got rid of the loop. But alas, it does not, so we cannot.
do {
len = h.length;
// Don't remove the type attribute for lists so that non-default list types display correctly.
h = h.replace(/(!(ol|ul)[^>]*\s)(?:id|name|language|type|on\w+|\w+:\w+)=(?:"[^"]*"|\w+)\s?/gi, "$1");
h = h.replace(/(<(ol|ul)[^>]*\s)(?:id|name|language|on\w+|\w+:\w+)=(?:"[^"]*"|\w+)\s?/gi, "$1");
} while (len != h.length);
// Remove all spans if no styles is to be retained
if (getParam(ed, "paste_retain_style_properties").replace(/^none$/i, "").length == 0) {
h = h.replace(/<\/?span[^>]*>/gi, "");
} else {
// We're keeping styles, so at least clean them up.
// CSS Reference: http://msdn.microsoft.com/en-us/library/aa155477.aspx
// Convert ___ to string of alternating breaking/non-breaking spaces of same length
function(str, spaces) {
return (spaces.length > 0)? spaces.replace(/./, " ").slice(Math.floor(spaces.length/2)).split("").join("\u00a0") : "";
// Examine all styles: delete junk, transform some, and keep the rest
function(str, tag, style) {
var n = [],
i = 0,
s = explode(trim(style).replace(/"/gi, "'"), ";");
// Examine each style definition within the tag's style attribute
each(s, function(v) {
var name, value,
parts = explode(v, ":");
function ensureUnits(v) {
return v + ((v !== "0") && (/\d$/.test(v)))? "px" : "";
if (parts.length == 2) {
name = parts[0].toLowerCase();
value = parts[1].toLowerCase();
// Translate certain MS Office styles into their CSS equivalents
switch (name) {
case "mso-padding-alt":
case "mso-padding-top-alt":
case "mso-padding-right-alt":
case "mso-padding-bottom-alt":
case "mso-padding-left-alt":
case "mso-margin-alt":
case "mso-margin-top-alt":
case "mso-margin-right-alt":
case "mso-margin-bottom-alt":
case "mso-margin-left-alt":
case "mso-table-layout-alt":
case "mso-height":
case "mso-width":
case "mso-vertical-align-alt":
n[i++] = name.replace(/^mso-|-alt$/g, "") + ":" + ensureUnits(value);
case "horiz-align":
n[i++] = "text-align:" + value;
case "vert-align":
n[i++] = "vertical-align:" + value;
case "font-color":
case "mso-foreground":
n[i++] = "color:" + value;
case "mso-background":
case "mso-highlight":
n[i++] = "background:" + value;
case "mso-default-height":
n[i++] = "min-height:" + ensureUnits(value);
case "mso-default-width":
n[i++] = "min-width:" + ensureUnits(value);
case "mso-padding-between-alt":
n[i++] = "border-collapse:separate;border-spacing:" + ensureUnits(value);
case "text-line-through":
if ((value == "single") || (value == "double")) {
n[i++] = "text-decoration:line-through";
case "mso-zero-height":
if (value == "yes") {
n[i++] = "display:none";
// Eliminate all MS Office style definitions that have no CSS equivalent by examining the first characters in the name
if (/^(mso|column|font-emph|lang|layout|line-break|list-image|nav|panose|punct|row|ruby|sep|size|src|tab-|table-border|text-(?!align|decor|indent|trans)|top-bar|version|vnd|word-break)/.test(name)) {
// If it reached this point, it must be a valid CSS style
n[i++] = name + ":" + parts[1]; // Lower-case name, but keep value case
// If style attribute contained any valid styles the re-write it; otherwise delete style attribute.
if (i > 0) {
return tag + ' style="' + n.join(';') + '"';
} else {
return tag;
// Replace headers with
if (getParam(ed, "paste_convert_headers_to_strong")) {
[/]*>/gi, ""],
[/<\/h[1-6][^>]*>/gi, "
// Copy paste from Java like Open Office will produce this junk on FF
[/Version:[\d.]+\nStartHTML:\d+\nEndHTML:\d+\nStartFragment:\d+\nEndFragment:\d+/gi, '']
// Class attribute options are: leave all as-is ("none"), remove all ("all"), or remove only those starting with mso ("mso").
// Note:- paste_strip_class_attributes: "none", verify_css_classes: true is also a good variation.
stripClass = getParam(ed, "paste_strip_class_attributes");
if (stripClass !== "none") {
function removeClasses(match, g1) {
if (stripClass === "all")
return '';
var cls = grep(explode(g1.replace(/^(["'])(.*)\1$/, "$2"), " "),
function(v) {
return (/^(?!mso)/i.test(v));
return cls.length ? ' class="' + cls.join(" ") + '"' : '';
h = h.replace(/ class="([^"]+)"/gi, removeClasses);
h = h.replace(/ class=([\-\w]+)/gi, removeClasses);
// Remove spans option
if (getParam(ed, "paste_remove_spans")) {
h = h.replace(/<\/?span[^>]*>/gi, "");
//console.log('After preprocess:' + h);
o.content = h;
* Various post process items.
_postProcess : function(pl, o) {
var t = this, ed = t.editor, dom = ed.dom, styleProps;
if (ed.settings.paste_enable_default_filters == false) {
if (o.wordContent) {
// Remove named anchors or TOC links
each(dom.select('a', o.node), function(a) {
if (!a.href || a.href.indexOf('#_Toc') != -1)
dom.remove(a, 1);
if (getParam(ed, "paste_convert_middot_lists")) {
t._convertLists(pl, o);
// Process styles
styleProps = getParam(ed, "paste_retain_style_properties"); // retained properties
// Process only if a string was specified and not equal to "all" or "*"
if ((tinymce.is(styleProps, "string")) && (styleProps !== "all") && (styleProps !== "*")) {
styleProps = tinymce.explode(styleProps.replace(/^none$/i, ""));
// Retains some style properties
each(dom.select('*', o.node), function(el) {
var newStyle = {}, npc = 0, i, sp, sv;
// Store a subset of the existing styles
if (styleProps) {
for (i = 0; i < styleProps.length; i++) {
sp = styleProps[i];
sv = dom.getStyle(el, sp);
if (sv) {
newStyle[sp] = sv;
// Remove all of the existing styles
dom.setAttrib(el, 'style', '');
if (styleProps && npc > 0)
dom.setStyles(el, newStyle); // Add back the stored subset of styles
else // Remove empty span tags that do not have class attributes
if (el.nodeName == 'SPAN' && !el.className)
dom.remove(el, true);
// Remove all style information or only specifically on WebKit to avoid the style bug on that browser
if (getParam(ed, "paste_remove_styles") || (getParam(ed, "paste_remove_styles_if_webkit") && tinymce.isWebKit)) {
each(dom.select('*[style]', o.node), function(el) {
} else {
if (tinymce.isWebKit) {
// We need to compress the styles on WebKit since if you paste it will become
// Removing the mce_style that contains the real value will force the Serializer engine to compress the styles
each(dom.select('*', o.node), function(el) {
* Converts the most common bullet and number formats in Office into a real semantic UL/LI list.
_convertLists : function(pl, o) {
var dom = pl.editor.dom, listElm, li, lastMargin = -1, margin, levels = [], lastType, html;
// Convert middot lists into real semantic lists
each(dom.select('p', o.node), function(p) {
var sib, val = '', type, html, idx, parents;
// Get text node value at beginning of paragraph
for (sib = p.firstChild; sib && sib.nodeType == 3; sib = sib.nextSibling)
val += sib.nodeValue;
val = p.innerHTML.replace(/<\/?\w+[^>]*>/gi, '').replace(/ /g, '\u00a0');
// Detect unordered lists look for bullets
if (/^(__MCE_ITEM__)+[\u2022\u00b7\u00a7\u00d8o\u25CF]\s*\u00a0*/.test(val))
type = 'ul';
// Detect ordered lists 1., a. or ixv.
if (/^__MCE_ITEM__\s*\w+\.\s*\u00a0+/.test(val))
type = 'ol';
// Check if node value matches the list pattern: o
if (type) {
margin = parseFloat(p.style.marginLeft || 0);
if (margin > lastMargin)
if (!listElm || type != lastType) {
listElm = dom.create(type);
dom.insertAfter(listElm, p);
} else {
// Nested list element
if (margin > lastMargin) {
listElm = li.appendChild(dom.create(type));
} else if (margin < lastMargin) {
// Find parent level based on margin value
idx = tinymce.inArray(levels, margin);
parents = dom.getParents(listElm.parentNode, type);
listElm = parents[parents.length - 1 - idx] || listElm;
// Remove middot or number spans if they exists
each(dom.select('span', p), function(span) {
var html = span.innerHTML.replace(/<\/?\w+[^>]*>/gi, '');
// Remove span with the middot or the number
if (type == 'ul' && /^__MCE_ITEM__[\u2022\u00b7\u00a7\u00d8o\u25CF]/.test(html))
else if (/^__MCE_ITEM__[\s\S]*\w+\.( |\u00a0)*\s*/.test(html))
html = p.innerHTML;
// Remove middot/list items
if (type == 'ul')
html = p.innerHTML.replace(/__MCE_ITEM__/g, '').replace(/^[\u2022\u00b7\u00a7\u00d8o\u25CF]\s*( |\u00a0)+\s*/, '');
html = p.innerHTML.replace(/__MCE_ITEM__/g, '').replace(/^\s*[\w|'<'|'>']+\.( |\u00a0)+\s*/, '');;
// Create li and add paragraph data into the new li
li = listElm.appendChild(dom.create('li', 0, html));
lastMargin = margin;
lastType = type;
} else
listElm = lastMargin = 0; // End list element
// Remove any left over makers
html = o.node.innerHTML;
if (html.indexOf('__MCE_ITEM__') != -1)
o.node.innerHTML = html.replace(/__MCE_ITEM__/g, '');
* Inserts the specified contents at the caret position.
_insert : function(h, skip_undo) {
var ed = this.editor, r = ed.selection.getRng();
// First delete the contents seems to work better on WebKit when the selection spans multiple list items or multiple table cells.
if (!ed.selection.isCollapsed() && r.startContainer != r.endContainer)
ed.getDoc().execCommand('Delete', false, null);
ed.execCommand('mceInsertContent', false, h, {skip_undo : skip_undo});
* Instead of the old plain text method which tried to re-create a paste operation, the
* new approach adds a plain text mode toggle switch that changes the behavior of paste.
* This function is passed the same input that the regular paste plugin produces.
* It performs additional scrubbing and produces (and inserts) the plain text.
* This approach leverages all of the great existing functionality in the paste
* plugin, and requires minimal changes to add the new functionality.
* Speednet - June 2009
_insertPlainText : function(content) {
var ed = this.editor,
linebr = getParam(ed, "paste_text_linebreaktype"),
rl = getParam(ed, "paste_text_replacements"),
is = tinymce.is;
function process(items) {
each(items, function(v) {
if (v.constructor == RegExp)
content = content.replace(v, "");
content = content.replace(v[0], v[1]);
if ((typeof(content) === "string") && (content.length > 0)) {
// If HTML content with line-breaking tags, then remove all cr/lf chars because only tags will break a line
if (/<(?:p|br|h[1-6]|ul|ol|dl|table|t[rdh]|div|blockquote|fieldset|pre|address|center)[^>]*>/i.test(content)) {
} else {
// Otherwise just get rid of carriage returns (only need linefeeds)
[/<\/(?:p|h[1-6]|ul|ol|dl|table|div|blockquote|fieldset|pre|address|center)>/gi, "\n\n"], // Block tags get a blank line after them
]*>|<\/tr>/gi, "\n"], // Single linebreak for
tags and table rows
[/<\/t[dh]>\s*]*>/gi, "\t"], // Table cells get tabs betweem them
/<[a-z!\/?][^>]*>/gi, // Delete all remaining tags
[/ /gi, " "], // Convert non-break spaces to regular spaces (remember, *plain text*)
[/(?:(?!\n)\s)*(\n+)(?:(?!\n)\s)*/gi, "$1"] // Cool little RegExp deletes whitespace around linebreak chars.
var maxLinebreaks = Number(getParam(ed, "paste_max_consecutive_linebreaks"));
if (maxLinebreaks > -1) {
var maxLinebreaksRegex = new RegExp("\n{" + (maxLinebreaks + 1) + ",}", "g");
var linebreakReplacement = "";
while (linebreakReplacement.length < maxLinebreaks) {
linebreakReplacement += "\n";
[maxLinebreaksRegex, linebreakReplacement] // Limit max consecutive linebreaks
content = ed.dom.decode(tinymce.html.Entities.encodeRaw(content));
// Perform default or custom replacements
if (is(rl, "array")) {
} else if (is(rl, "string")) {
process(new RegExp(rl, "gi"));
// Treat paragraphs as specified in the config
if (linebr == "none") {
// Convert all line breaks to space
[/\n+/g, " "]
} else if (linebr == "br") {
// Convert all line breaks to
[/\n/g, "
} else if (linebr == "p") {
// Convert all line breaks to ...
[/\n+/g, "
)$/, '
} else {
// defaults to "combined"
// Convert single line breaks to
and double line breaks to
[/\n\n/g, ""],
)$/, '
[/\n/g, "
ed.execCommand('mceInsertContent', false, content);
* This method will open the old style paste dialogs. Some users might want the old behavior but still use the new cleanup engine.
_legacySupport : function() {
var t = this, ed = t.editor;
// Register command(s) for backwards compatibility
ed.addCommand("mcePasteWord", function() {
file: t.url + "/pasteword.htm",
width: parseInt(getParam(ed, "paste_dialog_width")),
height: parseInt(getParam(ed, "paste_dialog_height")),
inline: 1
if (getParam(ed, "paste_text_use_dialog")) {
ed.addCommand("mcePasteText", function() {
file : t.url + "/pastetext.htm",
width: parseInt(getParam(ed, "paste_dialog_width")),
height: parseInt(getParam(ed, "paste_dialog_height")),
inline : 1
// Register button for backwards compatibility
ed.addButton("pasteword", {title : "paste.paste_word_desc", cmd : "mcePasteWord"});
// Register plugin
tinymce.PluginManager.add("paste", tinymce.plugins.PastePlugin);