Context Navigation

source: Dev/trunk/src/client/dojox/html/format.js

Last change on this file was 483, checked in by hendrikvanantwerpen, 11 years ago
Added Dojo 1.9.3 release.
Property svn:executable set to ``*
File size: 13.3 KB

Rev	Line
[483]	1	define(["dojo/_base/kernel", "./entities", "dojo/_base/array", "dojo/_base/window", "dojo/_base/sniff"],
	2	function(lang, Entities, ArrayUtil, Window, has) {
	3	var dhf = lang.getObject("dojox.html.format",true);
	4
	5	dhf.prettyPrint = function(html/String/, indentBy /Integer?/, maxLineLength /Integer?/, map/Array?/, /boolean/ xhtml){
	6	// summary:
	7	// Function for providing a 'pretty print' version of HTML content from
	8	// the provided string. It's nor perfect by any means, but it does
	9	// a 'reasonable job'.
	10	// html: String
	11	// The string of HTML to try and generate a 'pretty' formatting.
	12	// indentBy: Integer
	13	// Optional input for the number of spaces to use when indenting.
	14	// If not defined, zero, negative, or greater than 10, will just use tab
	15	// as the indent.
	16	// maxLineLength: Integer
	17	// Optional input for the number of characters a text line should use in
	18	// the document, including the indent if possible.
	19	// map: Array
	20	// Optional array of entity mapping characters to use when processing the
	21	// HTML Text content. By default it uses the default set used by the
	22	// dojox.html.entities.encode function.
	23	// xhtml: boolean
	24	// Optional parameter that declares that the returned HTML should try to be 'xhtml' compatible.
	25	// This means normally unclosed tags are terminated with /> instead of >. Example: `<hr>` -> `<hr />`
	26	var content = [];
	27	var indentDepth = 0;
	28	var closeTags = [];
	29	var iTxt = "\t";
	30	var textContent = "";
	31	var inlineStyle = [];
	32	var i;
	33
	34	// Compile regexps once for this call.
	35	var rgxp_fixIEAttrs = /[=]([^"']+?)(\s\|>)/g;
	36	var rgxp_styleMatch = /style=("[^"]"\|'[^']'\|\S*)/gi;
	37	var rgxp_attrsMatch = /[\w-]+=("[^"]"\|'[^']'\|\S*)/gi;
	38
	39	// Check to see if we want to use spaces for indent instead
	40	// of tab.
	41	if(indentBy && indentBy > 0 && indentBy < 10){
	42	iTxt = "";
	43	for(i = 0; i < indentBy; i++){
	44	iTxt += " ";
	45	}
	46	}
	47
	48	//Build the content outside of the editor so we can walk
	49	//via DOM and build a 'pretty' output.
	50	var contentDiv = Window.doc.createElement("div");
	51	contentDiv.innerHTML = html;
	52
	53	// Use the entity encode/decode functions, they cache on the map,
	54	// so it won't multiprocess a map.
	55	var encode = Entities.encode;
	56	var decode = Entities.decode;
	57
	58	/ Define a bunch of formatters to format the output. /
	59	var isInlineFormat = function(tag){
	60	// summary:
	61	// Function to determine if the current tag is an inline
	62	// element that does formatting, as we don't want to
	63	// break/indent around it, as it can screw up text.
	64	// tag:
	65	// The tag to examine
	66	switch(tag){
	67	case "a":
	68	case "b":
	69	case "strong":
	70	case "s":
	71	case "strike":
	72	case "i":
	73	case "u":
	74	case "em":
	75	case "sup":
	76	case "sub":
	77	case "span":
	78	case "font":
	79	case "big":
	80	case "cite":
	81	case "q":
	82	case "small":
	83	return true;
	84	default:
	85	return false;
	86	}
	87	};
	88
	89	//Create less divs.
	90	var div = contentDiv.ownerDocument.createElement("div");
	91	var outerHTML = function(node){
	92	// summary:
	93	// Function to return the outer HTML of a node.
	94	// Yes, IE has a function like this, but using cloneNode
	95	// allows avoiding looking at any child nodes, because in this
	96	// case, we don't want them.
	97	var clone = node.cloneNode(false);
	98	div.appendChild(clone);
	99	var html = div.innerHTML;
	100	div.innerHTML = "";
	101	return html;
	102	};
	103
	104	var sizeIndent = function(){
	105	var i, txt = "";
	106	for(i = 0; i < indentDepth; i++){
	107	txt += iTxt;
	108	}
	109	return txt.length;
	110	}
	111
	112	var indent = function(){
	113	// summary:
	114	// Function to handle indent depth.
	115	var i;
	116	for(i = 0; i < indentDepth; i++){
	117	content.push(iTxt);
	118	}
	119	};
	120	var newline = function(){
	121	// summary:
	122	// Function to handle newlining.
	123	content.push("\n");
	124	};
	125
	126	var processTextNode = function(n){
	127	// summary:
	128	// Function to process the text content for doc
	129	// insertion
	130	// n:
	131	// The text node to process.
	132	textContent += encode(n.nodeValue, map);
	133	};
	134
	135	var formatText = function(txt){
	136	// summary:
	137	// Function for processing the text content encountered up to a
	138	// point and inserting it into the formatted document output.
	139	// txt:
	140	// The text to format.
	141	var i;
	142	var _iTxt;
	143
	144	// Clean up any indention organization since we're going to rework it
	145	// anyway.
	146	var _lines = txt.split("\n");
	147	for(i = 0; i < _lines.length; i++){
	148	_lines[i] = lang.trim(_lines[i]);
	149	}
	150	txt = _lines.join(" ");
	151	txt = lang.trim(txt);
	152	if(txt !== ""){
	153	var lines = [];
	154	if(maxLineLength && maxLineLength > 0){
	155	var indentSize = sizeIndent();
	156	var maxLine = maxLineLength;
	157	if(maxLineLength > indentSize){
	158	maxLine -= indentSize;
	159	}
	160	while(txt){
	161	if(txt.length > maxLineLength){
	162	for(i = maxLine; (i > 0 && txt.charAt(i) !== " "); i--){
	163	// Do nothing, we're just looking for a space to split at.
	164	}
	165	if(!i){
	166	// Couldn't find a split going back, so go forward.
	167	for(i = maxLine; (i < txt.length && txt.charAt(i) !== " "); i++){
	168	// Do nothing, we're just looking for a space to split at.
	169	}
	170	}
	171	var line = txt.substring(0, i);
	172	line = lang.trim(line);
	173	// Shift up the text string to the next chunk.
	174	txt = lang.trim(txt.substring((i == txt.length)?txt.length:i + 1, txt.length));
	175	if(line){
	176	_iTxt = "";
	177	for(i = 0; i < indentDepth; i++){
	178	_iTxt += iTxt;
	179	}
	180	line = _iTxt + line + "\n";
	181	}
	182	lines.push(line);
	183	}else{
	184	// Line is shorter than out desired length, so use it.
	185	// as/is
	186	_iTxt = "";
	187	for(i = 0; i < indentDepth; i++){
	188	_iTxt += iTxt;
	189	}
	190	txt = _iTxt + txt + "\n";
	191	lines.push(txt);
	192	txt = null;
	193	}
	194	}
	195	return lines.join("");
	196	}else{
	197	_iTxt = "";
	198	for(i = 0; i < indentDepth; i++){
	199	_iTxt += iTxt;
	200	}
	201	txt = _iTxt + txt + "\n";
	202	return txt;
	203	}
	204	}else{
	205	return "";
	206	}
	207	};
	208
	209	var processScriptText = function(txt){
	210	// summary:
	211	// Function to clean up potential escapes in the script code.
	212	if(txt){
	213	txt = txt.replace(/"/gi, "\"");
	214	txt = txt.replace(/>/gi, ">");
	215	txt = txt.replace(/</gi, "<");
	216	txt = txt.replace(/&/gi, "&");
	217	}
	218	return txt;
	219	};
	220
	221	var formatScript = function(txt){
	222	// summary:
	223	// Function to rudimentary formatting of script text.
	224	// Not perfect, but it helps get some level of organization
	225	// in there.
	226	// txt:
	227	// The script text to try to format a bit.
	228	if(txt){
	229	txt = processScriptText(txt);
	230	var i, t, c, _iTxt;
	231	var indent = 0;
	232	var scriptLines = txt.split("\n");
	233	var newLines = [];
	234	for (i = 0; i < scriptLines.length; i++){
	235	var line = scriptLines[i];
	236	var hasNewlines = (line.indexOf("\n") > -1);
	237	line = lang.trim(line);
	238	if(line){
	239	var iLevel = indent;
	240	// Not all blank, so we need to process.
	241	for(c = 0; c < line.length; c++){
	242	var ch = line.charAt(c);
	243	if(ch === "{"){
	244	indent++;
	245	}else if(ch === "}"){
	246	indent--;
	247	// We want to back up a bit before the
	248	// line is written.
	249	iLevel = indent;
	250	}
	251	}
	252	_iTxt = "";
	253	for(t = 0; t < indentDepth + iLevel; t++){
	254	_iTxt += iTxt;
	255	}
	256	newLines.push(_iTxt + line + "\n");
	257	}else if(hasNewlines && i === 0){
	258	// Just insert a newline for blank lines as
	259	// long as it's not the first newline (we
	260	// already inserted that in the openTag handler)
	261	newLines.push("\n");
	262	}
	263
	264	}
	265	// Okay, create the script text, hopefully reasonably
	266	// formatted.
	267	txt = newLines.join("");
	268	}
	269	return txt;
	270	};
	271
	272	var openTag = function(node){
	273	// summary:
	274	// Function to open a new tag for writing content.
	275	var name = node.nodeName.toLowerCase();
	276	// Generate the outer node content (tag with attrs)
	277	var nText = lang.trim(outerHTML(node));
	278	var tag = nText.substring(0, nText.indexOf(">") + 1);
	279
	280	// Also thanks to IE, we need to check for quotes around
	281	// attributes and insert if missing.
	282	tag = tag.replace(rgxp_fixIEAttrs,'="$1"$2');
	283
	284	// And lastly, thanks IE for changing style casing and end
	285	// semi-colon and webkit adds spaces, so lets clean it up by
	286	// sorting, etc, while we're at it.
	287	tag = tag.replace(rgxp_styleMatch, function(match){
	288	var sL = match.substring(0,6);
	289	var style = match.substring(6, match.length);
	290	var closure = style.charAt(0);
	291	style = lang.trim(style.substring(1,style.length -1));
	292	style = style.split(";");
	293	var trimmedStyles = [];
	294	ArrayUtil.forEach(style, function(s){
	295	s = lang.trim(s);
	296	if(s){
	297	// Lower case the style name, leave the value alone. Mainly a fixup for IE.
	298	s = s.substring(0, s.indexOf(":")).toLowerCase() + s.substring(s.indexOf(":"), s.length);
	299	trimmedStyles.push(s);
	300	}
	301	});
	302	trimmedStyles = trimmedStyles.sort();
	303
	304	// Reassemble and return the styles in sorted order.
	305	style = trimmedStyles.join("; ");
	306	var ts = lang.trim(style);
	307	if(!ts \|\| ts === ";"){
	308	// Just remove any style attrs that are empty.
	309	return "";
	310	}else{
	311	style += ";";
	312	return sL + closure + style + closure;
	313	}
	314	});
	315
	316	// Try and sort the attributes while we're at it.
	317	var attrs = [];
	318	tag = tag.replace(rgxp_attrsMatch, function(attr){
	319	attrs.push(lang.trim(attr));
	320	return "";
	321	});
	322	attrs = attrs.sort();
	323
	324	// Reassemble the tag with sorted attributes!
	325	tag = "<" + name;
	326	if(attrs.length){
	327	tag += " " + attrs.join(" ");
	328	}
	329
	330	// Determine closure status. If xhtml,
	331	// then close the tag properly as needed.
	332	if(nText.indexOf("</") != -1){
	333	closeTags.push(name);
	334	tag += ">";
	335	}else{
	336	if(xhtml){
	337	tag += " />";
	338	}else{
	339	tag += ">";
	340	}
	341	closeTags.push(false);
	342	}
	343
	344	var inline = isInlineFormat(name);
	345	inlineStyle.push(inline);
	346	if(textContent && !inline){
	347	// Process any text content we have that occurred
	348	// before the open tag of a non-inline.
	349	content.push(formatText(textContent));
	350	textContent = "";
	351	}
	352
	353	// Determine if this has a closing tag or not!
	354	if(!inline){
	355	indent();
	356	content.push(tag);
	357	newline();
	358	indentDepth++;
	359	}else{
	360	textContent += tag;
	361	}
	362
	363	};
	364
	365	var closeTag = function(){
	366	// summary:
	367	// Function to close out a tag if necessary.
	368	var inline = inlineStyle.pop();
	369	if(textContent && !inline){
	370	// Process any text content we have that occurred
	371	// before the close tag.
	372	content.push(formatText(textContent));
	373	textContent = "";
	374	}
	375	var ct = closeTags.pop();
	376	if(ct){
	377	ct = "</" + ct + ">";
	378	if(!inline){
	379	indentDepth--;
	380	indent();
	381	content.push(ct);
	382	newline();
	383	}else{
	384	textContent += ct;
	385	}
	386	}else{
	387	indentDepth--;
	388	}
	389	};
	390
	391	var processCommentNode = function(n){
	392	// summary:
	393	// Function to handle processing a comment node.
	394	// n:
	395	// The comment node to process.
	396
	397	//Make sure contents aren't double-encoded.
	398	var commentText = decode(n.nodeValue, map);
	399	indent();
	400	content.push("<!--");
	401	newline();
	402	indentDepth++;
	403	content.push(formatText(commentText));
	404	indentDepth--;
	405	indent();
	406	content.push("-->");
	407	newline();
	408	};
	409
	410	var processNode = function(node) {
	411	// summary:
	412	// Entrypoint for processing all the text!
	413	var children = node.childNodes;
	414	if(children){
	415	var i;
	416	for(i = 0; i < children.length; i++){
	417	var n = children[i];
	418	if(n.nodeType === 1){
	419	var tg = lang.trim(n.tagName.toLowerCase());
	420	if(has("ie") && n.parentNode != node){
	421	// IE is broken. DOMs are supposed to be a tree.
	422	// But in the case of malformed HTML, IE generates a graph
	423	// meaning one node ends up with multiple references
	424	// (multiple parents). This is totally wrong and invalid, but
	425	// such is what it is. We have to keep track and check for
	426	// this because otherwise the source output HTML will have dups.
	427	continue;
	428	}
	429	if(tg && tg.charAt(0) === "/"){
	430	// IE oddity. Malformed HTML can put in odd tags like:
	431	// </ >, </span>. It treats a mismatched closure as a new
	432	// start tag. So, remove them.
	433	continue;
	434	}else{
	435	//Process non-dup, seemingly wellformed elements!
	436	openTag(n);
	437	if(tg === "script"){
	438	content.push(formatScript(n.innerHTML));
	439	}else if(tg === "pre"){
	440	var preTxt = n.innerHTML;
	441	if(has("mozilla")){
	442	//Mozilla screws this up, so fix it up.
	443	preTxt = preTxt.replace("<br>", "\n");
	444	preTxt = preTxt.replace("<pre>", "");
	445	preTxt = preTxt.replace("</pre>", "");
	446	}
	447	// Add ending newline, if needed.
	448	if(preTxt.charAt(preTxt.length - 1) !== "\n"){
	449	preTxt += "\n";
	450	}
	451	content.push(preTxt);
	452	}else{
	453	processNode(n);
	454	}
	455	closeTag();
	456	}
	457	}else if(n.nodeType === 3 \|\| n.nodeType === 4){
	458	processTextNode(n);
	459	}else if(n.nodeType === 8){
	460	processCommentNode(n);
	461	}
	462	}
	463	}
	464	};
	465
	466	//Okay, finally process the input string.
	467	processNode(contentDiv);
	468	if(textContent){
	469	// Insert any trailing text. See: #10854
	470	content.push(formatText(textContent));
	471	textContent = "";
	472	}
	473	return content.join(""); //String
	474	};
	475	return dhf;
	476	});
	477

Note: See TracBrowser for help on using the repository browser.

Download in other formats: