Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: Dev/trunk/src/client/dojox/html/format.js @ 532

Last change on this file since 532 was 483, checked in by hendrikvanantwerpen, 11 years ago
Added Dojo 1.9.3 release.
Property svn:executable set to ``*
File size: 13.3 KB

Line
1	define(["dojo/_base/kernel", "./entities", "dojo/_base/array", "dojo/_base/window", "dojo/_base/sniff"],
2	function(lang, Entities, ArrayUtil, Window, has) {
3	var dhf = lang.getObject("dojox.html.format",true);
4
5	dhf.prettyPrint = function(html/String/, indentBy /Integer?/, maxLineLength /Integer?/, map/Array?/, /boolean/ xhtml){
6	// summary:
7	// Function for providing a 'pretty print' version of HTML content from
8	// the provided string. It's nor perfect by any means, but it does
9	// a 'reasonable job'.
10	// html: String
11	// The string of HTML to try and generate a 'pretty' formatting.
12	// indentBy: Integer
13	// Optional input for the number of spaces to use when indenting.
14	// If not defined, zero, negative, or greater than 10, will just use tab
15	// as the indent.
16	// maxLineLength: Integer
17	// Optional input for the number of characters a text line should use in
18	// the document, including the indent if possible.
19	// map: Array
20	// Optional array of entity mapping characters to use when processing the
21	// HTML Text content. By default it uses the default set used by the
22	// dojox.html.entities.encode function.
23	// xhtml: boolean
24	// Optional parameter that declares that the returned HTML should try to be 'xhtml' compatible.
25	// This means normally unclosed tags are terminated with /> instead of >. Example: `<hr>` -> `<hr />`
26	var content = [];
27	var indentDepth = 0;
28	var closeTags = [];
29	var iTxt = "\t";
30	var textContent = "";
31	var inlineStyle = [];
32	var i;
33
34	// Compile regexps once for this call.
35	var rgxp_fixIEAttrs = /[=]([^"']+?)(\s\|>)/g;
36	var rgxp_styleMatch = /style=("[^"]"\|'[^']'\|\S*)/gi;
37	var rgxp_attrsMatch = /[\w-]+=("[^"]"\|'[^']'\|\S*)/gi;
38
39	// Check to see if we want to use spaces for indent instead
40	// of tab.
41	if(indentBy && indentBy > 0 && indentBy < 10){
42	iTxt = "";
43	for(i = 0; i < indentBy; i++){
44	iTxt += " ";
45	}
46	}
47
48	//Build the content outside of the editor so we can walk
49	//via DOM and build a 'pretty' output.
50	var contentDiv = Window.doc.createElement("div");
51	contentDiv.innerHTML = html;
52
53	// Use the entity encode/decode functions, they cache on the map,
54	// so it won't multiprocess a map.
55	var encode = Entities.encode;
56	var decode = Entities.decode;
57
58	/ Define a bunch of formatters to format the output. /
59	var isInlineFormat = function(tag){
60	// summary:
61	// Function to determine if the current tag is an inline
62	// element that does formatting, as we don't want to
63	// break/indent around it, as it can screw up text.
64	// tag:
65	// The tag to examine
66	switch(tag){
67	case "a":
68	case "b":
69	case "strong":
70	case "s":
71	case "strike":
72	case "i":
73	case "u":
74	case "em":
75	case "sup":
76	case "sub":
77	case "span":
78	case "font":
79	case "big":
80	case "cite":
81	case "q":
82	case "small":
83	return true;
84	default:
85	return false;
86	}
87	};
88
89	//Create less divs.
90	var div = contentDiv.ownerDocument.createElement("div");
91	var outerHTML = function(node){
92	// summary:
93	// Function to return the outer HTML of a node.
94	// Yes, IE has a function like this, but using cloneNode
95	// allows avoiding looking at any child nodes, because in this
96	// case, we don't want them.
97	var clone = node.cloneNode(false);
98	div.appendChild(clone);
99	var html = div.innerHTML;
100	div.innerHTML = "";
101	return html;
102	};
103
104	var sizeIndent = function(){
105	var i, txt = "";
106	for(i = 0; i < indentDepth; i++){
107	txt += iTxt;
108	}
109	return txt.length;
110	}
111
112	var indent = function(){
113	// summary:
114	// Function to handle indent depth.
115	var i;
116	for(i = 0; i < indentDepth; i++){
117	content.push(iTxt);
118	}
119	};
120	var newline = function(){
121	// summary:
122	// Function to handle newlining.
123	content.push("\n");
124	};
125
126	var processTextNode = function(n){
127	// summary:
128	// Function to process the text content for doc
129	// insertion
130	// n:
131	// The text node to process.
132	textContent += encode(n.nodeValue, map);
133	};
134
135	var formatText = function(txt){
136	// summary:
137	// Function for processing the text content encountered up to a
138	// point and inserting it into the formatted document output.
139	// txt:
140	// The text to format.
141	var i;
142	var _iTxt;
143
144	// Clean up any indention organization since we're going to rework it
145	// anyway.
146	var _lines = txt.split("\n");
147	for(i = 0; i < _lines.length; i++){
148	_lines[i] = lang.trim(_lines[i]);
149	}
150	txt = _lines.join(" ");
151	txt = lang.trim(txt);
152	if(txt !== ""){
153	var lines = [];
154	if(maxLineLength && maxLineLength > 0){
155	var indentSize = sizeIndent();
156	var maxLine = maxLineLength;
157	if(maxLineLength > indentSize){
158	maxLine -= indentSize;
159	}
160	while(txt){
161	if(txt.length > maxLineLength){
162	for(i = maxLine; (i > 0 && txt.charAt(i) !== " "); i--){
163	// Do nothing, we're just looking for a space to split at.
164	}
165	if(!i){
166	// Couldn't find a split going back, so go forward.
167	for(i = maxLine; (i < txt.length && txt.charAt(i) !== " "); i++){
168	// Do nothing, we're just looking for a space to split at.
169	}
170	}
171	var line = txt.substring(0, i);
172	line = lang.trim(line);
173	// Shift up the text string to the next chunk.
174	txt = lang.trim(txt.substring((i == txt.length)?txt.length:i + 1, txt.length));
175	if(line){
176	_iTxt = "";
177	for(i = 0; i < indentDepth; i++){
178	_iTxt += iTxt;
179	}
180	line = _iTxt + line + "\n";
181	}
182	lines.push(line);
183	}else{
184	// Line is shorter than out desired length, so use it.
185	// as/is
186	_iTxt = "";
187	for(i = 0; i < indentDepth; i++){
188	_iTxt += iTxt;
189	}
190	txt = _iTxt + txt + "\n";
191	lines.push(txt);
192	txt = null;
193	}
194	}
195	return lines.join("");
196	}else{
197	_iTxt = "";
198	for(i = 0; i < indentDepth; i++){
199	_iTxt += iTxt;
200	}
201	txt = _iTxt + txt + "\n";
202	return txt;
203	}
204	}else{
205	return "";
206	}
207	};
208
209	var processScriptText = function(txt){
210	// summary:
211	// Function to clean up potential escapes in the script code.
212	if(txt){
213	txt = txt.replace(/"/gi, "\"");
214	txt = txt.replace(/>/gi, ">");
215	txt = txt.replace(/</gi, "<");
216	txt = txt.replace(/&/gi, "&");
217	}
218	return txt;
219	};
220
221	var formatScript = function(txt){
222	// summary:
223	// Function to rudimentary formatting of script text.
224	// Not perfect, but it helps get some level of organization
225	// in there.
226	// txt:
227	// The script text to try to format a bit.
228	if(txt){
229	txt = processScriptText(txt);
230	var i, t, c, _iTxt;
231	var indent = 0;
232	var scriptLines = txt.split("\n");
233	var newLines = [];
234	for (i = 0; i < scriptLines.length; i++){
235	var line = scriptLines[i];
236	var hasNewlines = (line.indexOf("\n") > -1);
237	line = lang.trim(line);
238	if(line){
239	var iLevel = indent;
240	// Not all blank, so we need to process.
241	for(c = 0; c < line.length; c++){
242	var ch = line.charAt(c);
243	if(ch === "{"){
244	indent++;
245	}else if(ch === "}"){
246	indent--;
247	// We want to back up a bit before the
248	// line is written.
249	iLevel = indent;
250	}
251	}
252	_iTxt = "";
253	for(t = 0; t < indentDepth + iLevel; t++){
254	_iTxt += iTxt;
255	}
256	newLines.push(_iTxt + line + "\n");
257	}else if(hasNewlines && i === 0){
258	// Just insert a newline for blank lines as
259	// long as it's not the first newline (we
260	// already inserted that in the openTag handler)
261	newLines.push("\n");
262	}
263
264	}
265	// Okay, create the script text, hopefully reasonably
266	// formatted.
267	txt = newLines.join("");
268	}
269	return txt;
270	};
271
272	var openTag = function(node){
273	// summary:
274	// Function to open a new tag for writing content.
275	var name = node.nodeName.toLowerCase();
276	// Generate the outer node content (tag with attrs)
277	var nText = lang.trim(outerHTML(node));
278	var tag = nText.substring(0, nText.indexOf(">") + 1);
279
280	// Also thanks to IE, we need to check for quotes around
281	// attributes and insert if missing.
282	tag = tag.replace(rgxp_fixIEAttrs,'="$1"$2');
283
284	// And lastly, thanks IE for changing style casing and end
285	// semi-colon and webkit adds spaces, so lets clean it up by
286	// sorting, etc, while we're at it.
287	tag = tag.replace(rgxp_styleMatch, function(match){
288	var sL = match.substring(0,6);
289	var style = match.substring(6, match.length);
290	var closure = style.charAt(0);
291	style = lang.trim(style.substring(1,style.length -1));
292	style = style.split(";");
293	var trimmedStyles = [];
294	ArrayUtil.forEach(style, function(s){
295	s = lang.trim(s);
296	if(s){
297	// Lower case the style name, leave the value alone. Mainly a fixup for IE.
298	s = s.substring(0, s.indexOf(":")).toLowerCase() + s.substring(s.indexOf(":"), s.length);
299	trimmedStyles.push(s);
300	}
301	});
302	trimmedStyles = trimmedStyles.sort();
303
304	// Reassemble and return the styles in sorted order.
305	style = trimmedStyles.join("; ");
306	var ts = lang.trim(style);
307	if(!ts \|\| ts === ";"){
308	// Just remove any style attrs that are empty.
309	return "";
310	}else{
311	style += ";";
312	return sL + closure + style + closure;
313	}
314	});
315
316	// Try and sort the attributes while we're at it.
317	var attrs = [];
318	tag = tag.replace(rgxp_attrsMatch, function(attr){
319	attrs.push(lang.trim(attr));
320	return "";
321	});
322	attrs = attrs.sort();
323
324	// Reassemble the tag with sorted attributes!
325	tag = "<" + name;
326	if(attrs.length){
327	tag += " " + attrs.join(" ");
328	}
329
330	// Determine closure status. If xhtml,
331	// then close the tag properly as needed.
332	if(nText.indexOf("</") != -1){
333	closeTags.push(name);
334	tag += ">";
335	}else{
336	if(xhtml){
337	tag += " />";
338	}else{
339	tag += ">";
340	}
341	closeTags.push(false);
342	}
343
344	var inline = isInlineFormat(name);
345	inlineStyle.push(inline);
346	if(textContent && !inline){
347	// Process any text content we have that occurred
348	// before the open tag of a non-inline.
349	content.push(formatText(textContent));
350	textContent = "";
351	}
352
353	// Determine if this has a closing tag or not!
354	if(!inline){
355	indent();
356	content.push(tag);
357	newline();
358	indentDepth++;
359	}else{
360	textContent += tag;
361	}
362
363	};
364
365	var closeTag = function(){
366	// summary:
367	// Function to close out a tag if necessary.
368	var inline = inlineStyle.pop();
369	if(textContent && !inline){
370	// Process any text content we have that occurred
371	// before the close tag.
372	content.push(formatText(textContent));
373	textContent = "";
374	}
375	var ct = closeTags.pop();
376	if(ct){
377	ct = "</" + ct + ">";
378	if(!inline){
379	indentDepth--;
380	indent();
381	content.push(ct);
382	newline();
383	}else{
384	textContent += ct;
385	}
386	}else{
387	indentDepth--;
388	}
389	};
390
391	var processCommentNode = function(n){
392	// summary:
393	// Function to handle processing a comment node.
394	// n:
395	// The comment node to process.
396
397	//Make sure contents aren't double-encoded.
398	var commentText = decode(n.nodeValue, map);
399	indent();
400	content.push("<!--");
401	newline();
402	indentDepth++;
403	content.push(formatText(commentText));
404	indentDepth--;
405	indent();
406	content.push("-->");
407	newline();
408	};
409
410	var processNode = function(node) {
411	// summary:
412	// Entrypoint for processing all the text!
413	var children = node.childNodes;
414	if(children){
415	var i;
416	for(i = 0; i < children.length; i++){
417	var n = children[i];
418	if(n.nodeType === 1){
419	var tg = lang.trim(n.tagName.toLowerCase());
420	if(has("ie") && n.parentNode != node){
421	// IE is broken. DOMs are supposed to be a tree.
422	// But in the case of malformed HTML, IE generates a graph
423	// meaning one node ends up with multiple references
424	// (multiple parents). This is totally wrong and invalid, but
425	// such is what it is. We have to keep track and check for
426	// this because otherwise the source output HTML will have dups.
427	continue;
428	}
429	if(tg && tg.charAt(0) === "/"){
430	// IE oddity. Malformed HTML can put in odd tags like:
431	// </ >, </span>. It treats a mismatched closure as a new
432	// start tag. So, remove them.
433	continue;
434	}else{
435	//Process non-dup, seemingly wellformed elements!
436	openTag(n);
437	if(tg === "script"){
438	content.push(formatScript(n.innerHTML));
439	}else if(tg === "pre"){
440	var preTxt = n.innerHTML;
441	if(has("mozilla")){
442	//Mozilla screws this up, so fix it up.
443	preTxt = preTxt.replace("<br>", "\n");
444	preTxt = preTxt.replace("<pre>", "");
445	preTxt = preTxt.replace("</pre>", "");
446	}
447	// Add ending newline, if needed.
448	if(preTxt.charAt(preTxt.length - 1) !== "\n"){
449	preTxt += "\n";
450	}
451	content.push(preTxt);
452	}else{
453	processNode(n);
454	}
455	closeTag();
456	}
457	}else if(n.nodeType === 3 \|\| n.nodeType === 4){
458	processTextNode(n);
459	}else if(n.nodeType === 8){
460	processCommentNode(n);
461	}
462	}
463	}
464	};
465
466	//Okay, finally process the input string.
467	processNode(contentDiv);
468	if(textContent){
469	// Insert any trailing text. See: #10854
470	content.push(formatText(textContent));
471	textContent = "";
472	}
473	return content.join(""); //String
474	};
475	return dhf;
476	});
477

Note: See TracBrowser for help on using the repository browser.

Download in other formats: