1 | var sys; |
---|
2 | try { |
---|
3 | sys = require('util'); |
---|
4 | } catch (e) { |
---|
5 | sys = require('sys'); |
---|
6 | } |
---|
7 | var events = require('events'), |
---|
8 | fs = require('fs'); |
---|
9 | |
---|
10 | var csv = exports; |
---|
11 | |
---|
12 | /** |
---|
13 | * Provides Base CSV Reading capabilities |
---|
14 | * @class CsvReader |
---|
15 | * @extends EventEmitter |
---|
16 | */ |
---|
17 | |
---|
18 | /** |
---|
19 | * The constructor |
---|
20 | * @constructor |
---|
21 | * @param readStream {ReadStread} An instance of the ReadStream Cl |
---|
22 | * @param options {Object} optional paramaters for the reader <br/> |
---|
23 | * - separator {String} |
---|
24 | * - quote {String} |
---|
25 | * - escape {String} |
---|
26 | * - comment {String} |
---|
27 | * - columnNames {Boolean} |
---|
28 | * - columnsFromHeader {Boolean} |
---|
29 | * - nestedQuotes {Boolean} |
---|
30 | */ |
---|
31 | var CsvReader = csv.CsvReader = function(readStream, options) { |
---|
32 | var self = this; |
---|
33 | _setOptions(self, options); |
---|
34 | |
---|
35 | self.parsingStatus = { |
---|
36 | rows: 0, |
---|
37 | openRecord: [], |
---|
38 | openField: '', |
---|
39 | lastChar: '', |
---|
40 | quotedField: false, |
---|
41 | commentedLine: false |
---|
42 | }; |
---|
43 | |
---|
44 | if (readStream) { |
---|
45 | readStream.addListener('data', this.parse.bind(this)); |
---|
46 | readStream.addListener('error', this.emit.bind(this, 'error')); |
---|
47 | readStream.addListener('end', this.end.bind(this)); |
---|
48 | |
---|
49 | |
---|
50 | /** |
---|
51 | * Pauses the readStream |
---|
52 | * @method pause |
---|
53 | * @return {ReadStream} the readstream instance |
---|
54 | */ |
---|
55 | self.pause = function(){ |
---|
56 | readStream.pause(); |
---|
57 | return self; |
---|
58 | } |
---|
59 | |
---|
60 | /** |
---|
61 | * Resumes the readStream |
---|
62 | * @method resume |
---|
63 | * @return {ReadStream} the readstream instance |
---|
64 | */ |
---|
65 | self.resume = function(){ |
---|
66 | readStream.resume(); |
---|
67 | return self; |
---|
68 | } |
---|
69 | |
---|
70 | /** |
---|
71 | * Closes the readStream |
---|
72 | * @method destroy |
---|
73 | * @return {ReadStream} the readstream instance |
---|
74 | */ |
---|
75 | self.destroy = function(){ |
---|
76 | readStream.destroy(); |
---|
77 | return self; |
---|
78 | } |
---|
79 | |
---|
80 | /** |
---|
81 | * Closes the readStream when its file stream has been drained |
---|
82 | * @method destroySoon |
---|
83 | * @return {ReadStream} the readstream instance |
---|
84 | */ |
---|
85 | self.destroySoon = function(){ |
---|
86 | readstream.destroy(); |
---|
87 | return self; |
---|
88 | } |
---|
89 | } |
---|
90 | |
---|
91 | }; |
---|
92 | sys.inherits(CsvReader, events.EventEmitter); |
---|
93 | |
---|
94 | /** |
---|
95 | * Parses incoming data as a readable CSV file |
---|
96 | * @method parse |
---|
97 | * @param data {Array} Array of values to parse from the incommin file |
---|
98 | */ |
---|
99 | CsvReader.prototype.parse = function(data) { |
---|
100 | var ps = this.parsingStatus; |
---|
101 | if (ps.openRecord.length == 0) { |
---|
102 | if (data.charCodeAt(0) === 0xFEFF) { |
---|
103 | data = data.slice(1); |
---|
104 | } |
---|
105 | } |
---|
106 | for (var i = 0; i < data.length; i++) { |
---|
107 | var c = data.charAt(i); |
---|
108 | switch (c) { |
---|
109 | // escape and separator may be the same char, typically '"' |
---|
110 | case this.escapechar: |
---|
111 | case this.quotechar: |
---|
112 | if (ps.commentedLine) break; |
---|
113 | var isEscape = false; |
---|
114 | if (c === this.escapechar) { |
---|
115 | // double-quote at the field beginning does not count as an escape string` |
---|
116 | if (c !== this.quotechar || ps.openField || ps.quotedField) { |
---|
117 | var nextChar = data.charAt(i + 1); |
---|
118 | if (this._isEscapable(nextChar)) { |
---|
119 | this._addCharacter(nextChar); |
---|
120 | i++; |
---|
121 | isEscape = true; |
---|
122 | } |
---|
123 | } |
---|
124 | } |
---|
125 | if (!isEscape && (c === this.quotechar)) { |
---|
126 | if (ps.openField && !ps.quotedField) { |
---|
127 | ps.quotedField = true; |
---|
128 | break; |
---|
129 | } |
---|
130 | if (ps.quotedField) { |
---|
131 | // closing quote should be followed by separator unless the nested quotes option is set |
---|
132 | var nextChar = data.charAt(i + 1); |
---|
133 | if (nextChar && nextChar != '\r' && nextChar != '\n' && nextChar !== this.separator && this.nestedQuotes != true) { |
---|
134 | throw new Error("separator expected after a closing quote; found " + nextChar); |
---|
135 | } else { |
---|
136 | ps.quotedField = false; |
---|
137 | } |
---|
138 | } else if (ps.openField === '') { |
---|
139 | ps.quotedField = true; |
---|
140 | } |
---|
141 | } |
---|
142 | break; |
---|
143 | case this.separator: |
---|
144 | if (ps.commentedLine) break; |
---|
145 | if (ps.quotedField) { |
---|
146 | this._addCharacter(c); |
---|
147 | } else { |
---|
148 | this._addField(); |
---|
149 | } |
---|
150 | break; |
---|
151 | case '\n': |
---|
152 | // handle CRLF sequence |
---|
153 | if (!ps.quotedField && (ps.lastChar === '\r')) { |
---|
154 | break; |
---|
155 | } |
---|
156 | case '\r': |
---|
157 | if (ps.commentedLine) { |
---|
158 | ps.commentedLine = false; |
---|
159 | } else if (ps.quotedField) { |
---|
160 | this._addCharacter(c); |
---|
161 | } else { |
---|
162 | this._addField(); |
---|
163 | this._addRecord(); |
---|
164 | } |
---|
165 | break; |
---|
166 | case this.commentchar: |
---|
167 | if (ps.commentedLine) break; |
---|
168 | if (ps.openRecord.length === 0 && ps.openField === '' && !ps.quotedField) { |
---|
169 | ps.commentedLine = true; |
---|
170 | } else { |
---|
171 | this._addCharacter(c); |
---|
172 | } |
---|
173 | default: |
---|
174 | if (ps.commentedLine) break; |
---|
175 | this._addCharacter(c); |
---|
176 | } |
---|
177 | ps.lastChar = c; |
---|
178 | } |
---|
179 | }; |
---|
180 | |
---|
181 | |
---|
182 | CsvReader.prototype.end = function() { |
---|
183 | var ps = this.parsingStatus; |
---|
184 | if (ps.quotedField) { |
---|
185 | this.emit('error', new Error('Input stream ended but closing quotes expected')); |
---|
186 | } else { |
---|
187 | // dump open record |
---|
188 | if (ps.openField) { |
---|
189 | this._addField(); |
---|
190 | } |
---|
191 | if (ps.openRecord.length > 0) { |
---|
192 | this._addRecord(); |
---|
193 | } |
---|
194 | this.emit('end'); |
---|
195 | } |
---|
196 | } |
---|
197 | CsvReader.prototype._isEscapable = function(c) { |
---|
198 | if ((c === this.escapechar) || (c === this.quotechar)) { |
---|
199 | return true; |
---|
200 | } |
---|
201 | return false; |
---|
202 | }; |
---|
203 | |
---|
204 | CsvReader.prototype._addCharacter = function(c) { |
---|
205 | this.parsingStatus.openField += c; |
---|
206 | }; |
---|
207 | |
---|
208 | CsvReader.prototype._addField = function() { |
---|
209 | var ps = this.parsingStatus; |
---|
210 | ps.openRecord.push(ps.openField); |
---|
211 | ps.openField = ''; |
---|
212 | ps.quotedField = false; |
---|
213 | }; |
---|
214 | |
---|
215 | CsvReader.prototype.setColumnNames = function(names) { |
---|
216 | this.columnNames = names; |
---|
217 | }; |
---|
218 | |
---|
219 | CsvReader.prototype._addRecord = function() { |
---|
220 | var ps = this.parsingStatus; |
---|
221 | if (this.columnsFromHeader && ps.rows === 0) { |
---|
222 | // user has passed columnNames through option |
---|
223 | if (this.columnNames.length === 0) |
---|
224 | this.setColumnNames(ps.openRecord); |
---|
225 | } else if (this.columnNames != null && this.columnNames.length > 0) { |
---|
226 | var objResult = {}; |
---|
227 | for (var i = 0; i < this.columnNames.length; i++) { |
---|
228 | objResult[this.columnNames[i]] = ps.openRecord[i]; |
---|
229 | } |
---|
230 | this.emit('data', objResult); |
---|
231 | } else { |
---|
232 | this.emit('data', ps.openRecord); |
---|
233 | } |
---|
234 | ps.rows++; |
---|
235 | ps.openRecord = []; |
---|
236 | ps.openField = ''; |
---|
237 | ps.quotedField = false; |
---|
238 | }; |
---|
239 | |
---|
240 | csv.createCsvFileReader = function(path, options) { |
---|
241 | options = options || {}; |
---|
242 | var readStream = fs.createReadStream(path, { |
---|
243 | 'flags': options.flags || 'r' |
---|
244 | }); |
---|
245 | readStream.setEncoding(options.encoding || 'utf8'); |
---|
246 | return new CsvReader(readStream, options); |
---|
247 | }; |
---|
248 | |
---|
249 | csv.createCsvStreamReader = function(readStream, options) { |
---|
250 | if (options === undefined && typeof readStream === 'object') { |
---|
251 | options = readStream; |
---|
252 | readStream = undefined; |
---|
253 | } |
---|
254 | options = options || {}; |
---|
255 | if (readStream) readStream.setEncoding(options.encoding || 'utf8'); |
---|
256 | return new CsvReader(readStream, options); |
---|
257 | }; |
---|
258 | |
---|
259 | var CsvWriter = csv.CsvWriter = function(writeStream, options) { |
---|
260 | var self = this; |
---|
261 | self.writeStream = writeStream; |
---|
262 | options = options || {}; |
---|
263 | _setOptions(self, options); |
---|
264 | self.encoding = options.encoding || 'utf8'; |
---|
265 | |
---|
266 | if (typeof writeStream.setEncoding === 'function') { |
---|
267 | writeStream.setEncoding(self.encoding); |
---|
268 | } |
---|
269 | |
---|
270 | writeStream.addListener('drain', this.emit.bind(this, 'drain')); |
---|
271 | writeStream.addListener('error', this.emit.bind(this, 'error')); |
---|
272 | writeStream.addListener('close', this.emit.bind(this, 'close')); |
---|
273 | }; |
---|
274 | sys.inherits(CsvWriter, events.EventEmitter); |
---|
275 | |
---|
276 | CsvWriter.prototype.writeRecord = function(rec) { |
---|
277 | if (!rec) return; // ignore empty records |
---|
278 | if (!Array.isArray(rec)) { |
---|
279 | throw new Error("CsvWriter.writeRecord only takes an array as an argument"); |
---|
280 | } |
---|
281 | _writeArray(this, rec); |
---|
282 | }; |
---|
283 | |
---|
284 | function _writeArray(writer, arr) { |
---|
285 | var out = []; |
---|
286 | for (var i = 0; i < arr.length; i++) { |
---|
287 | if (i != 0) out.push(writer.separator); |
---|
288 | out.push(writer.quotechar); |
---|
289 | _appendField(out, writer, arr[i]); |
---|
290 | out.push(writer.quotechar); |
---|
291 | } |
---|
292 | out.push("\r\n"); |
---|
293 | writer.writeStream.write(out.join(''), this.encoding); |
---|
294 | }; |
---|
295 | |
---|
296 | function _appendField(outArr, writer, field) { |
---|
297 | // Make sure field is a string |
---|
298 | if(typeof(field) !== 'string') { |
---|
299 | // We are not interested in outputting "null" or "undefined" |
---|
300 | if(typeof(field) !== 'undefined' && field !== null) { |
---|
301 | field = String(field); |
---|
302 | } else { |
---|
303 | outArr.push(''); |
---|
304 | return; |
---|
305 | } |
---|
306 | } |
---|
307 | |
---|
308 | for (var i = 0; i < field.length; i++) { |
---|
309 | if (field.charAt(i) === writer.quotechar || field.charAt(i) === writer.escapechar) { |
---|
310 | outArr.push(writer.escapechar); |
---|
311 | } |
---|
312 | outArr.push(field.charAt(i)); |
---|
313 | } |
---|
314 | }; |
---|
315 | |
---|
316 | csv.createCsvFileWriter = function(path, options) { |
---|
317 | options = options || {'flags': 'w'}; |
---|
318 | var writeStream = fs.createWriteStream(path, { |
---|
319 | 'flags': options.flags || 'w' |
---|
320 | }); |
---|
321 | return new CsvWriter(writeStream, options); |
---|
322 | }; |
---|
323 | |
---|
324 | csv.createCsvStreamWriter = function(writeStream, options) { |
---|
325 | return new CsvWriter(writeStream, options); |
---|
326 | }; |
---|
327 | |
---|
328 | // =============== |
---|
329 | // = utils = |
---|
330 | // =============== |
---|
331 | |
---|
332 | function _setOptions(obj, options) { |
---|
333 | options = options || {}; |
---|
334 | obj.separator = (typeof options.separator !== 'undefined') ? options.separator : ','; |
---|
335 | obj.quotechar = (typeof options.quote !== 'undefined') ? options.quote : '"'; |
---|
336 | obj.escapechar = (typeof options.escape !== 'undefined') ? options.escape : '"'; |
---|
337 | obj.commentchar = (typeof options.comment !== 'undefined') ? options.comment : ''; |
---|
338 | obj.columnNames = (typeof options.columnNames !== 'undefined') ? options.columnNames : []; |
---|
339 | obj.columnsFromHeader = (typeof options.columnsFromHeader !== 'undefined') ? options.columnsFromHeader : false; |
---|
340 | obj.nestedQuotes = (typeof options.nestedQuotes !== 'undefined') ? options.nestedQuotes : false; |
---|
341 | }; |
---|