1 | var sys; |
---|
2 | try { |
---|
3 | sys = require('util'); |
---|
4 | } catch (e) { |
---|
5 | sys = require('sys'); |
---|
6 | } |
---|
7 | var events = require('events'), |
---|
8 | fs = require('fs'); |
---|
9 | |
---|
10 | var csv = exports; |
---|
11 | |
---|
12 | /** |
---|
13 | * Provides Base CSV Reading capabilities |
---|
14 | * @class CsvReader |
---|
15 | * @extends EventEmitter |
---|
16 | */ |
---|
17 | |
---|
18 | /** |
---|
19 | * The constructor |
---|
20 | * @constructor |
---|
21 | * @param readStream {ReadStread} An instance of the ReadStream Cl |
---|
22 | * @param options {Object} optional paramaters for the reader <br/> |
---|
23 | * - separator {String} |
---|
24 | * - quote {String} |
---|
25 | * - escape {String} |
---|
26 | * - comment {String} |
---|
27 | * - columnNames {Boolean} |
---|
28 | * - columnsFromHeader {Boolean} |
---|
29 | * - nestedQuotes {Boolean} |
---|
30 | */ |
---|
31 | var CsvReader = csv.CsvReader = function(readStream, options) { |
---|
32 | var self = this; |
---|
33 | _setOptions(self, options); |
---|
34 | |
---|
35 | self.parsingStatus = { |
---|
36 | rows: 0, |
---|
37 | openRecord: [], |
---|
38 | openField: '', |
---|
39 | lastChar: '', |
---|
40 | quotedField: false, |
---|
41 | commentedLine: false |
---|
42 | }; |
---|
43 | |
---|
44 | if (readStream) { |
---|
45 | readStream.addListener('data', this.parse.bind(this)); |
---|
46 | readStream.addListener('error', this.emit.bind(this, 'error')); |
---|
47 | readStream.addListener('end', this.end.bind(this)); |
---|
48 | |
---|
49 | |
---|
50 | /** |
---|
51 | * Pauses the readStream |
---|
52 | * @method pause |
---|
53 | * @return {ReadStream} the readstream instance |
---|
54 | */ |
---|
55 | self.pause = function(){ |
---|
56 | readStream.pause(); |
---|
57 | return self; |
---|
58 | } |
---|
59 | |
---|
60 | /** |
---|
61 | * Resumes the readStream |
---|
62 | * @method resume |
---|
63 | * @return {ReadStream} the readstream instance |
---|
64 | */ |
---|
65 | self.resume = function(){ |
---|
66 | readStream.resume(); |
---|
67 | return self; |
---|
68 | } |
---|
69 | |
---|
70 | /** |
---|
71 | * Closes the readStream |
---|
72 | * @method destroy |
---|
73 | * @return {ReadStream} the readstream instance |
---|
74 | */ |
---|
75 | self.destoy = function(){ |
---|
76 | readStream.destroy(); |
---|
77 | return self; |
---|
78 | } |
---|
79 | |
---|
80 | /** |
---|
81 | * Closes the readStream when its file stream has been drained |
---|
82 | * @method destroySoon |
---|
83 | * @return {ReadStream} the readstream instance |
---|
84 | */ |
---|
85 | self.destroySoon = function(){ |
---|
86 | readstream.destroy(); |
---|
87 | return self; |
---|
88 | } |
---|
89 | } |
---|
90 | |
---|
91 | }; |
---|
92 | sys.inherits(CsvReader, events.EventEmitter); |
---|
93 | |
---|
94 | /** |
---|
95 | * Parses incoming data as a readable CSV file |
---|
96 | * @method parse |
---|
97 | * @param data {Array} Array of values to parse from the incommin file |
---|
98 | */ |
---|
99 | CsvReader.prototype.parse = function(data) { |
---|
100 | var ps = this.parsingStatus; |
---|
101 | if (ps.openRecord.length == 0) { |
---|
102 | if (data.charCodeAt(0) === 0xFEFF) { |
---|
103 | data = data.slice(1); |
---|
104 | } |
---|
105 | } |
---|
106 | for (var i = 0; i < data.length; i++) { |
---|
107 | var c = data.charAt(i); |
---|
108 | switch (c) { |
---|
109 | // escape and separator may be the same char, typically '"' |
---|
110 | case this.escapechar: |
---|
111 | case this.quotechar: |
---|
112 | if (ps.commentedLine) break; |
---|
113 | var isEscape = false; |
---|
114 | if (c === this.escapechar) { |
---|
115 | var nextChar = data.charAt(i + 1); |
---|
116 | if (this._isEscapable(nextChar)) { |
---|
117 | this._addCharacter(nextChar); |
---|
118 | i++; |
---|
119 | isEscape = true; |
---|
120 | } |
---|
121 | } |
---|
122 | if (!isEscape && (c === this.quotechar)) { |
---|
123 | if (ps.openField && !ps.quotedField) { |
---|
124 | ps.quotedField = true; |
---|
125 | break; |
---|
126 | } |
---|
127 | if (ps.quotedField) { |
---|
128 | // closing quote should be followed by separator unless the nested quotes option is set |
---|
129 | var nextChar = data.charAt(i + 1); |
---|
130 | if (nextChar && nextChar != '\r' && nextChar != '\n' && nextChar !== this.separator && this.nestedQuotes != true) { |
---|
131 | throw new Error("separator expected after a closing quote; found " + nextChar); |
---|
132 | } else { |
---|
133 | ps.quotedField = false; |
---|
134 | } |
---|
135 | } else if (ps.openField === '') { |
---|
136 | ps.quotedField = true; |
---|
137 | } |
---|
138 | } |
---|
139 | break; |
---|
140 | case this.separator: |
---|
141 | if (ps.commentedLine) break; |
---|
142 | if (ps.quotedField) { |
---|
143 | this._addCharacter(c); |
---|
144 | } else { |
---|
145 | this._addField(); |
---|
146 | } |
---|
147 | break; |
---|
148 | case '\n': |
---|
149 | // handle CRLF sequence |
---|
150 | if (!ps.quotedField && (ps.lastChar === '\r')) { |
---|
151 | break; |
---|
152 | } |
---|
153 | case '\r': |
---|
154 | if (ps.commentedLine) { |
---|
155 | ps.commentedLine = false; |
---|
156 | } else if (ps.quotedField) { |
---|
157 | this._addCharacter(c); |
---|
158 | } else { |
---|
159 | this._addField(); |
---|
160 | this._addRecord(); |
---|
161 | } |
---|
162 | break; |
---|
163 | case this.commentchar: |
---|
164 | if (ps.commentedLine) break; |
---|
165 | if (ps.openRecord.length === 0 && ps.openField === '' && !ps.quotedField) { |
---|
166 | ps.commentedLine = true; |
---|
167 | } else { |
---|
168 | this._addCharacter(c); |
---|
169 | } |
---|
170 | default: |
---|
171 | if (ps.commentedLine) break; |
---|
172 | this._addCharacter(c); |
---|
173 | } |
---|
174 | ps.lastChar = c; |
---|
175 | } |
---|
176 | }; |
---|
177 | |
---|
178 | |
---|
179 | CsvReader.prototype.end = function() { |
---|
180 | var ps = this.parsingStatus; |
---|
181 | if (ps.quotedField) { |
---|
182 | this.emit('error', new Error('Input stream ended but closing quotes expected')); |
---|
183 | } else { |
---|
184 | // dump open record |
---|
185 | if (ps.openField) { |
---|
186 | this._addField(); |
---|
187 | } |
---|
188 | if (ps.openRecord.length > 0) { |
---|
189 | this._addRecord(); |
---|
190 | } |
---|
191 | this.emit('end'); |
---|
192 | } |
---|
193 | } |
---|
194 | CsvReader.prototype._isEscapable = function(c) { |
---|
195 | if ((c === this.escapechar) || (c === this.quotechar)) { |
---|
196 | return true; |
---|
197 | } |
---|
198 | return false; |
---|
199 | }; |
---|
200 | |
---|
201 | CsvReader.prototype._addCharacter = function(c) { |
---|
202 | this.parsingStatus.openField += c; |
---|
203 | }; |
---|
204 | |
---|
205 | CsvReader.prototype._addField = function() { |
---|
206 | var ps = this.parsingStatus; |
---|
207 | ps.openRecord.push(ps.openField); |
---|
208 | ps.openField = ''; |
---|
209 | ps.quotedField = false; |
---|
210 | }; |
---|
211 | |
---|
212 | CsvReader.prototype.setColumnNames = function(names) { |
---|
213 | this.columnNames = names; |
---|
214 | }; |
---|
215 | |
---|
216 | CsvReader.prototype._addRecord = function() { |
---|
217 | var ps = this.parsingStatus; |
---|
218 | if (this.columnsFromHeader && ps.rows === 0) { |
---|
219 | this.setColumnNames(ps.openRecord); |
---|
220 | } else if (this.columnNames != null && this.columnNames.length > 0) { |
---|
221 | var objResult = {}; |
---|
222 | for (var i = 0; i < this.columnNames.length; i++) { |
---|
223 | objResult[this.columnNames[i]] = ps.openRecord[i]; |
---|
224 | } |
---|
225 | this.emit('data', objResult); |
---|
226 | } else { |
---|
227 | this.emit('data', ps.openRecord); |
---|
228 | } |
---|
229 | ps.rows++; |
---|
230 | ps.openRecord = []; |
---|
231 | ps.openField = ''; |
---|
232 | ps.quotedField = false; |
---|
233 | }; |
---|
234 | |
---|
235 | csv.createCsvFileReader = function(path, options) { |
---|
236 | options = options || {}; |
---|
237 | var readStream = fs.createReadStream(path, { |
---|
238 | 'flags': options.flags || 'r' |
---|
239 | }); |
---|
240 | readStream.setEncoding(options.encoding || 'utf8'); |
---|
241 | return new CsvReader(readStream, options); |
---|
242 | }; |
---|
243 | |
---|
244 | csv.createCsvStreamReader = function(readStream, options) { |
---|
245 | if (options === undefined && typeof readStream === 'object') { |
---|
246 | options = readStream; |
---|
247 | readStream = undefined; |
---|
248 | } |
---|
249 | options = options || {}; |
---|
250 | if (readStream) readStream.setEncoding(options.encoding || 'utf8'); |
---|
251 | return new CsvReader(readStream, options); |
---|
252 | }; |
---|
253 | |
---|
254 | var CsvWriter = csv.CsvWriter = function(writeStream, options) { |
---|
255 | var self = this; |
---|
256 | self.writeStream = writeStream; |
---|
257 | options = options || {}; |
---|
258 | _setOptions(self, options); |
---|
259 | self.encoding = options.encoding || 'utf8'; |
---|
260 | |
---|
261 | if (typeof writeStream.setEncoding === 'function') { |
---|
262 | writeStream.setEncoding(self.encoding); |
---|
263 | } |
---|
264 | |
---|
265 | writeStream.addListener('drain', this.emit.bind(this, 'drain')); |
---|
266 | writeStream.addListener('error', this.emit.bind(this, 'error')); |
---|
267 | writeStream.addListener('close', this.emit.bind(this, 'close')); |
---|
268 | }; |
---|
269 | sys.inherits(CsvWriter, events.EventEmitter); |
---|
270 | |
---|
271 | CsvWriter.prototype.writeRecord = function(rec) { |
---|
272 | if (!rec) return; // ignore empty records |
---|
273 | if (!Array.isArray(rec)) { |
---|
274 | throw new Error("CsvWriter.writeRecord only takes an array as an argument"); |
---|
275 | } |
---|
276 | _writeArray(this, rec); |
---|
277 | }; |
---|
278 | |
---|
279 | function _writeArray(writer, arr) { |
---|
280 | var out = []; |
---|
281 | for (var i = 0; i < arr.length; i++) { |
---|
282 | if (i != 0) out.push(writer.separator); |
---|
283 | out.push(writer.quotechar); |
---|
284 | _appendField(out, writer, arr[i]); |
---|
285 | out.push(writer.quotechar); |
---|
286 | } |
---|
287 | out.push("\r\n"); |
---|
288 | writer.writeStream.write(out.join(''), this.encoding); |
---|
289 | }; |
---|
290 | |
---|
291 | function _appendField(outArr, writer, field) { |
---|
292 | // Make sure field is a string |
---|
293 | if(typeof(field) !== 'string') { |
---|
294 | // We are not interested in outputting "null" or "undefined" |
---|
295 | if(typeof(field) !== 'undefined' && field !== null) { |
---|
296 | field = String(field); |
---|
297 | } else { |
---|
298 | outArr.push(''); |
---|
299 | return; |
---|
300 | } |
---|
301 | } |
---|
302 | |
---|
303 | for (var i = 0; i < field.length; i++) { |
---|
304 | if (field.charAt(i) === writer.quotechar || field.charAt(i) === writer.escapechar) { |
---|
305 | outArr.push(writer.escapechar); |
---|
306 | } |
---|
307 | outArr.push(field.charAt(i)); |
---|
308 | } |
---|
309 | }; |
---|
310 | |
---|
311 | csv.createCsvFileWriter = function(path, options) { |
---|
312 | options = options || {'flags': 'w'}; |
---|
313 | var writeStream = fs.createWriteStream(path, { |
---|
314 | 'flags': options.flags || 'w' |
---|
315 | }); |
---|
316 | return new CsvWriter(writeStream, options); |
---|
317 | }; |
---|
318 | |
---|
319 | csv.createCsvStreamWriter = function(writeStream, options) { |
---|
320 | return new CsvWriter(writeStream, options); |
---|
321 | }; |
---|
322 | |
---|
323 | // =============== |
---|
324 | // = utils = |
---|
325 | // =============== |
---|
326 | |
---|
327 | function _setOptions(obj, options) { |
---|
328 | options = options || {}; |
---|
329 | obj.separator = options.separator || ','; |
---|
330 | obj.quotechar = options.quote || '"'; |
---|
331 | obj.escapechar = options.escape || '"'; |
---|
332 | obj.commentchar = options.comment || ''; |
---|
333 | obj.columnNames = options.columnNames || []; |
---|
334 | obj.columnsFromHeader = options.columnsFromHeader || false; |
---|
335 | obj.nestedQuotes = options.nestedQuotes || false; |
---|
336 | }; |
---|