[484] | 1 | var sys; |
---|
| 2 | try { |
---|
| 3 | sys = require('util'); |
---|
| 4 | } catch (e) { |
---|
| 5 | sys = require('sys'); |
---|
| 6 | } |
---|
| 7 | var events = require('events'), |
---|
| 8 | fs = require('fs'); |
---|
| 9 | |
---|
| 10 | var csv = exports; |
---|
| 11 | |
---|
| 12 | /** |
---|
| 13 | * Provides Base CSV Reading capabilities |
---|
| 14 | * @class CsvReader |
---|
| 15 | * @extends EventEmitter |
---|
| 16 | */ |
---|
| 17 | |
---|
| 18 | /** |
---|
| 19 | * The constructor |
---|
| 20 | * @constructor |
---|
| 21 | * @param readStream {ReadStread} An instance of the ReadStream Cl |
---|
| 22 | * @param options {Object} optional paramaters for the reader <br/> |
---|
| 23 | * - separator {String} |
---|
| 24 | * - quote {String} |
---|
| 25 | * - escape {String} |
---|
| 26 | * - comment {String} |
---|
| 27 | * - columnNames {Boolean} |
---|
| 28 | * - columnsFromHeader {Boolean} |
---|
| 29 | * - nestedQuotes {Boolean} |
---|
| 30 | */ |
---|
| 31 | var CsvReader = csv.CsvReader = function(readStream, options) { |
---|
| 32 | var self = this; |
---|
| 33 | _setOptions(self, options); |
---|
| 34 | |
---|
| 35 | self.parsingStatus = { |
---|
| 36 | rows: 0, |
---|
| 37 | openRecord: [], |
---|
| 38 | openField: '', |
---|
| 39 | lastChar: '', |
---|
| 40 | quotedField: false, |
---|
| 41 | commentedLine: false |
---|
| 42 | }; |
---|
| 43 | |
---|
| 44 | if (readStream) { |
---|
| 45 | readStream.addListener('data', this.parse.bind(this)); |
---|
| 46 | readStream.addListener('error', this.emit.bind(this, 'error')); |
---|
| 47 | readStream.addListener('end', this.end.bind(this)); |
---|
| 48 | |
---|
| 49 | |
---|
| 50 | /** |
---|
| 51 | * Pauses the readStream |
---|
| 52 | * @method pause |
---|
| 53 | * @return {ReadStream} the readstream instance |
---|
| 54 | */ |
---|
| 55 | self.pause = function(){ |
---|
| 56 | readStream.pause(); |
---|
| 57 | return self; |
---|
| 58 | } |
---|
| 59 | |
---|
| 60 | /** |
---|
| 61 | * Resumes the readStream |
---|
| 62 | * @method resume |
---|
| 63 | * @return {ReadStream} the readstream instance |
---|
| 64 | */ |
---|
| 65 | self.resume = function(){ |
---|
| 66 | readStream.resume(); |
---|
| 67 | return self; |
---|
| 68 | } |
---|
| 69 | |
---|
| 70 | /** |
---|
| 71 | * Closes the readStream |
---|
| 72 | * @method destroy |
---|
| 73 | * @return {ReadStream} the readstream instance |
---|
| 74 | */ |
---|
[489] | 75 | self.destroy = function(){ |
---|
[484] | 76 | readStream.destroy(); |
---|
| 77 | return self; |
---|
| 78 | } |
---|
| 79 | |
---|
| 80 | /** |
---|
| 81 | * Closes the readStream when its file stream has been drained |
---|
| 82 | * @method destroySoon |
---|
| 83 | * @return {ReadStream} the readstream instance |
---|
| 84 | */ |
---|
| 85 | self.destroySoon = function(){ |
---|
| 86 | readstream.destroy(); |
---|
| 87 | return self; |
---|
| 88 | } |
---|
| 89 | } |
---|
| 90 | |
---|
| 91 | }; |
---|
| 92 | sys.inherits(CsvReader, events.EventEmitter); |
---|
| 93 | |
---|
| 94 | /** |
---|
| 95 | * Parses incoming data as a readable CSV file |
---|
| 96 | * @method parse |
---|
| 97 | * @param data {Array} Array of values to parse from the incommin file |
---|
| 98 | */ |
---|
| 99 | CsvReader.prototype.parse = function(data) { |
---|
| 100 | var ps = this.parsingStatus; |
---|
| 101 | if (ps.openRecord.length == 0) { |
---|
| 102 | if (data.charCodeAt(0) === 0xFEFF) { |
---|
| 103 | data = data.slice(1); |
---|
| 104 | } |
---|
| 105 | } |
---|
| 106 | for (var i = 0; i < data.length; i++) { |
---|
| 107 | var c = data.charAt(i); |
---|
| 108 | switch (c) { |
---|
| 109 | // escape and separator may be the same char, typically '"' |
---|
| 110 | case this.escapechar: |
---|
| 111 | case this.quotechar: |
---|
| 112 | if (ps.commentedLine) break; |
---|
| 113 | var isEscape = false; |
---|
| 114 | if (c === this.escapechar) { |
---|
[489] | 115 | // double-quote at the field beginning does not count as an escape string` |
---|
| 116 | if (c !== this.quotechar || ps.openField || ps.quotedField) { |
---|
| 117 | var nextChar = data.charAt(i + 1); |
---|
| 118 | if (this._isEscapable(nextChar)) { |
---|
| 119 | this._addCharacter(nextChar); |
---|
| 120 | i++; |
---|
| 121 | isEscape = true; |
---|
| 122 | } |
---|
[484] | 123 | } |
---|
| 124 | } |
---|
| 125 | if (!isEscape && (c === this.quotechar)) { |
---|
| 126 | if (ps.openField && !ps.quotedField) { |
---|
| 127 | ps.quotedField = true; |
---|
| 128 | break; |
---|
| 129 | } |
---|
| 130 | if (ps.quotedField) { |
---|
| 131 | // closing quote should be followed by separator unless the nested quotes option is set |
---|
| 132 | var nextChar = data.charAt(i + 1); |
---|
| 133 | if (nextChar && nextChar != '\r' && nextChar != '\n' && nextChar !== this.separator && this.nestedQuotes != true) { |
---|
| 134 | throw new Error("separator expected after a closing quote; found " + nextChar); |
---|
| 135 | } else { |
---|
| 136 | ps.quotedField = false; |
---|
| 137 | } |
---|
| 138 | } else if (ps.openField === '') { |
---|
| 139 | ps.quotedField = true; |
---|
| 140 | } |
---|
| 141 | } |
---|
| 142 | break; |
---|
| 143 | case this.separator: |
---|
| 144 | if (ps.commentedLine) break; |
---|
| 145 | if (ps.quotedField) { |
---|
| 146 | this._addCharacter(c); |
---|
| 147 | } else { |
---|
| 148 | this._addField(); |
---|
| 149 | } |
---|
| 150 | break; |
---|
| 151 | case '\n': |
---|
| 152 | // handle CRLF sequence |
---|
| 153 | if (!ps.quotedField && (ps.lastChar === '\r')) { |
---|
| 154 | break; |
---|
| 155 | } |
---|
| 156 | case '\r': |
---|
| 157 | if (ps.commentedLine) { |
---|
| 158 | ps.commentedLine = false; |
---|
| 159 | } else if (ps.quotedField) { |
---|
| 160 | this._addCharacter(c); |
---|
| 161 | } else { |
---|
| 162 | this._addField(); |
---|
| 163 | this._addRecord(); |
---|
| 164 | } |
---|
| 165 | break; |
---|
| 166 | case this.commentchar: |
---|
| 167 | if (ps.commentedLine) break; |
---|
| 168 | if (ps.openRecord.length === 0 && ps.openField === '' && !ps.quotedField) { |
---|
| 169 | ps.commentedLine = true; |
---|
| 170 | } else { |
---|
| 171 | this._addCharacter(c); |
---|
| 172 | } |
---|
| 173 | default: |
---|
| 174 | if (ps.commentedLine) break; |
---|
| 175 | this._addCharacter(c); |
---|
| 176 | } |
---|
| 177 | ps.lastChar = c; |
---|
| 178 | } |
---|
| 179 | }; |
---|
| 180 | |
---|
| 181 | |
---|
| 182 | CsvReader.prototype.end = function() { |
---|
| 183 | var ps = this.parsingStatus; |
---|
| 184 | if (ps.quotedField) { |
---|
| 185 | this.emit('error', new Error('Input stream ended but closing quotes expected')); |
---|
| 186 | } else { |
---|
| 187 | // dump open record |
---|
| 188 | if (ps.openField) { |
---|
| 189 | this._addField(); |
---|
| 190 | } |
---|
| 191 | if (ps.openRecord.length > 0) { |
---|
| 192 | this._addRecord(); |
---|
| 193 | } |
---|
| 194 | this.emit('end'); |
---|
| 195 | } |
---|
| 196 | } |
---|
| 197 | CsvReader.prototype._isEscapable = function(c) { |
---|
| 198 | if ((c === this.escapechar) || (c === this.quotechar)) { |
---|
| 199 | return true; |
---|
| 200 | } |
---|
| 201 | return false; |
---|
| 202 | }; |
---|
| 203 | |
---|
| 204 | CsvReader.prototype._addCharacter = function(c) { |
---|
| 205 | this.parsingStatus.openField += c; |
---|
| 206 | }; |
---|
| 207 | |
---|
| 208 | CsvReader.prototype._addField = function() { |
---|
| 209 | var ps = this.parsingStatus; |
---|
| 210 | ps.openRecord.push(ps.openField); |
---|
| 211 | ps.openField = ''; |
---|
| 212 | ps.quotedField = false; |
---|
| 213 | }; |
---|
| 214 | |
---|
| 215 | CsvReader.prototype.setColumnNames = function(names) { |
---|
| 216 | this.columnNames = names; |
---|
| 217 | }; |
---|
| 218 | |
---|
| 219 | CsvReader.prototype._addRecord = function() { |
---|
| 220 | var ps = this.parsingStatus; |
---|
| 221 | if (this.columnsFromHeader && ps.rows === 0) { |
---|
[489] | 222 | // user has passed columnNames through option |
---|
| 223 | if (this.columnNames.length === 0) |
---|
| 224 | this.setColumnNames(ps.openRecord); |
---|
[484] | 225 | } else if (this.columnNames != null && this.columnNames.length > 0) { |
---|
| 226 | var objResult = {}; |
---|
| 227 | for (var i = 0; i < this.columnNames.length; i++) { |
---|
| 228 | objResult[this.columnNames[i]] = ps.openRecord[i]; |
---|
| 229 | } |
---|
| 230 | this.emit('data', objResult); |
---|
| 231 | } else { |
---|
| 232 | this.emit('data', ps.openRecord); |
---|
| 233 | } |
---|
| 234 | ps.rows++; |
---|
| 235 | ps.openRecord = []; |
---|
| 236 | ps.openField = ''; |
---|
| 237 | ps.quotedField = false; |
---|
| 238 | }; |
---|
| 239 | |
---|
| 240 | csv.createCsvFileReader = function(path, options) { |
---|
| 241 | options = options || {}; |
---|
| 242 | var readStream = fs.createReadStream(path, { |
---|
| 243 | 'flags': options.flags || 'r' |
---|
| 244 | }); |
---|
| 245 | readStream.setEncoding(options.encoding || 'utf8'); |
---|
| 246 | return new CsvReader(readStream, options); |
---|
| 247 | }; |
---|
| 248 | |
---|
| 249 | csv.createCsvStreamReader = function(readStream, options) { |
---|
| 250 | if (options === undefined && typeof readStream === 'object') { |
---|
| 251 | options = readStream; |
---|
| 252 | readStream = undefined; |
---|
| 253 | } |
---|
| 254 | options = options || {}; |
---|
| 255 | if (readStream) readStream.setEncoding(options.encoding || 'utf8'); |
---|
| 256 | return new CsvReader(readStream, options); |
---|
| 257 | }; |
---|
| 258 | |
---|
| 259 | var CsvWriter = csv.CsvWriter = function(writeStream, options) { |
---|
| 260 | var self = this; |
---|
| 261 | self.writeStream = writeStream; |
---|
| 262 | options = options || {}; |
---|
| 263 | _setOptions(self, options); |
---|
| 264 | self.encoding = options.encoding || 'utf8'; |
---|
| 265 | |
---|
| 266 | if (typeof writeStream.setEncoding === 'function') { |
---|
| 267 | writeStream.setEncoding(self.encoding); |
---|
| 268 | } |
---|
| 269 | |
---|
| 270 | writeStream.addListener('drain', this.emit.bind(this, 'drain')); |
---|
| 271 | writeStream.addListener('error', this.emit.bind(this, 'error')); |
---|
| 272 | writeStream.addListener('close', this.emit.bind(this, 'close')); |
---|
| 273 | }; |
---|
| 274 | sys.inherits(CsvWriter, events.EventEmitter); |
---|
| 275 | |
---|
| 276 | CsvWriter.prototype.writeRecord = function(rec) { |
---|
| 277 | if (!rec) return; // ignore empty records |
---|
| 278 | if (!Array.isArray(rec)) { |
---|
| 279 | throw new Error("CsvWriter.writeRecord only takes an array as an argument"); |
---|
| 280 | } |
---|
| 281 | _writeArray(this, rec); |
---|
| 282 | }; |
---|
| 283 | |
---|
| 284 | function _writeArray(writer, arr) { |
---|
| 285 | var out = []; |
---|
| 286 | for (var i = 0; i < arr.length; i++) { |
---|
| 287 | if (i != 0) out.push(writer.separator); |
---|
| 288 | out.push(writer.quotechar); |
---|
| 289 | _appendField(out, writer, arr[i]); |
---|
| 290 | out.push(writer.quotechar); |
---|
| 291 | } |
---|
| 292 | out.push("\r\n"); |
---|
| 293 | writer.writeStream.write(out.join(''), this.encoding); |
---|
| 294 | }; |
---|
| 295 | |
---|
| 296 | function _appendField(outArr, writer, field) { |
---|
| 297 | // Make sure field is a string |
---|
| 298 | if(typeof(field) !== 'string') { |
---|
| 299 | // We are not interested in outputting "null" or "undefined" |
---|
| 300 | if(typeof(field) !== 'undefined' && field !== null) { |
---|
| 301 | field = String(field); |
---|
| 302 | } else { |
---|
| 303 | outArr.push(''); |
---|
| 304 | return; |
---|
| 305 | } |
---|
| 306 | } |
---|
| 307 | |
---|
| 308 | for (var i = 0; i < field.length; i++) { |
---|
| 309 | if (field.charAt(i) === writer.quotechar || field.charAt(i) === writer.escapechar) { |
---|
| 310 | outArr.push(writer.escapechar); |
---|
| 311 | } |
---|
| 312 | outArr.push(field.charAt(i)); |
---|
| 313 | } |
---|
| 314 | }; |
---|
| 315 | |
---|
| 316 | csv.createCsvFileWriter = function(path, options) { |
---|
| 317 | options = options || {'flags': 'w'}; |
---|
| 318 | var writeStream = fs.createWriteStream(path, { |
---|
| 319 | 'flags': options.flags || 'w' |
---|
| 320 | }); |
---|
| 321 | return new CsvWriter(writeStream, options); |
---|
| 322 | }; |
---|
| 323 | |
---|
| 324 | csv.createCsvStreamWriter = function(writeStream, options) { |
---|
| 325 | return new CsvWriter(writeStream, options); |
---|
| 326 | }; |
---|
| 327 | |
---|
| 328 | // =============== |
---|
| 329 | // = utils = |
---|
| 330 | // =============== |
---|
| 331 | |
---|
| 332 | function _setOptions(obj, options) { |
---|
| 333 | options = options || {}; |
---|
[489] | 334 | obj.separator = (typeof options.separator !== 'undefined') ? options.separator : ','; |
---|
| 335 | obj.quotechar = (typeof options.quote !== 'undefined') ? options.quote : '"'; |
---|
| 336 | obj.escapechar = (typeof options.escape !== 'undefined') ? options.escape : '"'; |
---|
| 337 | obj.commentchar = (typeof options.comment !== 'undefined') ? options.comment : ''; |
---|
| 338 | obj.columnNames = (typeof options.columnNames !== 'undefined') ? options.columnNames : []; |
---|
| 339 | obj.columnsFromHeader = (typeof options.columnsFromHeader !== 'undefined') ? options.columnsFromHeader : false; |
---|
| 340 | obj.nestedQuotes = (typeof options.nestedQuotes !== 'undefined') ? options.nestedQuotes : false; |
---|
[484] | 341 | }; |
---|