source: sipes/modules_contrib/feeds/libraries/ParserCSV.inc @ 1e95969

stableversion-3.0
Last change on this file since 1e95969 was 177a560, checked in by José Gregorio Puentes <jpuentes@…>, 8 años ago

se agrego el directorio de modulos contribuidos de drupal

  • Propiedad mode establecida a 100755
File size: 9.9 KB
Línea 
1<?php
2
3/**
4 * Contains CSV Parser.
5 * Functions in this file are independent of the Feeds specific implementation.
6 * Thanks to jpetso http://drupal.org/user/56020 for most of the code in this
7 * file.
8 */
9
10/**
11 * Text lines from file iterator.
12 */
13class ParserCSVIterator implements Iterator {
14  private $handle;
15  private $currentLine;
16  private $currentPos;
17
18  public function __construct($filepath) {
19    $this->handle = fopen($filepath, 'r');
20    $this->currentLine = NULL;
21    $this->currentPos = NULL;
22  }
23
24  function __destruct() {
25    if ($this->handle) {
26      fclose($this->handle);
27    }
28  }
29
30  public function rewind($pos = 0) {
31    if ($this->handle) {
32      fseek($this->handle, $pos);
33      $this->next();
34    }
35  }
36
37  public function next() {
38    if ($this->handle) {
39      $this->currentLine = feof($this->handle) ? NULL : fgets($this->handle);
40      $this->currentPos = ftell($this->handle);
41      return $this->currentLine;
42    }
43  }
44
45  public function valid() {
46    return isset($this->currentLine);
47  }
48
49  public function current() {
50    return $this->currentLine;
51  }
52
53  public function currentPos() {
54    return $this->currentPos;
55  }
56
57  public function key() {
58    return 'line';
59  }
60}
61
62/**
63 * Functionality to parse CSV files into a two dimensional array.
64 */
65class ParserCSV {
66  private $delimiter;
67  private $skipFirstLine;
68  private $columnNames;
69  private $timeout;
70  private $timeoutReached;
71  private $startByte;
72  private $lineLimit;
73  private $lastLinePos;
74
75  public function __construct() {
76    $this->delimiter = ',';
77    $this->skipFirstLine = FALSE;
78    $this->columnNames = FALSE;
79    $this->timeout = FALSE;
80    $this->timeoutReached = FALSE;
81    $this->startByte = 0;
82    $this->lineLimit = 0;
83    $this->lastLinePos = 0;
84    ini_set('auto_detect_line_endings', TRUE);
85  }
86
87  /**
88   * Set the column delimiter string.
89   * By default, the comma (',') is used as delimiter.
90   */
91  public function setDelimiter($delimiter) {
92    $this->delimiter = $delimiter;
93  }
94
95  /**
96   * Set this to TRUE if the parser should skip the first line of the CSV text,
97   * which might be desired if the first line contains the column names.
98   * By default, this is set to FALSE and the first line is not skipped.
99   */
100  public function setSkipFirstLine($skipFirstLine) {
101    $this->skipFirstLine = $skipFirstLine;
102  }
103
104  /**
105   * Specify an array of column names if you know them in advance, or FALSE
106   * (which is the default) to unset any prior column names. If no column names
107   * are set, the parser will put each row into a simple numerically indexed
108   * array. If column names are given, the parser will create arrays with
109   * these column names as array keys instead.
110   */
111  public function setColumnNames($columnNames) {
112    $this->columnNames = $columnNames;
113  }
114
115  /**
116   * Define the time (in milliseconds) after which the parser stops parsing,
117   * even if it has not yet finished processing the CSV data. If the timeout
118   * has been reached before parsing is done, the parse() method will return
119   * an incomplete list of rows - a single row will never be cut off in the
120   * middle, though. By default, no timeout (@p $timeout == FALSE) is defined.
121   *
122   * You can check if the timeout has been reached by calling the
123   * timeoutReached() method after parse() has been called.
124   */
125  public function setTimeout($timeout) {
126    $this->timeout = $timeout;
127  }
128
129  /**
130   * After calling the parse() method, determine if the timeout (set by the
131   * setTimeout() method) has been reached.
132   *
133   * @deprecated Use lastLinePos() instead to determine whether a file has
134   *   finished parsing.
135   */
136  public function timeoutReached() {
137    return $this->timeoutReached;
138  }
139
140  /**
141   * Define the number of lines to parse in one parsing operation.
142   *
143   * By default, all lines of a file are being parsed.
144   */
145  public function setLineLimit($lines) {
146    $this->lineLimit = $lines;
147  }
148
149  /**
150   * Get the byte number where the parser left off after last parse() call.
151   *
152   * @return
153   *  0 if all lines or no line has been parsed, the byte position of where a
154   *  timeout or the line limit has been reached otherwise. This position can be
155   *  used to set the start byte for the next iteration after parse() has
156   *  reached the timeout set with setTimeout() or the line limit set with
157   *  setLineLimit().
158   *
159   * @see ParserCSV::setStartByte($start);
160   */
161  public function lastLinePos() {
162    return $this->lastLinePos;
163  }
164
165  /**
166   * Set the byte where file should be started to read.
167   *
168   * Useful when parsing a file in batches.
169   */
170  public function setStartByte($start) {
171    return $this->startByte = $start;
172  }
173
174  /**
175   * Parse CSV files into a two dimensional array.
176   *
177   * @param Iterator $lineIterator
178   *   An Iterator object that yields line strings, e.g. ParserCSVIterator.
179   * @param $start
180   *   The byte number from where to start parsing the file.
181   * @param $lines
182   *   The number of lines to parse, 0 for all lines.
183   * @return
184   *   Two dimensional array that contains the data in the CSV file.
185   */
186  public function parse(Iterator $lineIterator) {
187    $skipLine = $this->skipFirstLine;
188    $rows = array();
189
190    $this->timeoutReached = FALSE;
191    $this->lastLinePos = 0;
192    $maxTime = empty($this->timeout) ? FALSE : (microtime() + $this->timeout);
193    $linesParsed = 0;
194
195    for ($lineIterator->rewind($this->startByte); $lineIterator->valid(); $lineIterator->next()) {
196
197      // Make really sure we've got lines without trailing newlines.
198      $line = trim($lineIterator->current(), "\r\n");
199
200      // Skip empty lines.
201      if (empty($line)) {
202        continue;
203      }
204      // If the first line contains column names, skip it.
205      if ($skipLine) {
206        $skipLine = FALSE;
207        continue;
208      }
209
210      // The actual parser. explode() is unfortunately not suitable because the
211      // delimiter might be located inside a quoted field, and that would break
212      // the field and/or require additional effort to re-join the fields.
213      $quoted = FALSE;
214      $currentIndex = 0;
215      $currentField = '';
216      $fields = array();
217
218      while ($currentIndex <= strlen($line)) {
219        if ($quoted) {
220          $nextQuoteIndex = strpos($line, '"', $currentIndex);
221
222          if ($nextQuoteIndex === FALSE) {
223            // There's a line break before the quote is closed, so fetch the
224            // next line and start from there.
225            $currentField .= substr($line, $currentIndex);
226            $lineIterator->next();
227
228            if (!$lineIterator->valid()) {
229              // Whoa, an unclosed quote! Well whatever, let's just ignore
230              // that shortcoming and record it nevertheless.
231              $fields[] = $currentField;
232              break;
233            }
234            // Ok, so, on with fetching the next line, as mentioned above.
235            $currentField .= "\n";
236            $line = trim($lineIterator->current(), "\r\n");
237            $currentIndex = 0;
238            continue;
239          }
240
241          // There's actually another quote in this line...
242          // find out whether it's escaped or not.
243          $currentField .= substr($line, $currentIndex, $nextQuoteIndex - $currentIndex);
244
245          if (isset($line[$nextQuoteIndex + 1]) && $line[$nextQuoteIndex + 1] === '"') {
246            // Escaped quote, add a single one to the field and proceed quoted.
247            $currentField .= '"';
248            $currentIndex = $nextQuoteIndex + 2;
249          }
250          else {
251            // End of the quoted section, close the quote and let the
252            // $quoted == FALSE block finalize the field.
253            $quoted = FALSE;
254            $currentIndex = $nextQuoteIndex + 1;
255          }
256        }
257        else { // $quoted == FALSE
258          // First, let's find out where the next character of interest is.
259          $nextQuoteIndex = strpos($line, '"', $currentIndex);
260          $nextDelimiterIndex = strpos($line, $this->delimiter, $currentIndex);
261
262          if ($nextQuoteIndex === FALSE) {
263            $nextIndex = $nextDelimiterIndex;
264          }
265          elseif ($nextDelimiterIndex === FALSE) {
266            $nextIndex = $nextQuoteIndex;
267          }
268          else {
269            $nextIndex = min($nextQuoteIndex, $nextDelimiterIndex);
270          }
271
272          if ($nextIndex === FALSE) {
273            // This line is done, add the rest of it as last field.
274            $currentField .= substr($line, $currentIndex);
275            $fields[] = $currentField;
276            break;
277          }
278          elseif ($line[$nextIndex] === $this->delimiter[0]) {
279            $length = ($nextIndex + strlen($this->delimiter) - 1) - $currentIndex;
280            $currentField .= substr($line, $currentIndex, $length);
281            $fields[] = $currentField;
282            $currentField = '';
283            $currentIndex += $length + 1;
284            // Continue with the next field.
285          }
286          else { // $line[$nextIndex] == '"'
287            $quoted = TRUE;
288            $currentField .= substr($line, $currentIndex, $nextIndex - $currentIndex);
289            $currentIndex = $nextIndex + 1;
290            // Continue this field in the $quoted == TRUE block.
291          }
292        }
293      }
294      // End of CSV parser. We've now got all the fields of the line as strings
295      // in the $fields array.
296
297      if (empty($this->columnNames)) {
298        $row = $fields;
299      }
300      else {
301        $row = array();
302        foreach ($this->columnNames as $columnName) {
303          $field = array_shift($fields);
304          $row[$columnName] = isset($field) ? $field : '';
305        }
306      }
307      $rows[] = $row;
308
309      // Quit parsing if timeout has been reached or requested lines have been
310      // reached.
311      if (!empty($maxTime) && microtime() > $maxTime) {
312        $this->timeoutReached = TRUE;
313        $this->lastLinePos = $lineIterator->currentPos();
314        break;
315      }
316      $linesParsed++;
317      if ($this->lineLimit && $linesParsed >= $this->lineLimit) {
318        $this->lastLinePos = $lineIterator->currentPos();
319        break;
320      }
321    }
322    return $rows;
323  }
324}
Nota: Vea TracBrowser para ayuda de uso del navegador del repositorio.