source: Dev/branches/rest-dojo-ui/server/rdfapi/rdql/RdqlParser.php @ 256

Last change on this file since 256 was 256, checked in by hendrikvanantwerpen, 13 years ago

Reworked project structure based on REST interaction and Dojo library. As
soon as this is stable, the old jQueryUI branch can be removed (it's
kept for reference).

File size: 37.7 KB
Line 
1<?php
2
3// ----------------------------------------------------------------------------------
4// Class: RdqlParser
5// ----------------------------------------------------------------------------------
6
7/**
8 * This class contains methods for parsing an Rdql query string into PHP variables.
9 * The output of the RdqlParser is an array with variables and constraints
10 * of each query clause (Select, From, Where, And, Using).
11 * To perform an RDQL query this array has to be passed to the RdqlEngine.
12 *
13 * @version  $Id: RdqlParser.php 282 2006-06-08 06:25:14Z tgauss $
14 * @author   Radoslaw Oldakowski <radol@gmx.de>
15 *
16 * @package rdql
17 * @access public
18 */
19
20
21Class RdqlParser extends Object{
22
23/**
24 * Parsed query variables and constraints.
25 * { } are only used within the parser class and are not returned as parsed query.
26 * ( [] stands for an integer index - 0..N ) 
27 *
28 * @var     array   ['selectVars'][] = ?VARNAME
29 *                  ['sources'][]{['value']} = URI | QName
30 *                                                               {['is_qname'] = boolean}
31 *                  ['patterns'][]['subject']['value'] = VARorURI
32 *                                                                                      {['is_qname'] = boolean}
33 *                                ['predicate']['value'] = VARorURI
34 *                                                                                        {['is_qname'] = boolean}
35 *                                ['object']['value'] = VARorURIorLiterl
36 *                                                                         {['is_qname'] = boolean}
37 *                                          ['is_literal'] = boolean
38 *                                          ['l_lang'] = string
39 *                                          ['l_dtype'] = string
40 *                                         {['l_dtype_is_qname'] = boolean}
41 *                  ['filters'][]['string'] = string
42 *                               ['evalFilterStr'] = string
43 *                               ['reqexEqExprs'][]['var'] = ?VARNAME
44 *                                                 ['operator'] = (eq | ne)
45 *                                                 ['regex'] = string
46 *                               ['strEqExprs'][]['var'] = ?VARNAME
47 *                                               ['operator'] = (eq | ne)
48 *                                               ['value'] = string
49 *                                               ['value_type'] = ('variable' | 'URI' | 'QName' | 'Literal')
50 *                                               ['value_lang'] = string
51 *                                               ['value_dtype'] = string
52 *                                              {['value_dtype_is_qname'] = boolean}
53 *                               ['numExpr']['vars'][] = ?VARNAME
54 *                 {['ns'][PREFIX] = NAMESPACE}   
55 * @access      private
56 */
57 var $parsedQuery;
58
59
60/**
61 * Query string divided into a sequence of tokens.
62 * A token is either: ' ' or "\n" or "\r" or "\t" or ',' or '(' or ')'
63 * or a string containing any characters except from the above.
64 *
65 * @var     array
66 * @access      private
67 */
68 var $tokens;
69
70
71/**
72 * Parse the given RDQL query string and return an array with query variables and constraints.
73 *
74 * @param   string  $queryString
75 * @return  array   $this->parsedQuery
76 * @access      public
77 */
78 function & parseQuery($queryString) {
79
80   $cleanQueryString = $this->removeComments($queryString);
81   $this->tokenize($cleanQueryString);
82   $this->startParsing();
83   if ($this->parsedQuery['selectVars'][0] == '*')
84      $this->parsedQuery['selectVars'] = $this->findAllQueryVariables();
85   else
86      $this->_checkSelectVars();
87   $this->replaceNamespacePrefixes();
88
89   return $this->parsedQuery;
90 }
91
92
93/**
94 *  Remove comments from the passed query string.
95 *
96 *  @param  string  $query
97 *  @return string
98 *  @throws PHPError
99 *  @access private
100 */
101 function removeComments($query) {
102 
103   $last = strlen($query)-1;
104   $query .= ' ';
105   $clean = '';
106   for ($i=0; $i<=$last; $i++) {
107     // don't search for comments inside a 'literal'@lang^^dtype or "literal"@lang^^dtype
108     if ($query{$i} == "'" || $query{$i} == '"') {
109        $quotMark = $query{$i};
110        do
111          $clean .= $query{$i++};
112        while($i < $last && $query{$i} != $quotMark);
113        $clean .= $query{$i};
114        // language
115        if ($query{$i+1} == '@') {
116           do{
117             if ($query{$i+1} == '^' && $query{$i+2} == '^')
118                break;
119             $clean .= $query{++$i};
120           }while ($i < $last && $query{$i} != ' '  && $query{$i} != "\t"
121                              && $query{$i} != "\n" && $query{$i} != "\r");
122        }
123        // datatype
124        if ($query{$i+1} == '^' && $query{$i+2} == '^') {
125            do
126              $clean .= $query{++$i};
127            while ($i < $last && $query{$i} != ' '  && $query{$i} != "\t"
128                             && $query{$i} != "\n" && $query{$i} != "\r" );
129        }
130     // don't search for comments inside an <URI> either
131     }elseif ($query{$i} == '<') {
132        do{
133           $clean .= $query{$i++};
134        }while($i < $last && $query{$i} != '>');
135        $clean .= $query{$i};
136     }elseif ($query{$i} == '/') {
137        // clear: // comment
138        if ($i < $last && $query{$i+1} == '/') {
139            while($i < $last && $query{$i} != "\n" && $query{$i} != "\r")
140              ++$i;
141            $clean .= ' ';
142        // clear: /*comment*/
143        }elseif ($i < $last-2 && $query{$i+1} == '*') {
144            $i += 2;
145            while($i < $last  && ($query{$i} != '*' || $query{$i+1} != '/'))
146              ++$i;
147            if ($i >= $last && ($query{$last-1} != '*' || $query{$last} != '/'))
148               trigger_error(RDQL_SYN_ERR .": unterminated comment - '*/' missing", E_USER_ERROR);
149            ++$i;
150        }else
151          $clean .= $query{$i};
152     }else
153        $clean .= $query{$i};
154   }
155   return $clean;
156 }
157
158
159/**
160 * Divide the query string into tokens.
161 * A token is either: ' ' or "\n" or "\r" or '\t' or ',' or '(' or ')'
162 * or a string containing any character except from the above.
163 *
164 * @param   string  $queryString
165 * @access      private
166 */
167 function tokenize($queryString) {
168
169   $queryString = trim($queryString, " \r\n\t");
170   $specialChars = array (" ", "\t", "\r", "\n", ",", "(", ")");
171   $len = strlen($queryString);
172   $this->tokens[0]='';
173   $n = 0;
174
175   for ($i=0; $i<$len; ++$i) {
176       if (!in_array($queryString{$i}, $specialChars))
177          $this->tokens[$n] .= $queryString{$i};
178       else {
179          if ($this->tokens[$n] != '')
180             ++$n;
181          $this->tokens[$n] = $queryString{$i};
182          $this->tokens[++$n] = '';
183       }
184   }
185 }
186
187
188/**
189 * Start parsing of the tokenized query string.
190 *
191 * @access private
192 */
193 function startParsing() {
194
195   $this->parseSelect();
196 }
197
198
199/**
200 * Parse the SELECT clause of an Rdql query.
201 * When the parsing of the SELECT clause is finished, this method will call
202 * a suitable method to parse the subsequent clause.
203 *
204 * @throws      PhpError
205 * @access      private
206 */
207  function parseSelect() {
208
209   $this->_clearWhiteSpaces();
210
211   // Check if the queryString contains a "SELECT" token
212   if (strcasecmp('SELECT', current($this->tokens)))
213      trigger_error(RDQL_SEL_ERR  ."'" .current($this->tokens)
214                                  ."' - SELECT keyword expected", E_USER_ERROR);
215   unset($this->tokens[key($this->tokens)]);
216   $this->_clearWhiteSpaces();
217
218   // Parse SELECT *
219   if (current($this->tokens) == '*') {
220      unset($this->tokens[key($this->tokens)]);
221      $this->parsedQuery['selectVars'][0] = '*';
222      $this->_clearWhiteSpaces();
223      if (strcasecmp('FROM', current($this->tokens))
224          && strcasecmp('SOURCE', current($this->tokens))
225          && strcasecmp('WHERE', current($this->tokens)))
226        trigger_error(RDQL_SYN_ERR .": '" .htmlspecialchars(current($this->tokens))
227                               ."' - SOURCE or WHERE clause expected", E_USER_ERROR);
228   }
229
230   // Parse SELECT ?Var (, ?Var)*
231   $commaExpected = FALSE;
232   $comma = FALSE;
233   while (current($this->tokens) != NULL) {
234     $k = key($this->tokens);
235     $token = $this->tokens[$k];
236
237     switch ($token) {
238        case ',': if (!$commaExpected)
239                     trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
240                  $comma = TRUE;
241                  $commaExpected = FALSE;
242                  break;
243        case '(':
244        case ')': trigger_error(RDQL_SEL_ERR ." '$token' - illegal input", E_USER_ERROR);
245                  break;
246        default :
247                  if (!strcasecmp('FROM', $token) || !strcasecmp('SOURCE', $token)) {
248                     if ($comma)
249                        trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
250                     unset($this->tokens[$k]);
251                     return $this->parseFrom();
252                  }elseif (!strcasecmp('WHERE', $token) && !$comma) {
253                     if ($comma)
254                        trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
255                     unset($this->tokens[$k]);
256                     return $this->parseWhere();
257                  }
258                  if ($token{0} == '?') {
259                     $this->parsedQuery['selectVars'][] = $this->_validateVar($token, RDQL_SEL_ERR);
260                     $commaExpected = TRUE;
261                     $comma = FALSE;
262                  }else
263                     trigger_error(RDQL_SEL_ERR ." '$token' - '?' missing", E_USER_ERROR);
264     }
265     unset($this->tokens[$k]);
266     $this->_clearWhiteSpaces();
267   }
268   trigger_error(RDQL_SYN_ERR . ': WHERE clause missing', E_USER_ERROR);
269 }
270
271
272/**
273 * Parse the FROM/SOURCES clause of an Rdql query
274 * When the parsing of this clause is finished, parseWhere() will be called.
275 *
276 * @throws      PhpError
277 * @access      private
278 */
279 function parseFrom() {
280
281   $comma = FALSE;
282   $commaExpected = FALSE;
283   $i = -1;
284   while (current($this->tokens) != NULL) {
285
286      $this->_clearWhiteSpaces();
287      if (!strcasecmp('WHERE', current($this->tokens)) && count($this->parsedQuery['sources']) != 0) {
288         if ($comma)
289            trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
290         unset($this->tokens[key($this->tokens)]);
291         return $this->parseWhere();
292      }
293      if (current($this->tokens) == ',') {
294         if ($commaExpected) {
295            $comma = TRUE;
296            $commaExpected = FALSE;
297            unset($this->tokens[key($this->tokens)]);
298         }else
299            trigger_error(RDQL_SRC_ERR ."',' - unecpected comma", E_USER_ERROR);
300      }else{
301        $token = current($this->tokens);
302        $this->parsedQuery['sources'][++$i]['value'] = $this->_validateURI($token, RDQL_SRC_ERR);
303        if ($token{0} != '<')
304                $this->parsedQuery['sources'][$i]['is_qname'] = TRUE;
305        $commaExpected = TRUE;
306        $comma = FALSE;
307      }
308   }
309   trigger_error(RDQL_SYN_ERR .': WHERE clause missing', E_USER_ERROR);
310 }
311
312
313/**'
314 * Parse the WHERE clause of an Rdql query.
315 * When the parsing of the WHERE clause is finished, this method will call
316 * a suitable method to parse the subsequent clause if provided.
317 *
318 * @throws      PhpError
319 * @access      private
320 */
321 function parseWhere() {
322
323   $comma = FALSE;
324   $commaExpected = FALSE;
325   $i=0;
326
327   do {
328     $this->_clearWhiteSpaces();
329     if (!strcasecmp('AND', current($this->tokens))
330         && count($this->parsedQuery['patterns']) != 0){
331        if ($comma)
332            trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
333        unset($this->tokens[key($this->tokens)]);
334        return $this->parseAnd();
335     }elseif (!strcasecmp('USING', current($this->tokens))
336              && count($this->parsedQuery['patterns']) != 0) {
337        if ($comma)
338            trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
339        unset($this->tokens[key($this->tokens)]);
340        return $this->parseUsing();
341     }
342
343     if (current($this->tokens) == ',') {
344        $comma = TRUE;
345        $this->_checkComma($commaExpected, RDQL_WHR_ERR);
346
347     }else{
348
349        if (current($this->tokens) != '(')
350           trigger_error(RDQL_WHR_ERR ."'" .current($this->tokens)
351                                      ."' - '(' expected", E_USER_ERROR);
352        unset($this->tokens[key($this->tokens)]);
353        $this->_clearWhiteSpaces();
354
355        $this->parsedQuery['patterns'][$i]['subject'] = $this->_validateVarUri(current($this->tokens));
356        $this->_checkComma(TRUE, RDQL_WHR_ERR);
357        $this->parsedQuery['patterns'][$i]['predicate'] = $this->_validateVarUri(current($this->tokens));
358        $this->_checkComma(TRUE, RDQL_WHR_ERR);
359        $this->parsedQuery['patterns'][$i++]['object'] = $this->_validateVarUriLiteral(current($this->tokens));
360        $this->_clearWhiteSpaces();
361
362       if (current($this->tokens) != ')')
363          trigger_error(RDQL_WHR_ERR ."'" .current($this->tokens) ."' - ')' expected", E_USER_ERROR);
364       unset($this->tokens[key($this->tokens)]);
365       $this->_clearWhiteSpaces();
366       $commaExpected = TRUE;
367       $comma = FALSE;
368     }
369   }while(current($this->tokens) != NULL);
370
371   if ($comma)
372      trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
373 }
374
375
376/**
377 * Parse the AND clause of an Rdql query
378 *
379 * @throws      PhpError
380 * @access      private
381 * @todo clear comments
382 */
383 function parseAnd() {
384
385   $this->_clearWhiteSpaces();
386   $n = 0;
387   $filterStr = '';
388
389   while(current($this->tokens) != NULL) {
390     $k = key($this->tokens);
391     $token = $this->tokens[$k];
392
393     if (!strcasecmp('USING', $token)) {
394        $this->parseFilter($n, $filterStr);
395        unset($this->tokens[$k]);
396        return $this->parseUsing();
397     }elseif ($token == ',') {
398        $this->parseFilter($n, $filterStr);
399        $filterStr = '';
400        $token = '';
401        ++$n;
402     }
403     $filterStr .= $token;
404     unset($this->tokens[$k]);
405   }
406   $this->parseFilter($n, $filterStr);
407 }
408 
409 
410/**
411 * Parse the USING clause of an Rdql query
412 *
413 * @throws      PhpError
414 * @access      private
415 */
416 function parseUsing() {
417
418  $commaExpected = FALSE;
419  $comma = FALSE;
420
421  do {
422    $this->_clearWhiteSpaces();
423    if (current($this->tokens) == ',') {
424        $comma = TRUE;
425        $this->_checkComma($commaExpected, RDQL_USG_ERR);
426    }else{
427       $prefix = $this->_validatePrefix(current($this->tokens));
428       $this->_clearWhiteSpaces();
429
430       if (strcasecmp('FOR', current($this->tokens)))
431          trigger_error(RDQL_USG_ERR ." keyword: 'FOR' missing in the namespace declaration: '", E_USER_ERROR);
432       unset($this->tokens[key($this->tokens)]);
433       $this->_clearWhiteSpaces();
434
435       $this->parsedQuery['ns'][$prefix] = $this->_validateUri(current($this->tokens), RDQL_USG_ERR);
436       $this->_clearWhiteSpaces();
437       $commaExpected = TRUE;
438       $comma = FALSE;
439    }
440  }while(current($this->tokens) != NULL);
441
442  if ($comma)
443      trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
444 }
445
446
447/**
448 * Check if a filter from the AND clause contains an equal number of '(' and ')'
449 * and parse filter expressions.
450 *
451 * @param   integer $n
452 * @param   string  $filter
453 * @throws  PHPError
454 * @access      private
455 */
456 function parseFilter($n, $filter) {
457
458   if ($filter == NULL)
459      trigger_error(RDQL_AND_ERR ." ',' - unexpected comma", E_USER_ERROR);
460   $paren = substr_count($filter, '(') - substr_count($filter, ')');
461   if ($paren != 0) {
462      if ($paren > 0)
463         $errorMsg = "'" .htmlspecialchars($filter) ."' - ')' missing ";
464      elseif ($paren < 0)
465         $errorMsg = "'" .htmlspecialchars($filter) ."' - too many ')' ";
466      trigger_error(RDQL_AND_ERR .$errorMsg, E_USER_ERROR);
467   }
468
469   $this->parsedQuery['filters'][$n] = $this->parseExpressions($filter);
470 }
471
472
473/**
474 * Parse expressions inside the passed filter:
475 * 1)  regex equality expressions:    ?var [~~ | =~ | !~ ] REG_EX
476 * 2a) string equality expressions:   ?var  [eq | ne] "literal"@lang^^dtype.
477 * 2b) string equality expressions:   ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
478 * 3)  numerical expressions: e.q.    (?var1 - ?var2)*4 >= 20
479 *
480 * In cases 1-2 parse each expression of the given filter into an array of variables.
481 * For each parsed expression put a place holder (e.g. ##RegEx_1##) into the filterStr.
482 * The RDQLengine will then replace each place holder with the outcomming boolean value
483 * of the corresponding expression.
484 * The remaining filterStr contains only numerical expressions and place holders.
485 *
486 * @param   string  $filteStr
487 * @return  array   ['string'] = string
488 *                  ['evalFilterStr'] = string
489 *                  ['reqexEqExprs'][]['var'] = ?VARNAME
490 *                                    ['operator'] = (eq | ne)
491 *                                    ['regex'] = string
492 *                  ['strEqExprs'][]['var'] = ?VARNAME
493 *                                 ['operator'] = (eq | ne)
494 *                                 ['value'] = string
495 *                                 ['value_type'] = ('variable' | 'URI' | 'QName'| 'Literal')
496 *                                 ['value_lang'] = string
497 *                                 ['value_dtype'] = string
498 *                                                                 ['value_dtype_is_qname'] = boolean
499 *                  ['numExpr']['vars'][] = ?VARNAME
500 * @access      private
501 */
502 function parseExpressions($filterStr) {
503
504   $parsedFilter['string'] = $filterStr;
505   $parsedFilter['regexEqExprs'] = array();
506   $parsedFilter['strEqExprs'] = array();
507   $parsedFilter['numExprVars'] = array();
508
509   // parse regex string equality expressions, e.g. ?x ~~ !//foo.com/r!i
510   $reg_ex  = "/(\?[a-zA-Z0-9_]+)\s+([~!=]~)\s+(['|\"])?([^\s'\"]+)(['|\"])?/";
511   preg_match_all($reg_ex, $filterStr, $eqExprs);
512   foreach ($eqExprs[0] as $i => $eqExpr) {
513     $this->_checkRegExQuotation($filterStr, $eqExprs[3][$i], $eqExprs[5][$i]);
514     $parsedFilter['regexEqExprs'][$i]['var'] = $this->_isDefined($eqExprs[1][$i]);
515     $parsedFilter['regexEqExprs'][$i]['operator'] = $eqExprs[2][$i];
516     $parsedFilter['regexEqExprs'][$i]['regex'] = $eqExprs[4][$i];
517
518     $filterStr = str_replace($eqExpr, " ##RegEx_$i## ", $filterStr);
519   }
520
521   // parse ?var  [eq | ne] "literal"@lang^^dtype
522   $reg_ex  = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+(\'[^\']*\'|\"[^\"]*\")";
523   $reg_ex .= "(@[a-zA-Z]+)?(\^{2}\S+:?\S+)?/i";
524   preg_match_all($reg_ex, $filterStr, $eqExprs);
525   foreach ($eqExprs[0] as $i => $eqExpr) {
526     $parsedFilter['strEqExprs'][$i]['var'] = $this->_isDefined($eqExprs[1][$i]);#
527     $parsedFilter['strEqExprs'][$i]['operator'] = strtolower($eqExprs[2][$i]);
528     $parsedFilter['strEqExprs'][$i]['value'] = trim($eqExprs[3][$i],"'\"");
529     $parsedFilter['strEqExprs'][$i]['value_type'] = 'Literal';
530     $parsedFilter['strEqExprs'][$i]['value_lang'] = substr($eqExprs[4][$i], 1);     
531     $dtype = substr($eqExprs[5][$i], 2);
532     if ($dtype) {
533        $parsedFilter['strEqExprs'][$i]['value_dtype'] = $this->_validateUri($dtype, RDQL_AND_ERR);
534        if ($dtype{0} != '<')
535               $parsedFilter['strEqExprs'][$i]['value_dtype_is_qname'] = TRUE;   
536     }else
537        $parsedFilter['strEqExprs'][$i]['value_dtype'] = '';
538
539     $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$i## ", $filterStr);
540   }
541   
542   // parse ?var [eq | ne] ?var
543   $ii = count($parsedFilter['strEqExprs']);
544   $reg_ex  = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+(\?[a-zA-Z0-9_]+)/i";
545   preg_match_all($reg_ex, $filterStr, $eqExprs);
546   foreach ($eqExprs[0] as $i => $eqExpr) {
547     $parsedFilter['strEqExprs'][$ii]['var'] = $this->_isDefined($eqExprs[1][$i]);
548     $parsedFilter['strEqExprs'][$ii]['operator'] = strtolower($eqExprs[2][$i]);
549     $parsedFilter['strEqExprs'][$ii]['value'] = $this->_isDefined($eqExprs[3][$i]);
550     $parsedFilter['strEqExprs'][$ii]['value_type'] = 'variable';
551
552     $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$ii## ", $filterStr);
553     $ii++;
554   }
555
556   // parse ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
557   $reg_ex  = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+((<\S+>)|(\S+:\S*))/i";
558   preg_match_all($reg_ex, $filterStr, $eqExprs);
559   foreach ($eqExprs[0] as $i => $eqExpr) {
560     $parsedFilter['strEqExprs'][$ii]['var'] = $this->_isDefined($eqExprs[1][$i]);
561     $parsedFilter['strEqExprs'][$ii]['operator'] = strtolower($eqExprs[2][$i]);
562     if ($eqExprs[4][$i]) {
563        $parsedFilter['strEqExprs'][$ii]['value'] = trim($eqExprs[4][$i], "<>");
564        $parsedFilter['strEqExprs'][$ii]['value_type'] = 'URI';
565     }else if($eqExprs[5][$i]){
566        $this->_validateQName($eqExprs[5][$i], RDQL_AND_ERR);
567        $parsedFilter['strEqExprs'][$ii]['value'] = $eqExprs[5][$i];
568        $parsedFilter['strEqExprs'][$ii]['value_type'] = 'QName';
569     }
570
571     $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$ii## ", $filterStr);
572     $ii++;
573   } 
574   
575   $parsedFilter['evalFilterStr'] = $filterStr;
576
577   // all that is left are numerical expressions and place holders for the above expressions
578   preg_match_all("/\?[a-zA-Z0-9_]+/", $filterStr, $vars);
579   foreach ($vars[0] as $var) {
580     $parsedFilter['numExprVars'][] = $this->_isDefined($var);
581   }
582
583   return $parsedFilter;
584 }
585
586
587/**
588 * Find all query variables used in the WHERE clause.
589 *
590 * @return  array [] = ?VARNAME
591 * @access      private
592 */
593 function findAllQueryVariables() {
594
595   $vars = array();
596   foreach ($this->parsedQuery['patterns'] as $pattern) {
597     $count = 0;
598     foreach ($pattern as $v) {
599       if ($v['value'] && $v['value']{0} == '?') {
600          ++$count;
601          if (!in_array($v['value'], $vars))
602             $vars[] = $v['value'];
603       }
604     }
605     if (!$count)
606        trigger_error(RDQL_WHR_ERR .'pattern contains no variables', E_USER_ERROR);
607   }
608
609   return $vars;
610 }
611
612
613/**
614 * Replace all namespace prefixes in the pattern and constraint clause of an rdql query
615 * with the namespaces declared in the USING clause and default namespaces.
616 *
617 * @access      private
618 */
619 function replaceNamespacePrefixes() {
620
621   global $default_prefixes;
622
623   if (!isset($this->parsedQuery['ns']))
624      $this->parsedQuery['ns'] = array();
625
626   // add default namespaces
627   // if in an rdql query a reserved prefix (e.g. rdf: rdfs:) is used
628   // it will be overridden by the default namespace defined in constants.php
629   $this->parsedQuery['ns'] = array_merge($this->parsedQuery['ns'], $default_prefixes);
630
631   // replace namespace prefixes in the FROM clause
632   if (isset($this->parsedQuery['sources']))
633          foreach ($this->parsedQuery['sources'] as $n => $source) {
634                if (isset($source['is_qname']))
635                   $this->parsedQuery['sources'][$n] = $this->_replaceNamespacePrefix($source['value'], RDQL_SRC_ERR);
636                else {
637                   foreach ($this->parsedQuery['ns'] as $prefix => $uri)
638                     $source['value'] = preg_replace("$prefix:", $uri, $source['value']);                                                   
639                   $this->parsedQuery['sources'][$n] = $source['value'];
640                }                 
641                   
642          }
643   
644   // replace namespace prefixes in the where clause
645   foreach ($this->parsedQuery['patterns'] as $n => $pattern) {
646     foreach ($pattern as $key => $v)
647       if ($v['value'] && $v['value']{0} != '?') {
648          if (isset($v['is_qname'])) {                                 
649                 $this->parsedQuery['patterns'][$n][$key]['value']
650                        = $this->_replaceNamespacePrefix($v['value'], RDQL_WHR_ERR);
651                 unset($this->parsedQuery['patterns'][$n][$key]['is_qname']);   
652          } else { // is quoted URI (== <URI>) or Literal               
653              if (isset($this->parsedQuery['patterns'][$n][$key]['is_literal'])) {
654                 if (isset($this->parsedQuery['patterns'][$n][$key]['l_dtype_is_qname'])) {   
655                                $this->parsedQuery['patterns'][$n][$key]['l_dtype']
656                                        = $this->_replaceNamespacePrefix($v['l_dtype'], RDQL_WHR_ERR);                                         
657                                unset($this->parsedQuery['patterns'][$n][$key]['l_dtype_is_qname']);   
658                 }else {
659                   foreach ($this->parsedQuery['ns'] as $prefix => $uri)
660                     $this->parsedQuery['patterns'][$n][$key]['l_dtype']
661                       = preg_replace("'$prefix:'", $uri, $this->parsedQuery['patterns'][$n][$key]['l_dtype']);
662                 }     
663              }else {
664                 foreach ($this->parsedQuery['ns'] as $prefix => $uri)
665                   $this->parsedQuery['patterns'][$n][$key]['value']
666                     = preg_replace("'$prefix:'", $uri, $this->parsedQuery['patterns'][$n][$key]['value']);
667              }
668          }
669       }
670   }
671
672   // replace prefixes in the constraint clause
673   if (isset($this->parsedQuery['filters']))
674      foreach ($this->parsedQuery['filters'] as $n => $filter)
675        foreach ($filter['strEqExprs'] as $i => $expr) {
676          if ($expr['value_type'] == 'QName') {                 
677                 $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']
678                        = $this->_replaceNamespacePrefix($expr['value'], RDQL_AND_ERR);                 
679                         $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_type'] = 'URI';                             
680          }
681          if ($expr['value_type'] == 'URI')
682             foreach ($this->parsedQuery['ns'] as $prefix => $uri)
683               $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']
684                 = preg_replace("'$prefix:'", $uri,
685                    $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']);
686          elseif ($expr['value_type'] == 'Literal') {
687                 if (isset($expr['value_dtype_is_qname'])) {
688                        $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']
689                           = $this->_replaceNamespacePrefix($expr['value_dtype'], RDQL_AND_ERR);                           
690                        unset($this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype_is_qname']);             
691                 } else {
692               foreach ($this->parsedQuery['ns'] as $prefix => $uri)
693                 $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']
694                   = preg_replace("$prefix:", $uri,
695                      $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']);
696                 }
697          }
698        }
699
700   unset($this->parsedQuery['ns']);
701 }
702
703
704// =============================================================================
705// *************************** helper functions ********************************
706// =============================================================================
707
708
709/**
710 * Remove whitespace-tokens from the array $this->tokens
711 *
712 * @access      private
713 */
714 function _clearWhiteSpaces() {
715
716   while (current($this->tokens) == ' '  ||
717          current($this->tokens) == "\n" ||
718          current($this->tokens) == "\t" ||
719          current($this->tokens) == "\r")
720
721      unset($this->tokens[key($this->tokens)]);
722 }
723
724
725/**
726 * Check if the query string of the given clause contains an undesired ','.
727 * If a comma was correctly placed then remove it and clear all whitespaces.
728 *
729 * @param   string  $commaExpected
730 * @param   string  $clause_error
731 * @throws  PHPError
732 * @access  private
733 */
734 function _checkComma($commaExpected, $clause_error) {
735
736   $this->_clearWhiteSpaces();
737   if (current($this->tokens) == ',') {
738      if (!$commaExpected)
739         trigger_error($clause_error ."',' - unexpected comma", E_USER_ERROR);
740      else {
741         unset($this->tokens[key($this->tokens)]);
742         $this->_checkComma(FALSE, $clause_error);
743      }
744   }
745 }
746 
747
748/**
749 * Check if the given token is either a variable (?var) or the first token of an URI (<URI>).
750 * In case of an URI this function returns the whole URI string.
751 *
752 * @param   string  $token
753 * @return  array ['value'] = string
754 * @throws  PHPError
755 * @access      private
756 */
757 function _validateVarUri($token) {
758       
759   if ($token{0} == '?') {
760      $token_res['value'] = $this->_validateVar($token, RDQL_WHR_ERR);
761   } else {
762          $token_res['value'] = $this->_validateUri($token, RDQL_WHR_ERR);
763          if ($token{0} != '<')
764                $token_res['is_qname'] = TRUE;           
765   }           
766   return $token_res;
767 }
768
769
770/**
771 * Check if the given token is either a variable (?var) or the first token
772 * of either an URI (<URI>) or a literal ("Literal").
773 * In case of a literal return an array with literal properties (value, language, datatype).
774 * In case of a variable or an URI return only ['value'] = string.
775 *
776 * @param   string  $token
777 * @return  array ['value'] = string
778 *                                ['is_qname'] = boolean
779 *                ['is_literal'] = boolean
780 *                ['l_lang'] = string
781 *                ['l_dtype'] = string
782 * @throws  PHPError
783 * @access      private
784 */
785 function _validateVarUriLiteral($token) {
786 
787   if ($token{0} == '?')
788      $statement_object['value'] = $this->_validateVar($token, RDQL_WHR_ERR);   
789   elseif ($token{0} == "'" || $token{0} == '"')
790      $statement_object = $this->_validateLiteral($token);
791   elseif ($token{0} == '<')
792      $statement_object['value'] = $this->_validateUri($token, RDQL_WHR_ERR);     
793   elseif (ereg(':', $token)) {
794          $statement_object['value'] = $this->_validateUri($token, RDQL_WHR_ERR);
795          $statement_object['is_qname'] = TRUE;
796   }else
797          trigger_error(RDQL_WHR_ERR ." '$token' - ?Variable, &lt;URI&gt;, QName, or \"LITERAL\" expected", E_USER_ERROR);   
798   return $statement_object;     
799 }
800 
801
802/**
803 * Check if the given token is a valid variable name (?var).
804 *
805 * @param   string  $token
806 * @param   string  $clause_error
807 * @return  string
808 * @throws  PHPError
809 * @access      private
810 */
811 function _validateVar($token, $clause_error) {
812
813   preg_match("/\?[a-zA-Z0-9_]+/", $token, $match);
814   if (!isset($match[0]) || $match[0] != $token)
815      trigger_error($clause_error ."'" .htmlspecialchars($token)
816                    ."' - variable name contains illegal characters", E_USER_ERROR);
817   unset($this->tokens[key($this->tokens)]);
818   return $token;
819 }
820
821
822/**
823 * Check if $token is the first token of a valid URI (<URI>) and return the whole URI string
824 *
825 * @param   string  $token
826 * @param   string  $clause_error
827 * @return  string
828 * @throws  PHPError
829 * @access      private
830 */
831 function _validateUri($token, $clause_error) {
832
833   if ($token{0} != '<') {
834      if (strpos($token, ':') && $this->_validateQName($token, $clause_error)) {
835        unset($this->tokens[key($this->tokens)]);
836        return rtrim($token, ':');
837      }
838      $errmsg = $clause_error .'\'' .htmlspecialchars($token) .'\' ';
839      if ($clause_error == RDQL_WHR_ERR)
840         $errmsg .= "- ?Variable or &lt;URI&gt; or QName expected";
841      else
842         $errmsg .= "- &lt;URI&gt; or QName expected";
843      trigger_error($errmsg, E_USER_ERROR);
844   }else{
845      $token_res = $token;
846      while($token{strlen($token)-1} != '>' && $token != NULL) {
847        if ($token == '(' || $token == ')' || $token == ',' ||
848            $token == ' ' || $token == "\n" || $token == "\r") {
849           trigger_error($clause_error .'\'' .htmlspecialchars($token_res)
850                          ."' - illegal input: '$token' - '>' missing", E_USER_ERROR);
851        }
852        unset($this->tokens[key($this->tokens)]);
853        $token = current($this->tokens);
854        $token_res .= $token;
855      }
856      if ($token == NULL)
857         trigger_error($clause_error .'\'' .htmlspecialchars($token_res) ."' - '>' missing", E_USER_ERROR);
858      unset($this->tokens[key($this->tokens)]);
859      return trim($token_res, '<>');
860   }
861 }
862
863
864/**
865 * Check if $token is the first token of a valid literal ("LITERAL") and
866 * return an array with literal properties (value, language, datatype).
867 *
868 * @param   string  $token
869 * @return  array   ['value'] = string
870 *                  ['is_literal'] = boolean
871 *                  ['l_lang'] = string
872 *                  ['l_dtype'] = string
873 *                                      ['l_dtype_is_qname'] = boolean
874 * @throws  PHPError
875 * @access      private
876 */
877 function _validateLiteral($token) {
878
879   $quotation_mark = $token{0};
880   $statement_object = array ('value' => '',
881                              'is_literal' => TRUE,
882                              'l_lang' => '',
883                              'l_dtype' => '');
884   $this->tokens[key($this->tokens)] = substr($token,1);
885
886   $return = FALSE;
887   foreach ($this->tokens as $k => $token) {
888
889     if ($token != NULL && $token{strlen($token)-1} == $quotation_mark) {
890         $token = rtrim($token, $quotation_mark);
891         $return = TRUE;
892
893      // parse @language(^^datatype)?
894     }elseif (strpos($token, $quotation_mark .'@') || substr($token, 0, 2)  == $quotation_mark .'@') {
895        $lang = substr($token, strpos($token, $quotation_mark .'@')+2);
896        if (strpos($lang, '^^') || substr($lang, 0,2) == '^^') {
897           $dtype = substr($lang, strpos($lang, '^^')+2);
898           if (!$dtype)
899              trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
900                                         .$token  ." - datatype expected" ,E_USER_ERROR);
901                                         
902                   $statement_object['l_dtype'] = $this->_validateUri($dtype, RDQL_WHR_ERR);
903                   if ($dtype{0} != '<')
904                      $statement_object['l_dtype_is_qname'] = TRUE;                   
905       
906           $lang = substr($lang, 0, strpos($lang, '^^'));
907        }
908        if (!$lang)
909           trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
910                                      .$token ." - language expected" ,E_USER_ERROR);
911        $statement_object['l_lang'] = $lang;
912        $token = substr($token, 0, strpos($token, $quotation_mark .'@'));
913        $return = TRUE;
914
915     // parse ^^datatype
916     }elseif (strpos($token, $quotation_mark .'^^') || substr($token, 0, 3)  == $quotation_mark .'^^') {
917        $dtype = substr($token, strpos($token, $quotation_mark .'^^')+3);
918        if (!$dtype)
919           trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
920                                      .$token  ." - datatype expected" ,E_USER_ERROR);       
921
922        $statement_object['l_dtype'] = $this->_validateUri($dtype, RDQL_WHR_ERR);
923                if ($dtype{0} != '<')
924                   $statement_object['l_dtype_is_qname'] = TRUE;                   
925
926        $token = substr($token, 0, strpos($token, $quotation_mark .'^^'));
927        $return = TRUE;
928     }elseif (strpos($token, $quotation_mark))
929        trigger_error(RDQL_WHR_ERR ."'$token' - illegal input", E_USER_ERROR);
930     $statement_object['value'] .= $token;
931     unset($this->tokens[$k]);
932     if ($return)
933        return $statement_object;
934   }
935   trigger_error(RDQL_WHR_ERR ."quotation end mark: $quotation_mark missing", E_USER_ERROR);
936 }
937
938 
939/**
940 * Check if the given token is a valid QName.
941 *
942 * @param   string  $token
943 * @param   string  $clause_error
944 * @return  boolean
945 * @throws  PHPError
946 * @access      private
947 */
948function _validateQName($token, $clause_error) {
949 
950  $parts = explode(':', $token);
951  if (count($parts) > 2)
952        trigger_error($clause_error ."illegal QName: '$token'", E_USER_ERROR);
953  if (!$this->_validateNCName($parts[0]))
954        trigger_error($clause_error ."illegal prefix in QName: '$token'", E_USER_ERROR);
955  if ($parts[1] && !$this->_validateNCName($parts[1]))
956        trigger_error($clause_error ."illegal local part in QName: '$token'", E_USER_ERROR);
957       
958  return TRUE;
959}
960
961
962/**
963 * Check if the given token is a valid NCName.
964 *
965 * @param   string  $token
966 * @return  boolean
967 * @access      private
968 */
969function _validateNCName($token) {
970       
971  preg_match("/[a-zA-Z_]+[a-zA-Z_0-9.\-]*/", $token, $match);
972  if (isset($match[0]) && $match[0] == $token)
973        return TRUE;
974  return FALSE;
975}
976
977
978/**
979 * Check if the given token is a valid namespace prefix.
980 *
981 * @param   string  $token
982 * @return  string
983 * @throws  PHPError
984 * @access      private
985 */
986 function _validatePrefix($token) {
987
988   if (!$this->_validateNCName($token))
989      trigger_error(RDQL_USG_ERR ."'" .htmlspecialchars($token)
990                                 ."' - illegal input, namespace prefix expected", E_USER_ERROR);
991   unset($this->tokens[key($this->tokens)]);
992   return $token;
993 }
994
995/**
996 * Replace a prefix in a given QName and return a full URI.
997 *
998 * @param   string  $qName
999 * @param   string  $clasue_error
1000 * @return  string
1001 * @throws  PHPError
1002 * @access      private
1003 */
1004 function _replaceNamespacePrefix($qName, $clause_error) {
1005
1006   $qName_parts = explode(':', $qName);
1007   if (!array_key_exists($qName_parts[0], $this->parsedQuery['ns']))
1008      trigger_error($clause_error .'undefined prefix: \'' .$qName_parts[0] .'\' in: \'' .$qName .'\'', E_USER_ERROR);
1009   return $this->parsedQuery['ns'][$qName_parts[0]] .$qName_parts[1];
1010 }
1011                       
1012                       
1013/**
1014 * Check if all variables from the SELECT clause are defined in the WHERE clause
1015 *
1016 * @access private
1017 */
1018 function _checkSelectVars() {
1019
1020   foreach ($this->parsedQuery['selectVars'] as $var)
1021     $this->_isDefined($var);
1022 }
1023
1024
1025/**
1026 * Check if the given variable is defined in the WHERE clause.
1027 *
1028 * @param $var string
1029 * @return     string
1030 * @throws PHPError
1031 * @access private
1032 */
1033 function _isDefined($var) {
1034
1035   $allQueryVars = $this->findAllQueryVariables();
1036
1037   if (!in_array($var, $allQueryVars))
1038      trigger_error(RDQL_SYN_ERR .": '$var' - variable must be defined in the WHERE clause", E_USER_ERROR);
1039   return $var;
1040 }
1041
1042
1043/**
1044 * Throw an error if the regular expression from the AND clause is not quoted.
1045 *
1046 * @param  string $filterString
1047 * @param  string $lQuotMark
1048 * @param  string $rQuotMark
1049 * @throws PHPError
1050 * @access private
1051 */
1052 function _checkRegExQuotation($filterString, $lQuotMark, $rQuotMark) {
1053
1054   if (!$lQuotMark)
1055      trigger_error(RDQL_AND_ERR ."'$filterString' - regular expressions must be quoted", E_USER_ERROR);
1056
1057   if ($lQuotMark != $rQuotMark)
1058      trigger_error(RDQL_AND_ERR ."'$filterString' - quotation end mark in the regular expression missing", E_USER_ERROR);
1059 }
1060
1061} // end: Class RdqlParser
1062
1063?>
Note: See TracBrowser for help on using the repository browser.