[12] | 1 | <?php
|
---|
| 2 |
|
---|
| 3 | // ----------------------------------------------------------------------------------
|
---|
| 4 | // Class: RdqlParser
|
---|
| 5 | // ----------------------------------------------------------------------------------
|
---|
| 6 |
|
---|
| 7 | /**
|
---|
| 8 | * This class contains methods for parsing an Rdql query string into PHP variables.
|
---|
| 9 | * The output of the RdqlParser is an array with variables and constraints
|
---|
| 10 | * of each query clause (Select, From, Where, And, Using).
|
---|
| 11 | * To perform an RDQL query this array has to be passed to the RdqlEngine.
|
---|
| 12 | *
|
---|
| 13 | * @version $Id: RdqlParser.php 282 2006-06-08 06:25:14Z tgauss $
|
---|
| 14 | * @author Radoslaw Oldakowski <radol@gmx.de>
|
---|
| 15 | *
|
---|
| 16 | * @package rdql
|
---|
| 17 | * @access public
|
---|
| 18 | */
|
---|
| 19 |
|
---|
| 20 |
|
---|
| 21 | Class RdqlParser extends Object{
|
---|
| 22 |
|
---|
| 23 | /**
|
---|
| 24 | * Parsed query variables and constraints.
|
---|
| 25 | * { } are only used within the parser class and are not returned as parsed query.
|
---|
| 26 | * ( [] stands for an integer index - 0..N )
|
---|
| 27 | *
|
---|
| 28 | * @var array ['selectVars'][] = ?VARNAME
|
---|
| 29 | * ['sources'][]{['value']} = URI | QName
|
---|
| 30 | * {['is_qname'] = boolean}
|
---|
| 31 | * ['patterns'][]['subject']['value'] = VARorURI
|
---|
| 32 | * {['is_qname'] = boolean}
|
---|
| 33 | * ['predicate']['value'] = VARorURI
|
---|
| 34 | * {['is_qname'] = boolean}
|
---|
| 35 | * ['object']['value'] = VARorURIorLiterl
|
---|
| 36 | * {['is_qname'] = boolean}
|
---|
| 37 | * ['is_literal'] = boolean
|
---|
| 38 | * ['l_lang'] = string
|
---|
| 39 | * ['l_dtype'] = string
|
---|
| 40 | * {['l_dtype_is_qname'] = boolean}
|
---|
| 41 | * ['filters'][]['string'] = string
|
---|
| 42 | * ['evalFilterStr'] = string
|
---|
| 43 | * ['reqexEqExprs'][]['var'] = ?VARNAME
|
---|
| 44 | * ['operator'] = (eq | ne)
|
---|
| 45 | * ['regex'] = string
|
---|
| 46 | * ['strEqExprs'][]['var'] = ?VARNAME
|
---|
| 47 | * ['operator'] = (eq | ne)
|
---|
| 48 | * ['value'] = string
|
---|
| 49 | * ['value_type'] = ('variable' | 'URI' | 'QName' | 'Literal')
|
---|
| 50 | * ['value_lang'] = string
|
---|
| 51 | * ['value_dtype'] = string
|
---|
| 52 | * {['value_dtype_is_qname'] = boolean}
|
---|
| 53 | * ['numExpr']['vars'][] = ?VARNAME
|
---|
| 54 | * {['ns'][PREFIX] = NAMESPACE}
|
---|
| 55 | * @access private
|
---|
| 56 | */
|
---|
| 57 | var $parsedQuery;
|
---|
| 58 |
|
---|
| 59 |
|
---|
| 60 | /**
|
---|
| 61 | * Query string divided into a sequence of tokens.
|
---|
| 62 | * A token is either: ' ' or "\n" or "\r" or "\t" or ',' or '(' or ')'
|
---|
| 63 | * or a string containing any characters except from the above.
|
---|
| 64 | *
|
---|
| 65 | * @var array
|
---|
| 66 | * @access private
|
---|
| 67 | */
|
---|
| 68 | var $tokens;
|
---|
| 69 |
|
---|
| 70 |
|
---|
| 71 | /**
|
---|
| 72 | * Parse the given RDQL query string and return an array with query variables and constraints.
|
---|
| 73 | *
|
---|
| 74 | * @param string $queryString
|
---|
| 75 | * @return array $this->parsedQuery
|
---|
| 76 | * @access public
|
---|
| 77 | */
|
---|
| 78 | function & parseQuery($queryString) {
|
---|
| 79 |
|
---|
| 80 | $cleanQueryString = $this->removeComments($queryString);
|
---|
| 81 | $this->tokenize($cleanQueryString);
|
---|
| 82 | $this->startParsing();
|
---|
| 83 | if ($this->parsedQuery['selectVars'][0] == '*')
|
---|
| 84 | $this->parsedQuery['selectVars'] = $this->findAllQueryVariables();
|
---|
| 85 | else
|
---|
| 86 | $this->_checkSelectVars();
|
---|
| 87 | $this->replaceNamespacePrefixes();
|
---|
| 88 |
|
---|
| 89 | return $this->parsedQuery;
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 |
|
---|
| 93 | /**
|
---|
| 94 | * Remove comments from the passed query string.
|
---|
| 95 | *
|
---|
| 96 | * @param string $query
|
---|
| 97 | * @return string
|
---|
| 98 | * @throws PHPError
|
---|
| 99 | * @access private
|
---|
| 100 | */
|
---|
| 101 | function removeComments($query) {
|
---|
| 102 |
|
---|
| 103 | $last = strlen($query)-1;
|
---|
| 104 | $query .= ' ';
|
---|
| 105 | $clean = '';
|
---|
| 106 | for ($i=0; $i<=$last; $i++) {
|
---|
| 107 | // don't search for comments inside a 'literal'@lang^^dtype or "literal"@lang^^dtype
|
---|
| 108 | if ($query{$i} == "'" || $query{$i} == '"') {
|
---|
| 109 | $quotMark = $query{$i};
|
---|
| 110 | do
|
---|
| 111 | $clean .= $query{$i++};
|
---|
| 112 | while($i < $last && $query{$i} != $quotMark);
|
---|
| 113 | $clean .= $query{$i};
|
---|
| 114 | // language
|
---|
| 115 | if ($query{$i+1} == '@') {
|
---|
| 116 | do{
|
---|
| 117 | if ($query{$i+1} == '^' && $query{$i+2} == '^')
|
---|
| 118 | break;
|
---|
| 119 | $clean .= $query{++$i};
|
---|
| 120 | }while ($i < $last && $query{$i} != ' ' && $query{$i} != "\t"
|
---|
| 121 | && $query{$i} != "\n" && $query{$i} != "\r");
|
---|
| 122 | }
|
---|
| 123 | // datatype
|
---|
| 124 | if ($query{$i+1} == '^' && $query{$i+2} == '^') {
|
---|
| 125 | do
|
---|
| 126 | $clean .= $query{++$i};
|
---|
| 127 | while ($i < $last && $query{$i} != ' ' && $query{$i} != "\t"
|
---|
| 128 | && $query{$i} != "\n" && $query{$i} != "\r" );
|
---|
| 129 | }
|
---|
| 130 | // don't search for comments inside an <URI> either
|
---|
| 131 | }elseif ($query{$i} == '<') {
|
---|
| 132 | do{
|
---|
| 133 | $clean .= $query{$i++};
|
---|
| 134 | }while($i < $last && $query{$i} != '>');
|
---|
| 135 | $clean .= $query{$i};
|
---|
| 136 | }elseif ($query{$i} == '/') {
|
---|
| 137 | // clear: // comment
|
---|
| 138 | if ($i < $last && $query{$i+1} == '/') {
|
---|
| 139 | while($i < $last && $query{$i} != "\n" && $query{$i} != "\r")
|
---|
| 140 | ++$i;
|
---|
| 141 | $clean .= ' ';
|
---|
| 142 | // clear: /*comment*/
|
---|
| 143 | }elseif ($i < $last-2 && $query{$i+1} == '*') {
|
---|
| 144 | $i += 2;
|
---|
| 145 | while($i < $last && ($query{$i} != '*' || $query{$i+1} != '/'))
|
---|
| 146 | ++$i;
|
---|
| 147 | if ($i >= $last && ($query{$last-1} != '*' || $query{$last} != '/'))
|
---|
| 148 | trigger_error(RDQL_SYN_ERR .": unterminated comment - '*/' missing", E_USER_ERROR);
|
---|
| 149 | ++$i;
|
---|
| 150 | }else
|
---|
| 151 | $clean .= $query{$i};
|
---|
| 152 | }else
|
---|
| 153 | $clean .= $query{$i};
|
---|
| 154 | }
|
---|
| 155 | return $clean;
|
---|
| 156 | }
|
---|
| 157 |
|
---|
| 158 |
|
---|
| 159 | /**
|
---|
| 160 | * Divide the query string into tokens.
|
---|
| 161 | * A token is either: ' ' or "\n" or "\r" or '\t' or ',' or '(' or ')'
|
---|
| 162 | * or a string containing any character except from the above.
|
---|
| 163 | *
|
---|
| 164 | * @param string $queryString
|
---|
| 165 | * @access private
|
---|
| 166 | */
|
---|
| 167 | function tokenize($queryString) {
|
---|
| 168 |
|
---|
| 169 | $queryString = trim($queryString, " \r\n\t");
|
---|
| 170 | $specialChars = array (" ", "\t", "\r", "\n", ",", "(", ")");
|
---|
| 171 | $len = strlen($queryString);
|
---|
| 172 | $this->tokens[0]='';
|
---|
| 173 | $n = 0;
|
---|
| 174 |
|
---|
| 175 | for ($i=0; $i<$len; ++$i) {
|
---|
| 176 | if (!in_array($queryString{$i}, $specialChars))
|
---|
| 177 | $this->tokens[$n] .= $queryString{$i};
|
---|
| 178 | else {
|
---|
| 179 | if ($this->tokens[$n] != '')
|
---|
| 180 | ++$n;
|
---|
| 181 | $this->tokens[$n] = $queryString{$i};
|
---|
| 182 | $this->tokens[++$n] = '';
|
---|
| 183 | }
|
---|
| 184 | }
|
---|
| 185 | }
|
---|
| 186 |
|
---|
| 187 |
|
---|
| 188 | /**
|
---|
| 189 | * Start parsing of the tokenized query string.
|
---|
| 190 | *
|
---|
| 191 | * @access private
|
---|
| 192 | */
|
---|
| 193 | function startParsing() {
|
---|
| 194 |
|
---|
| 195 | $this->parseSelect();
|
---|
| 196 | }
|
---|
| 197 |
|
---|
| 198 |
|
---|
| 199 | /**
|
---|
| 200 | * Parse the SELECT clause of an Rdql query.
|
---|
| 201 | * When the parsing of the SELECT clause is finished, this method will call
|
---|
| 202 | * a suitable method to parse the subsequent clause.
|
---|
| 203 | *
|
---|
| 204 | * @throws PhpError
|
---|
| 205 | * @access private
|
---|
| 206 | */
|
---|
| 207 | function parseSelect() {
|
---|
| 208 |
|
---|
| 209 | $this->_clearWhiteSpaces();
|
---|
| 210 |
|
---|
| 211 | // Check if the queryString contains a "SELECT" token
|
---|
| 212 | if (strcasecmp('SELECT', current($this->tokens)))
|
---|
| 213 | trigger_error(RDQL_SEL_ERR ."'" .current($this->tokens)
|
---|
| 214 | ."' - SELECT keyword expected", E_USER_ERROR);
|
---|
| 215 | unset($this->tokens[key($this->tokens)]);
|
---|
| 216 | $this->_clearWhiteSpaces();
|
---|
| 217 |
|
---|
| 218 | // Parse SELECT *
|
---|
| 219 | if (current($this->tokens) == '*') {
|
---|
| 220 | unset($this->tokens[key($this->tokens)]);
|
---|
| 221 | $this->parsedQuery['selectVars'][0] = '*';
|
---|
| 222 | $this->_clearWhiteSpaces();
|
---|
| 223 | if (strcasecmp('FROM', current($this->tokens))
|
---|
| 224 | && strcasecmp('SOURCE', current($this->tokens))
|
---|
| 225 | && strcasecmp('WHERE', current($this->tokens)))
|
---|
| 226 | trigger_error(RDQL_SYN_ERR .": '" .htmlspecialchars(current($this->tokens))
|
---|
| 227 | ."' - SOURCE or WHERE clause expected", E_USER_ERROR);
|
---|
| 228 | }
|
---|
| 229 |
|
---|
| 230 | // Parse SELECT ?Var (, ?Var)*
|
---|
| 231 | $commaExpected = FALSE;
|
---|
| 232 | $comma = FALSE;
|
---|
| 233 | while (current($this->tokens) != NULL) {
|
---|
| 234 | $k = key($this->tokens);
|
---|
| 235 | $token = $this->tokens[$k];
|
---|
| 236 |
|
---|
| 237 | switch ($token) {
|
---|
| 238 | case ',': if (!$commaExpected)
|
---|
| 239 | trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
|
---|
| 240 | $comma = TRUE;
|
---|
| 241 | $commaExpected = FALSE;
|
---|
| 242 | break;
|
---|
| 243 | case '(':
|
---|
| 244 | case ')': trigger_error(RDQL_SEL_ERR ." '$token' - illegal input", E_USER_ERROR);
|
---|
| 245 | break;
|
---|
| 246 | default :
|
---|
| 247 | if (!strcasecmp('FROM', $token) || !strcasecmp('SOURCE', $token)) {
|
---|
| 248 | if ($comma)
|
---|
| 249 | trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
|
---|
| 250 | unset($this->tokens[$k]);
|
---|
| 251 | return $this->parseFrom();
|
---|
| 252 | }elseif (!strcasecmp('WHERE', $token) && !$comma) {
|
---|
| 253 | if ($comma)
|
---|
| 254 | trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
|
---|
| 255 | unset($this->tokens[$k]);
|
---|
| 256 | return $this->parseWhere();
|
---|
| 257 | }
|
---|
| 258 | if ($token{0} == '?') {
|
---|
| 259 | $this->parsedQuery['selectVars'][] = $this->_validateVar($token, RDQL_SEL_ERR);
|
---|
| 260 | $commaExpected = TRUE;
|
---|
| 261 | $comma = FALSE;
|
---|
| 262 | }else
|
---|
| 263 | trigger_error(RDQL_SEL_ERR ." '$token' - '?' missing", E_USER_ERROR);
|
---|
| 264 | }
|
---|
| 265 | unset($this->tokens[$k]);
|
---|
| 266 | $this->_clearWhiteSpaces();
|
---|
| 267 | }
|
---|
| 268 | trigger_error(RDQL_SYN_ERR . ': WHERE clause missing', E_USER_ERROR);
|
---|
| 269 | }
|
---|
| 270 |
|
---|
| 271 |
|
---|
| 272 | /**
|
---|
| 273 | * Parse the FROM/SOURCES clause of an Rdql query
|
---|
| 274 | * When the parsing of this clause is finished, parseWhere() will be called.
|
---|
| 275 | *
|
---|
| 276 | * @throws PhpError
|
---|
| 277 | * @access private
|
---|
| 278 | */
|
---|
| 279 | function parseFrom() {
|
---|
| 280 |
|
---|
| 281 | $comma = FALSE;
|
---|
| 282 | $commaExpected = FALSE;
|
---|
| 283 | $i = -1;
|
---|
| 284 | while (current($this->tokens) != NULL) {
|
---|
| 285 |
|
---|
| 286 | $this->_clearWhiteSpaces();
|
---|
| 287 | if (!strcasecmp('WHERE', current($this->tokens)) && count($this->parsedQuery['sources']) != 0) {
|
---|
| 288 | if ($comma)
|
---|
| 289 | trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
|
---|
| 290 | unset($this->tokens[key($this->tokens)]);
|
---|
| 291 | return $this->parseWhere();
|
---|
| 292 | }
|
---|
| 293 | if (current($this->tokens) == ',') {
|
---|
| 294 | if ($commaExpected) {
|
---|
| 295 | $comma = TRUE;
|
---|
| 296 | $commaExpected = FALSE;
|
---|
| 297 | unset($this->tokens[key($this->tokens)]);
|
---|
| 298 | }else
|
---|
| 299 | trigger_error(RDQL_SRC_ERR ."',' - unecpected comma", E_USER_ERROR);
|
---|
| 300 | }else{
|
---|
| 301 | $token = current($this->tokens);
|
---|
| 302 | $this->parsedQuery['sources'][++$i]['value'] = $this->_validateURI($token, RDQL_SRC_ERR);
|
---|
| 303 | if ($token{0} != '<')
|
---|
| 304 | $this->parsedQuery['sources'][$i]['is_qname'] = TRUE;
|
---|
| 305 | $commaExpected = TRUE;
|
---|
| 306 | $comma = FALSE;
|
---|
| 307 | }
|
---|
| 308 | }
|
---|
| 309 | trigger_error(RDQL_SYN_ERR .': WHERE clause missing', E_USER_ERROR);
|
---|
| 310 | }
|
---|
| 311 |
|
---|
| 312 |
|
---|
| 313 | /**'
|
---|
| 314 | * Parse the WHERE clause of an Rdql query.
|
---|
| 315 | * When the parsing of the WHERE clause is finished, this method will call
|
---|
| 316 | * a suitable method to parse the subsequent clause if provided.
|
---|
| 317 | *
|
---|
| 318 | * @throws PhpError
|
---|
| 319 | * @access private
|
---|
| 320 | */
|
---|
| 321 | function parseWhere() {
|
---|
| 322 |
|
---|
| 323 | $comma = FALSE;
|
---|
| 324 | $commaExpected = FALSE;
|
---|
| 325 | $i=0;
|
---|
| 326 |
|
---|
| 327 | do {
|
---|
| 328 | $this->_clearWhiteSpaces();
|
---|
| 329 | if (!strcasecmp('AND', current($this->tokens))
|
---|
| 330 | && count($this->parsedQuery['patterns']) != 0){
|
---|
| 331 | if ($comma)
|
---|
| 332 | trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
|
---|
| 333 | unset($this->tokens[key($this->tokens)]);
|
---|
| 334 | return $this->parseAnd();
|
---|
| 335 | }elseif (!strcasecmp('USING', current($this->tokens))
|
---|
| 336 | && count($this->parsedQuery['patterns']) != 0) {
|
---|
| 337 | if ($comma)
|
---|
| 338 | trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
|
---|
| 339 | unset($this->tokens[key($this->tokens)]);
|
---|
| 340 | return $this->parseUsing();
|
---|
| 341 | }
|
---|
| 342 |
|
---|
| 343 | if (current($this->tokens) == ',') {
|
---|
| 344 | $comma = TRUE;
|
---|
| 345 | $this->_checkComma($commaExpected, RDQL_WHR_ERR);
|
---|
| 346 |
|
---|
| 347 | }else{
|
---|
| 348 |
|
---|
| 349 | if (current($this->tokens) != '(')
|
---|
| 350 | trigger_error(RDQL_WHR_ERR ."'" .current($this->tokens)
|
---|
| 351 | ."' - '(' expected", E_USER_ERROR);
|
---|
| 352 | unset($this->tokens[key($this->tokens)]);
|
---|
| 353 | $this->_clearWhiteSpaces();
|
---|
| 354 |
|
---|
| 355 | $this->parsedQuery['patterns'][$i]['subject'] = $this->_validateVarUri(current($this->tokens));
|
---|
| 356 | $this->_checkComma(TRUE, RDQL_WHR_ERR);
|
---|
| 357 | $this->parsedQuery['patterns'][$i]['predicate'] = $this->_validateVarUri(current($this->tokens));
|
---|
| 358 | $this->_checkComma(TRUE, RDQL_WHR_ERR);
|
---|
| 359 | $this->parsedQuery['patterns'][$i++]['object'] = $this->_validateVarUriLiteral(current($this->tokens));
|
---|
| 360 | $this->_clearWhiteSpaces();
|
---|
| 361 |
|
---|
| 362 | if (current($this->tokens) != ')')
|
---|
| 363 | trigger_error(RDQL_WHR_ERR ."'" .current($this->tokens) ."' - ')' expected", E_USER_ERROR);
|
---|
| 364 | unset($this->tokens[key($this->tokens)]);
|
---|
| 365 | $this->_clearWhiteSpaces();
|
---|
| 366 | $commaExpected = TRUE;
|
---|
| 367 | $comma = FALSE;
|
---|
| 368 | }
|
---|
| 369 | }while(current($this->tokens) != NULL);
|
---|
| 370 |
|
---|
| 371 | if ($comma)
|
---|
| 372 | trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
|
---|
| 373 | }
|
---|
| 374 |
|
---|
| 375 |
|
---|
| 376 | /**
|
---|
| 377 | * Parse the AND clause of an Rdql query
|
---|
| 378 | *
|
---|
| 379 | * @throws PhpError
|
---|
| 380 | * @access private
|
---|
| 381 | * @todo clear comments
|
---|
| 382 | */
|
---|
| 383 | function parseAnd() {
|
---|
| 384 |
|
---|
| 385 | $this->_clearWhiteSpaces();
|
---|
| 386 | $n = 0;
|
---|
| 387 | $filterStr = '';
|
---|
| 388 |
|
---|
| 389 | while(current($this->tokens) != NULL) {
|
---|
| 390 | $k = key($this->tokens);
|
---|
| 391 | $token = $this->tokens[$k];
|
---|
| 392 |
|
---|
| 393 | if (!strcasecmp('USING', $token)) {
|
---|
| 394 | $this->parseFilter($n, $filterStr);
|
---|
| 395 | unset($this->tokens[$k]);
|
---|
| 396 | return $this->parseUsing();
|
---|
| 397 | }elseif ($token == ',') {
|
---|
| 398 | $this->parseFilter($n, $filterStr);
|
---|
| 399 | $filterStr = '';
|
---|
| 400 | $token = '';
|
---|
| 401 | ++$n;
|
---|
| 402 | }
|
---|
| 403 | $filterStr .= $token;
|
---|
| 404 | unset($this->tokens[$k]);
|
---|
| 405 | }
|
---|
| 406 | $this->parseFilter($n, $filterStr);
|
---|
| 407 | }
|
---|
| 408 |
|
---|
| 409 |
|
---|
| 410 | /**
|
---|
| 411 | * Parse the USING clause of an Rdql query
|
---|
| 412 | *
|
---|
| 413 | * @throws PhpError
|
---|
| 414 | * @access private
|
---|
| 415 | */
|
---|
| 416 | function parseUsing() {
|
---|
| 417 |
|
---|
| 418 | $commaExpected = FALSE;
|
---|
| 419 | $comma = FALSE;
|
---|
| 420 |
|
---|
| 421 | do {
|
---|
| 422 | $this->_clearWhiteSpaces();
|
---|
| 423 | if (current($this->tokens) == ',') {
|
---|
| 424 | $comma = TRUE;
|
---|
| 425 | $this->_checkComma($commaExpected, RDQL_USG_ERR);
|
---|
| 426 | }else{
|
---|
| 427 | $prefix = $this->_validatePrefix(current($this->tokens));
|
---|
| 428 | $this->_clearWhiteSpaces();
|
---|
| 429 |
|
---|
| 430 | if (strcasecmp('FOR', current($this->tokens)))
|
---|
| 431 | trigger_error(RDQL_USG_ERR ." keyword: 'FOR' missing in the namespace declaration: '", E_USER_ERROR);
|
---|
| 432 | unset($this->tokens[key($this->tokens)]);
|
---|
| 433 | $this->_clearWhiteSpaces();
|
---|
| 434 |
|
---|
| 435 | $this->parsedQuery['ns'][$prefix] = $this->_validateUri(current($this->tokens), RDQL_USG_ERR);
|
---|
| 436 | $this->_clearWhiteSpaces();
|
---|
| 437 | $commaExpected = TRUE;
|
---|
| 438 | $comma = FALSE;
|
---|
| 439 | }
|
---|
| 440 | }while(current($this->tokens) != NULL);
|
---|
| 441 |
|
---|
| 442 | if ($comma)
|
---|
| 443 | trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
|
---|
| 444 | }
|
---|
| 445 |
|
---|
| 446 |
|
---|
| 447 | /**
|
---|
| 448 | * Check if a filter from the AND clause contains an equal number of '(' and ')'
|
---|
| 449 | * and parse filter expressions.
|
---|
| 450 | *
|
---|
| 451 | * @param integer $n
|
---|
| 452 | * @param string $filter
|
---|
| 453 | * @throws PHPError
|
---|
| 454 | * @access private
|
---|
| 455 | */
|
---|
| 456 | function parseFilter($n, $filter) {
|
---|
| 457 |
|
---|
| 458 | if ($filter == NULL)
|
---|
| 459 | trigger_error(RDQL_AND_ERR ." ',' - unexpected comma", E_USER_ERROR);
|
---|
| 460 | $paren = substr_count($filter, '(') - substr_count($filter, ')');
|
---|
| 461 | if ($paren != 0) {
|
---|
| 462 | if ($paren > 0)
|
---|
| 463 | $errorMsg = "'" .htmlspecialchars($filter) ."' - ')' missing ";
|
---|
| 464 | elseif ($paren < 0)
|
---|
| 465 | $errorMsg = "'" .htmlspecialchars($filter) ."' - too many ')' ";
|
---|
| 466 | trigger_error(RDQL_AND_ERR .$errorMsg, E_USER_ERROR);
|
---|
| 467 | }
|
---|
| 468 |
|
---|
| 469 | $this->parsedQuery['filters'][$n] = $this->parseExpressions($filter);
|
---|
| 470 | }
|
---|
| 471 |
|
---|
| 472 |
|
---|
| 473 | /**
|
---|
| 474 | * Parse expressions inside the passed filter:
|
---|
| 475 | * 1) regex equality expressions: ?var [~~ | =~ | !~ ] REG_EX
|
---|
| 476 | * 2a) string equality expressions: ?var [eq | ne] "literal"@lang^^dtype.
|
---|
| 477 | * 2b) string equality expressions: ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
|
---|
| 478 | * 3) numerical expressions: e.q. (?var1 - ?var2)*4 >= 20
|
---|
| 479 | *
|
---|
| 480 | * In cases 1-2 parse each expression of the given filter into an array of variables.
|
---|
| 481 | * For each parsed expression put a place holder (e.g. ##RegEx_1##) into the filterStr.
|
---|
| 482 | * The RDQLengine will then replace each place holder with the outcomming boolean value
|
---|
| 483 | * of the corresponding expression.
|
---|
| 484 | * The remaining filterStr contains only numerical expressions and place holders.
|
---|
| 485 | *
|
---|
| 486 | * @param string $filteStr
|
---|
| 487 | * @return array ['string'] = string
|
---|
| 488 | * ['evalFilterStr'] = string
|
---|
| 489 | * ['reqexEqExprs'][]['var'] = ?VARNAME
|
---|
| 490 | * ['operator'] = (eq | ne)
|
---|
| 491 | * ['regex'] = string
|
---|
| 492 | * ['strEqExprs'][]['var'] = ?VARNAME
|
---|
| 493 | * ['operator'] = (eq | ne)
|
---|
| 494 | * ['value'] = string
|
---|
| 495 | * ['value_type'] = ('variable' | 'URI' | 'QName'| 'Literal')
|
---|
| 496 | * ['value_lang'] = string
|
---|
| 497 | * ['value_dtype'] = string
|
---|
| 498 | * ['value_dtype_is_qname'] = boolean
|
---|
| 499 | * ['numExpr']['vars'][] = ?VARNAME
|
---|
| 500 | * @access private
|
---|
| 501 | */
|
---|
| 502 | function parseExpressions($filterStr) {
|
---|
| 503 |
|
---|
| 504 | $parsedFilter['string'] = $filterStr;
|
---|
| 505 | $parsedFilter['regexEqExprs'] = array();
|
---|
| 506 | $parsedFilter['strEqExprs'] = array();
|
---|
| 507 | $parsedFilter['numExprVars'] = array();
|
---|
| 508 |
|
---|
| 509 | // parse regex string equality expressions, e.g. ?x ~~ !//foo.com/r!i
|
---|
| 510 | $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+([~!=]~)\s+(['|\"])?([^\s'\"]+)(['|\"])?/";
|
---|
| 511 | preg_match_all($reg_ex, $filterStr, $eqExprs);
|
---|
| 512 | foreach ($eqExprs[0] as $i => $eqExpr) {
|
---|
| 513 | $this->_checkRegExQuotation($filterStr, $eqExprs[3][$i], $eqExprs[5][$i]);
|
---|
| 514 | $parsedFilter['regexEqExprs'][$i]['var'] = $this->_isDefined($eqExprs[1][$i]);
|
---|
| 515 | $parsedFilter['regexEqExprs'][$i]['operator'] = $eqExprs[2][$i];
|
---|
| 516 | $parsedFilter['regexEqExprs'][$i]['regex'] = $eqExprs[4][$i];
|
---|
| 517 |
|
---|
| 518 | $filterStr = str_replace($eqExpr, " ##RegEx_$i## ", $filterStr);
|
---|
| 519 | }
|
---|
| 520 |
|
---|
| 521 | // parse ?var [eq | ne] "literal"@lang^^dtype
|
---|
| 522 | $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+(\'[^\']*\'|\"[^\"]*\")";
|
---|
| 523 | $reg_ex .= "(@[a-zA-Z]+)?(\^{2}\S+:?\S+)?/i";
|
---|
| 524 | preg_match_all($reg_ex, $filterStr, $eqExprs);
|
---|
| 525 | foreach ($eqExprs[0] as $i => $eqExpr) {
|
---|
| 526 | $parsedFilter['strEqExprs'][$i]['var'] = $this->_isDefined($eqExprs[1][$i]);#
|
---|
| 527 | $parsedFilter['strEqExprs'][$i]['operator'] = strtolower($eqExprs[2][$i]);
|
---|
| 528 | $parsedFilter['strEqExprs'][$i]['value'] = trim($eqExprs[3][$i],"'\"");
|
---|
| 529 | $parsedFilter['strEqExprs'][$i]['value_type'] = 'Literal';
|
---|
| 530 | $parsedFilter['strEqExprs'][$i]['value_lang'] = substr($eqExprs[4][$i], 1);
|
---|
| 531 | $dtype = substr($eqExprs[5][$i], 2);
|
---|
| 532 | if ($dtype) {
|
---|
| 533 | $parsedFilter['strEqExprs'][$i]['value_dtype'] = $this->_validateUri($dtype, RDQL_AND_ERR);
|
---|
| 534 | if ($dtype{0} != '<')
|
---|
| 535 | $parsedFilter['strEqExprs'][$i]['value_dtype_is_qname'] = TRUE;
|
---|
| 536 | }else
|
---|
| 537 | $parsedFilter['strEqExprs'][$i]['value_dtype'] = '';
|
---|
| 538 |
|
---|
| 539 | $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$i## ", $filterStr);
|
---|
| 540 | }
|
---|
| 541 |
|
---|
| 542 | // parse ?var [eq | ne] ?var
|
---|
| 543 | $ii = count($parsedFilter['strEqExprs']);
|
---|
| 544 | $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+(\?[a-zA-Z0-9_]+)/i";
|
---|
| 545 | preg_match_all($reg_ex, $filterStr, $eqExprs);
|
---|
| 546 | foreach ($eqExprs[0] as $i => $eqExpr) {
|
---|
| 547 | $parsedFilter['strEqExprs'][$ii]['var'] = $this->_isDefined($eqExprs[1][$i]);
|
---|
| 548 | $parsedFilter['strEqExprs'][$ii]['operator'] = strtolower($eqExprs[2][$i]);
|
---|
| 549 | $parsedFilter['strEqExprs'][$ii]['value'] = $this->_isDefined($eqExprs[3][$i]);
|
---|
| 550 | $parsedFilter['strEqExprs'][$ii]['value_type'] = 'variable';
|
---|
| 551 |
|
---|
| 552 | $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$ii## ", $filterStr);
|
---|
| 553 | $ii++;
|
---|
| 554 | }
|
---|
| 555 |
|
---|
| 556 | // parse ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
|
---|
| 557 | $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+((<\S+>)|(\S+:\S*))/i";
|
---|
| 558 | preg_match_all($reg_ex, $filterStr, $eqExprs);
|
---|
| 559 | foreach ($eqExprs[0] as $i => $eqExpr) {
|
---|
| 560 | $parsedFilter['strEqExprs'][$ii]['var'] = $this->_isDefined($eqExprs[1][$i]);
|
---|
| 561 | $parsedFilter['strEqExprs'][$ii]['operator'] = strtolower($eqExprs[2][$i]);
|
---|
| 562 | if ($eqExprs[4][$i]) {
|
---|
| 563 | $parsedFilter['strEqExprs'][$ii]['value'] = trim($eqExprs[4][$i], "<>");
|
---|
| 564 | $parsedFilter['strEqExprs'][$ii]['value_type'] = 'URI';
|
---|
| 565 | }else if($eqExprs[5][$i]){
|
---|
| 566 | $this->_validateQName($eqExprs[5][$i], RDQL_AND_ERR);
|
---|
| 567 | $parsedFilter['strEqExprs'][$ii]['value'] = $eqExprs[5][$i];
|
---|
| 568 | $parsedFilter['strEqExprs'][$ii]['value_type'] = 'QName';
|
---|
| 569 | }
|
---|
| 570 |
|
---|
| 571 | $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$ii## ", $filterStr);
|
---|
| 572 | $ii++;
|
---|
| 573 | }
|
---|
| 574 |
|
---|
| 575 | $parsedFilter['evalFilterStr'] = $filterStr;
|
---|
| 576 |
|
---|
| 577 | // all that is left are numerical expressions and place holders for the above expressions
|
---|
| 578 | preg_match_all("/\?[a-zA-Z0-9_]+/", $filterStr, $vars);
|
---|
| 579 | foreach ($vars[0] as $var) {
|
---|
| 580 | $parsedFilter['numExprVars'][] = $this->_isDefined($var);
|
---|
| 581 | }
|
---|
| 582 |
|
---|
| 583 | return $parsedFilter;
|
---|
| 584 | }
|
---|
| 585 |
|
---|
| 586 |
|
---|
| 587 | /**
|
---|
| 588 | * Find all query variables used in the WHERE clause.
|
---|
| 589 | *
|
---|
| 590 | * @return array [] = ?VARNAME
|
---|
| 591 | * @access private
|
---|
| 592 | */
|
---|
| 593 | function findAllQueryVariables() {
|
---|
| 594 |
|
---|
| 595 | $vars = array();
|
---|
| 596 | foreach ($this->parsedQuery['patterns'] as $pattern) {
|
---|
| 597 | $count = 0;
|
---|
| 598 | foreach ($pattern as $v) {
|
---|
| 599 | if ($v['value'] && $v['value']{0} == '?') {
|
---|
| 600 | ++$count;
|
---|
| 601 | if (!in_array($v['value'], $vars))
|
---|
| 602 | $vars[] = $v['value'];
|
---|
| 603 | }
|
---|
| 604 | }
|
---|
| 605 | if (!$count)
|
---|
| 606 | trigger_error(RDQL_WHR_ERR .'pattern contains no variables', E_USER_ERROR);
|
---|
| 607 | }
|
---|
| 608 |
|
---|
| 609 | return $vars;
|
---|
| 610 | }
|
---|
| 611 |
|
---|
| 612 |
|
---|
| 613 | /**
|
---|
| 614 | * Replace all namespace prefixes in the pattern and constraint clause of an rdql query
|
---|
| 615 | * with the namespaces declared in the USING clause and default namespaces.
|
---|
| 616 | *
|
---|
| 617 | * @access private
|
---|
| 618 | */
|
---|
| 619 | function replaceNamespacePrefixes() {
|
---|
| 620 |
|
---|
| 621 | global $default_prefixes;
|
---|
| 622 |
|
---|
| 623 | if (!isset($this->parsedQuery['ns']))
|
---|
| 624 | $this->parsedQuery['ns'] = array();
|
---|
| 625 |
|
---|
| 626 | // add default namespaces
|
---|
| 627 | // if in an rdql query a reserved prefix (e.g. rdf: rdfs:) is used
|
---|
| 628 | // it will be overridden by the default namespace defined in constants.php
|
---|
| 629 | $this->parsedQuery['ns'] = array_merge($this->parsedQuery['ns'], $default_prefixes);
|
---|
| 630 |
|
---|
| 631 | // replace namespace prefixes in the FROM clause
|
---|
| 632 | if (isset($this->parsedQuery['sources']))
|
---|
| 633 | foreach ($this->parsedQuery['sources'] as $n => $source) {
|
---|
| 634 | if (isset($source['is_qname']))
|
---|
| 635 | $this->parsedQuery['sources'][$n] = $this->_replaceNamespacePrefix($source['value'], RDQL_SRC_ERR);
|
---|
| 636 | else {
|
---|
| 637 | foreach ($this->parsedQuery['ns'] as $prefix => $uri)
|
---|
| 638 | $source['value'] = preg_replace("$prefix:", $uri, $source['value']);
|
---|
| 639 | $this->parsedQuery['sources'][$n] = $source['value'];
|
---|
| 640 | }
|
---|
| 641 |
|
---|
| 642 | }
|
---|
| 643 |
|
---|
| 644 | // replace namespace prefixes in the where clause
|
---|
| 645 | foreach ($this->parsedQuery['patterns'] as $n => $pattern) {
|
---|
| 646 | foreach ($pattern as $key => $v)
|
---|
| 647 | if ($v['value'] && $v['value']{0} != '?') {
|
---|
| 648 | if (isset($v['is_qname'])) {
|
---|
| 649 | $this->parsedQuery['patterns'][$n][$key]['value']
|
---|
| 650 | = $this->_replaceNamespacePrefix($v['value'], RDQL_WHR_ERR);
|
---|
| 651 | unset($this->parsedQuery['patterns'][$n][$key]['is_qname']);
|
---|
| 652 | } else { // is quoted URI (== <URI>) or Literal
|
---|
| 653 | if (isset($this->parsedQuery['patterns'][$n][$key]['is_literal'])) {
|
---|
| 654 | if (isset($this->parsedQuery['patterns'][$n][$key]['l_dtype_is_qname'])) {
|
---|
| 655 | $this->parsedQuery['patterns'][$n][$key]['l_dtype']
|
---|
| 656 | = $this->_replaceNamespacePrefix($v['l_dtype'], RDQL_WHR_ERR);
|
---|
| 657 | unset($this->parsedQuery['patterns'][$n][$key]['l_dtype_is_qname']);
|
---|
| 658 | }else {
|
---|
| 659 | foreach ($this->parsedQuery['ns'] as $prefix => $uri)
|
---|
| 660 | $this->parsedQuery['patterns'][$n][$key]['l_dtype']
|
---|
| 661 | = preg_replace("'$prefix:'", $uri, $this->parsedQuery['patterns'][$n][$key]['l_dtype']);
|
---|
| 662 | }
|
---|
| 663 | }else {
|
---|
| 664 | foreach ($this->parsedQuery['ns'] as $prefix => $uri)
|
---|
| 665 | $this->parsedQuery['patterns'][$n][$key]['value']
|
---|
| 666 | = preg_replace("'$prefix:'", $uri, $this->parsedQuery['patterns'][$n][$key]['value']);
|
---|
| 667 | }
|
---|
| 668 | }
|
---|
| 669 | }
|
---|
| 670 | }
|
---|
| 671 |
|
---|
| 672 | // replace prefixes in the constraint clause
|
---|
| 673 | if (isset($this->parsedQuery['filters']))
|
---|
| 674 | foreach ($this->parsedQuery['filters'] as $n => $filter)
|
---|
| 675 | foreach ($filter['strEqExprs'] as $i => $expr) {
|
---|
| 676 | if ($expr['value_type'] == 'QName') {
|
---|
| 677 | $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']
|
---|
| 678 | = $this->_replaceNamespacePrefix($expr['value'], RDQL_AND_ERR);
|
---|
| 679 | $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_type'] = 'URI';
|
---|
| 680 | }
|
---|
| 681 | if ($expr['value_type'] == 'URI')
|
---|
| 682 | foreach ($this->parsedQuery['ns'] as $prefix => $uri)
|
---|
| 683 | $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']
|
---|
| 684 | = preg_replace("'$prefix:'", $uri,
|
---|
| 685 | $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']);
|
---|
| 686 | elseif ($expr['value_type'] == 'Literal') {
|
---|
| 687 | if (isset($expr['value_dtype_is_qname'])) {
|
---|
| 688 | $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']
|
---|
| 689 | = $this->_replaceNamespacePrefix($expr['value_dtype'], RDQL_AND_ERR);
|
---|
| 690 | unset($this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype_is_qname']);
|
---|
| 691 | } else {
|
---|
| 692 | foreach ($this->parsedQuery['ns'] as $prefix => $uri)
|
---|
| 693 | $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']
|
---|
| 694 | = preg_replace("$prefix:", $uri,
|
---|
| 695 | $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']);
|
---|
| 696 | }
|
---|
| 697 | }
|
---|
| 698 | }
|
---|
| 699 |
|
---|
| 700 | unset($this->parsedQuery['ns']);
|
---|
| 701 | }
|
---|
| 702 |
|
---|
| 703 |
|
---|
| 704 | // =============================================================================
|
---|
| 705 | // *************************** helper functions ********************************
|
---|
| 706 | // =============================================================================
|
---|
| 707 |
|
---|
| 708 |
|
---|
| 709 | /**
|
---|
| 710 | * Remove whitespace-tokens from the array $this->tokens
|
---|
| 711 | *
|
---|
| 712 | * @access private
|
---|
| 713 | */
|
---|
| 714 | function _clearWhiteSpaces() {
|
---|
| 715 |
|
---|
| 716 | while (current($this->tokens) == ' ' ||
|
---|
| 717 | current($this->tokens) == "\n" ||
|
---|
| 718 | current($this->tokens) == "\t" ||
|
---|
| 719 | current($this->tokens) == "\r")
|
---|
| 720 |
|
---|
| 721 | unset($this->tokens[key($this->tokens)]);
|
---|
| 722 | }
|
---|
| 723 |
|
---|
| 724 |
|
---|
| 725 | /**
|
---|
| 726 | * Check if the query string of the given clause contains an undesired ','.
|
---|
| 727 | * If a comma was correctly placed then remove it and clear all whitespaces.
|
---|
| 728 | *
|
---|
| 729 | * @param string $commaExpected
|
---|
| 730 | * @param string $clause_error
|
---|
| 731 | * @throws PHPError
|
---|
| 732 | * @access private
|
---|
| 733 | */
|
---|
| 734 | function _checkComma($commaExpected, $clause_error) {
|
---|
| 735 |
|
---|
| 736 | $this->_clearWhiteSpaces();
|
---|
| 737 | if (current($this->tokens) == ',') {
|
---|
| 738 | if (!$commaExpected)
|
---|
| 739 | trigger_error($clause_error ."',' - unexpected comma", E_USER_ERROR);
|
---|
| 740 | else {
|
---|
| 741 | unset($this->tokens[key($this->tokens)]);
|
---|
| 742 | $this->_checkComma(FALSE, $clause_error);
|
---|
| 743 | }
|
---|
| 744 | }
|
---|
| 745 | }
|
---|
| 746 |
|
---|
| 747 |
|
---|
| 748 | /**
|
---|
| 749 | * Check if the given token is either a variable (?var) or the first token of an URI (<URI>).
|
---|
| 750 | * In case of an URI this function returns the whole URI string.
|
---|
| 751 | *
|
---|
| 752 | * @param string $token
|
---|
| 753 | * @return array ['value'] = string
|
---|
| 754 | * @throws PHPError
|
---|
| 755 | * @access private
|
---|
| 756 | */
|
---|
| 757 | function _validateVarUri($token) {
|
---|
| 758 |
|
---|
| 759 | if ($token{0} == '?') {
|
---|
| 760 | $token_res['value'] = $this->_validateVar($token, RDQL_WHR_ERR);
|
---|
| 761 | } else {
|
---|
| 762 | $token_res['value'] = $this->_validateUri($token, RDQL_WHR_ERR);
|
---|
| 763 | if ($token{0} != '<')
|
---|
| 764 | $token_res['is_qname'] = TRUE;
|
---|
| 765 | }
|
---|
| 766 | return $token_res;
|
---|
| 767 | }
|
---|
| 768 |
|
---|
| 769 |
|
---|
| 770 | /**
|
---|
| 771 | * Check if the given token is either a variable (?var) or the first token
|
---|
| 772 | * of either an URI (<URI>) or a literal ("Literal").
|
---|
| 773 | * In case of a literal return an array with literal properties (value, language, datatype).
|
---|
| 774 | * In case of a variable or an URI return only ['value'] = string.
|
---|
| 775 | *
|
---|
| 776 | * @param string $token
|
---|
| 777 | * @return array ['value'] = string
|
---|
| 778 | * ['is_qname'] = boolean
|
---|
| 779 | * ['is_literal'] = boolean
|
---|
| 780 | * ['l_lang'] = string
|
---|
| 781 | * ['l_dtype'] = string
|
---|
| 782 | * @throws PHPError
|
---|
| 783 | * @access private
|
---|
| 784 | */
|
---|
| 785 | function _validateVarUriLiteral($token) {
|
---|
| 786 |
|
---|
| 787 | if ($token{0} == '?')
|
---|
| 788 | $statement_object['value'] = $this->_validateVar($token, RDQL_WHR_ERR);
|
---|
| 789 | elseif ($token{0} == "'" || $token{0} == '"')
|
---|
| 790 | $statement_object = $this->_validateLiteral($token);
|
---|
| 791 | elseif ($token{0} == '<')
|
---|
| 792 | $statement_object['value'] = $this->_validateUri($token, RDQL_WHR_ERR);
|
---|
| 793 | elseif (ereg(':', $token)) {
|
---|
| 794 | $statement_object['value'] = $this->_validateUri($token, RDQL_WHR_ERR);
|
---|
| 795 | $statement_object['is_qname'] = TRUE;
|
---|
| 796 | }else
|
---|
| 797 | trigger_error(RDQL_WHR_ERR ." '$token' - ?Variable, <URI>, QName, or \"LITERAL\" expected", E_USER_ERROR);
|
---|
| 798 | return $statement_object;
|
---|
| 799 | }
|
---|
| 800 |
|
---|
| 801 |
|
---|
| 802 | /**
|
---|
| 803 | * Check if the given token is a valid variable name (?var).
|
---|
| 804 | *
|
---|
| 805 | * @param string $token
|
---|
| 806 | * @param string $clause_error
|
---|
| 807 | * @return string
|
---|
| 808 | * @throws PHPError
|
---|
| 809 | * @access private
|
---|
| 810 | */
|
---|
| 811 | function _validateVar($token, $clause_error) {
|
---|
| 812 |
|
---|
| 813 | preg_match("/\?[a-zA-Z0-9_]+/", $token, $match);
|
---|
| 814 | if (!isset($match[0]) || $match[0] != $token)
|
---|
| 815 | trigger_error($clause_error ."'" .htmlspecialchars($token)
|
---|
| 816 | ."' - variable name contains illegal characters", E_USER_ERROR);
|
---|
| 817 | unset($this->tokens[key($this->tokens)]);
|
---|
| 818 | return $token;
|
---|
| 819 | }
|
---|
| 820 |
|
---|
| 821 |
|
---|
| 822 | /**
|
---|
| 823 | * Check if $token is the first token of a valid URI (<URI>) and return the whole URI string
|
---|
| 824 | *
|
---|
| 825 | * @param string $token
|
---|
| 826 | * @param string $clause_error
|
---|
| 827 | * @return string
|
---|
| 828 | * @throws PHPError
|
---|
| 829 | * @access private
|
---|
| 830 | */
|
---|
| 831 | function _validateUri($token, $clause_error) {
|
---|
| 832 |
|
---|
| 833 | if ($token{0} != '<') {
|
---|
| 834 | if (strpos($token, ':') && $this->_validateQName($token, $clause_error)) {
|
---|
| 835 | unset($this->tokens[key($this->tokens)]);
|
---|
| 836 | return rtrim($token, ':');
|
---|
| 837 | }
|
---|
| 838 | $errmsg = $clause_error .'\'' .htmlspecialchars($token) .'\' ';
|
---|
| 839 | if ($clause_error == RDQL_WHR_ERR)
|
---|
| 840 | $errmsg .= "- ?Variable or <URI> or QName expected";
|
---|
| 841 | else
|
---|
| 842 | $errmsg .= "- <URI> or QName expected";
|
---|
| 843 | trigger_error($errmsg, E_USER_ERROR);
|
---|
| 844 | }else{
|
---|
| 845 | $token_res = $token;
|
---|
| 846 | while($token{strlen($token)-1} != '>' && $token != NULL) {
|
---|
| 847 | if ($token == '(' || $token == ')' || $token == ',' ||
|
---|
| 848 | $token == ' ' || $token == "\n" || $token == "\r") {
|
---|
| 849 | trigger_error($clause_error .'\'' .htmlspecialchars($token_res)
|
---|
| 850 | ."' - illegal input: '$token' - '>' missing", E_USER_ERROR);
|
---|
| 851 | }
|
---|
| 852 | unset($this->tokens[key($this->tokens)]);
|
---|
| 853 | $token = current($this->tokens);
|
---|
| 854 | $token_res .= $token;
|
---|
| 855 | }
|
---|
| 856 | if ($token == NULL)
|
---|
| 857 | trigger_error($clause_error .'\'' .htmlspecialchars($token_res) ."' - '>' missing", E_USER_ERROR);
|
---|
| 858 | unset($this->tokens[key($this->tokens)]);
|
---|
| 859 | return trim($token_res, '<>');
|
---|
| 860 | }
|
---|
| 861 | }
|
---|
| 862 |
|
---|
| 863 |
|
---|
| 864 | /**
|
---|
| 865 | * Check if $token is the first token of a valid literal ("LITERAL") and
|
---|
| 866 | * return an array with literal properties (value, language, datatype).
|
---|
| 867 | *
|
---|
| 868 | * @param string $token
|
---|
| 869 | * @return array ['value'] = string
|
---|
| 870 | * ['is_literal'] = boolean
|
---|
| 871 | * ['l_lang'] = string
|
---|
| 872 | * ['l_dtype'] = string
|
---|
| 873 | * ['l_dtype_is_qname'] = boolean
|
---|
| 874 | * @throws PHPError
|
---|
| 875 | * @access private
|
---|
| 876 | */
|
---|
| 877 | function _validateLiteral($token) {
|
---|
| 878 |
|
---|
| 879 | $quotation_mark = $token{0};
|
---|
| 880 | $statement_object = array ('value' => '',
|
---|
| 881 | 'is_literal' => TRUE,
|
---|
| 882 | 'l_lang' => '',
|
---|
| 883 | 'l_dtype' => '');
|
---|
| 884 | $this->tokens[key($this->tokens)] = substr($token,1);
|
---|
| 885 |
|
---|
| 886 | $return = FALSE;
|
---|
| 887 | foreach ($this->tokens as $k => $token) {
|
---|
| 888 |
|
---|
| 889 | if ($token != NULL && $token{strlen($token)-1} == $quotation_mark) {
|
---|
| 890 | $token = rtrim($token, $quotation_mark);
|
---|
| 891 | $return = TRUE;
|
---|
| 892 |
|
---|
| 893 | // parse @language(^^datatype)?
|
---|
| 894 | }elseif (strpos($token, $quotation_mark .'@') || substr($token, 0, 2) == $quotation_mark .'@') {
|
---|
| 895 | $lang = substr($token, strpos($token, $quotation_mark .'@')+2);
|
---|
| 896 | if (strpos($lang, '^^') || substr($lang, 0,2) == '^^') {
|
---|
| 897 | $dtype = substr($lang, strpos($lang, '^^')+2);
|
---|
| 898 | if (!$dtype)
|
---|
| 899 | trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
|
---|
| 900 | .$token ." - datatype expected" ,E_USER_ERROR);
|
---|
| 901 |
|
---|
| 902 | $statement_object['l_dtype'] = $this->_validateUri($dtype, RDQL_WHR_ERR);
|
---|
| 903 | if ($dtype{0} != '<')
|
---|
| 904 | $statement_object['l_dtype_is_qname'] = TRUE;
|
---|
| 905 |
|
---|
| 906 | $lang = substr($lang, 0, strpos($lang, '^^'));
|
---|
| 907 | }
|
---|
| 908 | if (!$lang)
|
---|
| 909 | trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
|
---|
| 910 | .$token ." - language expected" ,E_USER_ERROR);
|
---|
| 911 | $statement_object['l_lang'] = $lang;
|
---|
| 912 | $token = substr($token, 0, strpos($token, $quotation_mark .'@'));
|
---|
| 913 | $return = TRUE;
|
---|
| 914 |
|
---|
| 915 | // parse ^^datatype
|
---|
| 916 | }elseif (strpos($token, $quotation_mark .'^^') || substr($token, 0, 3) == $quotation_mark .'^^') {
|
---|
| 917 | $dtype = substr($token, strpos($token, $quotation_mark .'^^')+3);
|
---|
| 918 | if (!$dtype)
|
---|
| 919 | trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
|
---|
| 920 | .$token ." - datatype expected" ,E_USER_ERROR);
|
---|
| 921 |
|
---|
| 922 | $statement_object['l_dtype'] = $this->_validateUri($dtype, RDQL_WHR_ERR);
|
---|
| 923 | if ($dtype{0} != '<')
|
---|
| 924 | $statement_object['l_dtype_is_qname'] = TRUE;
|
---|
| 925 |
|
---|
| 926 | $token = substr($token, 0, strpos($token, $quotation_mark .'^^'));
|
---|
| 927 | $return = TRUE;
|
---|
| 928 | }elseif (strpos($token, $quotation_mark))
|
---|
| 929 | trigger_error(RDQL_WHR_ERR ."'$token' - illegal input", E_USER_ERROR);
|
---|
| 930 | $statement_object['value'] .= $token;
|
---|
| 931 | unset($this->tokens[$k]);
|
---|
| 932 | if ($return)
|
---|
| 933 | return $statement_object;
|
---|
| 934 | }
|
---|
| 935 | trigger_error(RDQL_WHR_ERR ."quotation end mark: $quotation_mark missing", E_USER_ERROR);
|
---|
| 936 | }
|
---|
| 937 |
|
---|
| 938 |
|
---|
| 939 | /**
|
---|
| 940 | * Check if the given token is a valid QName.
|
---|
| 941 | *
|
---|
| 942 | * @param string $token
|
---|
| 943 | * @param string $clause_error
|
---|
| 944 | * @return boolean
|
---|
| 945 | * @throws PHPError
|
---|
| 946 | * @access private
|
---|
| 947 | */
|
---|
| 948 | function _validateQName($token, $clause_error) {
|
---|
| 949 |
|
---|
| 950 | $parts = explode(':', $token);
|
---|
| 951 | if (count($parts) > 2)
|
---|
| 952 | trigger_error($clause_error ."illegal QName: '$token'", E_USER_ERROR);
|
---|
| 953 | if (!$this->_validateNCName($parts[0]))
|
---|
| 954 | trigger_error($clause_error ."illegal prefix in QName: '$token'", E_USER_ERROR);
|
---|
| 955 | if ($parts[1] && !$this->_validateNCName($parts[1]))
|
---|
| 956 | trigger_error($clause_error ."illegal local part in QName: '$token'", E_USER_ERROR);
|
---|
| 957 |
|
---|
| 958 | return TRUE;
|
---|
| 959 | }
|
---|
| 960 |
|
---|
| 961 |
|
---|
| 962 | /**
|
---|
| 963 | * Check if the given token is a valid NCName.
|
---|
| 964 | *
|
---|
| 965 | * @param string $token
|
---|
| 966 | * @return boolean
|
---|
| 967 | * @access private
|
---|
| 968 | */
|
---|
| 969 | function _validateNCName($token) {
|
---|
| 970 |
|
---|
| 971 | preg_match("/[a-zA-Z_]+[a-zA-Z_0-9.\-]*/", $token, $match);
|
---|
| 972 | if (isset($match[0]) && $match[0] == $token)
|
---|
| 973 | return TRUE;
|
---|
| 974 | return FALSE;
|
---|
| 975 | }
|
---|
| 976 |
|
---|
| 977 |
|
---|
| 978 | /**
|
---|
| 979 | * Check if the given token is a valid namespace prefix.
|
---|
| 980 | *
|
---|
| 981 | * @param string $token
|
---|
| 982 | * @return string
|
---|
| 983 | * @throws PHPError
|
---|
| 984 | * @access private
|
---|
| 985 | */
|
---|
| 986 | function _validatePrefix($token) {
|
---|
| 987 |
|
---|
| 988 | if (!$this->_validateNCName($token))
|
---|
| 989 | trigger_error(RDQL_USG_ERR ."'" .htmlspecialchars($token)
|
---|
| 990 | ."' - illegal input, namespace prefix expected", E_USER_ERROR);
|
---|
| 991 | unset($this->tokens[key($this->tokens)]);
|
---|
| 992 | return $token;
|
---|
| 993 | }
|
---|
| 994 |
|
---|
| 995 | /**
|
---|
| 996 | * Replace a prefix in a given QName and return a full URI.
|
---|
| 997 | *
|
---|
| 998 | * @param string $qName
|
---|
| 999 | * @param string $clasue_error
|
---|
| 1000 | * @return string
|
---|
| 1001 | * @throws PHPError
|
---|
| 1002 | * @access private
|
---|
| 1003 | */
|
---|
| 1004 | function _replaceNamespacePrefix($qName, $clause_error) {
|
---|
| 1005 |
|
---|
| 1006 | $qName_parts = explode(':', $qName);
|
---|
| 1007 | if (!array_key_exists($qName_parts[0], $this->parsedQuery['ns']))
|
---|
| 1008 | trigger_error($clause_error .'undefined prefix: \'' .$qName_parts[0] .'\' in: \'' .$qName .'\'', E_USER_ERROR);
|
---|
| 1009 | return $this->parsedQuery['ns'][$qName_parts[0]] .$qName_parts[1];
|
---|
| 1010 | }
|
---|
| 1011 |
|
---|
| 1012 |
|
---|
| 1013 | /**
|
---|
| 1014 | * Check if all variables from the SELECT clause are defined in the WHERE clause
|
---|
| 1015 | *
|
---|
| 1016 | * @access private
|
---|
| 1017 | */
|
---|
| 1018 | function _checkSelectVars() {
|
---|
| 1019 |
|
---|
| 1020 | foreach ($this->parsedQuery['selectVars'] as $var)
|
---|
| 1021 | $this->_isDefined($var);
|
---|
| 1022 | }
|
---|
| 1023 |
|
---|
| 1024 |
|
---|
| 1025 | /**
|
---|
| 1026 | * Check if the given variable is defined in the WHERE clause.
|
---|
| 1027 | *
|
---|
| 1028 | * @param $var string
|
---|
| 1029 | * @return string
|
---|
| 1030 | * @throws PHPError
|
---|
| 1031 | * @access private
|
---|
| 1032 | */
|
---|
| 1033 | function _isDefined($var) {
|
---|
| 1034 |
|
---|
| 1035 | $allQueryVars = $this->findAllQueryVariables();
|
---|
| 1036 |
|
---|
| 1037 | if (!in_array($var, $allQueryVars))
|
---|
| 1038 | trigger_error(RDQL_SYN_ERR .": '$var' - variable must be defined in the WHERE clause", E_USER_ERROR);
|
---|
| 1039 | return $var;
|
---|
| 1040 | }
|
---|
| 1041 |
|
---|
| 1042 |
|
---|
| 1043 | /**
|
---|
| 1044 | * Throw an error if the regular expression from the AND clause is not quoted.
|
---|
| 1045 | *
|
---|
| 1046 | * @param string $filterString
|
---|
| 1047 | * @param string $lQuotMark
|
---|
| 1048 | * @param string $rQuotMark
|
---|
| 1049 | * @throws PHPError
|
---|
| 1050 | * @access private
|
---|
| 1051 | */
|
---|
| 1052 | function _checkRegExQuotation($filterString, $lQuotMark, $rQuotMark) {
|
---|
| 1053 |
|
---|
| 1054 | if (!$lQuotMark)
|
---|
| 1055 | trigger_error(RDQL_AND_ERR ."'$filterString' - regular expressions must be quoted", E_USER_ERROR);
|
---|
| 1056 |
|
---|
| 1057 | if ($lQuotMark != $rQuotMark)
|
---|
| 1058 | trigger_error(RDQL_AND_ERR ."'$filterString' - quotation end mark in the regular expression missing", E_USER_ERROR);
|
---|
| 1059 | }
|
---|
| 1060 |
|
---|
| 1061 | } // end: Class RdqlParser
|
---|
| 1062 |
|
---|
| 1063 | ?> |
---|