ILIAS  Release_5_0_x_branch Revision 61816
 All Data Structures Namespaces Files Functions Variables Groups Pages
class.ilLuceneQueryParser.php
Go to the documentation of this file.
1 <?php
2 /*
3  +-----------------------------------------------------------------------------+
4  | ILIAS open source |
5  +-----------------------------------------------------------------------------+
6  | Copyright (c) 1998-2006 ILIAS open source, University of Cologne |
7  | |
8  | This program is free software; you can redistribute it and/or |
9  | modify it under the terms of the GNU General Public License |
10  | as published by the Free Software Foundation; either version 2 |
11  | of the License, or (at your option) any later version. |
12  | |
13  | This program is distributed in the hope that it will be useful, |
14  | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16  | GNU General Public License for more details. |
17  | |
18  | You should have received a copy of the GNU General Public License |
19  | along with this program; if not, write to the Free Software |
20  | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
21  +-----------------------------------------------------------------------------+
22 */
23 
24 include_once './Services/Search/classes/Lucene/class.ilLuceneQueryParserException.php';
25 
36 {
37  protected $query_string;
38  protected $parsed_query;
39 
40 
46  public function __construct($a_query_string)
47  {
48  $this->query_string = $a_query_string;
49  }
50 
55  public function parse()
56  {
57  $this->parsed_query = preg_replace_callback('/(owner:)\s?([A-Za-z0-9_\.\+\*\@!\$\%\~\-]+)/',array($this,'replaceOwnerCallback'),$this->query_string);
58  }
59 
64  public function parseAutoWildcard()
65  {
66  $this->parsed_query = trim($this->query_string);
67  if(stristr($this->parsed_query, '*'))
68  {
69  return TRUE;
70  }
71  if(substr($this->parsed_query, -1) !== '"')
72  {
73  $this->parsed_query .= '*';
74  }
75  return TRUE;
76  }
77 
82  public function getQuery()
83  {
84  return $this->parsed_query;
85  }
86 
90  protected function replaceOwnerCallback($matches)
91  {
92  if(isset($matches[2]))
93  {
94  if($usr_id = ilObjUser::_loginExists($matches[2]))
95  {
96  return $matches[1].$usr_id;
97  }
98  }
99  return $matches[0];
100  }
101 
102 
106  public static function validateQuery($a_query)
107  {
108  // TODO
109  // First replace all quote characters
110 
111 
112  #ilLuceneQueryParser::checkAllowedCharacters($a_query);
113  #ilLuceneQueryParser::checkAsterisk($a_query);
114  #ilLuceneQueryParser::checkAmpersands($a_query);
117  #ilLuceneQueryParser::checkExclamationMark($a_query);
118  #ilLuceneQueryParser::checkQuestionMark($a_query);
120  #ilLuceneQueryParser::checkPlusMinus($a_query);
121  #ilLuceneQueryParser::checkANDORNOT($a_query);
123  #ilLuceneQueryParser::checkColon($a_query);
124  return true;
125  }
126 
131  protected static function checkAllowedCharacters($query)
132  {
133  if(preg_match('/[^\pL0-9_+\-:.()\"*?&§€|!{}\[\]\^~\\@#\/$%\'= ]/u',$query) != 0)
134  {
135  throw new ilLuceneQueryParserException('lucene_err_allowed_characters');
136  }
137  return true;
138  }
139 
144  protected static function checkAsterisk($query)
145  {
146  if(preg_match('/^[\*]*$|[\s]\*|^\*[^\s]/',$query) != 0)
147  {
148  throw new ilLuceneQueryParserException('lucene_err_asterisk');
149  }
150  return true;
151  }
152 
157  protected static function checkAmpersands($query)
158  {
159  if(preg_match('/[&]{2}/',$query) > 0)
160  {
161  if(preg_match('/^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+( && )?[\pL0-9_+\-:.()\"*?|!{}\[\]\^~\\@#\/$%\'=]+[ ]*)+$/u',$query) == 0)
162  {
163  throw new ilLuceneQueryParserException('lucene_err_ampersand');
164  }
165  }
166  return true;
167  }
168 
173  protected static function checkCaret($query)
174  {
175  if(preg_match('/[^\\\]\^([^\s]*[^0-9.]+)|[^\\\]\^$/',$query) != 0)
176  {
177  throw new ilLuceneQueryParserException('lucene_err_caret');
178  }
179  return true;
180  }
181 
186  protected static function checkSquiggle($query)
187  {
188  if(preg_match('/[^\\\]*~[^\s]*[^0-9\s]+/',$query,$matches) != 0)
189  {
190  throw new ilLuceneQueryParserException('lucene_err_squiggle');
191  }
192  return true;
193  }
194 
199  protected static function checkExclamationMark($query)
200  {
201  if(preg_match('/^[^!]*$|^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+( ! )?[\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+[ ]*)+$/u',$query,$matches) == 0)
202  {
203  throw new ilLuceneQueryParserException('lucene_err_exclamation_mark');
204  }
205  return true;
206  }
207 
212  protected static function checkQuestionMark($query)
213  {
214  if(preg_match('/^(\?)|([^\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]\?+)/u',$query,$matches) != 0)
215  {
216  throw new ilLuceneQueryParserException('lucene_err_question_mark');
217  }
218  return true;
219  }
220 
225  protected static function checkParenthesis($a_query)
226  {
227  $hasLft = false;
228  $hasRgt = false;
229 
230  $matchLft = 0;
231  $matchRgt = 0;
232 
233  $tmp = array();
234 
235  if(($matchLft = preg_match_all('/[(]/',$a_query,$tmp)) > 0)
236  {
237  $hasLft = true;
238  }
239  if(($matchRgt = preg_match_all('/[)]/',$a_query,$tmp)) > 0)
240  {
241  $hasRgt = true;
242  }
243 
244  if(!$hasLft || !$hasRgt)
245  {
246  return true;
247  }
248 
249 
250  if(($hasLft && !$hasRgt) || ($hasRgt && !$hasLft))
251  {
252  throw new ilLuceneQueryParserException('lucene_err_parenthesis_not_closed');
253  }
254 
255  if($matchLft !== $matchRgt)
256  {
257  throw new ilLuceneQueryParserException('lucene_err_parenthesis_not_closed');
258  }
259 
260  if(preg_match('/\(\s*\)/',$a_query) > 0)
261  {
262  throw new ilLuceneQueryParserException('lucene_err_parenthesis_empty');
263  }
264  return true;
265  }
266 
272  protected static function checkPlusMinus($a_query)
273  {
274  if(preg_match('/^[^\n+\-]*$|^([+-]?\s*[\pL0-9_:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+[ ]?)+$/u',$a_query) == 0)
275  {
276  throw new ilLuceneQueryParserException('lucene_err_plus_minus');
277  }
278  return true;
279  }
280 
286  protected static function checkANDORNOT($a_query)
287  {
288  return true;
289 
290  if(preg_match('/^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@\/#$%\'=]+\s*((AND )|(OR )|(AND NOT )|(NOT ))?[\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@\/#$%\'=]+[ ]*)+$/u',$a_query) == 0)
291  {
292  throw new ilLuceneQueryParserException('lucene_err_and_or_not');
293  }
294  return true;
295  }
296 
302  protected static function checkQuotes($a_query)
303  {
304  $matches = preg_match_all('/"/',$a_query,$tmp);
305 
306  if($matches == 0)
307  {
308  return true;
309  }
310 
311  if(($matches % 2) > 0)
312  {
313  throw new ilLuceneQueryParserException('lucene_err_quotes');
314  }
315 
316  if(preg_match('/"\s*"/',$a_query) > 0)
317  {
318  throw new ilLuceneQueryParserException('lucene_err_quotes_not_empty');
319  }
320  return true;
321  }
322 
323 
329  protected static function checkColon($a_query)
330  {
331  if(preg_match('/[^\\\\s]:[\s]|[^\\\\s]:$|[\s][^\\]?:|^[^\\\\s]?:/',$a_query) != 0)
332  {
333  throw new ilLuceneQueryParserException('lucene_err_colon');
334  }
335  return true;
336  }
337 }
338 ?>