ILIAS  Release_4_3_x_branch Revision 61807
 All Data Structures Namespaces Files Functions Variables Groups Pages
class.ilLuceneQueryParser.php
Go to the documentation of this file.
1 <?php
2 /*
3  +-----------------------------------------------------------------------------+
4  | ILIAS open source |
5  +-----------------------------------------------------------------------------+
6  | Copyright (c) 1998-2006 ILIAS open source, University of Cologne |
7  | |
8  | This program is free software; you can redistribute it and/or |
9  | modify it under the terms of the GNU General Public License |
10  | as published by the Free Software Foundation; either version 2 |
11  | of the License, or (at your option) any later version. |
12  | |
13  | This program is distributed in the hope that it will be useful, |
14  | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16  | GNU General Public License for more details. |
17  | |
18  | You should have received a copy of the GNU General Public License |
19  | along with this program; if not, write to the Free Software |
20  | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
21  +-----------------------------------------------------------------------------+
22 */
23 
24 include_once './Services/Search/classes/Lucene/class.ilLuceneQueryParserException.php';
25 
36 {
37  private $query_string;
38  protected $parsed_query;
39 
40 
46  public function __construct($a_query_string)
47  {
48  $this->query_string = $a_query_string;
49  }
50 
55  public function parse()
56  {
57  $this->parsed_query = preg_replace_callback('/(owner:)\s?([A-Za-z0-9_\.\+\*\@!\$\%\~\-]+)/',array($this,'replaceOwnerCallback'),$this->query_string);
58  }
59 
64  public function getQuery()
65  {
66  return $this->parsed_query;
67  }
68 
72  protected function replaceOwnerCallback($matches)
73  {
74  if(isset($matches[2]))
75  {
76  if($usr_id = ilObjUser::_loginExists($matches[2]))
77  {
78  return $matches[1].$usr_id;
79  }
80  }
81  return $matches[0];
82  }
83 
84 
88  public static function validateQuery($a_query)
89  {
90  // TODO
91  // First replace all quote characters
92 
93 
94  #ilLuceneQueryParser::checkAllowedCharacters($a_query);
95  #ilLuceneQueryParser::checkAsterisk($a_query);
96  #ilLuceneQueryParser::checkAmpersands($a_query);
99  #ilLuceneQueryParser::checkExclamationMark($a_query);
100  #ilLuceneQueryParser::checkQuestionMark($a_query);
102  #ilLuceneQueryParser::checkPlusMinus($a_query);
103  #ilLuceneQueryParser::checkANDORNOT($a_query);
105  #ilLuceneQueryParser::checkColon($a_query);
106  return true;
107  }
108 
113  protected static function checkAllowedCharacters($query)
114  {
115  if(preg_match('/[^\pL0-9_+\-:.()\"*?&§€|!{}\[\]\^~\\@#\/$%\'= ]/u',$query) != 0)
116  {
117  throw new ilLuceneQueryParserException('lucene_err_allowed_characters');
118  }
119  return true;
120  }
121 
126  protected static function checkAsterisk($query)
127  {
128  if(preg_match('/^[\*]*$|[\s]\*|^\*[^\s]/',$query) != 0)
129  {
130  throw new ilLuceneQueryParserException('lucene_err_asterisk');
131  }
132  return true;
133  }
134 
139  protected static function checkAmpersands($query)
140  {
141  if(preg_match('/[&]{2}/',$query) > 0)
142  {
143  if(preg_match('/^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+( && )?[\pL0-9_+\-:.()\"*?|!{}\[\]\^~\\@#\/$%\'=]+[ ]*)+$/u',$query) == 0)
144  {
145  throw new ilLuceneQueryParserException('lucene_err_ampersand');
146  }
147  }
148  return true;
149  }
150 
155  protected static function checkCaret($query)
156  {
157  if(preg_match('/[^\\\]\^([^\s]*[^0-9.]+)|[^\\\]\^$/',$query) != 0)
158  {
159  throw new ilLuceneQueryParserException('lucene_err_caret');
160  }
161  return true;
162  }
163 
168  protected static function checkSquiggle($query)
169  {
170  if(preg_match('/[^\\\]*~[^\s]*[^0-9\s]+/',$query,$matches) != 0)
171  {
172  throw new ilLuceneQueryParserException('lucene_err_squiggle');
173  }
174  return true;
175  }
176 
181  protected static function checkExclamationMark($query)
182  {
183  if(preg_match('/^[^!]*$|^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+( ! )?[\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+[ ]*)+$/u',$query,$matches) == 0)
184  {
185  throw new ilLuceneQueryParserException('lucene_err_exclamation_mark');
186  }
187  return true;
188  }
189 
194  protected static function checkQuestionMark($query)
195  {
196  if(preg_match('/^(\?)|([^\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]\?+)/u',$query,$matches) != 0)
197  {
198  throw new ilLuceneQueryParserException('lucene_err_question_mark');
199  }
200  return true;
201  }
202 
207  protected static function checkParenthesis($a_query)
208  {
209  $hasLft = false;
210  $hasRgt = false;
211 
212  $matchLft = 0;
213  $matchRgt = 0;
214 
215  $tmp = array();
216 
217  if(($matchLft = preg_match_all('/[(]/',$a_query,$tmp)) > 0)
218  {
219  $hasLft = true;
220  }
221  if(($matchRgt = preg_match_all('/[)]/',$a_query,$tmp)) > 0)
222  {
223  $hasRgt = true;
224  }
225 
226  if(!$hasLft || !$hasRgt)
227  {
228  return true;
229  }
230 
231 
232  if(($hasLft && !$hasRgt) || ($hasRgt && !$hasLft))
233  {
234  throw new ilLuceneQueryParserException('lucene_err_parenthesis_not_closed');
235  }
236 
237  if($matchLft !== $matchRgt)
238  {
239  throw new ilLuceneQueryParserException('lucene_err_parenthesis_not_closed');
240  }
241 
242  if(preg_match('/\(\s*\)/',$a_query) > 0)
243  {
244  throw new ilLuceneQueryParserException('lucene_err_parenthesis_empty');
245  }
246  return true;
247  }
248 
254  protected static function checkPlusMinus($a_query)
255  {
256  if(preg_match('/^[^\n+\-]*$|^([+-]?\s*[\pL0-9_:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+[ ]?)+$/u',$a_query) == 0)
257  {
258  throw new ilLuceneQueryParserException('lucene_err_plus_minus');
259  }
260  return true;
261  }
262 
268  protected static function checkANDORNOT($a_query)
269  {
270  return true;
271 
272  if(preg_match('/^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@\/#$%\'=]+\s*((AND )|(OR )|(AND NOT )|(NOT ))?[\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@\/#$%\'=]+[ ]*)+$/u',$a_query) == 0)
273  {
274  throw new ilLuceneQueryParserException('lucene_err_and_or_not');
275  }
276  return true;
277  }
278 
284  protected static function checkQuotes($a_query)
285  {
286  $matches = preg_match_all('/"/',$a_query,$tmp);
287 
288  if($matches == 0)
289  {
290  return true;
291  }
292 
293  if(($matches % 2) > 0)
294  {
295  throw new ilLuceneQueryParserException('lucene_err_quotes');
296  }
297 
298  if(preg_match('/"\s*"/',$a_query) > 0)
299  {
300  throw new ilLuceneQueryParserException('lucene_err_quotes_not_empty');
301  }
302  return true;
303  }
304 
305 
311  protected static function checkColon($a_query)
312  {
313  if(preg_match('/[^\\\\s]:[\s]|[^\\\\s]:$|[\s][^\\]?:|^[^\\\\s]?:/',$a_query) != 0)
314  {
315  throw new ilLuceneQueryParserException('lucene_err_colon');
316  }
317  return true;
318  }
319 }
320 ?>