ILIAS  release_5-4 Revision v5.4.26-12-gabc799a52e6
class.ilLuceneQueryParser.php
Go to the documentation of this file.
1 <?php
2 /*
3  +-----------------------------------------------------------------------------+
4  | ILIAS open source |
5  +-----------------------------------------------------------------------------+
6  | Copyright (c) 1998-2006 ILIAS open source, University of Cologne |
7  | |
8  | This program is free software; you can redistribute it and/or |
9  | modify it under the terms of the GNU General Public License |
10  | as published by the Free Software Foundation; either version 2 |
11  | of the License, or (at your option) any later version. |
12  | |
13  | This program is distributed in the hope that it will be useful, |
14  | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16  | GNU General Public License for more details. |
17  | |
18  | You should have received a copy of the GNU General Public License |
19  | along with this program; if not, write to the Free Software |
20  | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
21  +-----------------------------------------------------------------------------+
22 */
23 
24 include_once './Services/Search/classes/Lucene/class.ilLuceneQueryParserException.php';
25 
36 {
37  protected $query_string;
38  protected $parsed_query;
39 
40 
46  public function __construct($a_query_string)
47  {
48  $this->query_string = $a_query_string;
49  }
50 
55  public function parse()
56  {
57  $this->parsed_query = preg_replace_callback('/(owner:)\s?([A-Za-z0-9_\.\+\*\@!\$\%\~\-]+)/', array($this,'replaceOwnerCallback'), $this->query_string);
58  }
59 
67  public function parseAutoWildcard()
68  {
69  $this->parsed_query = trim($this->query_string);
70  if (stristr($this->parsed_query, '*')) {
71  return true;
72  }
73  if (substr($this->parsed_query, -1) !== '"') {
74  $this->parsed_query .= '*';
75  }
76  return true;
77  }
78 
83  public function getQuery()
84  {
85  return $this->parsed_query;
86  }
87 
91  protected function replaceOwnerCallback($matches)
92  {
93  if (isset($matches[2])) {
94  if ($usr_id = ilObjUser::_loginExists($matches[2])) {
95  return $matches[1] . $usr_id;
96  }
97  }
98  return $matches[0];
99  }
100 
101 
105  public static function validateQuery($a_query)
106  {
107  // TODO
108  // First replace all quote characters
109 
110 
111  #ilLuceneQueryParser::checkAllowedCharacters($a_query);
112  #ilLuceneQueryParser::checkAsterisk($a_query);
113  #ilLuceneQueryParser::checkAmpersands($a_query);
116  #ilLuceneQueryParser::checkExclamationMark($a_query);
117  #ilLuceneQueryParser::checkQuestionMark($a_query);
119  #ilLuceneQueryParser::checkPlusMinus($a_query);
120  #ilLuceneQueryParser::checkANDORNOT($a_query);
122  #ilLuceneQueryParser::checkColon($a_query);
123  return true;
124  }
125 
130  protected static function checkAllowedCharacters($query)
131  {
132  if (preg_match('/[^\pL0-9_+\-:.()\"*?&§€|!{}\[\]\^~\\@#\/$%\'= ]/u', $query) != 0) {
133  throw new ilLuceneQueryParserException('lucene_err_allowed_characters');
134  }
135  return true;
136  }
137 
142  protected static function checkAsterisk($query)
143  {
144  if (preg_match('/^[\*]*$|[\s]\*|^\*[^\s]/', $query) != 0) {
145  throw new ilLuceneQueryParserException('lucene_err_asterisk');
146  }
147  return true;
148  }
149 
154  protected static function checkAmpersands($query)
155  {
156  if (preg_match('/[&]{2}/', $query) > 0) {
157  if (preg_match('/^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+( && )?[\pL0-9_+\-:.()\"*?|!{}\[\]\^~\\@#\/$%\'=]+[ ]*)+$/u', $query) == 0) {
158  throw new ilLuceneQueryParserException('lucene_err_ampersand');
159  }
160  }
161  return true;
162  }
163 
168  protected static function checkCaret($query)
169  {
170  if (preg_match('/[^\\\]\^([^\s]*[^0-9.]+)|[^\\\]\^$/', $query) != 0) {
171  throw new ilLuceneQueryParserException('lucene_err_caret');
172  }
173  return true;
174  }
175 
180  protected static function checkSquiggle($query)
181  {
182  if (preg_match('/[^\\\]*~[^\s]*[^0-9\s]+/', $query, $matches) != 0) {
183  throw new ilLuceneQueryParserException('lucene_err_squiggle');
184  }
185  return true;
186  }
187 
192  protected static function checkExclamationMark($query)
193  {
194  if (preg_match('/^[^!]*$|^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+( ! )?[\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+[ ]*)+$/u', $query, $matches) == 0) {
195  throw new ilLuceneQueryParserException('lucene_err_exclamation_mark');
196  }
197  return true;
198  }
199 
204  protected static function checkQuestionMark($query)
205  {
206  if (preg_match('/^(\?)|([^\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]\?+)/u', $query, $matches) != 0) {
207  throw new ilLuceneQueryParserException('lucene_err_question_mark');
208  }
209  return true;
210  }
211 
216  protected static function checkParenthesis($a_query)
217  {
218  $hasLft = false;
219  $hasRgt = false;
220 
221  $matchLft = 0;
222  $matchRgt = 0;
223 
224  $tmp = array();
225 
226  if (($matchLft = preg_match_all('/[(]/', $a_query, $tmp)) > 0) {
227  $hasLft = true;
228  }
229  if (($matchRgt = preg_match_all('/[)]/', $a_query, $tmp)) > 0) {
230  $hasRgt = true;
231  }
232 
233  if (!$hasLft || !$hasRgt) {
234  return true;
235  }
236 
237 
238  if (($hasLft && !$hasRgt) || ($hasRgt && !$hasLft)) {
239  throw new ilLuceneQueryParserException('lucene_err_parenthesis_not_closed');
240  }
241 
242  if ($matchLft !== $matchRgt) {
243  throw new ilLuceneQueryParserException('lucene_err_parenthesis_not_closed');
244  }
245 
246  if (preg_match('/\(\s*\)/', $a_query) > 0) {
247  throw new ilLuceneQueryParserException('lucene_err_parenthesis_empty');
248  }
249  return true;
250  }
251 
257  protected static function checkPlusMinus($a_query)
258  {
259  if (preg_match('/^[^\n+\-]*$|^([+-]?\s*[\pL0-9_:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+[ ]?)+$/u', $a_query) == 0) {
260  throw new ilLuceneQueryParserException('lucene_err_plus_minus');
261  }
262  return true;
263  }
264 
270  protected static function checkANDORNOT($a_query)
271  {
272  return true;
273 
274  if (preg_match('/^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@\/#$%\'=]+\s*((AND )|(OR )|(AND NOT )|(NOT ))?[\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@\/#$%\'=]+[ ]*)+$/u', $a_query) == 0) {
275  throw new ilLuceneQueryParserException('lucene_err_and_or_not');
276  }
277  return true;
278  }
279 
285  protected static function checkQuotes($a_query)
286  {
287  $matches = preg_match_all('/"/', $a_query, $tmp);
288 
289  if ($matches == 0) {
290  return true;
291  }
292 
293  if (($matches % 2) > 0) {
294  throw new ilLuceneQueryParserException('lucene_err_quotes');
295  }
296 
297  if (preg_match('/"\s*"/', $a_query) > 0) {
298  throw new ilLuceneQueryParserException('lucene_err_quotes_not_empty');
299  }
300  return true;
301  }
302 
303 
309  protected static function checkColon($a_query)
310  {
311  if (preg_match('/[^\\\\s]:[\s]|[^\\\\s]:$|[\s][^\\]?:|^[^\\\\s]?:/', $a_query) != 0) {
312  throw new ilLuceneQueryParserException('lucene_err_colon');
313  }
314  return true;
315  }
316 }
static checkAllowedCharacters($query)
Check allowed characters.
static checkSquiggle($query)
Check squiggles.
parse()
parse query string
static checkAmpersands($query)
Check ampersands.
static checkCaret($query)
Check carets.
static checkPlusMinus($a_query)
Check plus minus.
parseAutoWildcard()
Append asterisk for remote search from global search form field !!!DIC refactoring-script warning...
static _loginExists($a_login, $a_user_id=0)
check if a login name already exists You may exclude a user from the check by giving his user id as 2...
static checkQuestionMark($query)
Check question mark (wild card single character)
static checkQuotes($a_query)
Check quotes.
static checkANDORNOT($a_query)
Check AND OR NOT.
$query
static checkAsterisk($query)
Check asterisk.
static checkColon($a_query)
Check colon.
replaceOwnerCallback($matches)
Replace owner callback (preg_replace_callback)
static checkParenthesis($a_query)
Check parenthesis.
static checkExclamationMark($query)
Check exclamation marks (replacement for NOT)
__construct($a_query_string)
Constructor.