ILIAS  trunk Revision v11.0_alpha-1723-g8e69f309bab
All Data Structures Namespaces Files Functions Variables Enumerations Enumerator Modules Pages
class.ilLuceneQueryParser.php
Go to the documentation of this file.
1 <?php
2 
19 declare(strict_types=1);
29 {
30  protected string $query_string;
31  protected string $parsed_query = '';
32 
33 
38  public function __construct($a_query_string)
39  {
40  $this->query_string = $a_query_string;
41  }
42 
47  public function parse(): void
48  {
49  $this->parsed_query = (string) preg_replace_callback(
50  '/(owner:)\s?([A-Za-z0-9_\.\+\*\@!\$\%\~\-]+)/',
51  array($this,'replaceOwnerCallback'),
53  );
54  }
55 
59  public function parseAutoWildcard(): void
60  {
61  $this->parsed_query = trim($this->query_string);
62  if (stristr($this->parsed_query, '*')) {
63  return;
64  }
65  if (substr($this->parsed_query, -1) !== '"') {
66  $this->parsed_query .= '*';
67  }
68  }
69 
70  public function getQuery(): string
71  {
72  return $this->parsed_query;
73  }
74 
78  protected function replaceOwnerCallback(array $matches): string
79  {
80  if (isset($matches[2])) {
81  if ($usr_id = ilObjUser::_loginExists($matches[2])) {
82  return $matches[1] . $usr_id;
83  }
84  }
85  return $matches[0];
86  }
87 
88 
93  public static function validateQuery($a_query): bool
94  {
95  #ilLuceneQueryParser::checkAllowedCharacters($a_query);
96  #ilLuceneQueryParser::checkAsterisk($a_query);
97  #ilLuceneQueryParser::checkAmpersands($a_query);
100  #ilLuceneQueryParser::checkExclamationMark($a_query);
101  #ilLuceneQueryParser::checkQuestionMark($a_query);
103  #ilLuceneQueryParser::checkPlusMinus($a_query);
104  #ilLuceneQueryParser::checkANDORNOT($a_query);
106  #ilLuceneQueryParser::checkColon($a_query);
107  return true;
108  }
109 
114  protected static function checkAllowedCharacters(string $query): bool
115  {
116  if (preg_match('/[^\pL0-9_+\-:.()\"*?&§€|!{}\[\]\^~\\@#\/$%\'= ]/u', $query) != 0) {
117  throw new ilLuceneQueryParserException('lucene_err_allowed_characters');
118  }
119  return true;
120  }
121 
126  protected static function checkAsterisk(string $query): bool
127  {
128  if (preg_match('/^[\*]*$|[\s]\*|^\*[^\s]/', $query) != 0) {
129  throw new ilLuceneQueryParserException('lucene_err_asterisk');
130  }
131  return true;
132  }
133 
138  protected static function checkAmpersands(string $query): bool
139  {
140  if (preg_match('/[&]{2}/', $query) > 0) {
141  if (preg_match('/^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+( && )?[\pL0-9_+\-:.()\"*?|!{}\[\]\^~\\@#\/$%\'=]+[ ]*)+$/u', $query) == 0) {
142  throw new ilLuceneQueryParserException('lucene_err_ampersand');
143  }
144  }
145  return true;
146  }
147 
152  protected static function checkCaret(string $query): bool
153  {
154  if (preg_match('/[^\\\]\^([^\s]*[^0-9.]+)|[^\\\]\^$/', $query) != 0) {
155  throw new ilLuceneQueryParserException('lucene_err_caret');
156  }
157  return true;
158  }
159 
164  protected static function checkSquiggle(string $query): bool
165  {
166  if (preg_match('/[^\\\]*~[^\s]*[^0-9\s]+/', $query, $matches) != 0) {
167  throw new ilLuceneQueryParserException('lucene_err_squiggle');
168  }
169  return true;
170  }
171 
176  protected static function checkExclamationMark(string $query): bool
177  {
178  if (preg_match('/^[^!]*$|^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+( ! )?[\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+[ ]*)+$/u', $query, $matches) == 0) {
179  throw new ilLuceneQueryParserException('lucene_err_exclamation_mark');
180  }
181  return true;
182  }
183 
188  protected static function checkQuestionMark(string $query): bool
189  {
190  if (preg_match('/^(\?)|([^\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]\?+)/u', $query, $matches) != 0) {
191  throw new ilLuceneQueryParserException('lucene_err_question_mark');
192  }
193  return true;
194  }
195 
200  protected static function checkParenthesis(string $a_query): bool
201  {
202  $hasLft = false;
203  $hasRgt = false;
204 
205  $matchLft = 0;
206  $matchRgt = 0;
207 
208  $tmp = array();
209 
210  if (($matchLft = preg_match_all('/[(]/', $a_query, $tmp)) > 0) {
211  $hasLft = true;
212  }
213  if (($matchRgt = preg_match_all('/[)]/', $a_query, $tmp)) > 0) {
214  $hasRgt = true;
215  }
216 
217  if (!$hasLft || !$hasRgt) {
218  return true;
219  }
220 
221 
222  if (($hasLft && !$hasRgt) || ($hasRgt && !$hasLft)) {
223  throw new ilLuceneQueryParserException('lucene_err_parenthesis_not_closed');
224  }
225 
226  if ($matchLft !== $matchRgt) {
227  throw new ilLuceneQueryParserException('lucene_err_parenthesis_not_closed');
228  }
229 
230  if (preg_match('/\(\s*\)/', $a_query) > 0) {
231  throw new ilLuceneQueryParserException('lucene_err_parenthesis_empty');
232  }
233  return true;
234  }
235 
241  protected static function checkPlusMinus(string $a_query): bool
242  {
243  if (preg_match('/^[^\n+\-]*$|^([+-]?\s*[\pL0-9_:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+[ ]?)+$/u', $a_query) == 0) {
244  throw new ilLuceneQueryParserException('lucene_err_plus_minus');
245  }
246  return true;
247  }
248 
254  protected static function checkANDORNOT(string $a_query): bool
255  {
256  if (preg_match('/^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@\/#$%\'=]+\s*((AND )|(OR )|(AND NOT )|(NOT ))?[\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@\/#$%\'=]+[ ]*)+$/u', $a_query) == 0) {
257  throw new ilLuceneQueryParserException('lucene_err_and_or_not');
258  }
259  return true;
260  }
261 
267  protected static function checkQuotes(string $a_query): bool
268  {
269  $matches = preg_match_all('/"/', $a_query, $tmp);
270 
271  if ($matches == 0) {
272  return true;
273  }
274 
275  if (($matches % 2) > 0) {
276  throw new ilLuceneQueryParserException('lucene_err_quotes');
277  }
278 
279  if (preg_match('/"\s*"/', $a_query) > 0) {
280  throw new ilLuceneQueryParserException('lucene_err_quotes_not_empty');
281  }
282  return true;
283  }
284 
285 
290  protected static function checkColon(string $a_query): bool
291  {
292  if (preg_match('/[^\\\\s]:[\s]|[^\\\\s]:$|[\s][^\\]?:|^[^\\\\s]?:/', $a_query) != 0) {
293  throw new ilLuceneQueryParserException('lucene_err_colon');
294  }
295  return true;
296  }
297 }
replaceOwnerCallback(array $matches)
Replace owner callback (preg_replace_callback)
parse()
parse query string
static checkPlusMinus(string $a_query)
Check plus minus.
static checkQuotes(string $a_query)
Check quotes.
parseAutoWildcard()
Append asterisk for remote search from global search form field.
static checkColon(string $a_query)
Check colon.
static checkSquiggle(string $query)
Check squiggles.
static checkCaret(string $query)
Check carets.
static checkAllowedCharacters(string $query)
Check allowed characters.
static checkAsterisk(string $query)
Check asterisk.
static checkParenthesis(string $a_query)
Check parenthesis.
static _loginExists(string $a_login, int $a_user_id=0)
check if a login name already exists You may exclude a user from the check by giving his user id as 2...
static checkQuestionMark(string $query)
Check question mark (wild card single character)
static checkAmpersands(string $query)
Check ampersands.
static checkExclamationMark(string $query)
Check exclamation marks (replacement for NOT)
static checkANDORNOT(string $a_query)
Check AND OR NOT.
__construct($a_query_string)
Constructor.