ILIAS  release_5-3 Revision v5.3.23-19-g915713cf615
class.ilLuceneQueryParser.php
Go to the documentation of this file.
1<?php
2/*
3 +-----------------------------------------------------------------------------+
4 | ILIAS open source |
5 +-----------------------------------------------------------------------------+
6 | Copyright (c) 1998-2006 ILIAS open source, University of Cologne |
7 | |
8 | This program is free software; you can redistribute it and/or |
9 | modify it under the terms of the GNU General Public License |
10 | as published by the Free Software Foundation; either version 2 |
11 | of the License, or (at your option) any later version. |
12 | |
13 | This program is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | GNU General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with this program; if not, write to the Free Software |
20 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
21 +-----------------------------------------------------------------------------+
22*/
23
24include_once './Services/Search/classes/Lucene/class.ilLuceneQueryParserException.php';
25
36{
37 protected $query_string;
38 protected $parsed_query;
39
40
46 public function __construct($a_query_string)
47 {
48 $this->query_string = $a_query_string;
49 }
50
55 public function parse()
56 {
57 $this->parsed_query = preg_replace_callback('/(owner:)\s?([A-Za-z0-9_\.\+\*\@!\$\%\~\-]+)/', array($this,'replaceOwnerCallback'), $this->query_string);
58 }
59
64 public function parseAutoWildcard()
65 {
66 $this->parsed_query = trim($this->query_string);
67 if (stristr($this->parsed_query, '*')) {
68 return true;
69 }
70 if (substr($this->parsed_query, -1) !== '"') {
71 $this->parsed_query .= '*';
72 }
73 return true;
74 }
75
80 public function getQuery()
81 {
83 }
84
88 protected function replaceOwnerCallback($matches)
89 {
90 if (isset($matches[2])) {
91 if ($usr_id = ilObjUser::_loginExists($matches[2])) {
92 return $matches[1] . $usr_id;
93 }
94 }
95 return $matches[0];
96 }
97
98
102 public static function validateQuery($a_query)
103 {
104 // TODO
105 // First replace all quote characters
106
107
108 #ilLuceneQueryParser::checkAllowedCharacters($a_query);
109 #ilLuceneQueryParser::checkAsterisk($a_query);
110 #ilLuceneQueryParser::checkAmpersands($a_query);
113 #ilLuceneQueryParser::checkExclamationMark($a_query);
114 #ilLuceneQueryParser::checkQuestionMark($a_query);
116 #ilLuceneQueryParser::checkPlusMinus($a_query);
117 #ilLuceneQueryParser::checkANDORNOT($a_query);
119 #ilLuceneQueryParser::checkColon($a_query);
120 return true;
121 }
122
127 protected static function checkAllowedCharacters($query)
128 {
129 if (preg_match('/[^\pL0-9_+\-:.()\"*?&§€|!{}\[\]\^~\\@#\/$%\'= ]/u', $query) != 0) {
130 throw new ilLuceneQueryParserException('lucene_err_allowed_characters');
131 }
132 return true;
133 }
134
139 protected static function checkAsterisk($query)
140 {
141 if (preg_match('/^[\*]*$|[\s]\*|^\*[^\s]/', $query) != 0) {
142 throw new ilLuceneQueryParserException('lucene_err_asterisk');
143 }
144 return true;
145 }
146
151 protected static function checkAmpersands($query)
152 {
153 if (preg_match('/[&]{2}/', $query) > 0) {
154 if (preg_match('/^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+( && )?[\pL0-9_+\-:.()\"*?|!{}\[\]\^~\\@#\/$%\'=]+[ ]*)+$/u', $query) == 0) {
155 throw new ilLuceneQueryParserException('lucene_err_ampersand');
156 }
157 }
158 return true;
159 }
160
165 protected static function checkCaret($query)
166 {
167 if (preg_match('/[^\\\]\^([^\s]*[^0-9.]+)|[^\\\]\^$/', $query) != 0) {
168 throw new ilLuceneQueryParserException('lucene_err_caret');
169 }
170 return true;
171 }
172
177 protected static function checkSquiggle($query)
178 {
179 if (preg_match('/[^\\\]*~[^\s]*[^0-9\s]+/', $query, $matches) != 0) {
180 throw new ilLuceneQueryParserException('lucene_err_squiggle');
181 }
182 return true;
183 }
184
189 protected static function checkExclamationMark($query)
190 {
191 if (preg_match('/^[^!]*$|^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+( ! )?[\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+[ ]*)+$/u', $query, $matches) == 0) {
192 throw new ilLuceneQueryParserException('lucene_err_exclamation_mark');
193 }
194 return true;
195 }
196
201 protected static function checkQuestionMark($query)
202 {
203 if (preg_match('/^(\?)|([^\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]\?+)/u', $query, $matches) != 0) {
204 throw new ilLuceneQueryParserException('lucene_err_question_mark');
205 }
206 return true;
207 }
208
213 protected static function checkParenthesis($a_query)
214 {
215 $hasLft = false;
216 $hasRgt = false;
217
218 $matchLft = 0;
219 $matchRgt = 0;
220
221 $tmp = array();
222
223 if (($matchLft = preg_match_all('/[(]/', $a_query, $tmp)) > 0) {
224 $hasLft = true;
225 }
226 if (($matchRgt = preg_match_all('/[)]/', $a_query, $tmp)) > 0) {
227 $hasRgt = true;
228 }
229
230 if (!$hasLft || !$hasRgt) {
231 return true;
232 }
233
234
235 if (($hasLft && !$hasRgt) || ($hasRgt && !$hasLft)) {
236 throw new ilLuceneQueryParserException('lucene_err_parenthesis_not_closed');
237 }
238
239 if ($matchLft !== $matchRgt) {
240 throw new ilLuceneQueryParserException('lucene_err_parenthesis_not_closed');
241 }
242
243 if (preg_match('/\‍(\s*\‍)/', $a_query) > 0) {
244 throw new ilLuceneQueryParserException('lucene_err_parenthesis_empty');
245 }
246 return true;
247 }
248
254 protected static function checkPlusMinus($a_query)
255 {
256 if (preg_match('/^[^\n+\-]*$|^([+-]?\s*[\pL0-9_:.()\"*?&|!{}\[\]\^~\\@#\/$%\'=]+[ ]?)+$/u', $a_query) == 0) {
257 throw new ilLuceneQueryParserException('lucene_err_plus_minus');
258 }
259 return true;
260 }
261
267 protected static function checkANDORNOT($a_query)
268 {
269 return true;
270
271 if (preg_match('/^([\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@\/#$%\'=]+\s*((AND )|(OR )|(AND NOT )|(NOT ))?[\pL0-9_+\-:.()\"*?&|!{}\[\]\^~\\@\/#$%\'=]+[ ]*)+$/u', $a_query) == 0) {
272 throw new ilLuceneQueryParserException('lucene_err_and_or_not');
273 }
274 return true;
275 }
276
282 protected static function checkQuotes($a_query)
283 {
284 $matches = preg_match_all('/"/', $a_query, $tmp);
285
286 if ($matches == 0) {
287 return true;
288 }
289
290 if (($matches % 2) > 0) {
291 throw new ilLuceneQueryParserException('lucene_err_quotes');
292 }
293
294 if (preg_match('/"\s*"/', $a_query) > 0) {
295 throw new ilLuceneQueryParserException('lucene_err_quotes_not_empty');
296 }
297 return true;
298 }
299
300
306 protected static function checkColon($a_query)
307 {
308 if (preg_match('/[^\\\\s]:[\s]|[^\\\\s]:$|[\s][^\\]?:|^[^\\\\s]?:/', $a_query) != 0) {
309 throw new ilLuceneQueryParserException('lucene_err_colon');
310 }
311 return true;
312 }
313}
An exception for terminatinating execution or to throw for unit testing.
static checkColon($a_query)
Check colon.
__construct($a_query_string)
Constructor.
replaceOwnerCallback($matches)
Replace owner callback (preg_replace_callback)
static checkAmpersands($query)
Check ampersands.
static checkPlusMinus($a_query)
Check plus minus.
static checkParenthesis($a_query)
Check parenthesis.
parseAutoWildcard()
Append asterisk for remote search from global search form field.
static checkANDORNOT($a_query)
Check AND OR NOT.
static checkSquiggle($query)
Check squiggles.
static checkExclamationMark($query)
Check exclamation marks (replacement for NOT)
static checkQuestionMark($query)
Check question mark (wild card single character)
static checkQuotes($a_query)
Check quotes.
static checkCaret($query)
Check carets.
static checkAsterisk($query)
Check asterisk.
static checkAllowedCharacters($query)
Check allowed characters.
static _loginExists($a_login, $a_user_id=0)
check if a login name already exists You may exclude a user from the check by giving his user id as 2...
$query