GDAL
ogr_swq.h
1 /******************************************************************************
2  *
3  * Component: OGDI Driver Support Library
4  * Purpose: Generic SQL WHERE Expression Evaluator Declarations.
5  * Author: Frank Warmerdam <warmerdam@pobox.com>
6  *
7  ******************************************************************************
8  * Copyright (C) 2001 Information Interoperability Institute (3i)
9  * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
10  * Permission to use, copy, modify and distribute this software and
11  * its documentation for any purpose and without fee is hereby granted,
12  * provided that the above copyright notice appear in all copies, that
13  * both the copyright notice and this permission notice appear in
14  * supporting documentation, and that the name of 3i not be used
15  * in advertising or publicity pertaining to distribution of the software
16  * without specific, written prior permission. 3i makes no
17  * representations about the suitability of this software for any purpose.
18  * It is provided "as is" without express or implied warranty.
19  ****************************************************************************/
20 
21 #ifndef SWQ_H_INCLUDED_
22 #define SWQ_H_INCLUDED_
23 
24 #ifndef DOXYGEN_SKIP
25 
26 #include "cpl_conv.h"
27 #include "cpl_string.h"
28 #include "ogr_core.h"
29 
30 #include <list>
31 #include <map>
32 #include <vector>
33 #include <set>
34 
35 #if defined(_WIN32) && !defined(strcasecmp)
36 #define strcasecmp stricmp
37 #endif
38 
39 // Used for swq_summary.oSetDistinctValues and oVectorDistinctValues
40 #define SZ_OGR_NULL "__OGR_NULL__"
41 
42 typedef enum
43 {
44  SWQ_OR,
45  SWQ_AND,
46  SWQ_NOT,
47  SWQ_EQ,
48  SWQ_NE,
49  SWQ_GE,
50  SWQ_LE,
51  SWQ_LT,
52  SWQ_GT,
53  SWQ_LIKE,
54  SWQ_ILIKE,
55  SWQ_ISNULL,
56  SWQ_IN,
57  SWQ_BETWEEN,
58  SWQ_ADD,
59  SWQ_SUBTRACT,
60  SWQ_MULTIPLY,
61  SWQ_DIVIDE,
62  SWQ_MODULUS,
63  SWQ_CONCAT,
64  SWQ_SUBSTR,
65  SWQ_HSTORE_GET_VALUE,
66 
67  SWQ_AVG,
68  SWQ_AGGREGATE_BEGIN = SWQ_AVG,
69  SWQ_MIN,
70  SWQ_MAX,
71  SWQ_COUNT,
72  SWQ_SUM,
73  SWQ_STDDEV_POP,
74  SWQ_STDDEV_SAMP,
75  SWQ_AGGREGATE_END = SWQ_STDDEV_SAMP,
76 
77  SWQ_CAST,
78  SWQ_CUSTOM_FUNC, /* only if parsing done in bAcceptCustomFuncs mode */
79  SWQ_ARGUMENT_LIST /* temporary value only set during parsing and replaced by
80  something else at the end */
81 } swq_op;
82 
83 typedef enum
84 {
85  SWQ_INTEGER,
86  SWQ_INTEGER64,
87  SWQ_FLOAT,
88  SWQ_STRING,
89  SWQ_BOOLEAN, // integer
90  SWQ_DATE, // string
91  SWQ_TIME, // string
92  SWQ_TIMESTAMP, // string
93  SWQ_GEOMETRY,
94  SWQ_NULL,
95  SWQ_OTHER,
96  SWQ_ERROR
97 } swq_field_type;
98 
99 #define SWQ_IS_INTEGER(x) ((x) == SWQ_INTEGER || (x) == SWQ_INTEGER64)
100 
101 typedef enum
102 {
103  SNT_CONSTANT,
104  SNT_COLUMN,
105  SNT_OPERATION
106 } swq_node_type;
107 
108 class swq_field_list;
109 class swq_expr_node;
110 class swq_select;
111 class OGRGeometry;
112 
113 struct CPL_UNSTABLE_API swq_evaluation_context
114 {
115  bool bUTF8Strings = false;
116 };
117 
118 typedef swq_expr_node *(*swq_field_fetcher)(swq_expr_node *op,
119  void *record_handle);
120 typedef swq_expr_node *(*swq_op_evaluator)(
121  swq_expr_node *op, swq_expr_node **sub_field_values,
122  const swq_evaluation_context &sContext);
123 typedef swq_field_type (*swq_op_checker)(
124  swq_expr_node *op, int bAllowMismatchTypeOnFieldComparison);
125 
126 class swq_custom_func_registrar;
127 
128 class CPL_UNSTABLE_API swq_expr_node
129 {
130  swq_expr_node *Evaluate(swq_field_fetcher pfnFetcher, void *record,
131  const swq_evaluation_context &sContext,
132  int nRecLevel);
133  void reset();
134 
135  public:
136  swq_expr_node();
137  swq_expr_node(const swq_expr_node &);
138  swq_expr_node(swq_expr_node &&);
139 
140  swq_expr_node &operator=(const swq_expr_node &);
141  swq_expr_node &operator=(swq_expr_node &&);
142 
143  bool operator==(const swq_expr_node &) const;
144 
145  explicit swq_expr_node(const char *);
146  explicit swq_expr_node(int);
147  explicit swq_expr_node(GIntBig);
148  explicit swq_expr_node(double);
149  explicit swq_expr_node(OGRGeometry *);
150  explicit swq_expr_node(swq_op);
151 
152  ~swq_expr_node();
153 
154  void MarkAsTimestamp();
155  CPLString UnparseOperationFromUnparsedSubExpr(char **apszSubExpr);
156  char *Unparse(swq_field_list *, char chColumnQuote);
157  void Dump(FILE *fp, int depth);
158  swq_field_type Check(swq_field_list *, int bAllowFieldsInSecondaryTables,
159  int bAllowMismatchTypeOnFieldComparison,
160  swq_custom_func_registrar *poCustomFuncRegistrar,
161  int depth = 0);
162  swq_expr_node *Evaluate(swq_field_fetcher pfnFetcher, void *record,
163  const swq_evaluation_context &sContext);
164  swq_expr_node *Clone();
165 
166  void ReplaceBetweenByGEAndLERecurse();
167  void PushNotOperationDownToStack();
168 
169  swq_node_type eNodeType = SNT_CONSTANT;
170  swq_field_type field_type = SWQ_INTEGER;
171 
172  /* only for SNT_OPERATION */
173  void PushSubExpression(swq_expr_node *);
174  void ReverseSubExpressions();
175  swq_op nOperation = SWQ_OR;
176  int nSubExprCount = 0;
177  swq_expr_node **papoSubExpr = nullptr;
178 
179  /* only for SNT_COLUMN */
180  int field_index = 0;
181  int table_index = 0;
182  char *table_name = nullptr;
183 
184  /* only for SNT_CONSTANT */
185  int is_null = false;
186  GIntBig int_value = 0;
187  double float_value = 0.0;
188  OGRGeometry *geometry_value = nullptr;
189 
190  /* shared by SNT_COLUMN, SNT_CONSTANT and also possibly SNT_OPERATION when
191  */
192  /* nOperation == SWQ_CUSTOM_FUNC */
193  char *string_value = nullptr; /* column name when SNT_COLUMN */
194 
195  // May be transiently used by swq_parser.h, but should not be relied upon
196  // after parsing. swq_col_def.bHidden captures it afterwards.
197  bool bHidden = false;
198 
199  static CPLString QuoteIfNecessary(const CPLString &, char chQuote = '\'');
200  static CPLString Quote(const CPLString &, char chQuote = '\'');
201 };
202 
203 typedef struct
204 {
205  const char *pszName;
206  swq_op eOperation;
207  swq_op_evaluator pfnEvaluator;
208  swq_op_checker pfnChecker;
209 } swq_operation;
210 
211 class CPL_UNSTABLE_API swq_op_registrar
212 {
213  public:
214  static const swq_operation *GetOperator(const char *);
215  static const swq_operation *GetOperator(swq_op eOperation);
216 };
217 
218 class CPL_UNSTABLE_API swq_custom_func_registrar
219 {
220  public:
221  virtual ~swq_custom_func_registrar()
222  {
223  }
224 
225  virtual const swq_operation *GetOperator(const char *) = 0;
226 };
227 
228 typedef struct
229 {
230  char *data_source;
231  char *table_name;
232  char *table_alias;
233 } swq_table_def;
234 
235 class CPL_UNSTABLE_API swq_field_list
236 {
237  public:
238  int count;
239  char **names;
240  swq_field_type *types;
241  int *table_ids;
242  int *ids;
243 
244  int table_count;
245  swq_table_def *table_defs;
246 };
247 
248 class CPL_UNSTABLE_API swq_parse_context
249 {
250  public:
251  swq_parse_context()
252  : nStartToken(0), pszInput(nullptr), pszNext(nullptr),
253  pszLastValid(nullptr), bAcceptCustomFuncs(FALSE), poRoot(nullptr),
254  poCurSelect(nullptr)
255  {
256  }
257 
258  int nStartToken;
259  const char *pszInput;
260  const char *pszNext;
261  const char *pszLastValid;
262  int bAcceptCustomFuncs;
263 
264  swq_expr_node *poRoot;
265 
266  swq_select *poCurSelect;
267 };
268 
269 /* Compile an SQL WHERE clause into an internal form. The field_list is
270 ** the list of fields in the target 'table', used to render where into
271 ** field numbers instead of names.
272 */
273 int CPL_UNSTABLE_API swqparse(swq_parse_context *context);
274 int CPL_UNSTABLE_API swqlex(swq_expr_node **ppNode, swq_parse_context *context);
275 void CPL_UNSTABLE_API swqerror(swq_parse_context *context, const char *msg);
276 
277 int CPL_UNSTABLE_API swq_identify_field(const char *table_name,
278  const char *token,
279  swq_field_list *field_list,
280  swq_field_type *this_type,
281  int *table_id);
282 
283 CPLErr CPL_UNSTABLE_API
284 swq_expr_compile(const char *where_clause, int field_count, char **field_list,
285  swq_field_type *field_types, int bCheck,
286  swq_custom_func_registrar *poCustomFuncRegistrar,
287  swq_expr_node **expr_root);
288 
289 CPLErr CPL_UNSTABLE_API
290 swq_expr_compile2(const char *where_clause, swq_field_list *field_list,
291  int bCheck, swq_custom_func_registrar *poCustomFuncRegistrar,
292  swq_expr_node **expr_root);
293 
294 /*
295 ** Evaluation related.
296 */
297 int CPL_UNSTABLE_API swq_test_like(const char *input, const char *pattern);
298 
299 swq_expr_node CPL_UNSTABLE_API *
300 SWQGeneralEvaluator(swq_expr_node *, swq_expr_node **,
301  const swq_evaluation_context &sContext);
302 swq_field_type CPL_UNSTABLE_API
303 SWQGeneralChecker(swq_expr_node *node, int bAllowMismatchTypeOnFieldComparison);
304 swq_expr_node CPL_UNSTABLE_API *
305 SWQCastEvaluator(swq_expr_node *, swq_expr_node **,
306  const swq_evaluation_context &sContext);
307 swq_field_type CPL_UNSTABLE_API
308 SWQCastChecker(swq_expr_node *node, int bAllowMismatchTypeOnFieldComparison);
309 const char CPL_UNSTABLE_API *SWQFieldTypeToString(swq_field_type field_type);
310 
311 /****************************************************************************/
312 
313 #define SWQP_ALLOW_UNDEFINED_COL_FUNCS 0x01
314 
315 #define SWQM_SUMMARY_RECORD 1
316 #define SWQM_RECORDSET 2
317 #define SWQM_DISTINCT_LIST 3
318 
319 typedef enum
320 {
321  SWQCF_NONE = 0,
322  SWQCF_AVG = SWQ_AVG,
323  SWQCF_MIN = SWQ_MIN,
324  SWQCF_MAX = SWQ_MAX,
325  SWQCF_COUNT = SWQ_COUNT,
326  SWQCF_SUM = SWQ_SUM,
327  SWQCF_STDDEV_POP = SWQ_STDDEV_POP,
328  SWQCF_STDDEV_SAMP = SWQ_STDDEV_SAMP,
329  SWQCF_CUSTOM
330 } swq_col_func;
331 
332 typedef struct
333 {
334  swq_col_func col_func;
335  char *table_name;
336  char *field_name;
337  char *field_alias;
338  int table_index;
339  int field_index;
340  swq_field_type field_type;
341  swq_field_type target_type;
342  OGRFieldSubType target_subtype;
343  int field_length;
344  int field_precision;
345  int distinct_flag;
346  bool bHidden;
347  OGRwkbGeometryType eGeomType;
348  int nSRID;
349  swq_expr_node *expr;
350 } swq_col_def;
351 
352 class CPL_UNSTABLE_API swq_summary
353 {
354  public:
355  struct Comparator
356  {
357  bool bSortAsc;
358  swq_field_type eType;
359 
360  Comparator() : bSortAsc(true), eType(SWQ_STRING)
361  {
362  }
363 
364  bool operator()(const CPLString &, const CPLString &) const;
365  };
366 
368  // Cf cf KahanBabushkaNeumaierSum of https://en.wikipedia.org/wiki/Kahan_summation_algorithm#Further_enhancements
369  double sum() const
370  {
371  return sum_only_finite_terms ? sum_acc + sum_correction : sum_acc;
372  }
373 
374  GIntBig count = 0;
375 
376  std::vector<CPLString> oVectorDistinctValues{};
377  std::set<CPLString, Comparator> oSetDistinctValues{};
378  bool sum_only_finite_terms = true;
379  // Sum accumulator. To get the accurate sum, use the sum() method
380  double sum_acc = 0.0;
381  // Sum correction term.
382  double sum_correction = 0.0;
383  double min = 0.0;
384  double max = 0.0;
385 
386  // Welford's online algorithm for variance:
387  // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
388  double mean_for_variance = 0.0;
389  double sq_dist_from_mean_acc = 0.0; // "M2"
390 
391  CPLString osMin{};
392  CPLString osMax{};
393 };
394 
395 typedef struct
396 {
397  char *table_name;
398  char *field_name;
399  int table_index;
400  int field_index;
401  int ascending_flag;
402 } swq_order_def;
403 
404 typedef struct
405 {
406  int secondary_table;
407  swq_expr_node *poExpr;
408 } swq_join_def;
409 
410 class CPL_UNSTABLE_API swq_select_parse_options
411 {
412  public:
413  swq_custom_func_registrar *poCustomFuncRegistrar;
414  int bAllowFieldsInSecondaryTablesInWhere;
415  int bAddSecondaryTablesGeometryFields;
416  int bAlwaysPrefixWithTableName;
417  int bAllowDistinctOnGeometryField;
418  int bAllowDistinctOnMultipleFields;
419 
420  swq_select_parse_options()
421  : poCustomFuncRegistrar(nullptr),
422  bAllowFieldsInSecondaryTablesInWhere(FALSE),
423  bAddSecondaryTablesGeometryFields(FALSE),
424  bAlwaysPrefixWithTableName(FALSE),
425  bAllowDistinctOnGeometryField(FALSE),
426  bAllowDistinctOnMultipleFields(FALSE)
427  {
428  }
429 };
430 
431 class CPL_UNSTABLE_API swq_select
432 {
433  void postpreparse();
434 
435  CPL_DISALLOW_COPY_ASSIGN(swq_select)
436 
437  public:
438  swq_select();
439  ~swq_select();
440 
441  int query_mode = 0;
442 
443  char *raw_select = nullptr;
444 
445  int PushField(swq_expr_node *poExpr, const char *pszAlias,
446  bool distinct_flag, bool bHidden);
447 
448  int PushExcludeField(swq_expr_node *poExpr);
449 
450  int result_columns() const
451  {
452  return static_cast<int>(column_defs.size());
453  }
454 
455  std::vector<swq_col_def> column_defs{};
456  std::vector<swq_summary> column_summary{};
457 
458  int PushTableDef(const char *pszDataSource, const char *pszTableName,
459  const char *pszAlias);
460  int table_count = 0;
461  swq_table_def *table_defs = nullptr;
462 
463  void PushJoin(int iSecondaryTable, swq_expr_node *poExpr);
464  int join_count = 0;
465  swq_join_def *join_defs = nullptr;
466 
467  swq_expr_node *where_expr = nullptr;
468 
469  void PushOrderBy(const char *pszTableName, const char *pszFieldName,
470  int bAscending);
471  int order_specs = 0;
472  swq_order_def *order_defs = nullptr;
473 
474  void SetLimit(GIntBig nLimit);
475  GIntBig limit = -1;
476 
477  void SetOffset(GIntBig nOffset);
478  GIntBig offset = 0;
479 
480  swq_select *poOtherSelect = nullptr;
481  void PushUnionAll(swq_select *poOtherSelectIn);
482 
483  CPLErr preparse(const char *select_statement,
484  int bAcceptCustomFuncs = FALSE);
485  CPLErr expand_wildcard(swq_field_list *field_list,
486  int bAlwaysPrefixWithTableName);
487  CPLErr parse(swq_field_list *field_list,
488  swq_select_parse_options *poParseOptions);
489 
490  char *Unparse();
491 
492  bool bExcludedGeometry = false;
493 
494  private:
495  bool IsFieldExcluded(int src_index, const char *table, const char *field);
496 
497  // map of EXCLUDE columns keyed according to the index of the
498  // asterisk with which it should be associated. key of -1 is
499  // used for column lists that have not yet been associated with
500  // an asterisk.
501  std::map<int, std::list<swq_col_def>> m_exclude_fields{};
502 };
503 
504 /* This method should generally be invoked with pszValue set, except when
505  * called on a non-DISTINCT column definition of numeric type (SWQ_BOOLEAN,
506  * SWQ_INTEGER, SWQ_INTEGER64, SWQ_FLOAT), in which case pdfValue should
507  * rather be set.
508  */
509 const char CPL_UNSTABLE_API *swq_select_summarize(swq_select *select_info,
510  int dest_column,
511  const char *pszValue,
512  const double *pdfValue);
513 
514 int CPL_UNSTABLE_API swq_is_reserved_keyword(const char *pszStr);
515 
516 char CPL_UNSTABLE_API *OGRHStoreGetValue(const char *pszHStore,
517  const char *pszSearchedKey);
518 
519 #ifdef GDAL_COMPILATION
520 void swq_fixup(swq_parse_context *psParseContext);
521 swq_expr_node *swq_create_and_or_or(swq_op op, swq_expr_node *left,
522  swq_expr_node *right);
523 int swq_test_like(const char *input, const char *pattern, char chEscape,
524  bool insensitive, bool bUTF8Strings);
525 #endif
526 
527 #endif /* #ifndef DOXYGEN_SKIP */
528 
529 #endif /* def SWQ_H_INCLUDED_ */
Convenient string class based on std::string.
Definition: cpl_string.h:320
Abstract base class for all geometry classes.
Definition: ogr_geometry.h:377
Various convenience functions for CPL.
CPLErr
Error category.
Definition: cpl_error.h:53
#define CPL_DISALLOW_COPY_ASSIGN(ClassName)
Helper to remove the copy and assignment constructors so that the compiler will not generate the defa...
Definition: cpl_port.h:1042
long long GIntBig
Large signed integer type (generally 64-bit integer type).
Definition: cpl_port.h:215
Various convenience functions for working with strings and string lists.
Core portability services for cross-platform OGR code.
OGRFieldSubType
List of field subtypes.
Definition: ogr_core.h:821
OGRwkbGeometryType
List of well known binary geometry types.
Definition: ogr_core.h:416