GDAL
ograrrowarrayhelper.h
1 /******************************************************************************
2  *
3  * Project: OpenGIS Simple Features Reference Implementation
4  * Purpose: Helper to fill ArrowArray
5  * Author: Even Rouault <even dot rouault at spatialys.com>
6  *
7  ******************************************************************************
8  * Copyright (c) 2022, Even Rouault <even dot rouault at spatialys.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a
11  * copy of this software and associated documentation files (the "Software"),
12  * to deal in the Software without restriction, including without limitation
13  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14  * and/or sell copies of the Software, and to permit persons to whom the
15  * Software is furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included
18  * in all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  ****************************************************************************/
28 
29 #pragma once
30 
32 
33 #include <algorithm>
34 #include <limits>
35 
36 #include "cpl_time.h"
37 
38 #include "ogrsf_frmts.h"
39 #include "ogr_recordbatch.h"
40 
41 class CPL_DLL OGRArrowArrayHelper
42 {
43  OGRArrowArrayHelper(const OGRArrowArrayHelper &) = delete;
44  OGRArrowArrayHelper &operator=(const OGRArrowArrayHelper &) = delete;
45 
46  public:
47  bool m_bIncludeFID = false;
48  int m_nMaxBatchSize = 0;
49  int m_nChildren = 0;
50  const int m_nFieldCount = 0;
51  const int m_nGeomFieldCount = 0;
52  std::vector<int> m_mapOGRFieldToArrowField{};
53  std::vector<int> m_mapOGRGeomFieldToArrowField{};
54  std::vector<bool> m_abNullableFields{};
55  std::vector<uint32_t> m_anArrowFieldMaxAlloc{};
56  std::vector<int> m_anTZFlags{};
57  int64_t *m_panFIDValues = nullptr;
58  struct ArrowArray *m_out_array = nullptr;
59 
60  static uint32_t GetMemLimit();
61 
62  static int
63  GetMaxFeaturesInBatch(const CPLStringList &aosArrowArrayStreamOptions);
64 
65  OGRArrowArrayHelper(GDALDataset *poDS, OGRFeatureDefn *poFeatureDefn,
66  const CPLStringList &aosArrowArrayStreamOptions,
67  struct ArrowArray *out_array);
68 
69  bool SetNull(int iArrowField, int iFeat)
70  {
71  auto psArray = m_out_array->children[iArrowField];
72  ++psArray->null_count;
73  uint8_t *pabyNull =
74  static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[0]));
75  if (psArray->buffers[0] == nullptr)
76  {
77  pabyNull = static_cast<uint8_t *>(
78  VSI_MALLOC_ALIGNED_AUTO_VERBOSE((m_nMaxBatchSize + 7) / 8));
79  if (pabyNull == nullptr)
80  {
81  return false;
82  }
83  memset(pabyNull, 0xFF, (m_nMaxBatchSize + 7) / 8);
84  psArray->buffers[0] = pabyNull;
85  }
86  pabyNull[iFeat / 8] &= static_cast<uint8_t>(~(1 << (iFeat % 8)));
87 
88  if (psArray->n_buffers == 3)
89  {
90  auto panOffsets =
91  static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
92  panOffsets[iFeat + 1] = panOffsets[iFeat];
93  }
94  return true;
95  }
96 
97  inline static void SetBoolOn(struct ArrowArray *psArray, int iFeat)
98  {
99  static_cast<uint8_t *>(
100  const_cast<void *>(psArray->buffers[1]))[iFeat / 8] |=
101  static_cast<uint8_t>(1 << (iFeat % 8));
102  }
103 
104  inline static void SetInt8(struct ArrowArray *psArray, int iFeat,
105  int8_t nVal)
106  {
107  static_cast<int8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
108  nVal;
109  }
110 
111  inline static void SetUInt8(struct ArrowArray *psArray, int iFeat,
112  uint8_t nVal)
113  {
114  static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
115  nVal;
116  }
117 
118  inline static void SetInt16(struct ArrowArray *psArray, int iFeat,
119  int16_t nVal)
120  {
121  static_cast<int16_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
122  nVal;
123  }
124 
125  inline static void SetUInt16(struct ArrowArray *psArray, int iFeat,
126  uint16_t nVal)
127  {
128  static_cast<uint16_t *>(
129  const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
130  }
131 
132  inline static void SetInt32(struct ArrowArray *psArray, int iFeat,
133  int32_t nVal)
134  {
135  static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
136  nVal;
137  }
138 
139  inline static void SetUInt32(struct ArrowArray *psArray, int iFeat,
140  uint32_t nVal)
141  {
142  static_cast<uint32_t *>(
143  const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
144  }
145 
146  inline static void SetInt64(struct ArrowArray *psArray, int iFeat,
147  int64_t nVal)
148  {
149  static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
150  nVal;
151  }
152 
153  inline static void SetUInt64(struct ArrowArray *psArray, int iFeat,
154  uint64_t nVal)
155  {
156  static_cast<uint64_t *>(
157  const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
158  }
159 
160  inline static void SetFloat(struct ArrowArray *psArray, int iFeat,
161  float fVal)
162  {
163  static_cast<float *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
164  fVal;
165  }
166 
167  inline static void SetDouble(struct ArrowArray *psArray, int iFeat,
168  double dfVal)
169  {
170  static_cast<double *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
171  dfVal;
172  }
173 
174  static void SetDate(struct ArrowArray *psArray, int iFeat,
175  struct tm &brokenDown, const OGRField &ogrField)
176  {
177  brokenDown.tm_year = ogrField.Date.Year - 1900;
178  brokenDown.tm_mon = ogrField.Date.Month - 1;
179  brokenDown.tm_mday = ogrField.Date.Day;
180  brokenDown.tm_hour = 0;
181  brokenDown.tm_min = 0;
182  brokenDown.tm_sec = 0;
183  static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
184  static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
185  }
186 
187  static void SetDateTime(struct ArrowArray *psArray, int iFeat,
188  struct tm &brokenDown, int nFieldTZFlag,
189  const OGRField &ogrField)
190  {
191  brokenDown.tm_year = ogrField.Date.Year - 1900;
192  brokenDown.tm_mon = ogrField.Date.Month - 1;
193  brokenDown.tm_mday = ogrField.Date.Day;
194  brokenDown.tm_hour = ogrField.Date.Hour;
195  brokenDown.tm_min = ogrField.Date.Minute;
196  brokenDown.tm_sec = static_cast<int>(ogrField.Date.Second);
197  auto nVal =
198  CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
199  (static_cast<int>(ogrField.Date.Second * 1000 + 0.5) % 1000);
200  if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
201  ogrField.Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
202  {
203  // Convert for ogrField.Date.TZFlag to UTC
204  const int TZOffset = (ogrField.Date.TZFlag - OGR_TZFLAG_UTC) * 15;
205  const int TZOffsetMS = TZOffset * 60 * 1000;
206  nVal -= TZOffsetMS;
207  }
208  static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
209  nVal;
210  }
211 
212  GByte *GetPtrForStringOrBinary(int iArrowField, int iFeat, size_t nLen)
213  {
214  auto psArray = m_out_array->children[iArrowField];
215  auto panOffsets =
216  static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
217  const uint32_t nCurLength = static_cast<uint32_t>(panOffsets[iFeat]);
218  if (nLen > m_anArrowFieldMaxAlloc[iArrowField] - nCurLength)
219  {
220  if (nLen >
221  static_cast<uint32_t>(std::numeric_limits<int32_t>::max()) -
222  nCurLength)
223  {
224  CPLError(CE_Failure, CPLE_AppDefined,
225  "Too large string or binary content");
226  return nullptr;
227  }
228  uint32_t nNewSize = nCurLength + static_cast<uint32_t>(nLen);
229  if ((m_anArrowFieldMaxAlloc[iArrowField] >> 31) == 0)
230  {
231  const uint32_t nDoubleSize =
232  2U * m_anArrowFieldMaxAlloc[iArrowField];
233  if (nNewSize < nDoubleSize)
234  nNewSize = nDoubleSize;
235  }
236  void *newBuffer = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nNewSize);
237  if (newBuffer == nullptr)
238  return nullptr;
239  m_anArrowFieldMaxAlloc[iArrowField] = nNewSize;
240  memcpy(newBuffer, psArray->buffers[2], nCurLength);
241  VSIFreeAligned(const_cast<void *>(psArray->buffers[2]));
242  psArray->buffers[2] = newBuffer;
243  }
244  GByte *paby =
245  static_cast<GByte *>(const_cast<void *>(psArray->buffers[2])) +
246  nCurLength;
247  panOffsets[iFeat + 1] = panOffsets[iFeat] + static_cast<int32_t>(nLen);
248  return paby;
249  }
250 
251  static void SetEmptyStringOrBinary(struct ArrowArray *psArray, int iFeat)
252  {
253  auto panOffsets =
254  static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
255  panOffsets[iFeat + 1] = panOffsets[iFeat];
256  }
257 
258  void Shrink(int nFeatures)
259  {
260  if (nFeatures < m_nMaxBatchSize)
261  {
262  m_out_array->length = nFeatures;
263  for (int i = 0; i < m_nChildren; i++)
264  {
265  m_out_array->children[i]->length = nFeatures;
266  }
267  }
268  }
269 
270  void ClearArray()
271  {
272  if (m_out_array->release)
273  m_out_array->release(m_out_array);
274  memset(m_out_array, 0, sizeof(*m_out_array));
275  }
276 
277  static bool FillDict(struct ArrowArray *psChild,
278  const OGRCodedFieldDomain *poCodedDomain);
279 };
280 
String list class designed around our use of C "char**" string lists.
Definition: cpl_string.h:449
A set of associated raster bands, usually from one file.
Definition: gdal_priv.h:503
Definition of a coded / enumerated field domain.
Definition: ogr_feature.h:1758
Definition of a feature class or feature layer.
Definition: ogr_feature.h:517
#define CPLE_AppDefined
Application defined error.
Definition: cpl_error.h:100
void CPLError(CPLErr eErrClass, CPLErrorNum err_no, const char *fmt,...)
Report an error.
Definition: cpl_error.cpp:330
unsigned char GByte
Unsigned byte type.
Definition: cpl_port.h:185
#define VSI_MALLOC_ALIGNED_AUTO_VERBOSE(size)
VSIMallocAlignedAutoVerbose() with FILE and LINE reporting.
Definition: cpl_vsi.h:321
void VSIFreeAligned(void *ptr)
Free a buffer allocated with VSIMallocAligned().
Definition: cpl_vsisimple.cpp:995
#define OGR_TZFLAG_UTC
Time zone flag indicating UTC.
Definition: ogr_core.h:899
#define OGR_TZFLAG_MIXED_TZ
Time zone flag only returned by OGRFieldDefn::GetTZFlag() to indicate that all values in the field ha...
Definition: ogr_core.h:891
Classes related to registration of format support, and opening datasets.
OGRFeature field attribute value union.
Definition: ogr_core.h:910