导入具有深度嵌套对象数组的 GeoJSON
·5 分钟阅读
“导入具有深度嵌套对象数组的 GeoJSON“
问题
如何导入具有嵌套对象数组的 GeoJSON?
答案
在本教程中,我们将使用公开可用的开放数据这里。副本可以在这里找到。
-
以 GeoJSON 格式下载数据并将文件重命名为
geojson.json
。 -
理解结构。
DESCRIBE TABLE file('geojson.json', 'JSON')
┌─name─────┬─type─────────────────────────────────────────────────────────────────────────────────────────┐
│ type │ Nullable(String) │
│ name │ Nullable(String) │
│ crs │ Tuple( properties Tuple(name Nullable(String)),type Nullable(String)) │
│ features │ Array(Tuple( │
│ │ geometry Tuple(coordinates Array(Array(Array(Array(Nullable(Float64))))), │
│ │ type Nullable(String)), │
│ │ properties Tuple( CODIGOINE Nullable(String), │
│ │ CODNUT1 Nullable(String), │
│ │ CODNUT2 Nullable(String), │
│ │ CODNUT3 Nullable(String), │
│ │ FID Nullable(Int64), │
│ │ INSPIREID Nullable(String), │
│ │ NAMEUNIT Nullable(String), │
│ │ NATCODE Nullable(String), │
│ │ SHAPE_Area Nullable(Float64), │
│ │ SHAPE_Length Nullable(Float64) │
│ │ ), │
│ │ type Nullable(String) │
│ │ ) │
│ │ ) │
└──────────┴──────────────────────────────────────────────────────────────────────────────────────────────┘
- 创建一个表来存储 GeoJSON 行。
这里的要求是为 features 数组
中的每个 object
生成一行。字段 geometry
推断的数据类型表明它转换为 ClickHouse 的 MultiPolygon 数据类型。
create table geojson
(
type String,
name String,
crsType String,
crsName String,
featureType String,
id Int64,
inspiredId String,
natCode String,
nameUnit String,
codNut1 String,
codNut2 String,
codNut3 String,
codigoIne String,
shapeLength Float64,
shapeArea Float64,
geometryType String,
geometry MultiPolygon
)
engine = MergeTree
order by id;
- 准备数据。
查询的主要目的是验证我们是否为 features 数组中的每个 object 获得一行。
注意
字段 features.geometry.coordinates
被注释掉,以使结果集更具可读性。
SELECT
type AS type,
name AS name,
crs.type AS crsType,
crs.properties.name AS crsName,
features.type AS featureType,
features.properties.FID AS id,
features.properties.INSPIREID AS inspiredId,
features.properties.NATCODE AS natCode,
features.properties.NAMEUNIT AS nameUnit,
features.properties.CODNUT1 AS codNut1,
features.properties.CODNUT2 AS codNut2,
features.properties.CODNUT3 AS codNut3,
features.properties.CODIGOINE AS codigoIne,
features.properties.SHAPE_Length AS shapeLength,
features.properties.SHAPE_Area AS shapeArea,
features.geometry.type AS geometryType
--,features.geometry.coordinates
FROM file('municipios_ign.geojson', 'JSON')
ARRAY JOIN features
LIMIT 5
┌─type──────────────┬─name───────────┬─crsType─┬─crsName───────────────────────┬─featureType─┬─id─┬─inspiredId───────────────┬─natCode─────┬─nameUnit──────────────┬─codNut1─┬─codNut2─┬─codNut3─┬─codigoIne─┬────────shapeLength─┬─────────────shapeArea─┬─geometryType─┐
│ FeatureCollection │ Municipios_IGN │ name │ urn:ogc:def:crs:OGC:1.3:CRS84 │ Feature │ 1 │ ES.IGN.SIGLIM34081616266 │ 34081616266 │ Villarejo-Periesteban │ ES4 │ ES42 │ ES423 │ 16266 │ 0.2697476997304121 │ 0.0035198414406406673 │ MultiPolygon │
│ FeatureCollection │ Municipios_IGN │ name │ urn:ogc:def:crs:OGC:1.3:CRS84 │ Feature │ 2 │ ES.IGN.SIGLIM34081616269 │ 34081616269 │ Villares del Saz │ ES4 │ ES42 │ ES423 │ 16269 │ 0.4476083901269905 │ 0.00738179315030249 │ MultiPolygon │
│ FeatureCollection │ Municipios_IGN │ name │ urn:ogc:def:crs:OGC:1.3:CRS84 │ Feature │ 3 │ ES.IGN.SIGLIM34081616270 │ 34081616270 │ Villarrubio │ ES4 │ ES42 │ ES423 │ 16270 │ 0.3053942273994179 │ 0.0029777582813496337 │ MultiPolygon │
│ FeatureCollection │ Municipios_IGN │ name │ urn:ogc:def:crs:OGC:1.3:CRS84 │ Feature │ 4 │ ES.IGN.SIGLIM34081616271 │ 34081616271 │ Villarta │ ES4 │ ES42 │ ES423 │ 16271 │ 0.2831226979821184 │ 0.002680273189024594 │ MultiPolygon │
│ FeatureCollection │ Municipios_IGN │ name │ urn:ogc:def:crs:OGC:1.3:CRS84 │ Feature │ 5 │ ES.IGN.SIGLIM34081616272 │ 34081616272 │ Villas de la Ventosa │ ES4 │ ES42 │ ES423 │ 16272 │ 0.5958276749246777 │ 0.015354885085133583 │ MultiPolygon │
└───────────────────┴────────────────┴─────────┴───────────────────────────────┴─────────────┴────┴──────────────────────────┴─────────────┴───────────────────────┴─────────┴─────────┴─────────┴───────────┴────────────────────┴───────────────────────┴──────────────┘
- 插入数据。
INSERT INTO geojson
SELECT
type AS type,
name AS name,
crs.type AS crsType,
crs.properties.name AS crsName,
features.type AS featureType,
features.properties.FID AS id,
features.properties.INSPIREID AS inspiredId,
features.properties.NATCODE AS natCode,
features.properties.NAMEUNIT AS nameUnit,
features.properties.CODNUT1 AS codNut1,
features.properties.CODNUT2 AS codNut2,
features.properties.CODNUT3 AS codNut3,
features.properties.CODIGOINE AS codigoIne,
features.properties.SHAPE_Length AS shapeLength,
features.properties.SHAPE_Area AS shapeArea,
features.geometry.type AS geometryType,
features.geometry.coordinates as geometry
FROM file('municipios_ign.geojson', 'JSON')
ARRAY JOIN features
在这里,我们得到以下错误
Code: 53. DB::Exception: Received from localhost:9000. DB::Exception: ARRAY JOIN requires array or map argument. (TYPE_MISMATCH)
Received exception from server (version 24.1.2):
这是由 features.geometry.coordinates
的解析引起的。
- 让我们检查其数据类型。
SELECT DISTINCT toTypeName(features.geometry.coordinates) AS geometry
FROM file('municipios_ign.geojson', 'JSON')
ARRAY JOIN features
┌─geometry──────────────────────────────────────┐
│ Array(Array(Array(Array(Nullable(Float64))))) │
└───────────────────────────────────────────────┘
可以通过将 multipolygon.properties.coordinates
转换为 Array(Array(Array(Tuple(Float64,Float64))))
来修复它。为此,我们可以使用函数 arrayMap(func,arr1,...)。
SELECT distinct
toTypeName(
arrayMap(features.geometry.coordinates->
arrayMap(features.geometry.coordinates->
arrayMap(features.geometry.coordinates-> (features.geometry.coordinates[1],features.geometry.coordinates[2])
,features.geometry.coordinates),
features.geometry.coordinates),
features.geometry.coordinates)
) as toTypeName
FROM file('municipios_ign.geojson', 'JSON')
ARRAY JOIN features;
┌─toTypeName───────────────────────────────────────────────────────┐
│ Array(Array(Array(Tuple(Nullable(Float64), Nullable(Float64))))) │
└──────────────────────────────────────────────────────────────────┘
- 插入数据。
INSERT INTO geojson
SELECT
type as type,
name as name,
crs.type as crsType,
crs.properties.name as crsName,
features.type as featureType,
features.properties.FID id,
features.properties.INSPIREID inspiredId,
features.properties.NATCODE natCode,
features.properties.NAMEUNIT nameUnit,
features.properties.CODNUT1 codNut1,
features.properties.CODNUT2 codNut2,
features.properties.CODNUT3 codNut3,
features.properties.CODIGOINE codigoIne,
features.properties.SHAPE_Length shapeLength,
features.properties.SHAPE_Area shapeArea,
features.geometry.type geometryType,
arrayMap(features.geometry.coordinates->
arrayMap(features.geometry.coordinates->
arrayMap(features.geometry.coordinates-> (features.geometry.coordinates[1],features.geometry.coordinates[2]),features.geometry.coordinates)
,features.geometry.coordinates)
,features.geometry.coordinates) geometry
FROM file('municipios_ign.geojson', 'JSON')
ARRAY JOIN features;
SELECT count()
FROM geojson
┌─count()─┐
│ 8205 │
└─────────┘
SELECT DISTINCT toTypeName(geometry)
FROM geojson
┌─toTypeName(geometry)─┐
│ MultiPolygon │
└──────────────────────┘
结论
处理 JSON 可能导致复杂的任务。本教程解决了一个场景,其中嵌套的对象数组可能会使这项任务更加困难。
对于任何其他与 JSON 相关的要求,请参阅我们的文档。