amazon-redshift

Reading JSON array in Redshift

Introduction#

Currently, reading all elements in JSON array is not possible in Redshift. For e.g. if you want to read Manufacturer and model as columns from following JSON

Remarks#

dim_idnumberinfomanufacturermodel
2001Nissan~Sentra^Nissan~Maxima^Ford~Taurus^Ford~Escort^NissanSentra
2002Nissan~Sentra^Nissan~Maxima^Ford~Taurus^Ford~Escort^NissanMaxima
2003Nissan~Sentra^Nissan~Maxima^Ford~Taurus^Ford~Escort^FordTaurus
2004Nissan~Sentra^Nissan~Maxima^Ford~Taurus^Ford~Escort^FordEscort
## Reading array elements in JSON

— Create a sample JSON with ARRAY

create table car_sample(dim_id integer, info varchar(2000)); insert into car_sample values (200, ’{“cars”: [ { “Manufacturer”: “Nissan”, “Models”: [{“Name”:“Sentra”, “doors”:4}, {“Name”:“Maxima”, “doors”:4} ]}, {“Manufacturer”: “Ford”, “Models”: [{“Name”:“Taurus”, “doors”:4}, {“Name”:“Escort”, “doors”:4} ]} ] }‘)

— Create a supporting table for CROSS JOIN

create table series1_10 (number integer );
insert into series1_10 values (1);
insert into series1_10 values (2);
insert into series1_10 values (3);
insert into series1_10 values (4);
insert into series1_10 values (5);
insert into series1_10 values (6);
insert into series1_10 values (7);
insert into series1_10 values (8);
insert into series1_10 values (9);
insert into series1_10 values (10);

— UDF for extracting JSON array into one ^ delimited string
CREATE OR REPLACE FUNCTION f_extractJson (jsonVar varchar) RETURNS varchar IMMUTABLE as $$
 def myfunc(myParm):
  import json
  cars=json.loads(jsonVar)
  parsedString=”
  for car in cars[“cars”]:
   for model in car[“Models”]:
    parsedString=parsedString+car[“Manufacturer”]+’~‘+model[“Name”]+’^’
  return parsedString

 return myfunc(jsonVar)
$$ LANGUAGE plpythonu;

— Check the data

select dim_id, f_extractJson(info) from car_sample;

— Pivot rows

WITH w1 AS (select dim_id, f_extractJson(info) info from car_sample)
select dim_id,number, info, split_part(split_part(info,’^‘,number),’’, 1)
Manufacturer, split_part(split_part(info,’^‘,number),’
’, 2) Model
from w1 cross join series1_10
where number <= regexp_count(info,’[=^=]’) ;


This modified text is an extract of the original Stack Overflow Documentation created by the contributors and released under CC BY-SA 3.0 This website is not affiliated with Stack Overflow