Flattening arrays for Elastic Search

Flattening GraphQL data posted on 22nd Dec 2019


The data that comes back from GraphQL is not ideal for input to Elastic Search which is designed for full text searching, which means it loses context when searching arrays. Consider this response from some fictional GraphQL API.

[{
  "franchise": {
    "name": "rambo",
    "episodes": [{
      "name": "first blood",
      "year": 1982,
      "actors": [{
        "dob": "1946-07-06",
        "name": "Sylvester Stallone",
        "nicknames": ["sly"]
      }, {
        "name": "Jack Starrett"
      }, {
        "name": "Richard Crenna",
        "nationality": "American"
      }]
    }, {
      "name": "first blood part 2",
      "year": 1985,
      "actors": [{
        "dob": "1946-07-06",
        "name": "Sylvester Stallone",
        "nicknames": ["sly"]
      }, {
        "name": "Richard Crenna",
        "nationality": "American"
      }]
    }]
  }
}, {
  "franchise": {
    "name": "terminator",
    "episodes": [{
      "name": "the terminator",
      "year": 1984,
      "actors": [{
        "dob": "1947-07-30",
        "name": "Arnold Schwarzenegger",
        "nicknames": ["arnie"]
      }, {
        "name": "Linda Hamilton"
      }]
    }, {
      "name": "terminator 2: judgement day",
      "year": 1985,
      "actors": [{
        "dob": "1947-07-30",
        "name": "Arnold Schwarzenegger",
        "nicknames": ["arnie", "schwarzie"]
      }, {
        "name": "Michael Biehn"
      }]
    }]
  }
}];

To make a success of this in Elastic search we need to turn it back into a tabular format, a little like you might get back from a complex SQL join. Somthing like this.

That's a little more complex than simple object flattening, as we need to create a duplicate row each time there is an array element, and of course these arrays might occur at any level.

The code

const blowup = ({ ob, sep = '_', cloner = item => JSON.parse(JSON.stringify(item)) }) => {

  const isObject = (sob) => typeof (sob) === 'object' && !(sob instanceof Date);
 
  // recursive piece
  const makeRows = (sob, rows = [], currentKey = '', cob = {}) => {

    // ignore undefined or null items
    if (typeof sob === typeof undefined || sob === null) {
      return rows;
    } else if (Array.isArray(sob)) {
      // going to work through an array creating 1 row for each element
      // but without adding to the current key
      // make deep clone of current object
      sob.forEach((f, i) => {
        // make clone of what we have so far to replicate across
        const clob = cloner(cob);
        // the first element updates an existing row
        // subsequent elements add to the number of rows
        if (i) {
          rows.push(clob);
        } else {
          rows[rows.length ? rows.length - 1 : 0] = clob;
        }
        // recurse for each element
        makeRows(f, rows, currentKey, clob);
      });

    } else if (isObject(sob)) {
      // deal with the non object children first so they get cloned
      Object.keys(sob)
       .sort((a,b) => isObject(sob[a]) && isObject(sob[b]) ? 0 : (isObject(sob[b]) ? -1: 1) )
       .forEach((k, i) => {
         // add to the key, but nothing to the accumulating object
         makeRows(sob[k], rows, currentKey ? currentKey + sep + k : k, cob);
      });
    } else {
      // its a natural value
      if (cob.hasOwnProperty(currentKey)) {
        // something has gone wrong here - show should probably be a throw
        console.log('attempt to to overwrite property', cob, currentKey, 'row', rows.length);
      } else {
        cob[currentKey] = sob;
      }
    }
    return rows;
  };

  // do the work - the input data should be an array of objects
  if(!Array.isArray(ob)) ob = [ob];
  return makeRows(ob);
};

That will give this result

[{
"franchise_name": "rambo",
"franchise_episodes_name": "first blood",
"franchise_episodes_year": 1982,
"franchise_episodes_actors_dob": "1946-07-06",
"franchise_episodes_actors_name": "Sylvester Stallone",
"franchise_episodes_actors_nicknames": "sly"
}, {
"franchise_name": "rambo",
"franchise_episodes_name": "first blood",
"franchise_episodes_year": 1982,
"franchise_episodes_actors_name": "Jack Starrett"
}, {
"franchise_name": "rambo",
"franchise_episodes_name": "first blood",
"franchise_episodes_year": 1982,
"franchise_episodes_actors_name": "Richard Crenna",
"franchise_episodes_actors_nationality": "American"
}, {
"franchise_name": "rambo",
"franchise_episodes_name": "first blood part 2",
"franchise_episodes_year": 1985,
"franchise_episodes_actors_dob": "1946-07-06",
"franchise_episodes_actors_name": "Sylvester Stallone",
"franchise_episodes_actors_nicknames": "sly"
}, {
"franchise_name": "rambo",
"franchise_episodes_name": "first blood part 2",
"franchise_episodes_year": 1985,
"franchise_episodes_actors_name": "Richard Crenna",
"franchise_episodes_actors_nationality": "American"
}, {
"franchise_name": "terminator",
"franchise_episodes_name": "the terminator",
"franchise_episodes_year": 1984,
"franchise_episodes_actors_dob": "1947-07-30",
"franchise_episodes_actors_name": "Arnold Schwarzenegger",
"franchise_episodes_actors_nicknames": "arnie"
}, {
"franchise_name": "terminator",
"franchise_episodes_name": "the terminator",
"franchise_episodes_year": 1984,
"franchise_episodes_actors_name": "Linda Hamilton"
}, {
"franchise_name": "terminator",
"franchise_episodes_name": "terminator 2: judgement day",
"franchise_episodes_year": 1985,
"franchise_episodes_actors_dob": "1947-07-30",
"franchise_episodes_actors_name": "Arnold Schwarzenegger",
"franchise_episodes_actors_nicknames": "arnie"
}, {
"franchise_name": "terminator",
"franchise_episodes_name": "terminator 2: judgement day",
"franchise_episodes_year": 1985,
"franchise_episodes_actors_dob": "1947-07-30",
"franchise_episodes_actors_name": "Arnold Schwarzenegger",
"franchise_episodes_actors_nicknames": "schwarzie"
}, {
"franchise_name": "terminator",
"franchise_episodes_name": "terminator 2: judgement day",
"franchise_episodes_year": 1985,
"franchise_episodes_actors_name": "Michael Biehn"
}]

We can take that one stage further and convert the result into tabular format (ideal for writing to a sheet)

const blownupToTable = ({ blownup, sorter = mentions => Object.keys(mentions).sort((a, b) => a - b) }) => {

  // collect all the property names
  const mentions = blownup.reduce((p, c) => {
    Object.keys(c).forEach((k, i) => {
      p[k] = i;
    });
    return p;
  }, {});
  // make that into a header row
  const headerRow = sorter(mentions);
  // now add the rows after the header
  // & we dont really like undefined in sheets, so replace with null.
  return [headerRow]
    .concat(blownup.map(row => headerRow.map(h => typeof row[h] === typeof undefined ? null : row[h])));
}

Which gives this result, with the header row in the first element
[
["franchise_name", "franchise_episodes_name", "franchise_episodes_year", "franchise_episodes_actors_dob", "franchise_episodes_actors_name", "franchise_episodes_actors_nicknames", "franchise_episodes_actors_nationality"],
["rambo", "first blood", 1982, "1946-07-06", "Sylvester Stallone", "sly", null],
["rambo", "first blood", 1982, null, "Jack Starrett", null, null],
["rambo", "first blood", 1982, null, "Richard Crenna", null, "American"],
["rambo", "first blood part 2", 1985, "1946-07-06", "Sylvester Stallone", "sly", null],
["rambo", "first blood part 2", 1985, null, "Richard Crenna", null, "American"],
["terminator", "the terminator", 1984, "1947-07-30", "Arnold Schwarzenegger", "arnie", null],
["terminator", "the terminator", 1984, null, "Linda Hamilton", null, null],
["terminator", "terminator 2: judgement day", 1985, "1947-07-30", "Arnold Schwarzenegger", "arnie", null],
["terminator", "terminator 2: judgement day", 1985, "1947-07-30", "Arnold Schwarzenegger", "schwarzie", null],
["terminator", "terminator 2: judgement day", 1985, null, "Michael Biehn", null, null]
]

Putting it together

Assuming the JSON data earlier in the article is in a variable called films, here's the whole thing
const objectUnnest = ({ ob, sep, cloner, sorter }) => {
 const blownup = blowup({ ob, sep, cloner});
 const result = blownupToTable ({ blownup, sorter });
 return result;
}
console.log(objectUnnest({ ob: films }));

You can find an Apps Script version of this here.


Since G+ is closed, you can now star and follow post announcements and discussions on github, here 

Learning Apps Script, (and transitioning from VBA) are covered comprehensively in my my book, Going Gas - from VBA to Apps script, All formats are available from O'ReillyAmazon and all good bookshops. You can also read a preview on O'Reilly

If you prefer Video style learning I also have two courses available. also published by O'Reilly.
Google Apps Script for Developers and Google Apps Script for Beginners.

Comments