This topic describes how to use a Java user-defined function (UDF) to obtain a JSON string based on the specified path.

UDF used to obtain a JSON string

UDFGetJsonObject(String jsonString, String pathString)
  • Description

    This UDF parses a JSON string specified by the jsonString parameter and returns the JSON object at the path specified by the pathString parameter. If the input JSON string is invalid, null is returned.

  • Parameters
    • jsonString: the JSON string.
    • pathString: the path. A dollar sign $ indicates a root node, a period . indicates a child node, and a pair of brackets [] indicates an array subscript. The value can only contain letters, digits, and underscores (_).

UDF example

  • Upload resources

    Due to sandbox limits, to run the UDF in Alibaba Cloud MaxCompute, you must upload the org.json package and the UDF JAR package to MaxCompute as resources. Reference the two packages when you create the UDF in DataWorks. You can visit MVN to download the org.json package.

  • Register the UDF

    After UDFGetJsonObject.java is tested, register the UDF. In this example, json4.jar is the JAR package of the UDF, and json-20180813.jar is the org.json package.

  • Example
    After the UDF is registered, execute the following statement:
    select UDFGetJsonObject('{"grade":"One","persons":[{"age":"a1","name":"n1"},{"age":"a2","name":"n2"}]}', '$.persons[0].age') from dual;
    The result is a1.

UDF code example

package com.aliyun.odps.examples.udf; // The package name, which can be customized.
import org.json.JSONArray; 
import org.json.JSONException; 
import org.json.JSONObject;

import java.util.ArrayList; 
import java.util.Iterator; 
import java.util.LinkedHashMap; 
import java.util.Map; 
import java.util.regex.Matcher; 
import java.util.regex.Pattern; 
// Alibaba Cloud UDF.
import com.aliyun.odps.udf.UDF;  

public class UDFGetJsonObject extends UDF {
  // Declare a private immutable pattern.
  private final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-]+).*");   
  private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]"); 

  // Use LinkedHashMap to implement a least recently used (LRU) cache.
  // Define the inner static subclasses of HashCache, which are inherited from the LinkedHashMap class.
  static class HashCache<K, V> extends LinkedHashMap<K, V> {  

    private static final int CACHE_SIZE = 16; 
    private static final int INIT_SIZE = 32;
    // Declare LOAD_FACTOR of the private constant HashMap.
    private static final float LOAD_FACTOR = 0.6f; 

    HashCache() {
      super(INIT_SIZE, LOAD_FACTOR);
    }

    private static final long serialVersionUID = 1; 

    // Override the LinkedHashMap method to determine whether the cache size exceeds the limit and clear the cache if necessary.
    @Override   
    protected boolean removeEldestEntry(Map.Entry<K, V> eldest) { 
      return size() > CACHE_SIZE;
    }

  }
  // Use HashCache to create a cache area of the Map type.
  static Map<String, Object> extractObjectCache = new HashCache<String, Object>();  
  static Map<String, String[]> pathExprCache = new HashCache<String, String[]>(); 
  static Map<String, ArrayList<String>> indexListCache = new HashCache<String, ArrayList<String>>(); 
  static Map<String, String> mKeyGroup1Cache = new HashCache<String, String>();
  static Map<String, Boolean> mKeyMatchesCache = new HashCache<String, Boolean>();

  String result = new String(); 

  public UDFGetJsonObject() {
  } 

  /**
   * Extract json object from a json string based on json path specified, and
   * return json string of the extracted json object. It will return null if the
   * input json string is invalid.
   * 
   * A limited version of JSONPath supported: $ : Root object . : Child operator
   * [] : Subscript operator for array * : Wildcard for []
   * 
   * Syntax not supported that's worth noticing: '' : Zero length string as key
   * .. : Recursive descent &amp;#064; : Current object/element () : Script
   * expression ?() : Filter (script) expression. [,] : Union operator
   * [start:end:step] : array slice operator
   * 
   * @param jsonString
   *          the json string.
   * @param pathString
   *          the json path expression.
   * @return json string or null when an error happens.
   */
  public String evaluate(String jsonString, String pathString) {    
    // Create the evaluate method. Like in Hive, UDFs in MaxCompute use the evaluate method, with jsonString as the input JSON string and pathString as the input path.  
    // If the input JSON string or path is empty, null is returned.
    if (jsonString == null || jsonString.equals("") || pathString == null   
        || pathString.equals("")) {
      return null;
    }

    try {
      // Cache pathExpr
      // Obtain the pathExpr parameter of the String Array type, which indicates an array of paths.
      String[] pathExpr = pathExprCache.get(pathString);     
      if (pathExpr == null) {
        // Use a period (.) as a separator to obtain the pathExpr parameter of the String Array type. A value of -1 indicates that separators in the last few characters are also split.
        pathExpr = pathString.split("\\.", -1);  
        // Use the put method to cache pathString and pathExpr to pathExprCache.
        pathExprCache.put(pathString, pathExpr); 
      }
      // If the path does not start with a dollar sign ($), null is returned.
      if (! pathExpr[0].equalsIgnoreCase("$")) {  
        return null;
      }
      // Cache extractObject
      Object extractObject = extractObjectCache.get(jsonString); 
      if (extractObject == null) {
        extractObject = new JSONObject(jsonString); 
        // Use the put method to cache the content of the JSON string.
        extractObjectCache.put(jsonString, extractObject);  
      }
      for (int i = 1; i < pathExpr.length; i++) {
        // Use the extract method to extract a JSON string based on pathExpr.
        extractObject = extract(extractObject, pathExpr[i]);  
      }
      // Provide the obtained JSON string of the Object type as a String-type string.
      return extractObject.toString();  
    } catch (Exception e) {
      // If an exception occurs, null is returned.
      return null;  
    }
  }

  private Object extract(Object json, String path) throws JSONException { 
    // Cache patternkey.matcher(path).matches()
    Matcher mKey = null;  
    Boolean mKeyMatches = mKeyMatchesCache.get(path);  
    if (mKeyMatches == null) {
      // The path does not exist in the cache.            
      mKey = patternKey.matcher(path); 
      // If the path matches a JSON string, True is returned. If the path fails to match a JSON string, False is returned.
      mKeyMatches = mKey.matches() ? Boolean.TRUE : Boolean.FALSE;  
      mKeyMatchesCache.put(path, mKeyMatches);
    }
    if (! mKeyMatches.booleanValue()) {
      return null; 
    }

    // Cache mkey.group(1)
    // Obtain the JSON string that corresponds to the path.
    String mKeyGroup1 = mKeyGroup1Cache.get(path);   
    if (mKeyGroup1 == null) {  
      // Check whether the JSON string exists in the cache.      
      if (mKey == null) {
        mKey = patternKey.matcher(path);
      }
      mKeyGroup1 = mKey.group(1);
      // Cache the JSON string based on the path.
      mKeyGroup1Cache.put(path, mKeyGroup1);   
    }
    // Obtain the JSON string.
    json = extract_json_withkey(json, mKeyGroup1);  

    // Cache indexList
    // Obtain the JSON string of the Array type.
    ArrayList<String> indexList = indexListCache.get(path);  
    if (indexList == null) { 
      // Check whether the JSON string of the Array type exists in the cache.            
      Matcher mIndex = patternIndex.matcher(path);
      indexList = new ArrayList<String>();
      while (mIndex.find()) {
        indexList.add(mIndex.group(1));
      }
      indexListCache.put(path, indexList);
    }

    if (indexList.size() > 0) {
      // Extract the JSON string of the Array type based on the path.
      json = extract_json_withindex(json, indexList);    
    }

    return json;
  }
  // Create a JSON object of the Array type.
  ArrayList<Object> jsonList = new ArrayList<Object>();  
  // Create a method to obtain a JSON string of the Array type.
  private Object extract_json_withindex(Object json, ArrayList<String> indexList)  
      throws JSONException {

    jsonList.clear();
    jsonList.add(json);
    Iterator<String> itr = indexList.iterator();
    while (itr.hasNext()) {
      String index = itr.next();
      ArrayList<Object> tmp_jsonList = new ArrayList<Object>();
      // If the path contains a wildcard, all JSON strings that match the wildcard are returned.
      if (index.equalsIgnoreCase("*")) {                
        for (int i = 0; i < (jsonList).size(); i++) {
          try {
            JSONArray array = (JSONArray) (jsonList).get(i);
            for (int j = 0; j < array.length(); j++) {
              tmp_jsonList.add(array.get(j));
            }
          } catch (Exception e) {
            continue;
          }
        }
        jsonList = tmp_jsonList;
      } else { 
        // If no wildcard exists, all JSON data is traversed.                             
        for (int i = 0; i < (jsonList).size(); i++) {
          try {
            tmp_jsonList.add(((JSONArray) (jsonList).get(i)).get(Integer
                .parseInt(index)));
          } catch (ClassCastException e) {
            continue;
          } catch (JSONException e) {
            return null;
          }
          jsonList = tmp_jsonList;
        }
      }
    }
    return (jsonList.size() > 1) ? new JSONArray(jsonList) : jsonList.get(0);
  }
  // Create a method to obtain the JSON string of common types.
  private Object extract_json_withkey(Object json, String path)   
      throws JSONException {
    if (json.getClass() == org.json.JSONArray.class) {
      JSONArray jsonArray = new JSONArray();
      for (int i = 0; i < ((JSONArray) json).length(); i++) {
        Object josn_elem = ((JSONArray) json).get(i);
        try {
          Object json_obj = ((JSONObject) josn_elem).get(path);
          if (json_obj.getClass() == org.json.JSONArray.class) {
            for (int j = 0; j < ((JSONArray) json_obj).length(); j++) {
              jsonArray.put(((JSONArray) json_obj).get(j));
            }
          } else {
            jsonArray.put(json_obj);
          }
        } catch (Exception e) {
          continue;
        }
      }
      return (jsonArray.length() == 0) ? null : jsonArray;
    } else {
      return ((JSONObject) json).get(path);
    }
  }
}