This topic describes how to use a Java UDF to obtain a JSON string based on its path and check whether it has a specific value.

UDF used to check the value of a JSON string

UDFGetJsonObject(String jsonString, String pathString)
  • Description

    This UDF is used to parse jsonString and obtain the value of pathString. It can also be used to determine whether the obtained value contains a specific value.

  • Parameters
    • jsonString: the JSON string.
    • pathString: the path. A dollar sign ($) indicates a root node, a period (.) indicates a child node, and a bracket ([]) indicates an array subscript. pathString can only contain letters, digits, and underscores (_).

UDF example

  • Resource upload

    Due to sandbox limits, to run this function in Alibaba Cloud MaxCompute, you need to upload the org.json and UDF JAR packages to MaxCompute as resources and reference the two packages when you create a function in DataWorks. You can visit MVN to download the org.json package.

  • Function registration

    TestJson.jar is the JAR package generated by packaging the example code of the UDF used to check the value of a JSON string. json-20180813.jar is the org.json package.

  • Examples
    After the UDF is registered, execute the following statements:
    • select TestJson('{"LogA\":[{"id":"11562,20508"}, {"id":"11563,20509"}]}', "LogA[0]") from dual;
      The result is {"id":"11562,20508"}.
    • select bi_udf:bi_get_value('{"LogA\":[{"id":"11562,20508"}, {"id":"11563,20509"}]}', "LogA[0].id", "20508") from dual;                                    
      The result is true.
    • select bi_udf:bi_get_value('{"LogA\":[{"id":"11562,20508"}, {"id":"11563,20509"}]}', "LogA[0].id", "20508", "&") from dual;
      The result is true.

UDF code example

package com.aliyun.odps.examples.udf; // The package name, which can be defined as needed.
import com.aliyun.odps.io.Text;
import com.aliyun.odps.udf.UDF; // Alibaba Cloud UDF.
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class UDFGetValue extends UDF {
    // Declare the mode of a regular expression with the private final pattern.
    private  final  Pattern  patternKey  =  Pattern.compile("^([a-zA-Z0-9_\\-]+).*");  
    private  final  Pattern  patternIndex  =  Pattern.compile("\\[([0-9]+|\\*)\\]");
    // Use HashCache to create a cache region of the Map type.
    private  static  Map<String,  Object>  extractObjectCache  =  new  HashCache<String,  Object>();
    private  static  Map<String,  String[]>  pathExprCache  =  new  HashCache<String,  String[]>();
    private  static  Map<String, ArrayList<String>>  indexListCache  =  new  HashCache<String,  ArrayList<String>>();
    private  static  Map<String,  String>  mKeyGroup1Cache  =  new  HashCache<String,  String>();
    private  static  Map<String,  Boolean>  mKeyMatchesCache  =  new  HashCache<String,  Boolean>();
    private  Text  result  =  new Text();
    private  ArrayList<Object>  jsonList  =  new  ArrayList<Object>();

    // An LRU cache using a linked hash map.
    // Define the inner static subclasses of HashCache, which are inherited from the LinkedHashMap class.
    private  static  class  HashCache<K,  V>  extends LinkedHashMap<K,  V> {
        private  static  final  int  CACHE_SIZE  =  16;
        private  static  final  int  INIT_SIZE  =  32;
        // Declare LOAD_FACTOR of the private constant HashMap.
        private  static  final  float  LOAD_FACTOR  =  0.6f;
        private  static  final  long  serialVersionUID  =  1;
        HashCache()  {
            super(INIT_SIZE,  LOAD_FACTOR);
        }
        // LinkedHashMap overload is used to determine whether the cache size exceeds the limit and clear the cache if necessary.
        @Override
        protected  boolean  removeEldestEntry(Map.Entry<K,  V>  eldest)  {
            return  size()  >  CACHE_SIZE;
        }
    }

    public UDFGetValue()  {
    }

    /**
     *  @param  jsonStr indicates the input JSON string.
     *  @param  path indicates the JSON path.
     *  @return indicates the return value.
     */
    public  String  evaluate(String  jsonStr,  String  path)  {
        // The way to create the EVALUATE method, which is the same as HIVE. MaxCompute UDFs generally use the EVALUATE method, with jsonString as the input JSON string and pathString as the input path.
        // If the path to the input JSON string is empty, null is returned.
        if  (jsonStr  ==  null  ||  jsonStr  ==  ""  ||  path  ==  null  ||  path  ==  "")  {
            return null;
        }
        String  jResult  =  evaluateJson(jsonStr,  path);
        if  (jResult  ==  null  ||  "".equals(jResult))
            return  null;
        result.set(jResult);
        return result.toString();
    }

    /**
     * @param jsonStr indicates the input JSON string.
     * @param path indicates the JSON path.
     * @param value indicates the return value.
     * @return checks whether a specific path in the JSON string contains all content in value and then returns "true" or "false" based on the result.
     */
    public String evaluate(String jsonStr, String path, String value) {
        if (jsonStr == null || jsonStr == "" || path == null
                || path == "" || value == null || "".equals(value)) {
            return null;
        }
        String jResult = evaluateJson(jsonStr, path);
        if (jResult == null || "".equals(jResult)) {
            result.set("false");
            return result.toString();
        }
        String[] sV = value.split(",");
        String[] dV = jResult.split(",");
        int sameCount = 0;
        for (int i = 0; i < sV.length; i++) {
            for (int j = 0; j < dV.length; j++) {
                if (sV[i].equals(dV[j])) {
                    sameCount++;
                    break;
                }
            }
        }
        if (sameCount == sV.length) {
            result.set("true");
        } else {
            result.set("false");
        }
        return result.toString();
    }

    /**
     * @param jsonStr indicates the input JSON string.
     * @param path indicates the JSON path.
     * @param value indicates the return value.
     * @param flag can be set to a vertical bar (|) which indicates that any content in value is contained or an ampersand (&) which indicates that all content in value must be contained.
     * @return checks whether the JSON string contains value and then returns "true" or "false" based on the result.
     */
    public String evaluate(String jsonStr, String path, String value, String flag) {
        if ("&".equals(flag))
            return evaluate(jsonStr, path, value);
        if (jsonStr == null || jsonStr.equals("") || path == null || path.equals("")
                || value == null || "".equals(value) || flag == null
                || !" |".equals(flag)) {
            result.set("false");
            return result.toString();
        }
        String jResult = evaluateJson(jsonStr, path);
        if (jResult == null || "".equals(jResult)) {
            result.set("false");
            return result.toString();
        }
        String[] sV = value.split(",");
        String[] dV = jResult.split(",");
        for (int i = 0; i < sV.length; i++) {
            for (int j = 0; j < dV.length; j++) {
                if (sV[i].equals(dV[j])) {
                    result.set("true");
                    return result.toString();
                }
            }
        }
        result.set("false");
        return result.toString();
    }

    /**
     * Extract json object from a json string based on json path specified, and
     * return json string of the extracted json object. It will return null if
     * the input json string is invalid.
     *
     * A limited version of JSONPath supported: $ : Root object . : Child
     * operator [] : Subscript operator for array * : Wildcard for []
     *
     * Syntax not supported that's worth noticing: '' : Zero length string as
     * key .. : Recursive descent &amp;#064; : Current object/element () :
     * Script expression ?() : Filter (script) expression. [,] : Union operator
     * [start:end:step] : array slice operator
     *
     * @param jsonString the json string.
     * @param pathString the json path expression.
     * @return json string or null when an error happens.
     */
    private String evaluateJson(String jsonString, String pathString) {
        try {
            pathString = "$." + pathString;
            // Cache pathExpr
            // Obtain the input path of the array of the STRING type.
            String[] pathExpr = pathExprCache.get(pathString);
            if (pathExpr == null) {
                // Use a period (.) as a separator to obtain the pathExpr array of the STRING type of the input path. -1 indicates that separators in the last few digits are also split.
                pathExpr = pathString.split("\\.", -1);
                // Use the PUT method to cache pathString and pathExpr to pathExprCache.
                pathExprCache.put(pathString, pathExpr);
            }
            // If the path does not start with a dollar sign ($), null is returned.
            if (! pathExpr[0].equalsIgnoreCase("$")) {
                return null;
            }
            // Cache extractObject
            Object extractObject = extractObjectCache.get(jsonString);
            if (extractObject == null) {
                extractObject = new JSONObject(jsonString);
                Object put = extractObjectCache.put(jsonString, extractObject);
            }
            for (int i = 1; i < pathExpr.length; i++) {
                // Use the EXTRACT method to extract a JSON string based on pathExpr.
                extractObject = extract(extractObject, pathExpr[i]);
            }
            // Provide the obtained JSON string of the OBJECT type as a STRING-type string.
            return extractObject.toString();
        } catch (Exception e) {
            // If an exception occurs, null is returned.
            return null;
        }
    }

    private Object extract(Object json, String path) throws JSONException {
        // Cache patternkey.matcher(path).matches()
        Matcher mKey = null;
        Boolean mKeyMatches = mKeyMatchesCache.get(path);
        // The path does not exist in the cache.
        if (mKeyMatches == null) {
            mKey = patternKey.matcher(path);
            // If the path matches a JSON string, True is returned. If the path fails to match a JSON string, False is returned.
            mKeyMatches = mKey.matches() ? Boolean.TRUE : Boolean.FALSE;
            mKeyMatchesCache.put(path, mKeyMatches);
        }
        if (! mKeyMatches.booleanValue()) {
            return null;
        }

        // Cache mkey.group(1)
        // Obtain the JSON string that corresponds to the path.
        String mKeyGroup1 = mKeyGroup1Cache.get(path);
        // Check whether the JSON string exists in the cache.
        if (mKeyGroup1 == null) {
            if (mKey == null) {
                mKey = patternKey.matcher(path);
            }
            mKeyGroup1 = mKey.group(1);
            // Cache the JSON string based on the path.
            mKeyGroup1Cache.put(path, mKeyGroup1);
        }
        // Obtain the JSON string.
        json = extract_json_withkey(json, mKeyGroup1);

        // Cache indexList
        // Obtain the JSON string of the ARRAY type.
        ArrayList<String> indexList = indexListCache.get(path);
        // Check whether the JSON string of the ARRAY type exists in the cache.
        if (indexList == null) {
            Matcher mIndex = patternIndex.matcher(path);
            indexList = new ArrayList<String>();
            while (mIndex.find()) {
                indexList.add(mIndex.group(1));
            }
            indexListCache.put(path, indexList);
        }

        if (indexList.size() > 0) {
            // Extract the JSON string of the ARRAY type based on the path.
            json = extract_json_withindex(json, indexList);
        }

        return json;
    }

    private Object extract_json_withindex(Object json, ArrayList<String> indexList) throws JSONException {

        jsonList.clear();
        jsonList.add(json);
        Iterator<String> itr = indexList.iterator();
        while (itr.hasNext()) {
            String index = itr.next();
            // Create a JSON object of the ARRAY type.
            ArrayList<Object> tmp_jsonList = new ArrayList<Object>();
            // Create a method to obtain a JSON string of the ARRAY type.
            if (index.equalsIgnoreCase("*")) {
                for (int i = 0; i < (jsonList).size(); i++) {
                    try {
                        JSONArray array = (JSONArray) (jsonList).get(i);
                        for (int j = 0; j < array.length(); j++) {
                            tmp_jsonList.add(array.get(j));
                        }
                    } catch (Exception e) {
                        continue;
                    }
                }
                jsonList = tmp_jsonList;
              // If no wildcard exists, all JSON data is traversed.
            } else {
                for (int i = 0; i < (jsonList).size(); i++) {
                    try {
                        tmp_jsonList.add(((JSONArray) (jsonList).get(i))
                                .get(Integer.parseInt(index)));
                    } catch (ClassCastException e) {
                        continue;
                    } catch (JSONException e) {
                        return null;
                    }
                    jsonList = tmp_jsonList;
                }
            }
        }
        return (jsonList.size() > 1) ? new JSONArray(jsonList) : jsonList
                .get(0);
    }
    // Create a method to obtain the JSON string of common types.
    private Object extract_json_withkey(Object json, String path) throws JSONException {
        if (json.getClass() == JSONArray.class) {
            JSONArray jsonArray = new JSONArray();
            for (int i = 0; i < ((JSONArray) json).length(); i++) {
                Object josn_elem = ((JSONArray) json).get(i);
                try {
                    Object json_obj = ((JSONObject) josn_elem).get(path);
                    if (json_obj.getClass() == JSONArray.class) {
                        for (int j = 0; j < ((JSONArray) json_obj).length(); j++) {
                            jsonArray.put(((JSONArray) json_obj).get(j));
                        }
                    } else {
                        jsonArray.put(json_obj);
                    }
                } catch (Exception e) {
                    continue;
                }
            }
            return (jsonArray.length() == 0) ? null : jsonArray;
        } else {
            return ((JSONObject) json).get(path);
        }
    }
}