edit-icon download-icon

Attachment: Sample code

Last Updated: Mar 27, 2018

Table creation statement of ods_log_tracker

  1. CREATE TABLE IF NOT EXISTS ods_log_tracker(
  2. ip STRING COMMENT 'client ip address',
  3. user STRING,
  4. time DATETIME,
  5. request STRING COMMENT 'HTTP request type + requested path without args + HTTP protocol version',
  6. status BIGINT COMMENT 'HTTP reponse code from server',
  7. size BIGINT,
  8. referer STRING,
  9. agent STRING)
  10. COMMENT 'Log from coolshell.cn'
  11. PARTITIONED BY(dt STRING);

Table creation statement of dw_log_parser

  1. CREATE TABLE IF NOT EXISTS dw_log_parser(
  2. ip STRING COMMENT 'client ip address',
  3. user STRING,
  4. time DATETIME,
  5. method STRING COMMENT 'HTTP request type, such as GET POST...',
  6. url STRING,
  7. protocol STRING,
  8. status BIGINT COMMENT 'HTTP reponse code from server',
  9. size BIGINT,
  10. referer STRING,
  11. agent STRING)
  12. PARTITIONED BY(dt STRING);

Table creation statement of dw_log_detail

  1. CREATE TABLE IF NOT EXISTS dw_log_detail(
  2. ip STRING COMMENT 'client ip address',
  3. time DATETIME,
  4. method STRING COMMENT 'HTTP request type, such as GET POST...',
  5. url STRING,
  6. protocol STRING,
  7. status BIGINT COMMENT 'HTTP reponse code from server',
  8. size BIGINT,
  9. referer STRING COMMENT 'referer domain',
  10. agent STRING,
  11. device STRING COMMENT 'android|iphone|ipad...',
  12. identity STRING COMMENT 'identify: user, crawler, feed')
  13. PARTITIONED BY(dt STRING);

Table creation statement of dim_user_info

  1. CREATE TABLE IF NOT EXISTS dim_user_info(
  2. uid STRING COMMENT 'unique user id',
  3. ip STRING COMMENT 'client ip address',
  4. device STRING,
  5. protocol STRING,
  6. identity STRING COMMENT 'user, crawler, feed',
  7. agent STRING)
  8. PARTITIONED BY(dt STRING);

Table creation statement of dw_log_fact

  1. CREATE TABLE IF NOT EXISTS dw_log_fact(
  2. uid STRING COMMENT 'unique user id',
  3. time DATETIME,
  4. method STRING COMMENT 'HTTP request type, such as GET POST...',
  5. url STRING,
  6. status BIGINT COMMENT 'HTTP reponse code from server',
  7. size BIGINT,
  8. referer STRING)
  9. PARTITIONED BY(dt STRING);

Table creation statement of adm_user_measures

  1. CREATE TABLE IF NOT EXISTS adm_user_measures(
  2. device STRING COMMENT 'such as android, iphone, ipad...',
  3. pv BIGINT,
  4. uv BIGINT)
  5. PARTITIONED BY(dt STRING);
  6. adm_refer_info_ddl
  7. CREATE TABLE adm_refer_info(
  8. referer STRING,
  9. count BIGINT)
  10. PARTITIONED BY(dt STRING);

Node code of dw_log_parser

  1. INSERT OVERWRITE TABLE dw_log_parser PARTITION (dt=${bdp.system.bizdate})
  2. SELECT ip
  3. , user
  4. , time
  5. , regexp_substr(request, '(^[^ ]+ )') AS method
  6. , regexp_extract(request, '^[^ ]+ (.*) [^ ]+$') AS url
  7. , regexp_substr(request, '([^ ]+$)') AS protocol
  8. , status
  9. , size
  10. , referer
  11. , agent
  12. FROM ods_log_tracker
  13. WHERE dt = ${bdp.system.bizdate};

Node code of dw_log_detail

  1. INSERT OVERWRITE TABLE dw_log_detail PARTITION (dt=${bdp.system.bizdate})
  2. SELECT ip
  3. , time
  4. , method
  5. , url
  6. , protocol
  7. , status
  8. , size
  9. , regexp_extract(referer, '^[^/]+://([^/]+){1}') AS referer
  10. , agent
  11. , CASE
  12. WHEN TOLOWER(agent) RLIKE 'android' THEN 'android'
  13. WHEN TOLOWER(agent) RLIKE 'iphone' THEN 'iphone'
  14. WHEN TOLOWER(agent) RLIKE 'ipad' THEN 'ipad'
  15. WHEN TOLOWER(agent) RLIKE 'macintosh' THEN 'macintosh'
  16. WHEN TOLOWER(agent) RLIKE 'windows phone' THEN 'windows_phone'
  17. WHEN TOLOWER(agent) RLIKE 'windows' THEN 'windows_pc'
  18. ELSE 'unknown'
  19. END AS device
  20. , CASE
  21. WHEN TOLOWER(agent) RLIKE '(bot|spider|crawler|slurp)' THEN 'crawler'
  22. WHEN TOLOWER(agent) RLIKE 'feed'
  23. OR url RLIKE 'feed' THEN 'feed'
  24. WHEN TOLOWER(agent) NOT RLIKE '(bot|spider|crawler|feed|slurp)'
  25. AND agent RLIKE '^[Mozilla|Opera]'
  26. AND url NOT RLIKE 'feed' THEN 'user'
  27. ELSE 'unknown'
  28. END AS identity
  29. FROM dw_log_parser
  30. WHERE url NOT RLIKE '^[/]+wp-'
  31. AND dt =${bdp.system.bizdate};

Node code of dim_user_info

  1. INSERT OVERWRITE TABLE dim_user_info PARTITION (dt=${bdp.system.bizdate})
  2. SELECT md5(concat(t1.ip, t1.device, t1.protocol, t1.identity, t1.agent))
  3. , t1.ip
  4. , t1.device
  5. , t1.protocol
  6. , t1.identity
  7. , t1.agent
  8. FROM (
  9. SELECT ip
  10. , protocol
  11. , agent
  12. , device
  13. , identity
  14. FROM dw_log_detail
  15. WHERE dt = ${bdp.system.bizdate}
  16. GROUP BY ip,
  17. protocol,
  18. agent,
  19. device,
  20. identity
  21. ) t1;

Node code of dw_log_fact

  1. INSERT OVERWRITE TABLE dw_log_fact PARTITION (dt=${bdp.system.bizdate})
  2. SELECT u.uid
  3. , d.time
  4. , d.method
  5. , d.url
  6. , d.status
  7. , d.size
  8. , d.referer
  9. FROM dw_log_detail d
  10. JOIN dim_user_info u
  11. ON (d.ip = u.ip
  12. AND d.protocol = u.protocol
  13. AND d.agent = u.agent) and d.dt = ${bdp.system.bizdate} AND u.dt =${bdp.system.bizdate};

Node code of adm_user_measures

  1. INSERT OVERWRITE TABLE adm_user_measures PARTITION (dt='${bdp.system.bizdate}')
  2. SELECT u.device
  3. , COUNT(*) AS pv
  4. , COUNT(DISTINCT u.uid) AS uv
  5. FROM dw_log_fact f
  6. JOIN dim_user_info u
  7. ON f.uid = u.uid
  8. AND u.identity = 'user'
  9. AND f.dt = '${bdp.system.bizdate}'
  10. AND u.dt = '${bdp.system.bizdate}'
  11. GROUP BY u.device;

Node code of adm_refer_info

  1. INSERT OVERWRITE TABLE adm_refer_info PARTITION (dt='${bdp.system.bizdate}')
  2. SELECT referer
  3. , COUNT(*) AS cnt
  4. FROM dw_log_fact
  5. WHERE LENGTH(referer) > 1
  6. AND dt = '${bdp.system.bizdate}'
  7. GROUP BY referer;
Thank you! We've received your feedback.