Code:
#!/bin/bash
HADOOP_HOME=/home/hadoop/latest
export HADOOP_HOME
JAVA_HOME=/usr/jdk/latest
export JAVA_HOME
HIVE_OPTS="$HIVE_OPTS -hiveconf mapred.job.queue.name=hdmi-technology"
export HIVE_OPTS
DATE_YEST_FORMAT1=`perl -e 'use POSIX qw(strftime); print strftime "%Y-%m-%d",localtime(time()- 3600*504);'`
echo $DATE_YEST_FORMAT1
DATE_YEST_FORMAT2=`perl -e 'use POSIX qw(strftime); print strftime "%Y%m%d",localtime(time()- 3600*504);'`
echo $DATE_YEST_FORMAT2
hive -e "
set mapred.job.queue.name=hdmi-technology;
set mapred.output.compress=true;
set mapred.output.compression.type=BLOCK;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.LzoCodec;
add jar UserDefinedFunction.jar;
create temporary function rank as 'com.ebay.hive.udf.Rank';
insert overwrite table lip_data_quality partition (dt='$DATE_YEST_FORMAT2')
SELECT table2.buyer_id
,table2.count2 as count2
,coalesce(table1.error, cast(0 AS BIGINT)) AS count1
FROM (
SELECT buyer_id
,count(1) AS count2
FROM (
SELECT t1 [0] AS buyer_id
,t1 [1] AS item_id
,created_time
FROM (
SELECT split(ckey, '\\\\|') AS t1
,created_time
FROM (
SELECT CONCAT (
buyer_id
,'|'
,item_id
) AS ckey
,created_time
FROM dw_checkout_trans
WHERE to_date(from_unixtime(cast(UNIX_TIMESTAMP(created_time) AS BIGINT))) = '$DATE_YEST_FORMAT1' distribute BY ckey sort BY ckey
,created_time DESC
) a
WHERE rank(ckey) < 1
) X
ORDER BY buyer_id
,created_time DESC
) a
WHERE rank(buyer_id) < 10
GROUP BY buyer_id
) table2
LEFT OUTER JOIN (
SELECT TT1.BUYER_ID
,SUM(CASE
WHEN ABS(cast((TT2.prod_and_ts_split / 1000) - UNIX_TIMESTAMP(TT1.created_time) AS DOUBLE)) <= 900
THEN 0
ELSE 1
END) AS ERROR
FROM (
SELECT buyer_id
,item_id
,rank(buyer_id)
,created_time
FROM (
SELECT t1 [0] AS buyer_id
,t1 [1] AS item_id
,created_time
FROM (
SELECT split(ckey, '\\\\|') AS t1
,created_time
FROM (
SELECT CONCAT (
buyer_id
,'|'
,item_id
) AS ckey
,created_time
FROM dw_checkout_trans
WHERE to_date(from_unixtime(cast(UNIX_TIMESTAMP(created_time) AS BIGINT))) = '$DATE_YEST_FORMAT1' distribute BY ckey sort BY ckey
,created_time DESC
) a
WHERE rank(ckey) < 1
) X
ORDER BY buyer_id
,created_time DESC
) a
WHERE rank(buyer_id) < 10
) TT1
LEFT OUTER JOIN (
SELECT t1 [0] AS uid
,t1 [1] AS product_id
,prod_and_ts_split
FROM (
SELECT split(ckey, '\\\\|') AS t1
,prod_and_ts_split
FROM (
SELECT CONCAT (
uid
,'|'
,product_id
) AS ckey
,prod_and_ts_split
FROM (
SELECT uid
,product_id
,prod_and_ts_split
FROM (
SELECT *
FROM (
SELECT uid
,prod_and_ts.product_id AS product_id
,prod_and_ts.timestamps AS timestamps
FROM pds_attribute_data_realtime LATERAL VIEW explode(last_items_purchased) exploded_table AS prod_and_ts
) prod_and_ts
) tt2 LATERAL VIEW explode(split(timestamps, '#')) exploded_table2 AS prod_and_ts_split
WHERE to_date(from_unixtime(cast(prod_and_ts_split / 1000 AS BIGINT))) = '$DATE_YEST_FORMAT1'
GROUP BY uid
,product_id
,prod_and_ts_split
ORDER BY uid
,prod_and_ts_split DESC
) X
distribute BY ckey sort BY ckey
,prod_and_ts_split DESC
) a
WHERE rank(ckey) < 1
) YY
ORDER BY uid
,prod_and_ts_split DESC
) TT2 ON (
TT1.item_id = TT2.product_id
AND TT1.BUYER_ID = TT2.uid
)
GROUP BY TT1.BUYER_ID
) table1 ON table1.buyer_id = table2.buyer_id
ORDER BY table2.buyer_id;"
sleep 120
QUERY1=`hive -e "
set mapred.job.queue.name=hdmi-technology;
SELECT SUM(total_items_purchased), SUM(total_items_missingormismatch) from lip_data_quality where dt='$DATE_YEST_FORMAT2';"`
QUERY2=`hive -e "
set mapred.job.queue.name=hdmi-technology;
SELECT 100 * SUM(total_items_missingormismatch*1.0) / SUM(total_items_purchased) FROM lip_data_quality where dt='$DATE_YEST_FORMAT2';"`
rm /tmp/chart.html
TEMP=`mktemp -t chart.html`
NOERROR=100-$QUERY2
ERROR=$QUERY2
cat > $TEMP <<EOF
<html>
<head>
<!--Load the AJAX API-->
<script type="text/javascript" src="https://www.google.com/jsapi"></script>
<script type="text/javascript">
// Load the Visualization API and the piechart package.
google.load('visualization', '1.0', {'packages':['corechart']});
// Set a callback to run when the Google Visualization API is loaded.
google.setOnLoadCallback(drawChart);
// Callback that creates and populates a data table,
// instantiates the pie chart, passes in the data and
// draws it.
function drawChart() {
// Create the data table.
var data = new google.visualization.DataTable();
data.addColumn('string', 'Title');
data.addColumn('number', 'Value');
data.addRows([
['No Error Percentage', $NOERROR],
['Error Percentage', $ERROR]
]);
// Set chart options
var options = {'title':'LIP Data Quality Report',
'width':700,
'height':600};
// Instantiate and draw our chart, passing in some options.
var chart = new google.visualization.PieChart(document.getElementById('chart_div'));
chart.draw(data, options);
}
</script>
</head>
<body>
<!--Div that will hold the pie chart-->
<div id="chart_div" style="width:900px; height: 800px;"></div>
</body>
</html>
EOF
mailx -s "LIP Data Quality Report for $DATE_YEST_FORMAT1" -r email_person@sample_email.com <<EOF
Data Successfully loaded into LIP_DATA_QUALITY table
Total Items Purchased: `echo $QUERY1 | awk '{print $1}'`
Total Items MissingorMismatch: `echo $QUERY1 | awk '{print $2}'`
Error Percentage: $QUERY2
********* Before Making Assumptions, please read the below Notes***********************
Above Percentage could be mainly because of below reasons:-
1) Multiple USER's sharing the same browser and export is done basis on LAST_LOGGED_IN_USER.
2) Either the Export Issue.
3) Thirdly, ITEM was not captured properly in PDS.
`uuencode /tmp/chart.html percentage_graph.html`
EOF