EZ Study

Actuarial Biology Chemistry Economics Calculators Confucius Engineer
Physics
C.S.

SAS Tutorial in Netezza Analytics Functions
Netezza Analytics Functions

Here are the examples for various Netezza Analytics Functions:
/******************************************************/
proc sql stimer;
connect to netezza (server='server_name' database=database1
execute (create table outputfile as
select round(a.num_var,2) as num_var1, *two decimal points;
select round(a.num_var) as num_var_int, *round to nearest integer;
select floor(a.num_var) as num_var_less,
*return the integer less than thar var;
select ceil(a.num_var) as num_var_greater,
*return the integer greater than thar var;
select sign(a.num_var) as sign_num_var, *negative or positve;
select cast(a.character_var as int) as numerical_var,
/*If out of range, try bigint, numeric, or double */

select int_var1/int_var2 as int_ratio, output only ceiling integers
select cast(int_var1 as double)/int_var2 as int_ratio,
/* output decimal points, instead of integers. */
select to_number(a.character,'9999') as numerical_var,
/*be careful , e.g. '2012M12D26' will show as '2012' for first 4 digits */
select cast(a.numeric_var as varchar(255)) as character_var,
select To_char(current_timestamp,'YYYY-MM-DD'),
select To_Char( DATE '2009-12-23 23:45:58','YYYY-MM-DD'),
select To_char(
To_date( '2009-12-23 23:45:58','YYYY-MM-DD HH24:MI:SS'),
'YYYY-MM-DD'), /* to_char is used for specific format */
select cast(datetime as date) as date,cast(datetime as time) as time
select date(current_Date) as date_today
select (current_timestamp) as time_now
select date_part('year', a.saledate) as year,
select date_part('month' , a.saledate) as month,
select date_part('day', a.saledate) as day,
select extract(hour from a.data_timestamp) as hour,
select extract(minute from a.data_timestamp) as minute,
select ln(avg(value)) as log_value, not use log
select stddev(value) as std_variance
/* In SAS: std function returns standard deviation, */
/* stderr=std/sqrt(n): standard error of the mean. */

/* Get the 2nd word('1580') from the string1= '20130602-1580-2-45'
/* scan(string1,2,'-') from SAS, = instr(string1,'-',1,2) */
substr(str1,instr(str1,'-',1,2)+1,instr(str1,'-',1,3)-instr(str1,'-',1,2)) as word2
substr(str1,10,position('-' in substr(str1,11))) as word2
select substring(var1,1,position('key' in var1)-1) as new_var1,
trim(leading '0' from b.var2) as var2, c.var3
/* remove leading zeros from the text string */

from netezza_database1 as a
left join netezza_database2 as b on a.keyvar1 = b.key2
left join netezza_database3 as c on a.keyvar2 = c.key3
/*by default, to_char() has 200 length, use substr() to truncate */
where a.datetime >= '01/01/2010'
where a.datetime >= '01Jan10'
where a.datetime >= '01Jan2010 08:00:00'
and b.var2 in ('string1', 'string2')
group by a.var1, c.var3
having sum(b.var2)>100
limit 10;
distribute on random ) by netezza; quit;
/******************************************************/

You may see the following error message if you are not joining correctly:
ERROR: CLI error trying to establish connection: [Microsoft][ODBC Driver Manager] Data source name not found and no default driver specified.
This is due to the hox-fix by SAS: http://support.sas.com/kb/20/287.html

In case if you want to use the case statement, use the following:
/******************************************************/
execute (create table outputfile as
select var1, case
when var2='old1' then 'new1'
when var2='old2' then 'new2'
else var3
end as new_var2

from another_netezza_data
where var3 is not null /* missing not working, neither for var3^=.*/
order by var1,var3
having new_var2=max(new_var2)
DISTRIBUTE ON RANDOM
) by netezza; quit;
/******************************************************/

If you don't want to move those two datasets into the same database, especially if both datasets are huge, you can use the next tutorial to create a temporary file in Netezza, do the query by the temporary data.