Moving Medians and Quartiles in Oracle

Image credit: energepic.com

Moving Medians and Quartiles in Oracle

I discovered a neat trick today using the MODEL clause in Oracle.

My problem was that I had a collection of time-based data, and wanted to compute a running median. Not only that, but the data was not a nice evenly spaced time series, and I basically wanted to compute the “end of day” running median.

This seemed impossible to perform in one query until I realised it was actually quite simple.

I will try to summarise this with an example below

-- Input some data
with l_data as (

    select to_date('20170101', 'yyyymmdd') as ref_date, 1 as id, 123 as value from dual union all
    select to_date('20170101', 'yyyymmdd') as ref_date, 2 as id, 125 as value from dual union all
    select to_date('20170101', 'yyyymmdd') as ref_date, 3 as id, 150 as value from dual union all
    select to_date('20170101', 'yyyymmdd') as ref_date, 4 as id, 104 as value from dual union all
    select to_date('20170102', 'yyyymmdd') as ref_date, 5 as id, 121 as value from dual union all
    select to_date('20170102', 'yyyymmdd') as ref_date, 6 as id, 122 as value from dual union all
    select to_date('20170102', 'yyyymmdd') as ref_date, 7 as id, 140 as value from dual union all
    select to_date('20170102', 'yyyymmdd') as ref_date, 8 as id, 130 as value from dual union all
    select to_date('20170102', 'yyyymmdd') as ref_date, 9 as id, 135 as value from dual union all
    select to_date('20170103', 'yyyymmdd') as ref_date, 10 as id, 145 as value from dual union all
    select to_date('20170103', 'yyyymmdd') as ref_date, 11 as id, 143 as value from dual union all
    select to_date('20170103', 'yyyymmdd') as ref_date, 12 as id, 140 as value from dual union all
    select to_date('20170105', 'yyyymmdd') as ref_date, 13 as id, 160 as value from dual union all
    select to_date('20170106', 'yyyymmdd') as ref_date, 14 as id, 133 as value from dual union all
    select to_date('20170107', 'yyyymmdd') as ref_date, 15 as id, 158 as value from dual union all
    select to_date('20170107', 'yyyymmdd') as ref_date, 16 as id, 145 as value from dual union all
    select to_date('20170107', 'yyyymmdd') as ref_date, 17 as id, 135 as value from dual union all
    select to_date('20170107', 'yyyymmdd') as ref_date, 18 as id, 142 as value from dual union all
    select to_date('20170107', 'yyyymmdd') as ref_date, 19 as id, 139 as value from dual
    
-- Get the latest ID per day, as we are computing the "end of day" totals
), l_latest as (

    select max(id) as id
      from l_data
     group by ref_date

-- Compute the moving metrics with a lookback window of 3 observations
), l_mov as (

    select id
         , mov_lqtile
         , mov_median
         , mov_uqtile
      from l_data
     model
       dimension by ( id )
       measures ( value
                , 0 mov_lqtile
                , 0 mov_median
                , 0 mov_uqtile )
       rules
       -- only compute for the last observations each day, saves time
         ( mov_lqtile[ for id in ( select id from l_latest ) ] = percentile_disc(0.25) within group (order by value)[id between cv()-3 and cv()]
         , mov_median[ for id in ( select id from l_latest ) ] = median(value)[id between cv()-3 and cv()]
         , mov_uqtile[ for id in ( select id from l_latest ) ] = percentile_disc(0.75) within group (order by value)[id between cv()-3 and cv()] )
    
-- Work out the date range
), l_date_range as (

    select min(ref_date) as min_date
         , max(ref_date) as max_date
      from l_data
     
-- Use the date range to get a full list of dates    
), l_dates as (

    select min_date + level - 1 as ref_date
      from l_date_range
   connect by min_date + level - 1 <= max_date
   
)

-- Display the data with missing dates filled in
-- Display moving median alongside the daily median

    select d.ref_date
         , median(dt.value) as median
         , max(m.mov_lqtile) keep (dense_rank first order by m.id desc) as mov_lqtile
         , max(m.mov_median) keep (dense_rank first order by m.id desc) as mov_median
         , max(m.mov_uqtile) keep (dense_rank first order by m.id desc) as mov_uqtile
         , count(dt.ref_date) as count
      from l_dates d
      left join l_data dt
        on d.ref_date = dt.ref_date
      left join l_mov m
        on dt.id = m.id
     group by d.ref_date
REF_DATE MEDIAN MOV_LQTILE MOV_MEDIAN MOV_UQTILE COUNT
01/01/2017 124 104 124 125 4
02/01/2017 130 122 132.5 135 5
03/01/2017 143 135 141.5 143 3
04/01/2017         0
05/01/2017 160 140 144 145 1
06/01/2017 133 133 141.5 143 1
07/01/2017 142 135 140.5 142 5
comments powered by Disqus