#![allow(unused_variables, dead_code)]

const S_0: f32 = 0.003130668442500564;

//const START_BITS: u32 = 0x3b50dd8b;
const START_BITS: u32 = 0x3b4f8978;
//const DENSITY_SWITCH_INDEX: usize = 1000;


fn compress_normalised(s: f32) -> f64 {
    // Adding 0.5 is for rounding.
    if !(s > 0.0) {
        0.5
    } else if s <= S_0 {
        const D: f64 = 12.92 * 255.0;
        D.mul_add(s as f64, 0.5)
    } else if s < 1.0 {
        const A: f64 = 0.055 * 255.0;
        const D: f64 = 1.055 * 255.0;
        D.mul_add((s as f64).powf(5.0 / 12.0), -A + 0.5)
    } else {
        255.5
    }
}


fn linear_piece(s: f32) -> u8 {
    const D: f32 = 12.92 * 255.0;
    D.mul_add(s.max(0.0), 0.5) as u8
}


fn compress_u8(s: f32) -> u8 {
    // Adding 0.5 is for rounding.
    (if s <= S_0 {
        const D: f32 = 12.92 * 255.0;
        D.mul_add(s.max(0.0), 0.5)
    } else {
        const A: f32 = 0.055 * 255.0;
        const D: f32 = 1.055 * 255.0;
        D.mul_add(s.min(1.0).powf(5.0 / 12.0), -A + 0.5)
    }) as u8
}


struct Edges(pub [u32; 255]);

impl Edges {
    fn new(compress: impl Fn(f32) -> u8) -> Edges {
        let mut edges = [0; 255];
        let mut x = 0.0001f32.to_bits();
        loop {
            let y = compress(f32::from_bits(x));
            if y == 255 {
                break Self(edges);
            }
            edges[y as usize] = x;
            x += 1;
        }
    }

    fn min_one(&self) -> f32 { f32::from_bits(self.min_one_bits()) }
    fn min_eleven(&self) -> f32 { f32::from_bits(self.min_eleven_bits()) }
    fn min_255(&self) -> f32 { f32::from_bits(self.min_255_bits()) }

    fn min_one_bits(&self) -> u32 { self.0[0] + 1 }
    fn min_eleven_bits(&self) -> u32 { self.0[10] + 1 }
    fn min_255_bits(&self) -> u32 { self.0[254] + 1 }
}


fn get_lut32(edges: &Edges, shift: u32, start: u32) -> Vec<f32> {
    let mut lut = Vec::new();
    let mut x = start;
    let mut idx = 0;
    let step = 1 << shift;
    let end = edges.min_255_bits() + step;
    while x < end {
        let s = f32::from_bits(x);
        let y = compress_normalised(s) as f32 + 0.0114;
        // let y = y.to_bits() & (!0 << (23 - 15));
        // lut.push(f32::from_bits(y));
        lut.push(y);
        idx += 1;
        x += step;
    }
    lut
}

fn print_lut32(name: &str, lut: &Vec<f32>, shift: u32, start: u32) {
    println!("static {}: [f32; {}] = {{", name, lut.len());
    for (idx, value) in lut.iter().enumerate() {
        let x = f32::from_bits(start + ((idx as u32) << shift));
        println!("    /* #{:3} {:9.7} */  0x{:08x},  /* {} */",
                 idx, x, value.to_bits(), value);
    }
    println!("}};");
}


fn benchmark_lut32(edges: &Edges, lut: &Vec<f32>, shift: u32, start: u32) {
    let compress = |s: f32| {
        if !(s > f32::from_bits(0x3b50dd8b)) {  // Also handles NaN
            linear_piece(s)
        } else if s < edges.min_255() {
            let lft_x = ((s.to_bits() - start) >> shift) as usize;
            let rht_x = lft_x + 1;

            let lft = lut[lft_x];
            let rht = lut[rht_x];

            let lft_x = f32::from_bits(start + ((lft_x as u32) << shift));
            let rht_x = f32::from_bits(start + ((rht_x as u32) << shift));

            let dx = rht_x - lft_x;
            let ox = s - lft_x;

            (lft + (rht - lft) * ox / dx) as u8
        } else {
            255
        }
    };

    eprintln!("=== LUT bits:32 shift:{} start:{:x} ===", shift, start);
    let got = Edges::new(compress);

    let mut max_diff = 0.0;
    for (idx, (a, b)) in edges.0.iter().zip(got.0.iter()).enumerate() {
        let s = if a < b { '<' } else if a > b { '>' } else { '=' };
        let a = f32::from_bits(*a);
        let b = f32::from_bits(*b);
        let d = (a - b).abs();
        let m = {
            let ilog = (d.to_bits() >> 23) as i32 - 127;
            if ilog < -20 {
                String::from("")
            } else {
                " *".repeat((ilog.min(-10) + 20) as usize)
            }
        };
        eprintln!("{:3}  {:9.7}  {:9.7} {} {:9.7}{}", idx, d, a, s, b, m);
        if d > max_diff {
            max_diff = d;
        }
    }

    let e = (max_diff.to_bits() >> 23) as i32 - 127;
    let x = f32::from_bits((max_diff.to_bits() & 0x007fffff) | (127 << 23));

    eprintln!("max  {:9.7}  {:5.3}*2^{}", max_diff, x, e);
}


fn main() {
    let edges = Edges::new(compress_u8);

    println!("const MIN_NON_LINEAR_BITS: u32 = 0x{:08x};", START_BITS);
    println!("const MIN_255_BITS: u32 = 0x{:08x};", edges.min_255_bits());

    let shift = 19;
    let lut32 = get_lut32(&edges, shift, START_BITS);
    print_lut32("LINEAR_TO_U8_LUT", &lut32, shift, START_BITS);
    benchmark_lut32(&edges, &lut32, shift, START_BITS);
}
