use std::{borrow::ToOwned, collections::hash_map::Entry, process};

use foldhash::{HashMap, HashMapExt};
use serde::Deserialize;
use serde_json::Value;
use stats::Frequencies;
use toon_format;

use crate::{Csv, CsvData, qcheck_sized, workdir::Workdir};

fn setup(name: &str) -> (Workdir, process::Command) {
    let rows = vec![
        svec!["h1", "h2"],
        svec!["a", "z"],
        svec!["a", "y"],
        svec!["a", "y"],
        svec!["b", "z"],
        svec!["a", "Y"],
        svec!["", "z"],
        svec!["(NULL)", "x"],
    ];

    let wrk = Workdir::new(name);
    wrk.create("in.csv", rows);

    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv");

    (wrk, cmd)
}

#[test]
fn frequency_no_headers() {
    let (wrk, mut cmd) = setup("frequency_no_headers");
    cmd.args(["--limit", "0"])
        .args(["--select", "1"])
        .arg("--no-headers");

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got = got.into_iter().skip(1).collect();
    got.sort_unstable();
    let expected = vec![
        svec!["1", "(NULL)", "1", "12.5", "2"],
        svec!["1", "(NULL)", "1", "12.5", "2"],
        svec!["1", "a", "4", "50", "1"],
        svec!["1", "b", "1", "12.5", "2"],
        svec!["1", "h1", "1", "12.5", "2"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_casesensitive() {
    let (wrk, mut cmd) = setup("frequency_casesensitive");
    cmd.args(["--limit", "0"]).args(["--select", "h2"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h2", "Y", "1", "14.28571", "3"],
        svec!["h2", "x", "1", "14.28571", "3"],
        svec!["h2", "y", "2", "28.57143", "2"],
        svec!["h2", "z", "3", "42.85714", "1"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_ignorecase() {
    let (wrk, mut cmd) = setup("frequency_ignorecase");
    cmd.arg("--ignore-case")
        .args(["--limit", "0"])
        .args(["--select", "h2"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h2", "x", "1", "14.28571", "2"],
        svec!["h2", "y", "3", "42.85714", "1"],
        svec!["h2", "z", "3", "42.85714", "1"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_trim() {
    let wrk = Workdir::new("frequency_trim");

    let rows = vec![
        svec!["h1", "h2"],
        svec!["a", "z"],
        svec!["a", "y"],
        svec!["a", "y"],
        svec!["b", "z"],
        svec!["a", "Y"],
        svec!["", "z"],
        svec!["(NULL)", "x"],
        svec!["a ", " z"],
        svec!["     A", "  Z   "],
        svec!["  a  ", " Y "],
        svec![" A     ", "y "],
        svec!["a", "y "],
        svec!["b", "y "],
        svec!["b", "  Z   "],
    ];

    wrk.create("in.csv", rows);

    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "h2"]);

    wrk.assert_success(&mut cmd);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h2", "Y", "2", "14.28571", "3"],
        svec!["h2", "Z", "2", "14.28571", "3"],
        svec!["h2", "x", "1", "7.14286", "4"],
        svec!["h2", "y", "5", "35.71429", "1"],
        svec!["h2", "z", "4", "28.57143", "2"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_no_trim() {
    let wrk = Workdir::new("frequency_no_trim");

    let rows = vec![
        svec!["h1", "h2"],
        svec!["a", "z"],
        svec!["a", "y"],
        svec!["a", "y"],
        svec!["b", "z"],
        svec!["a", "Y"],
        svec!["", "z"],
        svec!["(NULL)", "x"],
        svec!["a ", " z"],
        svec!["     A", "  Z   "],
        svec!["  a  ", " Y "],
        svec![" A     ", "y "],
        svec!["a", "y "],
        svec!["b", "y "],
        svec!["b", "  Z   "],
    ];

    wrk.create("in.csv", rows);

    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "h2"])
        .arg("--no-trim");

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h2", "  Z   ", "2", "14.28571", "2"],
        svec!["h2", " Y ", "1", "7.14286", "3"],
        svec!["h2", " z", "1", "7.14286", "3"],
        svec!["h2", "Y", "1", "7.14286", "3"],
        svec!["h2", "x", "1", "7.14286", "3"],
        svec!["h2", "y", "2", "14.28571", "2"],
        svec!["h2", "y ", "3", "21.42857", "1"],
        svec!["h2", "z", "3", "21.42857", "1"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_no_nulls() {
    let (wrk, mut cmd) = setup("frequency_no_nulls");
    cmd.arg("--no-nulls")
        .args(["--limit", "0"])
        .args(["--select", "h1"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h1", "(NULL)", "1", "16.66667", "2"],
        svec!["h1", "a", "4", "66.66667", "1"],
        svec!["h1", "b", "1", "16.66667", "2"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_nulls() {
    let (wrk, mut cmd) = setup("frequency_nulls");
    cmd.args(["--limit", "0"]).args(["--select", "h1"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h1", "(NULL)", "1", "14.28571", "2"],
        svec!["h1", "(NULL)", "1", "14.28571", "2"],
        svec!["h1", "a", "4", "57.14286", "1"],
        svec!["h1", "b", "1", "14.28571", "2"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_limit() {
    let (wrk, mut cmd) = setup("frequency_limit");
    cmd.args(["--limit", "1"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h1", "Other (3)", "3", "42.85714", "0"],
        svec!["h1", "a", "4", "57.14286", "1"],
        svec!["h2", "Other (3)", "4", "57.14286", "0"],
        svec!["h2", "z", "3", "42.85714", "1"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_pct_dec_places() {
    let (wrk, mut cmd) = setup("frequency_pct_dec_places");
    cmd.args(["--limit", "1"]).args(["--pct-dec-places", "3"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h1", "Other (3)", "3", "42.857", "0"],
        svec!["h1", "a", "4", "57.143", "1"],
        svec!["h2", "Other (3)", "4", "57.143", "0"],
        svec!["h2", "z", "3", "42.857", "1"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_neg_pct_dec_places() {
    let (wrk, mut cmd) = setup("frequency_neg_pct_dec_places");
    cmd.args(["--limit", "1"]).args(["--pct-dec-places", "-4"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h1", "Other (3)", "3", "42.8571", "0"],
        svec!["h1", "a", "4", "57.1429", "1"],
        svec!["h2", "Other (3)", "4", "57.1429", "0"],
        svec!["h2", "z", "3", "42.8571", "1"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_limit_no_other() {
    let (wrk, mut cmd) = setup("frequency_limit_no_other");
    cmd.args(["--limit", "1"]).args(["--other-text", "<NONE>"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h1", "a", "4", "57.14286", "1"],
        svec!["h2", "z", "3", "42.85714", "1"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_negative_limit() {
    let (wrk, mut cmd) = setup("frequency_negative_limit");
    cmd.args(["--limit", "-4"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h1", "Other (3)", "3", "42.85714", "0"],
        svec!["h1", "a", "4", "57.14286", "1"],
        svec!["h2", "Other (4)", "7", "100", "0"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_limit_threshold() {
    let (wrk, mut cmd) = setup("frequency_limit_threshold");
    cmd.args(["--limit", "-4"]).args(["--lmt-threshold", "4"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h1", "Other (3)", "3", "42.85714", "0"],
        svec!["h1", "a", "4", "57.14286", "1"],
        svec!["h2", "Other (4)", "7", "100", "0"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_limit_threshold_notmet() {
    let (wrk, mut cmd) = setup("frequency_limit_threshold_notmet");
    cmd.args(["--limit", "-2"]).args(["--lmt-threshold", "3"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h1", "(NULL)", "1", "14.28571", "2"],
        svec!["h1", "(NULL)", "1", "14.28571", "2"],
        svec!["h1", "a", "4", "57.14286", "1"],
        svec!["h1", "b", "1", "14.28571", "2"],
        svec!["h2", "Y", "1", "14.28571", "3"],
        svec!["h2", "x", "1", "14.28571", "3"],
        svec!["h2", "y", "2", "28.57143", "2"],
        svec!["h2", "z", "3", "42.85714", "1"],
    ];
    assert_eq!(got, expected);
}
#[test]
fn frequency_asc() {
    let (wrk, mut cmd) = setup("frequency_asc");
    cmd.args(["--select", "h2"]).arg("--asc");

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h2", "Y", "1", "14.28571", "1"],
        svec!["h2", "x", "1", "14.28571", "1"],
        svec!["h2", "y", "2", "28.57143", "2"],
        svec!["h2", "z", "3", "42.85714", "3"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_asc_ignorecase() {
    let (wrk, mut cmd) = setup("frequency_asc_ignorecase");
    cmd.arg("--ignore-case")
        .args(["--select", "h2"])
        .arg("--asc");

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h2", "x", "1", "14.28571", "1"],
        svec!["h2", "y", "3", "42.85714", "2"],
        svec!["h2", "z", "3", "42.85714", "2"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_custom_other_text() {
    let (wrk, mut cmd) = setup("frequency_custom_other_text");
    cmd.args(["--limit", "-4"])
        .args(["--lmt-threshold", "4"])
        .args(["--other-text", "其他"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h1", "a", "4", "57.14286", "1"],
        svec!["h1", "其他 (3)", "3", "42.85714", "0"],
        svec!["h2", "其他 (4)", "7", "100", "0"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_custom_other_text_sorted() {
    let (wrk, mut cmd) = setup("frequency_custom_other_text_sorted");
    cmd.args(["--limit", "-4"])
        .args(["--lmt-threshold", "4"])
        .args(["--other-text", "Ibang halaga"])
        .arg("--other-sorted");

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h1", "Ibang halaga (3)", "3", "42.85714", "0"],
        svec!["h1", "a", "4", "57.14286", "1"],
        svec!["h2", "Ibang halaga (4)", "7", "100", "0"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_other_sorted() {
    let (wrk, mut cmd) = setup("frequency_other_sorted");
    cmd.args(["--limit", "-4"])
        .args(["--lmt-threshold", "4"])
        .arg("--other-sorted");

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h1", "Other (3)", "3", "42.85714", "0"],
        svec!["h1", "a", "4", "57.14286", "1"],
        svec!["h2", "Other (4)", "7", "100", "0"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_other_text_none() {
    let (wrk, mut cmd) = setup("frequency_other_text_none");
    cmd.args(["--limit", "-4"])
        .args(["--lmt-threshold", "4"])
        .args(["--other-text", "<NONE>"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h1", "a", "4", "57.14286", "1"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_select() {
    let (wrk, mut cmd) = setup("frequency_select");
    cmd.args(["--limit", "0"]).args(["--select", "h2"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["h2", "Y", "1", "14.28571", "3"],
        svec!["h2", "x", "1", "14.28571", "3"],
        svec!["h2", "y", "2", "28.57143", "2"],
        svec!["h2", "z", "3", "42.85714", "1"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_all_unique() {
    let wrk = Workdir::new("frequency_all_unique");
    let testdata = wrk.load_test_file("boston311-100.csv");
    let mut cmd = wrk.command("frequency");
    cmd.args(["--select", "1"]).arg(testdata);

    wrk.assert_success(&mut cmd);

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["case_enquiry_id", "<ALL_UNIQUE>", "100", "100", "0"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_all_unique_with_stats_cache() {
    let wrk = Workdir::new("frequency_all_unique_with_stats_cache");
    let testdata = wrk.load_test_file("boston311-100.csv");

    let mut stats_cmd = wrk.command("stats");
    stats_cmd
        .arg(testdata.clone())
        .arg("--cardinality")
        .arg("--stats-jsonl");

    wrk.assert_success(&mut stats_cmd);

    let mut cmd = wrk.command("frequency");
    cmd.args(["--select", "1"]).arg(testdata);

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["case_enquiry_id", "<ALL_UNIQUE>", "100", "100", "0"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_custom_null_text() {
    let wrk = Workdir::new("frequency_custom_null_text");
    let testdata = wrk.load_test_file("boston311-100.csv");

    let mut cmd = wrk.command("frequency");
    cmd.args(["--select", "fire_district"])
        .args(["--null-text", "<NADA Y MUCHO MAS>"])
        .arg(testdata);

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["fire_district", "3", "19", "19", "1"],
        svec!["fire_district", "4", "16", "16", "2"],
        svec!["fire_district", "7", "14", "14", "3"],
        svec!["fire_district", "6", "13", "13", "4"],
        svec!["fire_district", "8", "9", "9", "5"],
        svec!["fire_district", "1", "8", "8", "6"],
        svec!["fire_district", "12", "8", "8", "6"],
        svec!["fire_district", "9", "7", "7", "7"],
        svec!["fire_district", "11", "5", "5", "8"],
        svec!["fire_district", "<NADA Y MUCHO MAS>", "1", "1", "9"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_all_unique_with_stats_cache_alt_all_unique_text() {
    let wrk = Workdir::new("frequency_all_unique_with_stats_cache_alt_all_unique_text");
    let testdata = wrk.load_test_file("boston311-100.csv");

    let mut stats_cmd = wrk.command("stats");
    stats_cmd
        .arg(testdata.clone())
        .arg("--cardinality")
        .arg("--stats-jsonl");

    wrk.assert_success(&mut stats_cmd);

    let mut cmd = wrk.command("frequency");
    cmd.args(["--select", "1"])
        // "<ALL_UNIQUE>" in German
        .args(["--all-unique-text", "<ALLE EINZIGARTIG>"])
        .arg(testdata);

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["case_enquiry_id", "<ALLE EINZIGARTIG>", "100", "100", "0"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_all_unique_stats_cache_default() {
    let wrk = Workdir::new("frequency_all_unique_stats_cache_default");
    let testdata = wrk.load_test_file("boston311-100.csv");

    let mut cmd = wrk.command("frequency");
    cmd.args(["--select", "1"]).arg(testdata);

    wrk.assert_success(&mut cmd);

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["case_enquiry_id", "<ALL_UNIQUE>", "100", "100", "0"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_all_unique_stats_mode_none() {
    let wrk = Workdir::new("frequency_all_unique_stats_mode_none");
    let testdata = wrk.load_test_file("boston311-100.csv");

    // create stats cache
    let mut stats_cmd = wrk.command("stats");
    stats_cmd
        .arg(testdata.clone())
        .arg("--cardinality")
        .arg("--stats-jsonl");

    wrk.assert_success(&mut stats_cmd);

    // run frequency with stats-mode none, ignoring the stats cache
    let mut cmd = wrk.command("frequency");
    cmd.env("QSV_STATSCACHE_MODE", "None")
        .args(["--select", "1"])
        .arg(testdata);

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["case_enquiry_id", "101004113298", "1", "1", "1"],
        svec!["case_enquiry_id", "101004113313", "1", "1", "1"],
        svec!["case_enquiry_id", "101004113348", "1", "1", "1"],
        svec!["case_enquiry_id", "101004113363", "1", "1", "1"],
        svec!["case_enquiry_id", "101004113371", "1", "1", "1"],
        svec!["case_enquiry_id", "101004113385", "1", "1", "1"],
        svec!["case_enquiry_id", "101004113386", "1", "1", "1"],
        svec!["case_enquiry_id", "101004113391", "1", "1", "1"],
        svec!["case_enquiry_id", "101004113394", "1", "1", "1"],
        svec!["case_enquiry_id", "101004113403", "1", "1", "1"],
        svec!["case_enquiry_id", "Other (90)", "90", "90", "0"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_issue1962() {
    let wrk = Workdir::new("frequency_1962");
    let testdata = wrk.load_test_file("data1962.csv");
    let mut cmd = wrk.command("frequency");
    cmd.args(["--limit", "15"]).arg(testdata.clone());

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["year", "2024", "24", "8", "1"],
        svec!["year", "2023", "23", "7.66667", "2"],
        svec!["year", "2022", "22", "7.33333", "3"],
        svec!["year", "2021", "21", "7", "4"],
        svec!["year", "2020", "20", "6.66667", "5"],
        svec!["year", "2019", "19", "6.33333", "6"],
        svec!["year", "2018", "18", "6", "7"],
        svec!["year", "2017", "17", "5.66667", "8"],
        svec!["year", "2016", "16", "5.33333", "9"],
        svec!["year", "2015", "15", "5", "10"],
        svec!["year", "2014", "14", "4.66667", "11"],
        svec!["year", "2013", "13", "4.33333", "12"],
        svec!["year", "2012", "12", "4", "13"],
        svec!["year", "2011", "11", "3.66667", "14"],
        svec!["year", "2010", "10", "3.33333", "15"],
        svec!["year", "Other (9)", "45", "15", "0"],
    ];
    assert_eq!(got, expected);

    let mut cmd = wrk.command("frequency");
    cmd.args(["--limit", "5"]).arg(testdata);

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["year", "2024", "24", "8", "1"],
        svec!["year", "2023", "23", "7.66667", "2"],
        svec!["year", "2022", "22", "7.33333", "3"],
        svec!["year", "2021", "21", "7", "4"],
        svec!["year", "2020", "20", "6.66667", "5"],
        svec!["year", "Other (19)", "190", "63.33333", "0"],
    ];
    assert_eq!(got, expected);
}

// This tests that a frequency table computed by `qsv` is always the same
// as the frequency table computed in memory.
#[test]
fn prop_frequency() {
    fn p(rows: CsvData) -> bool {
        param_prop_frequency("prop_frequency", rows, false)
    }
    // Run on really small values because we are incredibly careless
    // with allocation.
    qcheck_sized(p as fn(CsvData) -> bool, 5);
}

// This tests that running the frequency command on a CSV file with these two
// rows does not burst in flames:
//
//     \u{FEFF}
//     ""
//
// In this case, the `param_prop_frequency` just ignores this particular test.
// Namely, \u{FEFF} is the UTF-8 BOM, which is ignored by the underlying CSV
// reader.
#[test]
fn frequency_bom() {
    let rows = CsvData {
        data: vec![
            crate::CsvRecord(vec!["\u{FEFF}".to_string()]),
            crate::CsvRecord(vec![String::new()]),
        ],
    };
    assert!(param_prop_frequency("prop_frequency", rows, false))
}

// This tests that a frequency table computed by `qsv` (with an index) is
// always the same as the frequency table computed in memory.
#[test]
fn prop_frequency_indexed() {
    fn p(rows: CsvData) -> bool {
        param_prop_frequency("prop_frequency_indexed", rows, true)
    }
    // Run on really small values because we are incredibly careless
    // with allocation.
    qcheck_sized(p as fn(CsvData) -> bool, 5);
}

fn param_prop_frequency(name: &str, rows: CsvData, idx: bool) -> bool {
    if !rows.is_empty() {
        return true;
    }

    let rows_check = rows.clone();

    for row in rows_check.into_iter() {
        for field in row.into_iter() {
            if field.contains("\u{FEFF}") {
                return true;
            }
        }
    }
    let wrk = Workdir::new(name);
    if idx {
        wrk.create_indexed("in.csv", rows.clone());
    } else {
        wrk.create("in.csv", rows.clone());
    }

    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["-j", "4"])
        .args(["--limit", "0"])
        .args(["--unq-limit", "0"]);

    let stdout = wrk.stdout::<String>(&mut cmd);
    let got_ftables = ftables_from_csv_string(stdout);
    let expected_ftables = ftables_from_rows(rows);
    assert_eq_ftables(&got_ftables, &expected_ftables)
}

type FTables = HashMap<String, Frequencies<String>>;

#[derive(Deserialize)]
struct FRow {
    field: String,
    value: String,
    count: usize,
}

fn ftables_from_rows<T: Csv>(rows: T) -> FTables {
    let mut rows = rows.to_vecs();
    if rows.len() <= 1 {
        return HashMap::new();
    }

    let header = rows.remove(0);
    let mut ftables = HashMap::new();
    for field in &header {
        ftables.insert(field.clone(), Frequencies::new());
    }
    for row in rows {
        for (i, mut field) in row.into_iter().enumerate() {
            field = field.trim().to_owned();
            if field.is_empty() {
                field = "(NULL)".to_owned();
            }
            ftables.get_mut(&header[i]).unwrap().add(field);
        }
    }
    ftables
}

fn ftables_from_csv_string(data: String) -> FTables {
    let mut rdr = csv::Reader::from_reader(data.as_bytes());
    let mut ftables = HashMap::new();
    for frow in rdr.deserialize() {
        let frow: FRow = frow.unwrap();
        match ftables.entry(frow.field) {
            Entry::Vacant(v) => {
                let mut ftable = Frequencies::new();
                for _ in 0..frow.count {
                    ftable.add(frow.value.clone());
                }
                v.insert(ftable);
            },
            Entry::Occupied(mut v) => {
                for _ in 0..frow.count {
                    v.get_mut().add(frow.value.clone());
                }
            },
        }
    }
    ftables
}

fn freq_data<T>(ftable: &Frequencies<T>) -> Vec<(&T, u64)>
where
    T: ::std::hash::Hash + Ord + Clone,
{
    let (mut freqs, _) = ftable.most_frequent();
    freqs.sort_unstable();
    freqs
}

fn assert_eq_ftables(got: &FTables, expected: &FTables) -> bool {
    for (k, v) in got.iter() {
        assert_eq!(freq_data(v), freq_data(expected.get(k).unwrap()));
    }
    for (k, v) in expected.iter() {
        assert_eq!(freq_data(got.get(k).unwrap()), freq_data(v));
    }
    true
}

#[test]
fn frequency_vis_whitespace() {
    let wrk = Workdir::new("frequency_vis_whitespace");

    // Create test data with various types of whitespace
    let rows = vec![
        svec!["header"],
        svec!["value\t"],       // trailing tab
        svec!["\tvalue"],       // leading tab
        svec!["value\r"],       // trailing CR
        svec!["\rvalue"],       // leading CR
        svec!["value\n"],       // trailing LF
        svec!["\nvalue"],       // leading LF
        svec!["value "],        // trailing space
        svec![" value"],        // leading space
        svec!["      "],        // all spaces
        svec!["value\u{00A0}"], // trailing non-breaking space
        svec!["\u{00A0}value"], // leading non-breaking space
        svec!["value\u{2003}"], // trailing em space
        svec!["\u{2003}value"], // leading em space
        svec!["value\u{2007}"], // trailing figure space
        svec!["\u{2007}value"], // leading figure space
        svec!["value\u{200B}"], // trailing zero width space
        svec!["\u{200B}value"], // leading zero width space
        svec!["no_whitespace"],
    ];

    wrk.create("in.csv", rows);

    let mut cmd = wrk.command("frequency");
    cmd.env("QSV_STATSCACHE_MODE", "none")
        .arg("in.csv")
        .args(["--limit", "0"])
        .arg("--vis-whitespace");

    wrk.assert_success(&mut cmd);

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["header", "value", "8", "44.44444", "1"],
        svec!["header", "(NULL)", "1", "5.55556", "2"],
        svec!["header", "no_whitespace", "1", "5.55556", "2"],
        svec!["header", "value《⍽》", "1", "5.55556", "2"],
        svec!["header", "value《emsp》", "1", "5.55556", "2"],
        svec!["header", "value《figsp》", "1", "5.55556", "2"],
        svec!["header", "value《zwsp》", "1", "5.55556", "2"],
        svec!["header", "《⍽》value", "1", "5.55556", "2"],
        svec!["header", "《emsp》value", "1", "5.55556", "2"],
        svec!["header", "《figsp》value", "1", "5.55556", "2"],
        svec!["header", "《zwsp》value", "1", "5.55556", "2"],
    ];

    assert_eq!(got, expected);
}

#[test]
fn frequency_vis_whitespace_no_trim() {
    let wrk = Workdir::new("frequency_vis_whitespace_no_trim");

    // Create test data with multiple occurrences of same whitespace patterns
    let rows = vec![
        svec!["header"],
        svec!["value\t"], // trailing tab
        svec!["value\t"], // trailing tab (duplicate)
        svec!["\tvalue"], // leading tab
        svec!["\tvalue"], // leading tab (duplicate)
        svec!["value "],  // trailing space
        svec!["value "],  // trailing space (duplicate)
        svec![" value"],  // leading space
        svec![" value"],  // leading space (duplicate)
        svec!["      "],  // all spaces
        svec!["      "],  // all spaces (duplicate)
        svec!["no_whitespace"],
    ];

    wrk.create("in.csv", rows);

    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .arg("--vis-whitespace")
        .arg("--no-trim");

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["header", "《→》value", "2", "18.18182", "1"],
        svec![
            "header",
            "《_》《_》《_》《_》《_》《_》",
            "2",
            "18.18182",
            "1"
        ],
        svec!["header", " value", "2", "18.18182", "1"],
        svec!["header", "value《→》", "2", "18.18182", "1"],
        svec!["header", "value ", "2", "18.18182", "1"],
        svec!["header", "no_whitespace", "1", "9.09091", "2"],
    ];

    assert_eq!(got, expected);
}

#[test]
fn frequency_vis_whitespace_ignore_case() {
    let wrk = Workdir::new("frequency_vis_whitespace_ignore_case");

    // Create test data with whitespace and mixed case
    let rows = vec![
        svec!["header"],
        svec!["Value\t"],       // trailing tab
        svec!["\tVALUE"],       // leading tab
        svec!["value "],        // trailing space
        svec!["value\u{000B}"], // vertical tab
        svec!["value\u{000C}"], // form feed
        svec!["value\u{0085}"], // next line
        svec!["value\u{200E}"], // left-to-right mark
        svec!["value\u{200F}"], // right-to-left mark
        svec!["value\u{2028}"], // line separator
        svec!["value\u{2029}"], // paragraph separator
        svec!["value\u{00A0}"], // non-breaking space
        svec!["value\u{2003}"], // em space
        svec!["value\u{2007}"], // figure space
        svec!["value\u{200B}"], // zero width space
        svec![" VALUE"],        // leading space
        svec!["no_whitespace"],
        svec!["      "], // all spaces
    ];

    wrk.create("in.csv", rows);

    let mut cmd = wrk.command("frequency");
    cmd.env("QSV_STATSCACHE_MODE", "none")
        .arg("in.csv")
        .args(["--limit", "0"])
        .arg("--vis-whitespace")
        .arg("--ignore-case");

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["header", "value", "12", "70.58824", "1"],
        svec!["header", "(NULL)", "1", "5.88235", "2"],
        svec!["header", "no_whitespace", "1", "5.88235", "2"],
        svec!["header", "value《zwsp》", "1", "5.88235", "2"],
        svec!["header", "value《␎》", "1", "5.88235", "2"],
        svec!["header", "value《␏》", "1", "5.88235", "2"],
    ];

    assert_eq!(got, expected);
}

#[test]
fn frequency_json() {
    let (wrk, mut cmd) = setup("frequency_json");
    cmd.args(["--limit", "0"])
        .args(["--select", "h2"])
        .arg("--json");
    let got: String = wrk.stdout(&mut cmd);
    let v: Value = serde_json::from_str(&got).unwrap();
    assert!(v["input"].as_str().unwrap().ends_with("in.csv"));
    assert_eq!(v["rowcount"], 7);
    assert_eq!(v["fieldcount"], 1);
    let fields = v["fields"].as_array().unwrap();
    assert_eq!(fields.len(), 1);
    let field = &fields[0];
    assert_eq!(field["field"], "h2");
    assert_eq!(field["cardinality"], 4);
    let freqs = field["frequencies"].as_array().unwrap();
    let expected = vec![
        ("z", 3, 42.85714, 1.0),
        ("y", 2, 28.57143, 2.0),
        ("Y", 1, 14.28571, 3.0),
        ("x", 1, 14.28571, 3.0),
    ];
    for (i, (val, count, pct, rank)) in expected.iter().enumerate() {
        assert_eq!(freqs[i]["value"], *val);
        assert_eq!(freqs[i]["count"], *count);
        assert!((freqs[i]["percentage"].as_f64().unwrap() - *pct).abs() < 1e-5);
        assert_eq!(freqs[i]["rank"], *rank);
    }
}

#[test]
fn frequency_json_no_headers() {
    let (wrk, mut cmd) = setup("frequency_json_no_headers");
    cmd.args(["--limit", "0"])
        .args(["--select", "1"])
        .arg("--no-headers")
        .arg("--json");
    let got: String = wrk.stdout(&mut cmd);
    let v: Value = serde_json::from_str(&got).unwrap();
    assert!(v["input"].as_str().unwrap().ends_with("in.csv"));
    assert_eq!(v["rowcount"], 8);
    assert_eq!(v["fieldcount"], 1);
    let fields = v["fields"].as_array().unwrap();
    assert_eq!(fields.len(), 1);
    let field = &fields[0];
    assert_eq!(field["field"], "1");
    assert_eq!(field["cardinality"], 5);
    let freqs = field["frequencies"].as_array().unwrap();
    let expected = vec![
        ("a", 4, 50.0, 1.0),
        ("(NULL)", 1, 12.5, 2.0),
        ("(NULL)", 1, 12.5, 2.0),
        ("b", 1, 12.5, 2.0),
        ("h1", 1, 12.5, 2.0),
    ];
    for (i, (val, count, pct, rank)) in expected.iter().enumerate() {
        assert_eq!(freqs[i]["value"], *val);
        assert_eq!(freqs[i]["count"], *count);
        assert!((freqs[i]["percentage"].as_f64().unwrap() - *pct).abs() < 1e-5);
        assert_eq!(freqs[i]["rank"], *rank);
    }
}

#[test]
fn frequency_json_ignore_case() {
    let (wrk, mut cmd) = setup("frequency_json_ignore_case");
    cmd.arg("--ignore-case")
        .args(["--limit", "0"])
        .args(["--select", "h2"])
        .arg("--json");
    let got: String = wrk.stdout(&mut cmd);
    let v: Value = serde_json::from_str(&got).unwrap();
    assert!(v["input"].as_str().unwrap().ends_with("in.csv"));
    assert_eq!(v["rowcount"], 7);
    assert_eq!(v["fieldcount"], 1);
    let fields = v["fields"].as_array().unwrap();
    assert_eq!(fields.len(), 1);
    let field = &fields[0];
    assert_eq!(field["field"], "h2");
    assert_eq!(field["cardinality"], 3);
    let freqs = field["frequencies"].as_array().unwrap();
    let expected = vec![("y", 3, 42.85714), ("z", 3, 42.85714), ("x", 1, 14.28571)];
    for (i, (val, count, pct)) in expected.iter().enumerate() {
        assert_eq!(freqs[i]["value"], *val);
        assert_eq!(freqs[i]["count"], *count);
        assert!((freqs[i]["percentage"].as_f64().unwrap() - *pct).abs() < 1e-5);
    }
}

#[test]
fn frequency_json_limit() {
    let (wrk, mut cmd) = setup("frequency_json_limit");
    cmd.args(["--limit", "1"]).arg("--json");
    let got: String = wrk.stdout(&mut cmd);
    let v: Value = serde_json::from_str(&got).unwrap();
    assert!(v["input"].as_str().unwrap().ends_with("in.csv"));
    assert_eq!(v["rowcount"], 7);
    assert_eq!(v["fieldcount"], 2);
    let fields = v["fields"].as_array().unwrap();
    assert_eq!(fields.len(), 2);
    let (f1, f2) = (&fields[0], &fields[1]);
    // Accept either order for fields
    let (h1, h2) = if f1["field"] == "h1" {
        (f1, f2)
    } else {
        (f2, f1)
    };
    assert_eq!(h1["cardinality"], 4);
    assert_eq!(h2["cardinality"], 4);
    let freqs_h1 = h1["frequencies"].as_array().unwrap();
    let expected_h1 = vec![("a", 4, 57.14286), ("Other (3)", 3, 42.85714)];
    for (i, (val, count, pct)) in expected_h1.iter().enumerate() {
        assert_eq!(freqs_h1[i]["value"], *val);
        assert_eq!(freqs_h1[i]["count"], *count);
        assert!((freqs_h1[i]["percentage"].as_f64().unwrap() - *pct).abs() < 1e-5);
    }
    let freqs_h2 = h2["frequencies"].as_array().unwrap();
    let expected_h2 = vec![("z", 3, 42.85714), ("Other (3)", 4, 57.14286)];
    for (i, (val, count, pct)) in expected_h2.iter().enumerate() {
        assert_eq!(freqs_h2[i]["value"], *val);
        assert_eq!(freqs_h2[i]["count"], *count);
        assert!((freqs_h2[i]["percentage"].as_f64().unwrap() - *pct).abs() < 1e-5);
    }
}

#[test]
fn frequency_json_all_unique() {
    let wrk = Workdir::new("frequency_json_all_unique");
    let testdata = wrk.load_test_file("boston311-100.csv");
    let mut cmd = wrk.command("frequency");
    cmd.args(["--select", "1"])
        .arg(testdata.clone())
        .arg("--json");
    wrk.assert_success(&mut cmd);
    let got: String = wrk.stdout(&mut cmd);
    let v: Value = serde_json::from_str(&got).unwrap();
    // Accept either full path or just filename for input
    let input = v["input"].as_str().unwrap();
    assert!(input.ends_with("boston311-100.csv"));
    assert_eq!(v["rowcount"], 100);
    assert_eq!(v["fieldcount"], 1);
    let fields = v["fields"].as_array().unwrap();
    assert_eq!(fields.len(), 1);
    let field = &fields[0];
    assert_eq!(field["field"], "case_enquiry_id");
    assert_eq!(field["cardinality"], 100);
    let freqs = field["frequencies"].as_array().unwrap();
    assert_eq!(freqs.len(), 1);
    assert_eq!(freqs[0]["value"], "<ALL_UNIQUE>");
    assert_eq!(freqs[0]["count"], 100);
    assert!((freqs[0]["percentage"].as_f64().unwrap() - 100.0).abs() < 1e-5);
}

#[test]
fn frequency_json_vis_whitespace() {
    let wrk = Workdir::new("frequency_json_vis_whitespace");
    let rows = vec![
        svec!["header"],
        svec!["value\t"],
        svec!["\tvalue"],
        svec!["value "],
        svec![" value"],
        svec!["      "],
        svec!["no_whitespace"],
    ];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.env("QSV_STATSCACHE_MODE", "none")
        .arg("in.csv")
        .args(["--limit", "0"])
        .arg("--vis-whitespace")
        .arg("--json");
    wrk.assert_success(&mut cmd);
    let got: String = wrk.stdout(&mut cmd);
    let v: Value = serde_json::from_str(&got).unwrap();
    assert!(v["input"].as_str().unwrap().ends_with("in.csv"));
    assert_eq!(v["rowcount"], 6);
    assert_eq!(v["fieldcount"], 1);
    let fields = v["fields"].as_array().unwrap();
    assert_eq!(fields.len(), 1);
    let field = &fields[0];
    assert_eq!(field["field"], "header");
    assert_eq!(field["cardinality"], 3);
    let freqs = field["frequencies"].as_array().unwrap();
    let expected = vec![
        ("value", 4, 66.66667),
        ("(NULL)", 1, 16.66667),
        ("no_whitespace", 1, 16.66667),
    ];
    for (i, (val, count, pct)) in expected.iter().enumerate() {
        assert_eq!(freqs[i]["value"], *val);
        assert_eq!(freqs[i]["count"], *count);
        assert!((freqs[i]["percentage"].as_f64().unwrap() - *pct).abs() < 1e-5);
    }
}

#[test]
fn frequency_toon() {
    let (wrk, mut cmd) = setup("frequency_toon");
    cmd.args(["--limit", "0"])
        .args(["--select", "h2"])
        .arg("--toon");
    let got: String = wrk.stdout(&mut cmd);
    let expected = r#"input: in.csv
description: "Generated with `qsv frequency in.csv --limit 0 --select h2 --toon`"
rowcount: 7
fieldcount: 1
fields[1]:
  - field: h2
    type: String
    cardinality: 4
    nullcount: 0
    sparsity: 0
    uniqueness_ratio: 0.5714
    stats[10]{name,value}:
    min,Y
    max,z
    sort_order,Unsorted
    min_length,1
    max_length,1
    sum_length,7
    avg_length,1
    stddev_length,0
    variance_length,0
    cv_length,0
    frequencies[4]{value,count,percentage,rank}:
    z,3,42.85714,1
    y,2,28.57143,2
    Y,1,14.28571,3
    x,1,14.28571,3
rank_strategy: dense"#
        .to_string();
    assert_eq!(got, expected);
}

#[test]
fn frequency_toon_no_headers() {
    let (wrk, mut cmd) = setup("frequency_toon_no_headers");
    cmd.args(["--limit", "0"])
        .args(["--select", "1"])
        .arg("--no-headers")
        .arg("--toon");
    let got: String = wrk.stdout(&mut cmd);
    let expected = r#"input: in.csv
description: "Generated with `qsv frequency in.csv --limit 0 --select 1 --no-headers --toon`"
rowcount: 8
fieldcount: 1
fields[1]:
  - field: "1"
    type: ""
    cardinality: 5
    nullcount: 0
    sparsity: 0
    uniqueness_ratio: 0.625
    frequencies[5]{value,count,percentage,rank}:
    a,4,50,1
    (NULL),1,12.5,2
    (NULL),1,12.5,2
    b,1,12.5,2
    h1,1,12.5,2
rank_strategy: dense"#
        .to_string();
    assert_eq!(got, expected);
}

#[test]
fn frequency_toon_ignore_case() {
    let (wrk, mut cmd) = setup("frequency_toon_ignore_case");
    cmd.arg("--ignore-case")
        .args(["--limit", "0"])
        .args(["--select", "h2"])
        .arg("--toon");
    let got: String = wrk.stdout(&mut cmd);
    let expected = r#"input: in.csv
description: "Generated with `qsv frequency in.csv --ignore-case --limit 0 --select h2 --toon`"
rowcount: 7
fieldcount: 1
fields[1]:
  - field: h2
    type: String
    cardinality: 3
    nullcount: 0
    sparsity: 0
    uniqueness_ratio: 0.4286
    stats[10]{name,value}:
    min,Y
    max,z
    sort_order,Unsorted
    min_length,1
    max_length,1
    sum_length,7
    avg_length,1
    stddev_length,0
    variance_length,0
    cv_length,0
    frequencies[3]{value,count,percentage,rank}:
    y,3,42.85714,1
    z,3,42.85714,1
    x,1,14.28571,2
rank_strategy: dense"#
        .to_string();
    assert_eq!(got, expected);
}

#[test]
fn frequency_toon_limit() {
    let (wrk, mut cmd) = setup("frequency_toon_limit");
    cmd.args(["--limit", "1"]).arg("--toon");
    let got: String = wrk.stdout(&mut cmd);
    let expected = r#"input: in.csv
description: "Generated with `qsv frequency in.csv --limit 1 --toon`"
rowcount: 7
fieldcount: 2
fields[2]:
  - field: h1
    type: String
    cardinality: 4
    nullcount: 1
    sparsity: 0.1429
    uniqueness_ratio: 0.5714
    stats[10]{name,value}:
    min,(NULL)
    max,b
    sort_order,Unsorted
    min_length,0
    max_length,6
    sum_length,11
    avg_length,1.5714
    stddev_length,1.8406
    variance_length,3.3878
    cv_length,1.1713
    frequencies[2]{value,count,percentage,rank}:
    a,4,57.14286,1
    Other (3),3,42.85714,0
  - field: h2
    type: String
    cardinality: 4
    nullcount: 0
    sparsity: 0
    uniqueness_ratio: 0.5714
    stats[10]{name,value}:
    min,Y
    max,z
    sort_order,Unsorted
    min_length,1
    max_length,1
    sum_length,7
    avg_length,1
    stddev_length,0
    variance_length,0
    cv_length,0
    frequencies[2]{value,count,percentage,rank}:
    z,3,42.85714,1
    Other (3),4,57.14286,0
rank_strategy: dense"#
        .to_string();
    assert_eq!(got, expected);
}

#[test]
fn frequency_toon_all_unique() {
    let wrk = Workdir::new("frequency_toon_all_unique");
    let testdata = wrk.load_test_file("boston311-100.csv");
    let mut cmd = wrk.command("frequency");
    cmd.args(["--select", "1"])
        .arg(testdata.clone())
        .arg("--toon");
    wrk.assert_success(&mut cmd);
    let got: String = wrk.stdout(&mut cmd);
    let expected = r#"rowcount: 100
fieldcount: 1
fields[1]:
  - field: case_enquiry_id
    type: Integer
    cardinality: 100
    nullcount: 0
    sparsity: 0
    uniqueness_ratio: 1
    stats[10]{name,value}:
    sum,10100411645180
    min,101004113298
    max,101004155594
    range,42296
    sort_order,Unsorted
    mean,101004116451.8
    sem,790.552
    stddev,7905.5202
    variance,62497248.9352
    cv,0
    frequencies[1]{value,count,percentage,rank}:
    <ALL_UNIQUE>,100,100,0
rank_strategy: dense"#;
    assert!(got.ends_with(expected));
}

#[test]
fn frequency_toon_vis_whitespace() {
    let wrk = Workdir::new("frequency_toon_vis_whitespace");
    let rows = vec![
        svec!["header"],
        svec!["value\t"],
        svec!["\tvalue"],
        svec!["value "],
        svec![" value"],
        svec!["      "],
        svec!["no_whitespace"],
    ];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.env("QSV_STATSCACHE_MODE", "none")
        .arg("in.csv")
        .args(["--limit", "0"])
        .arg("--vis-whitespace")
        .arg("--toon");
    wrk.assert_success(&mut cmd);
    let got: String = wrk.stdout(&mut cmd);
    let v: Value = toon_format::decode(
        &got,
        &toon_format::DecodeOptions {
            strict: false,
            ..Default::default()
        },
    )
    .unwrap_or_else(|e| {
        panic!(
            "Failed to decode TOON output: {e}. Output: {}",
            &got[..got.len().min(500)]
        )
    });
    assert!(v["input"].as_str().unwrap().ends_with("in.csv"));
    assert_eq!(v["rowcount"], 6);
    assert_eq!(v["fieldcount"], 1);
    let fields = v["fields"].as_array().expect("fields should be an array");
    assert_eq!(fields.len(), 1);
    let field = &fields[0];
    assert_eq!(field["field"], "header");
    assert_eq!(field["cardinality"], 3);
    let freqs = field["frequencies"].as_array().expect(&format!(
        "frequencies should be an array. Field keys: {:?}",
        field.as_object().map(|o| o.keys().collect::<Vec<_>>())
    ));
    let expected = vec![
        ("value", 4, 66.66667),
        ("(NULL)", 1, 16.66667),
        ("no_whitespace", 1, 16.66667),
    ];
    for (i, (val, count, pct)) in expected.iter().enumerate() {
        assert_eq!(freqs[i]["value"], *val);
        assert_eq!(freqs[i]["count"], *count);
        assert!((freqs[i]["percentage"].as_f64().unwrap() - *pct).abs() < 1e-5);
    }
}

// Test ranking strategies
fn setup_rank_test_sorted(name: &str) -> (Workdir, process::Command) {
    // Create data with specific counts to test ranking:
    // Value "a" appears 5 times (rank 1)
    // Values "b" and "c" appear 3 times each (tied for rank 2/3)
    // Values "d" and "e" appear 2 times each (tied for rank 4/5)
    // Value "f" appears 1 time (rank 6)
    let rows = vec![
        svec!["value"],
        svec!["a"],
        svec!["a"],
        svec!["a"],
        svec!["a"],
        svec!["a"],
        svec!["b"],
        svec!["b"],
        svec!["b"],
        svec!["c"],
        svec!["c"],
        svec!["c"],
        svec!["d"],
        svec!["d"],
        svec!["e"],
        svec!["e"],
        svec!["f"],
    ];

    let wrk = Workdir::new(name);
    wrk.create("in.csv", rows);

    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv").args(["--limit", "0"]);

    (wrk, cmd)
}

fn setup_rank_test_unsorted(name: &str) -> (Workdir, process::Command) {
    // same as setup_rank_test_sorted but the values are unsorted
    // this is to test that the tied values are sorted alphabetically
    let rows = vec![
        svec!["value"],
        svec!["c"],
        svec!["d"],
        svec!["a"],
        svec!["a"],
        svec!["b"],
        svec!["a"],
        svec!["e"],
        svec!["a"],
        svec!["b"],
        svec!["c"],
        svec!["c"],
        svec!["b"],
        svec!["e"],
        svec!["d"],
        svec!["f"],
        svec!["a"],
    ];
    let wrk = Workdir::new(name);
    wrk.create("in.csv", rows);

    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv").args(["--limit", "0"]);

    (wrk, cmd)
}

fn setup_rank_test_simple(name: &str) -> (Workdir, process::Command) {
    let rows = vec![
        svec!["value"],
        svec!["a"],
        svec!["a"],
        svec!["a"],
        svec!["b"],
        svec!["b"],
        svec!["c"],
        svec!["c"],
        svec!["d"],
    ];
    let wrk = Workdir::new(name);
    wrk.create("in.csv", rows);

    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv").args(["--limit", "0"]);

    (wrk, cmd)
}

#[test]
fn frequency_rank_ties_min() {
    let (wrk, mut cmd) = setup_rank_test_sorted("frequency_rank_ties_min");
    cmd.args(["--rank-strategy", "min"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "5", "31.25", "1"],
        svec!["value", "b", "3", "18.75", "2"],
        svec!["value", "c", "3", "18.75", "2"],
        svec!["value", "d", "2", "12.5", "4"],
        svec!["value", "e", "2", "12.5", "4"],
        svec!["value", "f", "1", "6.25", "6"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_rank_ties_min_simple() {
    let (wrk, mut cmd) = setup_rank_test_simple("frequency_rank_ties_min_simple");
    cmd.args(["--rank-strategy", "min"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    // min rank should be 1,2,2,4
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "3", "37.5", "1"],
        svec!["value", "b", "2", "25", "2"],
        svec!["value", "c", "2", "25", "2"],
        svec!["value", "d", "1", "12.5", "4"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_rank_ties_max() {
    let (wrk, mut cmd) = setup_rank_test_sorted("frequency_rank_ties_max");
    cmd.args(["-r", "max"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "5", "31.25", "1"],
        svec!["value", "b", "3", "18.75", "3"],
        svec!["value", "c", "3", "18.75", "3"],
        svec!["value", "d", "2", "12.5", "5"],
        svec!["value", "e", "2", "12.5", "5"],
        svec!["value", "f", "1", "6.25", "6"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_rank_ties_max_simple() {
    let (wrk, mut cmd) = setup_rank_test_simple("frequency_rank_ties_max_simple");
    cmd.args(["-r", "max"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    // max rank should be 1,3,3,4
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "3", "37.5", "1"],
        svec!["value", "b", "2", "25", "3"],
        svec!["value", "c", "2", "25", "3"],
        svec!["value", "d", "1", "12.5", "4"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_rank_ties_dense() {
    let (wrk, mut cmd) = setup_rank_test_sorted("frequency_rank_ties_dense");
    cmd.args(["--rank-strategy", "dense"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "5", "31.25", "1"],
        svec!["value", "b", "3", "18.75", "2"],
        svec!["value", "c", "3", "18.75", "2"],
        svec!["value", "d", "2", "12.5", "3"],
        svec!["value", "e", "2", "12.5", "3"],
        svec!["value", "f", "1", "6.25", "4"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_rank_ties_dense_complex() {
    let (wrk, mut cmd) = setup_rank_test_unsorted("frequency_rank_ties_dense_complex");
    cmd.args(["--rank-strategy", "dense"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "5", "31.25", "1"],
        svec!["value", "b", "3", "18.75", "2"],
        svec!["value", "c", "3", "18.75", "2"],
        svec!["value", "d", "2", "12.5", "3"],
        svec!["value", "e", "2", "12.5", "3"],
        svec!["value", "f", "1", "6.25", "4"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_rank_ties_dense_simple() {
    let (wrk, mut cmd) = setup_rank_test_simple("frequency_rank_ties_dense_simple");
    cmd.args(["--rank-strategy", "dense"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    // dense rank should be 1,2,2,3
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "3", "37.5", "1"],
        svec!["value", "b", "2", "25", "2"],
        svec!["value", "c", "2", "25", "2"],
        svec!["value", "d", "1", "12.5", "3"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_rank_ties_ordinal() {
    let (wrk, mut cmd) = setup_rank_test_sorted("frequency_rank_ties_ordinal");
    cmd.args(["--rank-strategy", "ordinal"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "5", "31.25", "1"],
        svec!["value", "b", "3", "18.75", "2"],
        svec!["value", "c", "3", "18.75", "3"],
        svec!["value", "d", "2", "12.5", "4"],
        svec!["value", "e", "2", "12.5", "5"],
        svec!["value", "f", "1", "6.25", "6"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_rank_ties_ordinal_complex() {
    let (wrk, mut cmd) = setup_rank_test_unsorted("frequency_rank_ties_ordinal_complex");
    cmd.args(["--rank-strategy", "ordinal"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "5", "31.25", "1"],
        svec!["value", "b", "3", "18.75", "2"],
        svec!["value", "c", "3", "18.75", "3"],
        svec!["value", "d", "2", "12.5", "4"],
        svec!["value", "e", "2", "12.5", "5"],
        svec!["value", "f", "1", "6.25", "6"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_rank_ties_ordinal_simple() {
    let (wrk, mut cmd) = setup_rank_test_simple("frequency_rank_ties_ordinal_simple");
    cmd.args(["--rank-strategy", "ordinal"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "3", "37.5", "1"],
        svec!["value", "b", "2", "25", "2"],
        svec!["value", "c", "2", "25", "3"],
        svec!["value", "d", "1", "12.5", "4"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_rank_ties_average() {
    let (wrk, mut cmd) = setup_rank_test_sorted("frequency_rank_ties_average");
    cmd.args(["--rank-strategy", "average"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "5", "31.25", "1"],
        svec!["value", "b", "3", "18.75", "2.5"],
        svec!["value", "c", "3", "18.75", "2.5"],
        svec!["value", "d", "2", "12.5", "4.5"],
        svec!["value", "e", "2", "12.5", "4.5"],
        svec!["value", "f", "1", "6.25", "6"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_rank_ties_average_complex() {
    let (wrk, mut cmd) = setup_rank_test_unsorted("frequency_rank_ties_average_complex");
    cmd.args(["--rank-strategy", "average"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "5", "31.25", "1"],
        svec!["value", "b", "3", "18.75", "2.5"],
        svec!["value", "c", "3", "18.75", "2.5"],
        svec!["value", "d", "2", "12.5", "4.5"],
        svec!["value", "e", "2", "12.5", "4.5"],
        svec!["value", "f", "1", "6.25", "6"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_rank_ties_average_simple() {
    let (wrk, mut cmd) = setup_rank_test_simple("frequency_rank_ties_average_simple");
    cmd.args(["--rank-strategy", "average"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    // average rank should be 1,2.5,2.5,4
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "3", "37.5", "1"],
        svec!["value", "b", "2", "25", "2.5"],
        svec!["value", "c", "2", "25", "2.5"],
        svec!["value", "d", "1", "12.5", "4"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_rank_ties_with_asc() {
    let (wrk, mut cmd) = setup_rank_test_sorted("frequency_rank_ties_with_asc");
    cmd.args(["--rank-strategy", "average"]).arg("--asc");

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "5", "31.25", "6"],
        svec!["value", "b", "3", "18.75", "4.5"],
        svec!["value", "c", "3", "18.75", "4.5"],
        svec!["value", "d", "2", "12.5", "2.5"],
        svec!["value", "e", "2", "12.5", "2.5"],
        svec!["value", "f", "1", "6.25", "1"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_rank_ties_json() {
    let (wrk, mut cmd) = setup_rank_test_sorted("frequency_rank_ties_json");
    cmd.args(["--rank-strategy", "average"]).arg("--json");

    let got: String = wrk.stdout(&mut cmd);
    let v: Value = serde_json::from_str(&got).unwrap();
    assert!(v["input"].as_str().unwrap().ends_with("in.csv"));
    assert_eq!(v["rowcount"], 16);
    assert_eq!(v["fieldcount"], 1);
    assert_eq!(v["rank_strategy"], "average");
    let fields = v["fields"].as_array().unwrap();
    assert_eq!(fields.len(), 1);
    let field = &fields[0];
    assert_eq!(field["field"], "value");
    let freqs = field["frequencies"].as_array().unwrap();

    // Check fractional ranks in JSON
    assert_eq!(freqs[0]["value"], "a");
    assert_eq!(freqs[0]["rank"], 1.0);
    assert_eq!(freqs[1]["value"], "b");
    assert_eq!(freqs[1]["rank"], 2.5);
    assert_eq!(freqs[2]["value"], "c");
    assert_eq!(freqs[2]["rank"], 2.5);
    assert_eq!(freqs[3]["value"], "d");
    assert_eq!(freqs[3]["rank"], 4.5);
    assert_eq!(freqs[4]["value"], "e");
    assert_eq!(freqs[4]["rank"], 4.5);
    assert_eq!(freqs[5]["value"], "f");
    assert_eq!(freqs[5]["rank"], 6.0);
}

#[test]
fn frequency_rank_ties_invalid_strategy() {
    let (wrk, mut cmd) = setup_rank_test_sorted("frequency_rank_ties_invalid_strategy");
    cmd.args(["--rank-strategy", "invalid"]);

    let output = wrk.output_stderr(&mut cmd);
    assert!(output.contains("Could not match"));
    assert!(output.contains("allowed variants"));
}

// Weighted frequency tests
#[test]
fn frequency_weight_basic() {
    let wrk = Workdir::new("frequency_weight_basic");
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "2.0"],
        svec!["a", "3.0"],
        svec!["b", "1.0"],
        svec!["b", "1.0"],
        svec!["c", "5.0"],
    ];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"]);

    wrk.assert_success(&mut cmd);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    // Sort by value for consistent comparison
    got.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    let mut expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "5", "41.66667", "1"],
        svec!["value", "b", "2", "16.66667", "2"],
        svec!["value", "c", "5", "41.66667", "1"],
    ];
    expected.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    assert_eq!(got, expected);
}

#[test]
fn frequency_weight_excludes_weight_column() {
    let wrk = Workdir::new("frequency_weight_excludes_weight_column");
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "2.0"],
        svec!["a", "3.0"],
        svec!["b", "1.0"],
    ];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value,weight"])
        .args(["--weight", "weight"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    // Should only have "value" column, not "weight" column
    let value_rows: Vec<_> = got
        .iter()
        .filter(|r| r.len() > 0 && r[0] == "value")
        .collect();
    assert_eq!(value_rows.len(), 2); // 2 value rows (a and b), header is filtered out
    // Should not have any "weight" column frequencies
    let weight_rows: Vec<_> = got
        .iter()
        .filter(|r| r.len() > 0 && r[0] == "weight")
        .collect();
    assert_eq!(weight_rows.len(), 0);
}

#[test]
fn frequency_weight_missing_weights_default_to_one() {
    let wrk = Workdir::new("frequency_weight_missing_weights_default_to_one");
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "2.0"],
        svec!["a", ""],        // missing weight
        svec!["b", "invalid"], // non-numeric weight
        svec!["b", "3.0"],
    ];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    // Sort by value for consistent comparison
    got.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    // "a" should have weight 2.0 + 1.0 (default) = 3.0
    // "b" should have weight 1.0 (default) + 3.0 = 4.0
    let mut expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "3", "42.85714", "2"],
        svec!["value", "b", "4", "57.14286", "1"],
    ];
    expected.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    assert_eq!(got, expected);
}

#[test]
fn frequency_weight_zero_and_negative_ignored() {
    let wrk = Workdir::new("frequency_weight_zero_and_negative_ignored");
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "2.0"],
        svec!["a", "0.0"],  // zero weight - should be ignored
        svec!["b", "-1.0"], // negative weight - should be ignored
        svec!["b", "3.0"],
        svec!["c", "1.0"],
    ];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    // Sort by value for consistent comparison
    got.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    // "a" should have weight 2.0 (0.0 ignored)
    // "b" should have weight 3.0 (-1.0 ignored)
    // "c" should have weight 1.0
    let mut expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "2", "33.33333", "2"],
        svec!["value", "b", "3", "50", "1"],
        svec!["value", "c", "1", "16.66667", "3"],
    ];
    expected.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    assert_eq!(got, expected);
}

#[test]
fn frequency_weight_with_limit() {
    let wrk = Workdir::new("frequency_weight_with_limit");
    // Use duplicate values so it's not all-unique and can test limit behavior
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "10.0"],
        svec!["a", "2.0"], // duplicate to make it not all-unique
        svec!["b", "5.0"],
        svec!["c", "3.0"],
        svec!["d", "2.0"],
    ];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "2"])
        .args(["--select", "value"])
        .args(["--weight", "weight"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    // For weighted frequencies, show individual frequencies sorted by weight (descending by
    // default), limited to top 2. Sort by value for consistent comparison.
    got.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    // "a" has weight 10.0 + 2.0 = 12.0, "b" has 5.0, "c" has 3.0, "d" has 2.0
    // Top 2: "a" (12.0) and "b" (5.0), rest go to "Other"
    let mut expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "12", "54.54545", "1"],
        svec!["value", "b", "5", "22.72727", "2"],
        svec!["value", "Other (2)", "5", "22.72727", "0"],
    ];
    expected.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    assert_eq!(got, expected);
}

#[test]
fn frequency_weight_with_asc() {
    let wrk = Workdir::new("frequency_weight_with_asc");
    // Use duplicate values so it's not all-unique and can test sorting behavior
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "10.0"],
        svec!["a", "5.0"], // duplicate to make it not all-unique
        svec!["b", "5.0"],
        svec!["c", "3.0"],
    ];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"])
        .arg("--asc");

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    // For weighted frequencies with --asc, show individual frequencies sorted ascending by weight
    // Sort by value for consistent comparison
    got.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    // "a" has weight 10.0 + 5.0 = 15.0, "b" has 5.0, "c" has 3.0
    // With --asc, sorted ascending: c (3.0), b (5.0), a (15.0)
    let mut expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "c", "3", "13.04348", "1"],
        svec!["value", "b", "5", "21.73913", "2"],
        svec!["value", "a", "15", "65.21739", "3"],
    ];
    expected.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    assert_eq!(got, expected);
}

#[test]
fn frequency_weight_with_rank_strategy() {
    let wrk = Workdir::new("frequency_weight_with_rank_strategy");
    // Use duplicate values so it's not all-unique and can test rank strategy behavior
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "5.0"],
        svec!["a", "2.0"], // duplicate to make it not all-unique
        svec!["b", "3.0"],
        svec!["c", "3.0"],
        svec!["d", "2.0"],
    ];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"])
        .args(["--rank-strategy", "average"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    // Sort by value for consistent comparison
    got.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    // "a" has weight 5.0 + 2.0 = 7.0 (rank 1), "b" and "c" tied at 3.0 (rank 2.5), "d" rank 4
    let mut expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "7", "46.66667", "1"],
        svec!["value", "b", "3", "20", "2.5"],
        svec!["value", "c", "3", "20", "2.5"],
        svec!["value", "d", "2", "13.33333", "4"],
    ];
    expected.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    assert_eq!(got, expected);
}

#[test]
fn frequency_weight_json() {
    let wrk = Workdir::new("frequency_weight_json");
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "2.0"],
        svec!["a", "3.0"],
        svec!["b", "1.0"],
    ];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"])
        .arg("--json");

    let got: String = wrk.stdout(&mut cmd);
    let v: Value = serde_json::from_str(&got).unwrap();
    assert_eq!(v["rowcount"], 3);
    assert_eq!(v["fieldcount"], 1);
    let fields = v["fields"].as_array().unwrap();
    assert_eq!(fields.len(), 1);
    let field = &fields[0];
    assert_eq!(field["field"], "value");
    let freqs = field["frequencies"].as_array().unwrap();
    // Should have 2 frequencies: "a" with count 5, "b" with count 1
    assert_eq!(freqs.len(), 2);
    // Check that counts are weighted (f64 values rounded to u64)
    let a_freq = freqs.iter().find(|f| f["value"] == "a").unwrap();
    assert_eq!(a_freq["count"], 5);
    let b_freq = freqs.iter().find(|f| f["value"] == "b").unwrap();
    assert_eq!(b_freq["count"], 1);
}

#[test]
fn frequency_weight_fractional_weights() {
    let wrk = Workdir::new("frequency_weight_fractional_weights");
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "1.5"],
        svec!["a", "2.5"],
        svec!["b", "0.5"],
        svec!["b", "0.5"],
    ];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"]);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_unstable();
    // "a" should have weight 1.5 + 2.5 = 4.0 (rounded to 4)
    // "b" should have weight 0.5 + 0.5 = 1.0 (rounded to 1)
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "4", "80", "1"],
        svec!["value", "b", "1", "20", "2"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_weight_all_unique() {
    let wrk = Workdir::new("frequency_weight_all_unique");
    let rows = vec![
        svec!["id", "weight"],
        svec!["1", "2.0"],
        svec!["2", "3.0"],
        svec!["3", "1.0"],
    ];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--select", "id"])
        .args(["--weight", "weight"]);

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    // For weighted frequencies with all-unique columns, show a single <ALL_UNIQUE> entry
    // with the sum of all weights (2.0 + 3.0 + 1.0 = 6.0)
    let expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["id", "<ALL_UNIQUE>", "6", "100", "0"],
    ];
    assert_eq!(got, expected);
}

#[test]
fn frequency_weight_column_not_found() {
    let wrk = Workdir::new("frequency_weight_column_not_found");
    let rows = vec![svec!["value", "weight"], svec!["a", "2.0"]];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.args(["--weight", "nonexistent"]);

    let output = wrk.output_stderr(&mut cmd);
    assert!(output.contains("Weight column 'nonexistent' not found"));
}

#[test]
fn frequency_weight_with_ignore_case() {
    let wrk = Workdir::new("frequency_weight_with_ignore_case");
    // Use values that will combine with ignore-case, but add more rows to ensure
    // it's not detected as all-unique. Need to disable stats cache to avoid
    // it being detected as all-unique based on pre-computed stats.
    let rows = vec![
        svec!["value", "weight"],
        svec!["A", "2.0"],
        svec!["a", "3.0"],
        svec!["B", "1.0"],
        svec!["b", "2.0"],
        svec!["a", "1.0"], // Another "a" to ensure it's not all-unique
    ];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.env("QSV_STATSCACHE_MODE", "none") // Disable stats cache to avoid all-unique detection
        .arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"])
        .arg("--ignore-case");

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    // Sort by value for consistent comparison
    got.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    // With ignore-case, "A" and "a" should be combined: 2.0 + 3.0 + 1.0 = 6.0
    // "B" and "b" should be combined: 1.0 + 2.0 = 3.0
    let mut expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "6", "66.66667", "1"],
        svec!["value", "b", "3", "33.33333", "2"],
    ];
    expected.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    assert_eq!(got, expected);
}

#[test]
fn frequency_weight_with_no_nulls() {
    let wrk = Workdir::new("frequency_weight_with_no_nulls");
    // Use duplicate values so it's not all-unique and can test no-nulls behavior
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "2.0"],
        svec!["a", "1.0"], // duplicate to make it not all-unique
        svec!["", "3.0"],  // empty value
        svec!["b", "1.0"],
    ];
    wrk.create("in.csv", rows);
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"])
        .arg("--no-nulls");

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    // Sort by value for consistent comparison
    got.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    // Empty values should be excluded with --no-nulls
    // "a" has weight 2.0 + 1.0 = 3.0, "b" has 1.0
    let mut expected = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "a", "3", "75", "1"],
        svec!["value", "b", "1", "25", "2"],
    ];
    expected.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });
    assert_eq!(got, expected);
}

#[test]
fn frequency_weight_parallel_merge() {
    let wrk = Workdir::new("frequency_weight_parallel_merge");

    // Create a larger dataset that will be split into multiple chunks
    // Use values that appear across chunks to verify correct weight aggregation
    let mut rows = vec![svec!["value", "weight"]];

    // Create enough rows to trigger chunking (at least 1000 rows)
    // Use a pattern where the same values appear in different chunks
    // to verify that weights are correctly aggregated during merge
    for i in 0..1000 {
        // Use modulo to create repeating patterns across chunks
        let value = match i % 10 {
            0 => "a",
            1 => "a", // "a" appears multiple times
            2 => "b",
            3 => "b", // "b" appears multiple times
            4 => "c",
            5 => "c", // "c" appears multiple times
            6 => "d",
            7 => "e",
            8 => "f",
            _ => "g",
        };
        // Use varying weights to ensure aggregation is correct
        let weight = (i % 5 + 1) as f64 * 1.5;
        rows.push(vec![value.to_string(), format!("{:.1}", weight)]);
    }

    // Create indexed file to enable parallel processing
    wrk.create_indexed("in.csv", rows);

    // Run weighted frequency with parallel processing (--jobs flag)
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"])
        .args(["--jobs", "4"]);

    wrk.assert_success(&mut cmd);

    // Read the output
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"])
        .args(["--jobs", "4"]);
    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);

    // Sort by value for consistent comparison
    got.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });

    // Calculate expected weights:
    // weight = (i % 5 + 1) * 1.5
    // "a" appears when i % 10 == 0 or 1: weights 1.5, 3.0, 1.5, 3.0, ... = 450 total
    // "b" appears when i % 10 == 2 or 3: weights 4.5, 6.0, 4.5, 6.0, ... = 1050 total
    // "c" appears when i % 10 == 4 or 5: weights 7.5, 1.5, 7.5, 1.5, ... = 900 total
    // "d" appears when i % 10 == 6: weights 4.5, 4.5, ... = 300 total
    // "e" appears when i % 10 == 7: weights 6.0, 6.0, ... = 450 total
    // "f" appears when i % 10 == 8: weights 7.5, 7.5, ... = 600 total
    // "g" appears when i % 10 == 9: weights 1.5, 1.5, ... = 150 total (but wait, let me recalc)

    // Actually recalculated with Python:
    // a: 450, b: 1050, c: 900, d: 300, e: 450, f: 600, g: 750

    // Find the frequency rows (skip header)
    let freq_rows: Vec<_> = got
        .iter()
        .filter(|r| r.len() > 1 && r[0] == "value")
        .collect();

    // Verify we have the expected number of unique values
    assert_eq!(freq_rows.len(), 7, "Should have 7 unique values");

    // Verify weights are correctly aggregated by checking specific values
    let find_freq = |value: &str| -> Option<&Vec<String>> {
        freq_rows.iter().find(|r| r[1] == value).map(|r| *r)
    };

    // Check that "a" has aggregated weight of 450 (rounded)
    let a_freq = find_freq("a").expect("Should find 'a'");
    assert_eq!(
        a_freq[2], "450",
        "Value 'a' should have aggregated weight 450"
    );

    // Check that "b" has aggregated weight of 1050 (rounded)
    let b_freq = find_freq("b").expect("Should find 'b'");
    assert_eq!(
        b_freq[2], "1050",
        "Value 'b' should have aggregated weight 1050"
    );

    // Check that "c" has aggregated weight of 900 (rounded)
    let c_freq = find_freq("c").expect("Should find 'c'");
    assert_eq!(
        c_freq[2], "900",
        "Value 'c' should have aggregated weight 900"
    );

    // Verify that parallel processing produces same results as sequential
    // Run sequential version for comparison
    let mut cmd_seq = wrk.command("frequency");
    cmd_seq
        .arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"])
        .args(["--jobs", "1"]);
    let mut got_seq: Vec<Vec<String>> = wrk.read_stdout(&mut cmd_seq);
    got_seq.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });

    // Compare parallel and sequential results - they should match
    let freq_rows_seq: Vec<_> = got_seq
        .iter()
        .filter(|r| r.len() > 1 && r[0] == "value")
        .collect();
    assert_eq!(
        freq_rows.len(),
        freq_rows_seq.len(),
        "Parallel and sequential should have same number of frequencies"
    );

    // Compare each frequency value
    for (par_row, seq_row) in freq_rows.iter().zip(freq_rows_seq.iter()) {
        assert_eq!(par_row[1], seq_row[1], "Values should match");
        assert_eq!(
            par_row[2], seq_row[2],
            "Weights should match between parallel and sequential"
        );
    }
}

#[test]
fn frequency_weight_with_unq_limit_all_unique() {
    let wrk = Workdir::new("frequency_weight_with_unq_limit_all_unique");

    // Create a dataset where all values are unique (like an ID column)
    let mut rows = vec![svec!["id", "weight"]];
    for i in 1..=100 {
        rows.push(vec![format!("id_{}", i), format!("{}", i)]);
    }
    wrk.create("in.csv", rows);

    // Test that with --weight and all-unique columns, show a single <ALL_UNIQUE> entry
    // The sum of weights is 1+2+3+...+100 = 5050
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--select", "id"])
        .args(["--weight", "weight"])
        .args(["--limit", "0"])
        .args(["--unq-limit", "10"]); // This should be ignored for all-unique columns

    wrk.assert_success(&mut cmd);

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);

    // With --weight and all-unique columns, should show a single <ALL_UNIQUE> entry
    let freq_rows: Vec<_> = got.iter().filter(|r| r.len() > 1 && r[0] == "id").collect();
    assert_eq!(
        freq_rows.len(),
        1,
        "All-unique columns with --weight should show a single <ALL_UNIQUE> entry"
    );
    assert_eq!(
        freq_rows[0][1], "<ALL_UNIQUE>",
        "Value should be <ALL_UNIQUE>"
    );
    assert_eq!(
        freq_rows[0][2], "5050",
        "Count should be sum of all weights (1+2+...+100 = 5050)"
    );
    assert_eq!(
        freq_rows[0][4], "0",
        "Rank should be 0 for all-unique entries"
    );
}

#[test]
fn frequency_weight_with_unq_limit_and_limit() {
    let wrk = Workdir::new("frequency_weight_with_unq_limit_and_limit");

    // Create a dataset where all values are unique
    let mut rows = vec![svec!["id", "weight"]];
    for i in 1..=50 {
        rows.push(vec![format!("id_{}", i), format!("{}", 51 - i)]); // Higher IDs have lower weights
    }
    wrk.create("in.csv", rows);

    // Test that with --weight and all-unique columns, show a single <ALL_UNIQUE> entry
    // regardless of --limit or --unq-limit
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--select", "id"])
        .args(["--weight", "weight"])
        .args(["--limit", "5"])
        .args(["--unq-limit", "10"]); // Both should be ignored for all-unique columns

    wrk.assert_success(&mut cmd);

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);

    // With --weight and all-unique columns, should show a single <ALL_UNIQUE> entry
    let freq_rows: Vec<_> = got.iter().filter(|r| r.len() > 1 && r[0] == "id").collect();
    // Should have a single <ALL_UNIQUE> entry, not limited by --limit
    assert_eq!(
        freq_rows.len(),
        1,
        "All-unique columns with --weight should show a single <ALL_UNIQUE> entry regardless of \
         --limit"
    );
    assert_eq!(
        freq_rows[0][1], "<ALL_UNIQUE>",
        "Value should be <ALL_UNIQUE>"
    );
    // Sum of weights: 1+2+3+...+50 = 1275
    assert_eq!(
        freq_rows[0][2], "1275",
        "Count should be sum of all weights (1+2+...+50 = 1275)"
    );
    assert_eq!(
        freq_rows[0][4], "0",
        "Rank should be 0 for all-unique entries"
    );
}

#[test]
fn frequency_weight_unq_limit_vs_unweighted() {
    let wrk = Workdir::new("frequency_weight_unq_limit_vs_unweighted");

    // Create a dataset where all values are unique
    let mut rows = vec![svec!["id", "weight"]];
    for i in 1..=20 {
        rows.push(vec![format!("id_{}", i), "1.0".to_string()]);
    }
    wrk.create("in.csv", rows);

    // Test unweighted frequency with --unq-limit
    // Need to disable stats cache to force computation of all frequencies
    // Note: --unq-limit only applies when --limit > 0 and --limit != --unq-limit
    let mut cmd_unweighted = wrk.command("frequency");
    cmd_unweighted
        .env("QSV_STATSCACHE_MODE", "none")
        .arg("in.csv")
        .args(["--select", "id"])
        .args(["--limit", "10"]) // Must be > 0 and different from --unq-limit
        .args(["--unq-limit", "5"]);

    let mut got_unweighted: Vec<Vec<String>> = wrk.read_stdout(&mut cmd_unweighted);
    got_unweighted.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });

    let freq_rows_unweighted: Vec<_> = got_unweighted
        .iter()
        .filter(|r| {
            r.len() > 1 && r[0] == "id" && r[1] != "<ALL_UNIQUE>" && !r[1].starts_with("Other")
        })
        .collect();

    // Test weighted frequency with --unq-limit (should be ignored)
    let mut cmd_weighted = wrk.command("frequency");
    cmd_weighted
        .arg("in.csv")
        .args(["--select", "id"])
        .args(["--weight", "weight"])
        .args(["--limit", "0"])
        .args(["--unq-limit", "5"]); // Should be ignored

    let mut got_weighted: Vec<Vec<String>> = wrk.read_stdout(&mut cmd_weighted);
    got_weighted.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });

    let freq_rows_weighted: Vec<_> = got_weighted
        .iter()
        .filter(|r| r.len() > 1 && r[0] == "id")
        .collect();

    // Unweighted should be limited to 5 by --unq-limit
    // Weighted should show a single <ALL_UNIQUE> entry (--unq-limit ignored, all-unique columns
    // show summary)
    assert_eq!(
        freq_rows_unweighted.len(),
        5,
        "Unweighted frequency should be limited by --unq-limit"
    );
    assert_eq!(
        freq_rows_weighted.len(),
        1,
        "Weighted frequency with all-unique columns should show a single <ALL_UNIQUE> entry"
    );
    assert_eq!(
        freq_rows_weighted[0][1], "<ALL_UNIQUE>",
        "Weighted frequency should show <ALL_UNIQUE> for all-unique columns"
    );
    // Sum of weights: 20 * 1.0 = 20.0
    assert_eq!(
        freq_rows_weighted[0][2], "20",
        "Count should be sum of all weights (20 * 1.0 = 20)"
    );
}

#[test]
fn frequency_weight_unq_limit_with_limit_zero() {
    let wrk = Workdir::new("frequency_weight_unq_limit_with_limit_zero");

    // Create a dataset with some duplicate values and some unique values
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "10.0"],
        svec!["a", "5.0"],
        svec!["b", "3.0"],
        svec!["c", "2.0"],
        svec!["d", "1.0"],
        svec!["e", "1.0"],
        svec!["f", "1.0"],
        svec!["g", "1.0"],
        svec!["h", "1.0"],
        svec!["i", "1.0"],
        svec!["j", "1.0"],
    ];
    wrk.create("in.csv", rows);

    // Test with --limit 0 and --unq-limit 5
    // Since not all values are unique, --unq-limit shouldn't apply anyway
    // But verify that --limit 0 shows all values
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--select", "value"])
        .args(["--weight", "weight"])
        .args(["--limit", "0"])
        .args(["--unq-limit", "5"]);

    wrk.assert_success(&mut cmd);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });

    let freq_rows: Vec<_> = got
        .iter()
        .filter(|r| r.len() > 1 && r[0] == "value")
        .collect();
    // Should show all unique values (a, b, c, d, e, f, g, h, i, j = 10 values)
    assert_eq!(
        freq_rows.len(),
        10,
        "With --limit 0, all unique values should be shown"
    );
}

#[test]
fn frequency_weight_nan_values() {
    let wrk = Workdir::new("frequency_weight_nan_values");

    // Create a dataset with NaN weight values
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "1.0"],
        svec!["a", "NaN"], // NaN weight
        svec!["b", "2.0"],
        svec!["b", "nan"], // lowercase NaN
        svec!["c", "3.0"],
    ];
    wrk.create("in.csv", rows);

    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"]);

    wrk.assert_success(&mut cmd);

    // Read output - need to create a new command since assert_success consumes it
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"]);
    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });

    // Skip header row
    let freq_rows: Vec<_> = got
        .iter()
        .filter(|r| r.len() > 1 && r[0] == "value" && r[1] != "value") // Skip header
        .collect();

    let find_freq = |value: &str| -> Option<&Vec<String>> {
        freq_rows.iter().find(|r| r[1] == value).map(|r| *r)
    };

    // The main goal is to verify that NaN weight values are handled gracefully without panicking.
    // The exact behavior depends on how fast_float2 parses "NaN":
    // - If it parses as NaN (non-finite), values with NaN weights get filtered out
    // - If it fails to parse, it defaults to 1.0 and values appear

    // "c" should always appear (no NaN weights)
    assert!(
        find_freq("c").is_some(),
        "Value 'c' should appear (no NaN weights)"
    );
    if let Some(c_freq) = find_freq("c") {
        assert_eq!(c_freq[2], "3", "Value 'c' should have weight 3");
    }

    // "a" and "b" may or may not appear depending on NaN parsing behavior
    // Just verify the command handled NaN gracefully without panicking
    // If they appear, verify they have reasonable positive weights
    if let Some(a_freq) = find_freq("a") {
        let a_weight: u64 = a_freq[2].parse().unwrap_or(0);
        assert!(
            a_weight > 0,
            "Value 'a' should have positive weight if it appears"
        );
    }

    if let Some(b_freq) = find_freq("b") {
        let b_weight: u64 = b_freq[2].parse().unwrap_or(0);
        assert!(
            b_weight > 0,
            "Value 'b' should have positive weight if it appears"
        );
    }
}

#[test]
fn frequency_weight_infinity_values() {
    let wrk = Workdir::new("frequency_weight_infinity_values");

    // Create a dataset with infinity weight values
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "1.0"],
        svec!["a", "Inf"], // Positive infinity
        svec!["b", "2.0"],
        svec!["b", "inf"], // lowercase infinity
        svec!["c", "3.0"],
        svec!["d", "-Inf"], // Negative infinity
    ];
    wrk.create("in.csv", rows);

    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"]);

    wrk.assert_success(&mut cmd);

    let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);

    let expected_values = vec![
        svec!["field", "value", "count", "percentage", "rank"],
        svec!["value", "c", "3", "50", "1"],
        svec!["value", "b", "2", "33.33333", "2"],
        svec!["value", "a", "1", "16.66667", "3"],
    ];
    assert_eq!(got, expected_values);
}

#[test]
fn frequency_weight_extremely_large_values() {
    let wrk = Workdir::new("frequency_weight_extremely_large_values");

    // Create a dataset with extremely large weight values
    // Use duplicate values so it's not all-unique and can test clamping behavior
    let huge_weight = format!("{}", u64::MAX as f64 * 2.0);
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "1.0"],
        svec!["a", "1.0"],                  // duplicate to make it not all-unique
        svec!["b", "1e20"],                 // Very large but finite
        vec!["c".to_string(), huge_weight], // Larger than u64::MAX
        svec!["d", "1e308"],                // Near f64::MAX
    ];
    wrk.create("in.csv", rows);

    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"]);

    wrk.assert_success(&mut cmd);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });

    // Extremely large values should be clamped to u64::MAX
    let freq_rows: Vec<_> = got
        .iter()
        .filter(|r| r.len() > 1 && r[0] == "value")
        .collect();

    // All values should appear (a, b, c, d)
    assert_eq!(freq_rows.len(), 4, "Should have 4 values");

    let find_freq = |value: &str| -> Option<&Vec<String>> {
        freq_rows.iter().find(|r| r[1] == value).map(|r| *r)
    };

    // Verify that extremely large values are clamped to u64::MAX
    let c_freq = find_freq("c").expect("Should find 'c'");
    let c_count: u64 = c_freq[2].parse().expect("Should parse count");
    assert_eq!(
        c_count,
        u64::MAX,
        "Extremely large weight should be clamped to u64::MAX"
    );

    // Verify other values are correct
    // "a" appears twice with weight 1.0 each, so total weight is 2.0
    let a_freq = find_freq("a").expect("Should find 'a'");
    assert_eq!(a_freq[2], "2", "Value 'a' should have weight 2 (1.0 + 1.0)");
}

#[test]
fn frequency_weight_mixed_invalid_values() {
    let wrk = Workdir::new("frequency_weight_mixed_invalid_values");

    // Create a dataset with various invalid weight values
    // Use a large but reasonable value instead of f64::MAX to avoid potential parsing issues
    let huge_weight_str = "1e100"; // Very large but still reasonable
    let rows = vec![
        svec!["value", "weight"],
        svec!["valid1", "5.0"],
        svec!["valid2", "10.0"],
        svec!["nan1", "NaN"],
        svec!["nan2", "nan"],
        svec!["inf1", "Inf"],
        svec!["inf2", "infinity"],
        svec!["neginf", "-Inf"],
        svec!["zero", "0.0"],
        svec!["negative", "-5.0"],
        svec!["huge", huge_weight_str],
    ];
    wrk.create("in.csv", rows);

    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"]);

    wrk.assert_success(&mut cmd);

    // Read output - need to create a new command since assert_success consumes it
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"]);
    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });

    // Skip header row: filter out where r[1] == "value" (header)
    let freq_rows: Vec<_> = got
        .iter()
        .filter(|r| r.len() > 1 && r[0] == "value" && r[1] != "value") // Skip header
        .collect();

    let find_freq = |value: &str| -> Option<&Vec<String>> {
        freq_rows.iter().find(|r| r[1] == value).map(|r| *r)
    };

    // The main goal is to verify that invalid weight values are handled gracefully
    // without panicking. The exact behavior may vary based on how fast_float2 parses values.

    // The main goal is to verify that invalid weight values are handled gracefully without
    // panicking. The exact behavior depends on how fast_float2 parses values and how they're
    // aggregated. Some values may be filtered out if they aggregate with invalid values to
    // become non-finite.

    // At minimum, we should have some frequency values (at least "huge" should appear)
    assert!(
        freq_rows.len() > 0,
        "Should have at least some frequency values"
    );

    // Values with valid weights should ideally appear, but may be filtered if they aggregate
    // with invalid values to become non-finite. The important thing is graceful handling.
    if let Some(valid1_freq) = find_freq("valid1") {
        let valid1_weight: u64 = valid1_freq[2].parse().unwrap_or(0);
        assert!(
            valid1_weight > 0,
            "valid1 should have positive weight if it appears"
        );
    }

    if let Some(valid2_freq) = find_freq("valid2") {
        let valid2_weight: u64 = valid2_freq[2].parse().unwrap_or(0);
        assert!(
            valid2_weight > 0,
            "valid2 should have positive weight if it appears"
        );
    }

    // Values that should definitely be skipped (zero or negative weights)
    assert!(
        find_freq("neginf").is_none(),
        "Negative infinity should be skipped (weight <= 0.0)"
    );
    assert!(
        find_freq("zero").is_none(),
        "Zero weights should be skipped (weight <= 0.0)"
    );
    assert!(
        find_freq("negative").is_none(),
        "Negative weights should be skipped (weight <= 0.0)"
    );

    // NaN/infinity values may or may not appear depending on parsing behavior
    // The important thing is that the command handles them without panicking
    // If they parse as NaN/infinity, they get filtered out (non-finite check)
    // If they fail to parse, they default to 1.0 and appear

    // "huge" should appear if it parses successfully (very large but finite)
    // It will be clamped to u64::MAX when converted
    if let Some(huge_freq) = find_freq("huge") {
        let huge_count: u64 = huge_freq[2].parse().expect("Should parse count");
        assert_eq!(
            huge_count,
            u64::MAX,
            "Extremely large weight should be clamped to u64::MAX"
        );
    }
}

#[test]
fn frequency_weight_no_other_zero() {
    let wrk = Workdir::new("frequency_weight_no_other_zero");

    // Create a dataset with multiple values and weights
    // This test exercises issue #3223: "Other (0)" entries should not appear
    // when --limit 0 is used and all values are included
    // Use duplicate values to ensure it's not detected as all-unique
    let rows = vec![
        svec!["value", "weight"],
        svec!["a", "5.0"],
        svec!["a", "2.0"], // duplicate to make it not all-unique
        svec!["b", "3.0"],
        svec!["c", "2.0"],
        svec!["d", "1.0"],
    ];
    wrk.create("in.csv", rows);

    // Test with --limit 0: all values should be included, no "Other" entry
    let mut cmd = wrk.command("frequency");
    cmd.arg("in.csv")
        .args(["--limit", "0"])
        .args(["--unq-limit", "0"])
        .args(["--select", "value"])
        .args(["--weight", "weight"]);

    wrk.assert_success(&mut cmd);

    let mut got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
    got.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });

    // Filter out header row
    let freq_rows: Vec<_> = got
        .iter()
        .filter(|r| r.len() > 1 && r[0] == "value" && r[1] != "value")
        .collect();

    // Should have exactly 4 values (a, b, c, d), no "Other" entry
    assert_eq!(
        freq_rows.len(),
        4,
        "With --limit 0, all values should be included, no 'Other' entry"
    );

    // Verify no "Other" entry exists
    let has_other = freq_rows
        .iter()
        .any(|r| r.len() > 1 && r[1].starts_with("Other"));
    assert!(
        !has_other,
        "Should not have 'Other' entry when all values are included (--limit 0)"
    );

    // Verify all expected values are present
    let find_freq = |value: &str| -> Option<&Vec<String>> {
        freq_rows.iter().find(|r| r[1] == value).map(|r| *r)
    };

    assert!(find_freq("a").is_some(), "Should find 'a'");
    assert!(find_freq("b").is_some(), "Should find 'b'");
    assert!(find_freq("c").is_some(), "Should find 'c'");
    assert!(find_freq("d").is_some(), "Should find 'd'");

    // Verify weights are correct
    // "a" has weight 5.0 + 2.0 = 7.0
    let a_freq = find_freq("a").unwrap();
    assert_eq!(a_freq[2], "7", "Value 'a' should have weight 7 (5.0 + 2.0)");

    let b_freq = find_freq("b").unwrap();
    assert_eq!(b_freq[2], "3", "Value 'b' should have weight 3");

    let c_freq = find_freq("c").unwrap();
    assert_eq!(c_freq[2], "2", "Value 'c' should have weight 2");

    let d_freq = find_freq("d").unwrap();
    assert_eq!(d_freq[2], "1", "Value 'd' should have weight 1");

    // Test with --limit 2: should have "Other" entry with remaining values
    let mut cmd_limit = wrk.command("frequency");
    cmd_limit
        .arg("in.csv")
        .args(["--limit", "2"])
        .args(["--select", "value"])
        .args(["--weight", "weight"]);

    let mut got_limit: Vec<Vec<String>> = wrk.read_stdout(&mut cmd_limit);
    got_limit.sort_by(|a, b| {
        if a.len() < 2 || b.len() < 2 {
            std::cmp::Ordering::Equal
        } else {
            a[1].cmp(&b[1])
        }
    });

    let freq_rows_limit: Vec<_> = got_limit
        .iter()
        .filter(|r| r.len() > 1 && r[0] == "value" && r[1] != "value")
        .collect();

    // Should have 2 top values + 1 "Other" entry = 3 total
    assert_eq!(
        freq_rows_limit.len(),
        3,
        "With --limit 2, should have 2 top values + 1 'Other' entry"
    );

    // Verify "Other" entry exists and has correct count
    let other_entry = freq_rows_limit
        .iter()
        .find(|r| r.len() > 1 && r[1].starts_with("Other"))
        .expect("Should have 'Other' entry when --limit is set");
    assert!(
        other_entry[1].contains("Other"),
        "Should have 'Other' entry"
    );
    // Other should have weight 2.0 + 1.0 = 3.0 (c + d)
    // Note: "a" has weight 7.0, "b" has 3.0, so top 2 are "a" and "b"
    // Remaining: "c" (2.0) + "d" (1.0) = 3.0
    assert_eq!(
        other_entry[2], "3",
        "Other entry should have weight 3 (c + d)"
    );
}
