use anyhow::{Context, Result, bail};
use log::*;

mod nom_parsing;
use nom_parsing::{parse_file, read_file_contents};

mod checker;
mod detector;

#[macro_use]
mod messages;
use messages::{explains, explains_all};

mod args;
mod linkcheck;
mod utils;
use clap::{CommandFactory, Parser};
use clap_complete::CompleteEnv;

use args::{Args, Shell};
mod recode;
use recode::{wrong_line_endings2crlf, wrong_line_endings2lf};

use checker::{Issue, check_file};
use detector::{DetectResult, LineEnding, detect};

use linkcheck::LinkCheck;
use std::fmt;
use std::fmt::Display;
use std::sync::LazyLock;

use std::str;
use utils::*;

use scoped_threadpool::Pool;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::os::unix::fs::MetadataExt;

use tempfile::Builder;

use colored::Colorize;

use regex::Regex;

use std::fs::File;
use std::fs::Metadata;
use std::io::BufReader;
use std::io::prelude::*;
use std::os::unix::fs::FileTypeExt;
use std::os::unix::fs::PermissionsExt;
use std::{fs, process};

use std::ffi::OsStr;
use std::io::Read;
use std::path::Path;
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, Ordering};

use rustc_hash::FxHashMap as HashMap;
use rustc_hash::FxHashSet as HashSet;
use std::time::SystemTime;

use std::fmt::Arguments;

use std::sync::mpsc::{Sender, channel};

#[cfg(unix)]
use walkdir::{DirEntry, WalkDir};

#[derive(Hash, Clone, Copy, Eq, PartialEq)]
struct FileSize(u64);

#[derive(Hash, Clone, Copy, Eq, PartialEq)]
struct Devno(u64);
#[derive(Hash, Clone, Copy, Eq, PartialEq)]
struct Inode(u64);

impl fmt::Display for FileSize {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
        self.0.fmt(f)
    }
}

fn is_future_mtime(now: SystemTime, mtime: SystemTime) -> bool {
    mtime > now
    //mtime > now + Duration::new(1800, 0)
}

fn format_message(message: &Arguments, no_color: bool) -> Cow<'static, str> {
    let msg_str = format!("{message}");
    if msg_str.starts_with(' ') {
        return msg_str.into();
    }
    let (left, right) = msg_str.split_once(' ').unwrap();
    if no_color {
        msg_str.into()
    } else {
        let colored_msg = match &left.chars().next().unwrap() {
            'E' | 'F' => format!("{} {}", left.bright_red().bold(), right),
            'W' => format!("{} {}", left.bright_red(), right),
            'I' => format!("{} {}", left.bright_yellow().bold(), right),
            _ => msg_str,
        };
        colored_msg.into()
    }
}

#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct TdsException {
    pub pkg: String,
    pub tpkg: String,
}

#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct PathExceptions {
    pub tds_path_exceptions: Vec<TdsException>,
}

fn get_config_file_name() -> Result<Option<String>> {
    if let Some(config_file) = &ARGS.config_file {
        return Ok(Some(config_file.clone()));
    }

    let home_dir = match home::home_dir() {
        Some(path) => path.display().to_string(),
        None => panic!("Impossible to get your home dir!"),
    };
    let config_files = [".ctan/pkgcheck.yml", ".config/ctan/pkgcheck.yml"];
    for f in config_files {
        let config_file_abs_path = format!("{home_dir}/{f}");
        if Path::new(&config_file_abs_path).exists() {
            return Ok(Some(config_file_abs_path));
        }
    }
    Ok(None)
}

fn read_yaml_config() -> Result<HashMap<String, String>> {
    let mut pkg_replacements: HashMap<String, String> = HashMap::default();
    for (p, q) in [
        ("armtex", "armenian"),
        ("babel-base", "babel"),
        ("l3backend-dev", "latex-dev/l3backend"),
        ("l3kernel-dev", "latex-dev/l3kernel"),
        ("latex-amsmath", "latex"),
        ("latex-amsmath-dev", "latex-dev"),
        ("latex-base", "latex"),
        ("latex-base-dev", "latex-dev"),
        ("latex-cyrillic", "cyrillic"),
        ("latex-firstaid", "latex/firstaid"),
        ("latex-firstaid-dev", "latex-dev/firstaid"),
        ("latex-graphics", "latex"),
        ("latex-graphics-dev", "latex-dev"),
        ("latex-lab", "latex"),
        ("latex-lab-dev", "latex-dev"),
        ("latex-tools", "latex"),
        ("latex-tools-dev", "latex-dev"),
        ("vntex-nonfree", "vntex"),
    ] {
        pkg_replacements.insert(p.to_string(), q.to_string());
    }

    if let Some(config_filename) = get_config_file_name()? {
        i0008!(config_filename);

        let data = fs::read_to_string(&config_filename)
            .with_context(|| format!("Config file {} could not be read", &config_filename))?;

        let path_exceptions = serde_yaml::from_str::<PathExceptions>(&data).with_context(|| {
            format!(
                "Problem with YAML content of config file {}",
                &config_filename
            )
        })?;

        for play in &path_exceptions.tds_path_exceptions {
            // check if package name is already in pkg_replacements hash
            let old_val = pkg_replacements.get(&play.pkg);
            if let Some(ov) = old_val
                && ARGS.verbose
            {
                if ov == &play.tpkg {
                    w0009!(play.pkg, play.tpkg);
                } else {
                    i0009!(play.pkg, ov, play.tpkg);
                }
            }
            pkg_replacements.insert(play.pkg.clone(), play.tpkg.clone());
        }
    }
    Ok(pkg_replacements)
}

fn setup_logger(no_color: bool) -> Result<(), fern::InitError> {
    fern::Dispatch::new()
        .format(move |out, message, _record| {
            let msg_txt = format_message(message, no_color);

            out.finish(format_args!("{msg_txt}"))
        })
        .level(log::LevelFilter::Info)
        .level_for("lopdf", log::LevelFilter::Error)
        .chain(std::io::stdout())
        .apply()?;
    Ok(())
}

type HashSender = Sender<(FileSize, PathBuf, Vec<u8>)>;

// SizesHashMap contains
// - file sizes
// - and a vector of file names having that size
type SizesHashMap = HashMap<FileSize, Vec<PathBuf>>;

/// key: generated file
/// value: the generator. i.e. the .ins or .dtx file
type GeneratedHashMap = HashMap<String, String>;

type FileNamesHashMap = HashMap<PathBuf, (Metadata, String, ReadmeKind)>;

const BLOCKSIZE: usize = 4096;

fn hash_file_inner(path: &Path) -> Result<Vec<u8>> {
    let mut buf = [0u8; BLOCKSIZE];
    let mut fp = File::open(path)?;
    let mut digest = blake3::Hasher::new();
    loop {
        match fp.read(&mut buf)? {
            0 => break,
            n => {
                digest.update(&buf[..n]);
            }
        }
    }
    Ok(digest.finalize().as_bytes().to_vec())
}

fn hash_file(fsize: FileSize, path: PathBuf, tx: &HashSender) -> Result<()> {
    let hash = hash_file_inner(&path).with_context(|| format!("{}", &path.display()))?;
    tx.send((fsize, path, hash)).unwrap();
    Ok(())
}

// returns false if an error occurred
fn fix_inconsistent_le(fname: &str) -> bool {
    i0004!(fname);
    match wrong_line_endings2lf(fname) {
        Ok(_) => {
            i0007!(fname, "LF");
            true
        }
        Err(e) => {
            e0027!(fname, e);
            false
        }
    }
}

// returns false if an error occurred
fn make_crlf(fname: &str) -> bool {
    i0004!(fname);
    match wrong_line_endings2crlf(fname) {
        Ok(()) => {
            i0007!(fname, "CRLF");
            true
        }
        Err(e) => {
            e0027!(fname, e);
            false
        }
    }
}

fn check_readme(dir_entry: &str, is_readme: &ReadmeKind, ft: &DetectResult) -> bool {
    let msg_name = if let ReadmeKind::Symlink(s) = is_readme {
        format!("{} (symlinked from {})", dir_entry, &s)
    } else {
        dir_entry.to_string()
    };
    //    let cr = check_file(Path::new(&msg_name), ft);

    match ft {
        DetectResult::Archive | DetectResult::Zip | DetectResult::Elf => {
            e0003!(msg_name);
            return false;
        }
        DetectResult::Bom(b) => {
            e0029!(msg_name, b);
            return false;
        }
        DetectResult::Text(_le) => match File::open(dir_entry) {
            Ok(f) => {
                if !check_readme_inner(&msg_name, &f) {
                    return false;
                }
            }
            Err(e) => {
                e0027!(msg_name, e);
                return false;
            }
        },
        _ => (),
    }
    true
}

fn check_readme_inner(fname: &str, f: &std::fs::File) -> bool {
    let reader = BufReader::new(f);

    let lines = reader.split(b'\n').map(|l| l.unwrap());
    let mut result = true;

    for (lineno, line) in lines.enumerate() {
        if let Err(e) = String::from_utf8(line.clone()) {
            e0021!(fname, lineno + 1, e);
            result = false;
        }
    }
    result
}

fn is_readme(entry: &str) -> bool {
    matches!(entry, "README" | "README.txt" | "README.md")
}

fn get_devno(meta: &Metadata) -> Devno {
    Devno(meta.dev())
}

fn _get_devno(entry: &DirEntry) -> Devno {
    let meta = fs::metadata(entry.path().to_str().unwrap());
    match meta {
        Ok(m) => Devno(m.dev()),
        _ => Devno(0),
    }
}

// In the past we took care to avoid visiting a single inode twice, which takes care of (false positive) hardlinks.
// Now we want to know if there is a hardlink in the package directory
#[cfg(unix)]
fn check_inode(set: &mut HashMap<(Devno, Inode), Vec<String>>, filename: &str, meta: &Metadata) {
    set.entry((get_devno(meta), Inode(meta.ino())))
        .or_default()
        .push(filename.to_string());
}

#[cfg(not(unix))]
fn check_inode(_: &mut HashSet<u64>, _: &Metadata) -> bool {
    true
}

static ARGS: LazyLock<Args> = LazyLock::new(Args::parse);
static ERROR_OCCURRED: AtomicBool = AtomicBool::new(false);
static WARNING_OCCURRED: AtomicBool = AtomicBool::new(false);
//Get the current time
static NOW: LazyLock<SystemTime> = LazyLock::new(SystemTime::now);

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DPath {
    Both(PathBuf),
    Tds(PathBuf),
}

impl fmt::Display for DPath {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            DPath::Both(p) => write!(f, "{}", p.display()),
            DPath::Tds(p) => write!(f, "{}", p.display()),
        }
    }
}

#[derive(Default)]
pub struct DupPath {
    len: usize,
    plen: usize,
    dupes: Vec<DPath>,
}

impl DupPath {
    pub fn new() -> DupPath {
        DupPath {
            len: 0,
            plen: 0,
            dupes: Vec::new(),
        }
    }

    pub fn push(&mut self, pb: PathBuf) {
        let pname = pb.to_string_lossy();
        self.len += 1;
        if pname.ends_with(".tfm") {
            self.dupes.push(DPath::Tds(pb.clone()));
        } else {
            self.plen += 1;
            self.dupes.push(DPath::Both(pb.clone()));
        }
    }
}

type DupHashes = HashMap<(FileSize, Vec<u8>), DupPath>;

fn main() -> Result<()> {
    CompleteEnv::with_factory(Args::command).complete();
    let _ = setup_logger(ARGS.no_colors);

    match &ARGS.explain {
        None => (),
        Some(e) => {
            explains(e);
            process::exit(0);
        }
    }

    if let Some(shell) = &ARGS.generate_completion {
        match shell {
            Shell::Nushell => print!("{}", args::nushell_completion()),
            other => println!("{}", other.usage("pkgcheck")),
        }
        process::exit(0);
    }

    if ARGS.explain_all {
        explains_all();
        process::exit(0);
    }

    if ARGS.show_tmp_endings {
        show_tmp_endings();
        process::exit(0);
    }

    // read yaml config file if one is given explicitly or implicitly
    let pkg_replace: HashMap<String, String> = read_yaml_config()?;

    let pkg_dir = match &ARGS.pkg_dir {
        None => {
            bail!("Specify a directory to check (use option -d)");
        }
        Some(d) => {
            // make sure the given directory ends with a '/' (slash)
            let ds: String = if d.ends_with('/') {
                d.clone()
            } else {
                let d_s = d.clone();
                d_s + "/"
            };
            exists_dir(&ds).with_context(|| format!("Specified package directory {}", &ds))?;
            ds
        }
    };

    let tds_zip = &ARGS.tds_zip;

    // let's check if the specified TDS archive
    // - does exist?
    // - is a zip archive?
    if let Some(tds_zip) = tds_zip {
        exists_file(tds_zip).with_context(|| format!("Specified TDS zip archive {}", &tds_zip))?;

        let pkg_name = get_package_name_from_tds_archive_name(tds_zip)?;

        let p = Path::new(tds_zip);

        let result = detect(p)?;
        let cr = check_file(p, &result)?;

        if let Some(Issue::MimeMismatch { .. }) = &cr.mime_issue {
            //            println!("{}: expected {expected}, got {got}", tds_zip);
            bail!("TDS archive {tds_zip} is not a zip archive")
        }

        if let Some(hashes) = check_package(&pkg_dir, &Some(tds_zip))? {
            check_tds_archive(tds_zip, &hashes, &pkg_replace, &pkg_name)?;
        }
    } else {
        let _ = check_package(&pkg_dir, &None)?;
    }

    if ARGS.correct_perms || ARGS.correct_le {
        process::exit(0);
    }
    if ERROR_OCCURRED.load(Ordering::Relaxed)
        || (WARNING_OCCURRED.load(Ordering::Relaxed) && !ARGS.warnings_no_errors)
    {
        process::exit(1);
    }

    process::exit(0);
}

fn print_duplicates(hashes: &DupHashes) {
    let mut total_dupes = 0;
    let mut total_files = 0;
    let mut total_size = 0;

    let mut header_printed = false;
    for (k, paths) in hashes {
        let (sz, _hash) = k;

        if paths.plen <= 1 {
            total_files += 1;
            total_size += sz.0;
            continue;
        } else if !header_printed {
            w0002!();
            header_printed = true;
        }

        total_files += paths.plen;
        total_size += sz.0 * (paths.plen - 1) as u64;
        total_dupes += (paths.plen - 1) as u64;

        info!("Size: {}", sz.0);
        for p in &paths.dupes {
            if let DPath::Both(p) = p {
                let ps = p.as_path().to_str().unwrap();
                info!("   >>> {ps}");
            }
        }
        //eprintln!();
    }

    if ARGS.verbose && total_dupes > 0 {
        info!("Duplicate statistics");
        info!("  Found {total_files} duplicate files");
        info!("  Size of duplicate files: {total_size}");
    }
}

//#[derive(Debug, PartialEq, Eq)]
#[derive(Debug, PartialEq, Eq)]
pub enum FType {
    Regular,
    Directory,
    Symlink,
    BlockDevice,
    CharDevice,
    Fifo,
    Socket,
    Error(String),
}

fn get_filetype(entry: &DirEntry) -> FType {
    match entry.metadata() {
        Ok(mt) => {
            let ft = mt.file_type();
            if ft.is_symlink() {
                return FType::Symlink;
            }
            if ft.is_dir() {
                return FType::Directory;
            }
            if ft.is_block_device() {
                return FType::BlockDevice;
            }
            if ft.is_char_device() {
                return FType::CharDevice;
            }
            if ft.is_fifo() {
                return FType::Fifo;
            }
            if ft.is_socket() {
                return FType::Socket;
            }
            FType::Regular
        }
        Err(e) => FType::Error(format!("{e}")),
    }
}

/// entry: relative full path name
/// generated: store found file names which are generated
fn check_generated_files(entry: &str, generated: &mut GeneratedHashMap) {
    match read_file_contents(Path::new(entry)) {
        Ok(content) => {
            let file_path = Path::new(entry);
            for fname in parse_file(file_path, &content) {
                // If the filename in the generate statement contains a path component
                // we ignore it so that a generated file will be reported even if it is
                // in a different place in the package directory which sometimes
                // happens in uploaded packages
                let fname_opt = utils::filename(&fname);
                if fname_opt.is_none() {
                    continue;
                }
                let filename = fname_opt.unwrap();

                // As we request a README in the top level directory of
                // a package we ignore if a README was generated by an
                // .ins or .dtx file
                // CAVEAT: If this happens in a subdirectory it could be an error!!!!
                if is_readme(filename) {
                    continue;
                }

                // Ignore generated pdf, html, and css files
                if fname.ends_with(".pdf") || fname.ends_with(".html") || fname.ends_with(".css") {
                    continue;
                }

                generated
                    .entry(filename.to_string())
                    .or_insert_with(|| entry.to_string());

                //generated.entry(target).or_insert_with(|| entry.to_string());
            }
        }
        Err(e) => error!("Could not read {entry}: {e}"),
    }
}

fn x_bit_set(p: u32) -> bool {
    let p1 = p & 0o7777;
    p1 & 0o111 != 0
}

fn get_package_name_from_tds_archive_name(tds_zip: &str) -> Result<String> {
    if tds_zip.len() < 8 + 1 || !tds_zip.ends_with(".tds.zip") {
        bail!("Bad file name {tds_zip} for the TDS zip archive");
    }
    let mut pname = String::from(utils::basename(tds_zip));
    let plen = pname.len();
    pname.truncate(plen - 8);
    Ok(pname)
}

fn check_tds_archive(
    tds_zip: &str,
    hashes: &DupHashes,
    pkg_replace: &HashMap<String, String>,
    pkg_name: &str,
) -> Result<()> {
    i0003!(tds_zip);

    let mut lcnames: HashMap<PathBuf, Vec<(PathBuf, FileKind)>> = HashMap::default();

    let dir_entry = Path::new(tds_zip);
    let p = get_perms(dir_entry)?;
    if !owner_has(p, 4) || !others_have(p, 4) || x_bit_set(p) {
        e0024!(tds_zip, perms_to_string(p));
        if ARGS.correct_perms {
            i0005!(&tds_zip);
            set_perms(tds_zip, 0o664)?;
        }
    }

    let ut = Utils::new(utils::CheckType::Tds);

    let tmp_dir = Builder::new()
        .prefix("pkgcheck")
        .tempdir()
        .with_context(|| "creating tempdir")?;

    let tmp_dir_offset = tmp_dir.path().to_str().unwrap().len() + 1;
    let tmp_dir_str = tmp_dir.path().to_str().unwrap();
    // unzip the TDS zip archive into a temporary directory
    ut.unzip(tds_zip, tmp_dir_str)
        // was E0033 error message
        .with_context(|| format!("TDS zip archive {tds_zip}"))?;

    // in order to compare the package files with the content of the
    // tds zip archive we need to checksum the files in the tds zip
    // archive.

    let mut sizes: SizesHashMap = HashMap::default();
    let mut pool = Pool::new(num_cpus::get() as u32 + 1);
    {
        // Processing a single file entry, with the "sizes" hashmap collecting
        // same-size files.  Entries are either Found::One or Found::Multiple,
        // so that we can submit the first file's path as a hashing job when the
        // first duplicate is found.  Hashing each file is submitted as a job to
        // the pool.
        let mut process = |fsize, dir_entry: &DirEntry| {
            let path = dir_entry.path().to_path_buf();
            let sizeref = &mut sizes;

            sizeref.entry(fsize).or_default().push(path);
        };

        let mut map_files_found = false;
        let mut map_dvips_found = false;

        // those top level directories are the directories found in the
        // texmf-dist/ directory of a texlive installation
        let tds_toplevel_dirs: HashSet<String> = [
            "asymptote",
            "bibtex",
            "chktex",
            "context",
            "doc",
            "dvipdfmx",
            "dvips",
            "fonts",
            "hbf2gf",
            "makeindex",
            "metafont",
            "metapost",
            "mft",
            "omega",
            "pbibtex",
            "psutils",
            "scripts",
            "source",
            "tex",
            "tex4ht",
            "texconfig",
            "texdoc",
            "texdoctk",
            "ttf2pk",
            "web2c",
            "xdvi",
            "xindy",
        ]
        .iter()
        .map(|&s| s.to_string())
        .collect();

        // set to True if the TDS zip archive contains a top level directory doc/
        let mut doc_found = false;
        // we track the number of toplevel directories which must at least be 2
        let mut number_of_toplevel_dirs = 0;
        let re: Regex = Regex::new(r"fonts[/]map[/]dvips[/]").unwrap();
        for dir_entry in WalkDir::new(tmp_dir.path().to_str().unwrap()).follow_links(false) {
            match dir_entry {
                Ok(dir_entry) => {
                    let dir_entry_str = if let Some(d) = dir_entry.path().to_str() {
                        d
                    } else {
                        e0031!(dir_entry.path().to_string_lossy());
                        continue;
                    };

                    // this is the file_name without the directory part
                    // unwrap() is ok here as we covered potential UTF-8 related errors
                    // above in the definition of dir_entry_str
                    let file_name = dir_entry.file_name().to_str().unwrap().to_string();

                    let meta = match dir_entry.metadata() {
                        Ok(meta) => meta,
                        Err(e) => {
                            e0027!(dir_entry.path().display(), e);
                            continue;
                        }
                    };

                    // let mtime = meta.modified().unwrap();
                    // if is_future_mtime(*NOW, mtime) {
                    //     let diff = mtime.duration_since(*NOW).unwrap();
                    //     println!(
                    //         "{} has an mtime in the future by {} seconds",
                    //         &file_name,
                    //         diff.as_secs()
                    //     );
                    // }
                    let ft = get_filetype(&dir_entry);

                    if let FType::Error(e) = ft {
                        e0023!(e);
                        continue;
                    }

                    // this is the path name without the temporary part
                    // from unpacking the TDS zip archive
                    let dir_entry_display = if dir_entry.depth() == 0 {
                        &dir_entry_str[tmp_dir_offset - 1..]
                    } else {
                        &dir_entry_str[tmp_dir_offset..]
                    };

                    let filetype = match ft {
                        FType::Directory => FileKind::Directory,
                        FType::Regular => FileKind::File,
                        FType::Symlink => {
                            e0043!(dir_entry_display);
                            continue;
                        }
                        _ => panic!(
                            "Unexpected file type for {} in zip archive",
                            dir_entry_display
                        ),
                    };
                    register_duplicate_filename(&mut lcnames, dir_entry_display, filetype);
                    ut.check_for_temporary_file(dir_entry_display);

                    // In the top level directory of a TDS zip archive
                    // ... no files are allowed
                    // ... only specific directories are allowed
                    if dir_entry.depth() == 1 {
                        if ft == FType::Regular {
                            e0034!(dir_entry_display);
                            continue;
                        }

                        if tds_toplevel_dirs.contains(&file_name) {
                            number_of_toplevel_dirs += 1;
                            if &file_name == "doc" {
                                doc_found = true;
                            }
                        } else {
                            e0020!(&file_name);
                        }

                        continue;
                    }

                    if ft == FType::Directory {
                        ut.check_for_empty_directory(dir_entry_str, dir_entry_display);
                        ut.check_for_hidden_directory(&file_name, dir_entry_display);
                        ut.is_unwanted_directory(&file_name, dir_entry_str);

                        continue;
                    }

                    // The LaTeX team provides the file `.tex` as a file with an empty name
                    // in order to make `\input\relax` work (explained by David Carlisle)
                    // Therefore, we don't call check_for_hidden_file() in this case
                    match (pkg_name, dir_entry_display) {
                        ("latex-tools", "tex/latex/tools/.tex")
                        | ("latex-tools-dev", "tex/latex-dev/tools/.tex") => (),
                        (_, _) => ut.check_for_hidden_file(&file_name, dir_entry_display),
                    }

                    let fsize = meta.len();
                    process(FileSize(fsize), &dir_entry);
                    ut.check_filesize(fsize, dir_entry_display);

                    // if we encounter a .dtx or .ins file we check
                    // that it is in a subdirectory of either source/ or doc/
                    if (dir_entry_str.ends_with(".dtx") || dir_entry_str.ends_with(".ins"))
                        && !(dir_entry_display.starts_with("source/")
                            || dir_entry_display.starts_with("doc/"))
                    {
                        e0036!(dir_entry_display);
                        continue;
                    }

                    // if the path doesn't contain a man page...
                    if !dir_entry_str.contains("/man/") && !dir_entry_str.contains(pkg_name) {
                        if let Some(real_name) = pkg_replace.get(pkg_name) {
                            let pkg_name_s = format!("/{real_name}/");
                            if !dir_entry_str.contains(&pkg_name_s) {
                                e0028!(pkg_name_s, dir_entry_display);
                            }
                        } else {
                            e0028!(pkg_name, dir_entry_display);
                        }
                    }

                    if dir_entry_str.ends_with(".map") {
                        map_files_found = true;
                        if re.is_match(dir_entry_str) {
                            map_dvips_found = true;
                        }
                    }
                }
                Err(e) => {
                    error!("{e}");
                }
            }
        }

        if !doc_found {
            e0039!();
        }
        if number_of_toplevel_dirs < 2 {
            e0040!();
        }
        if map_files_found && !map_dvips_found {
            e0041!();
        }
    };

    let mut tds_hashes: HashMap<(FileSize, Vec<u8>), Vec<PathBuf>> = HashMap::default();
    pool.scoped(|scope| {
        let (tx, rx) = channel();

        let hashref = &mut tds_hashes;
        scope.execute(move || {
            for (size, path, hash) in rx {
                hashref.entry((size, hash)).or_default().push(path);
            }
        });

        for size in sizes.keys() {
            for p in &sizes[size] {
                let txc = tx.clone();
                scope.execute(move || {
                    hash_file(*size, p.clone(), &txc)
                        .unwrap_or_else(|_| panic!("error hashing file {}", p.display()));
                });
            }
        }
    });

    // now check if each package file is in the tds archive
    for (k, paths) in hashes {
        if !tds_hashes.contains_key(k) {
            let p = &paths.dupes[0];
            e0026!(p);
        }
    }
    print_casefolding_tds(&lcnames);
    Ok(())
}

fn get_extension_from_filename(filename: &str) -> Option<&str> {
    Path::new(filename).extension().and_then(OsStr::to_str)
}

fn found_unwanted_filetype(fname: &str, ft: &FType) -> bool {
    match ft {
        FType::Socket => {
            e0013!(fname);
            true
        }
        FType::Fifo => {
            e0014!(fname);
            true
        }
        FType::BlockDevice => {
            e0015!(fname);
            true
        }
        FType::CharDevice => {
            e0016!(fname);
            true
        }
        FType::Error(e) => {
            e0023!(e);
            true
        }
        _ => false,
    }
}

// Very important
// The permissions getting back from a file or directory have
//   #define S_IFREG  0100000 which means `regular file`
// which is defined in `/usr/include/linux/stat.h`
//
// This means that, e.g. instead of 0o644 we have to use 0o100644
//
fn check_and_correct_perms4(dir_entry: &str, p: u32) -> Result<()> {
    if !check_perms4(p) {
        e0002!(dir_entry, perms_to_string(p));
        if ARGS.correct_perms {
            i0005!(&dir_entry);
            set_perms(dir_entry, 0o644)?;
        }
    }

    Ok(())
}

fn check_and_correct_perms5(dir_entry: &str, p: u32) -> Result<()> {
    if !check_perms5(p) {
        e0002!(dir_entry, perms_to_string(p));
        if ARGS.correct_perms {
            i0005!(&dir_entry);
            set_perms(dir_entry, 0o755)?;
        }
    }

    Ok(())
}

// Sets permissions for a file or directory
// Sample invocation: set_perms("somfile", 0o644);
fn set_perms(entry: &str, p: u32) -> Result<()> {
    let f = File::open(entry)?;
    let attr = f.metadata()?;
    let mut perms = attr.permissions();
    let ps = &format!("{:o}", perms.mode());
    perms.set_mode(p);
    let ps1 = &format!("{p:o}");
    f.set_permissions(perms)?;
    info!("mode of '{entry}' changed from {ps} to {ps1} ");

    Ok(())
}

#[derive(Debug, Clone, PartialEq)]
enum FileKind {
    File,
    Directory,
    Symlink(String),
}

impl Display for FileKind {
    fn fmt(&self, f: &mut ::std::fmt::Formatter) -> Result<(), ::std::fmt::Error> {
        match *self {
            FileKind::File => f.write_str("file"),
            FileKind::Directory => f.write_str("directory"),
            FileKind::Symlink(_) => f.write_str("symlink"),
        }
    }
}

#[derive(Debug, Clone, PartialEq)]
enum ReadmeKind {
    No,
    Yes,
    Symlink(String),
}

fn register_duplicate_filename(
    lcnames: &mut HashMap<PathBuf, Vec<(PathBuf, FileKind)>>,
    dir_entry: &str,
    fk: FileKind,
) {
    let lc_dir_entry_str = dir_entry.to_lowercase();
    if let Some(_dir_name) = filename(dir_entry) {
        lcnames
            .entry(PathBuf::from(lc_dir_entry_str))
            .or_default()
            .push((PathBuf::from(&dir_entry), fk));
    }
}

fn check_package(root: &str, tds_zip: &Option<&str>) -> Result<Option<DupHashes>> {
    let mut lcnames: HashMap<PathBuf, Vec<(PathBuf, FileKind)>> = HashMap::default();

    let mut doublenames: HashMap<PathBuf, Vec<PathBuf>> = HashMap::default();

    let mut inodes = HashMap::default();

    let ut = Utils::new(utils::CheckType::Package);

    i0002!(root);
    // This hash contains all package file names.
    //
    //  PathBuf: the full path starting at the directory specified at the command line
    //  Metadata: the meta data of the file
    //  String: the file name without any directory part
    //  ReadmeKind: is it a certain README, file or symlink?
    //      A special case of a README file is a file with has a different name but
    //      was pointed to by a symlink. Example: README --> README.rst
    let mut file_names: FileNamesHashMap = HashMap::default();
    let mut readme_found = false;

    let root_absolute = PathBuf::from(root)
        .canonicalize()
        .unwrap()
        .to_string_lossy()
        .to_string();

    for dir_entry in WalkDir::new(root).follow_links(false) {
        match dir_entry {
            Ok(dir_entry) => {
                let Some(dir_entry_str) = dir_entry.path().to_str() else {
                    // invalid UTF-8 character(s) in filename
                    e0031!(dir_entry.path().to_string_lossy());
                    continue;
                };

                let meta = match dir_entry.metadata() {
                    Ok(meta) => meta,
                    Err(e) => {
                        // insufficient permission to read directory
                        e0023!(e);
                        continue;
                    }
                };

                check_inode(&mut inodes, dir_entry_str, &meta);

                // this is the file_name without the directory part
                // unwrap() is ok here as we covered potential UTF-8 related errors
                // above in the definition of dir_entry_str
                let file_name = dir_entry.file_name().to_str().unwrap().to_string();

                let mtime = meta.modified().unwrap();
                if is_future_mtime(*NOW, mtime) {
                    let diff = mtime.duration_since(*NOW).unwrap();
                    w0011!(&file_name, diff.as_secs(), &utils::format_duration(&diff));
                }

                // we check for weird stuff like socket files aso.
                let ft = get_filetype(&dir_entry);
                if found_unwanted_filetype(dir_entry_str, &ft) {
                    continue;
                }

                ut.filename_has_bad_chars(&dir_entry, dir_entry_str);

                // 1. dealing with symlinks
                if ft == FType::Symlink {
                    match get_symlink(&dir_entry) {
                        // broken symlink
                        Ok(None) => {
                            e0010!(&dir_entry_str);
                            continue;
                        }
                        Err(e) => {
                            e0027!(&dir_entry_str, e);
                            continue;
                        }
                        Ok(Some(p)) => {
                            let pd: String =
                                p.canonicalize().unwrap().to_string_lossy().to_string();
                            // symlink pointing to outside of the package directory tree
                            if !pd.starts_with(&root_absolute) {
                                e0030!(&dir_entry_str, p.display());
                                continue;
                            }

                            if let Some(_dir_name) = filename(dir_entry_str) {
                                register_duplicate_filename(
                                    &mut lcnames,
                                    dir_entry_str,
                                    FileKind::Symlink(pd.clone()),
                                );
                            }
                            if is_readme(&file_name) {
                                readme_found = true;
                                file_names.insert(
                                    p,
                                    (
                                        meta,
                                        file_name,
                                        ReadmeKind::Symlink(dir_entry_str.to_string()),
                                    ),
                                );
                            }
                            continue;
                        }
                    }
                }

                let p = get_perms(dir_entry.path())?;

                // 2. dealing with directories
                if ft == FType::Directory {
                    if let Some(_dir_name) = filename(dir_entry_str) {
                        register_duplicate_filename(
                            &mut lcnames,
                            dir_entry_str,
                            FileKind::Directory,
                        );
                    }

                    if !owner_has(p, 5) || !others_have(p, 5) {
                        e0011!(&dir_entry_str, perms_to_string(p));
                        if ARGS.correct_perms {
                            i0005!(&dir_entry_str);
                            set_perms(dir_entry_str, 0o775)?;
                        }
                    }

                    ut.check_for_empty_directory(dir_entry_str, dir_entry_str);
                    ut.check_for_hidden_directory(&file_name, dir_entry_str);

                    ut.is_unwanted_directory(&file_name, dir_entry_str);
                    continue;
                }

                // 3. dealing with regular files
                ut.check_for_hidden_file(&file_name, dir_entry_str);
                if !ARGS.ignore_tmpfiles() {
                    ut.check_for_temporary_file(dir_entry_str);
                }

                if let Some(file_name) = filename(dir_entry_str) {
                    let doubleref = &mut doublenames;

                    doubleref
                        .entry(PathBuf::from(file_name))
                        .or_default()
                        .push(PathBuf::from(&dir_entry_str));
                }

                if is_readme(&file_name) {
                    // We want to deal with README files only if they are
                    // in the root directory of the package.
                    let f = format!(
                        "{}{}{}",
                        root,
                        // we have to pay attention if `root` ends already with '/'
                        if root.ends_with('/') { "" } else { "/" },
                        &file_name
                    );

                    if dir_entry_str == f {
                        readme_found = true;
                        file_names.insert(
                            dir_entry.path().to_path_buf(),
                            (meta, file_name.clone(), ReadmeKind::Yes),
                        );
                    } else {
                        file_names.entry(dir_entry.path().to_path_buf()).or_insert((
                            meta,
                            file_name.clone(),
                            ReadmeKind::No,
                        ));
                    }
                } else {
                    file_names.entry(dir_entry.path().to_path_buf()).or_insert((
                        meta,
                        file_name.clone(),
                        ReadmeKind::No,
                    ));
                }

                register_duplicate_filename(&mut lcnames, dir_entry_str, FileKind::File);
            }

            Err(e) => {
                error!("{e}");
            }
        }
    }

    if !readme_found {
        e0009!();
    }

    let lc = LinkCheck::new(4, false);

    let mut sizes: SizesHashMap = HashMap::default();
    let mut generated: GeneratedHashMap = HashMap::default();

    // Processing a single file entry, with the "sizes" hashmap collecting
    // same-size files.  Entries are either Found::One or Found::Multiple,
    // so that we can submit the first file's path as a hashing job when the
    // first duplicate is found.  Hashing each file is submitted as a job to
    // the pool.
    let mut process = |fsize, path: &PathBuf| {
        let sizeref = &mut sizes;

        let path = path.clone();
        sizeref.entry(fsize).or_default().push(path);
    };
    for (path, (meta, _file_name, is_readme)) in &file_names {
        let Some(dir_entry_str) = path.to_str() else {
            e0031!(&path.to_string_lossy());
            continue;
        };

        let fsize = meta.len();
        ut.check_filesize(fsize, dir_entry_str);

        let perms = get_perms(path)?;

        let p = Path::new(dir_entry_str);

        let ft = detect(p)?;

        if ReadmeKind::No != *is_readme {
            if !check_readme(dir_entry_str, is_readme, &ft) {
                continue;
            }
            if ARGS.urlcheck {
                lc.check_urls(dir_entry_str);
            }
        }

        let cr = check_file(Path::new(dir_entry_str), &ft)?;

        // Here we check mime mismatches
        // Example: a `.png` file which is not an image
        if let Some(Issue::MimeMismatch { expected, got }) = &cr.mime_issue {
            if !matches!(&ft, DetectResult::Zerofile) {
                e0032!(dir_entry_str, expected, got);
            }
        }

        // Here we check permission issues
        if let Some(Issue::PermMismatch { expected, got }) = &cr.perm_issue {
            match &ft {
                DetectResult::Text(_) => {
                    if is_windows_batchfile(dir_entry_str) {
                        e0002!(dir_entry_str, perms_to_string(*got));
                        if ARGS.correct_perms {
                            i0005!(&dir_entry_str);
                            set_perms(dir_entry_str, 0o755)?;
                        }
                    } else {
                        check_and_correct_perms4(dir_entry_str, perms)?;
                    }
                }

                DetectResult::Script(_, _) => {
                    e0002!(dir_entry_str, perms_to_string(*got));
                    if ARGS.correct_perms {
                        i0005!(&dir_entry_str);
                        set_perms(dir_entry_str, 0o755)?;
                    }
                }
                DetectResult::Bom(_b) => {
                    check_and_correct_perms4(dir_entry_str, *got)?;
                }
                DetectResult::Elf => {
                    if !dir_entry_str.ends_with(".dll") {
                        check_and_correct_perms5(dir_entry_str, perms)?;
                    }
                }
                DetectResult::Pdf | DetectResult::Archive | DetectResult::Zip => {
                    check_and_correct_perms4(dir_entry_str, perms)?;
                }
                DetectResult::Zerofile => check_and_correct_perms4(dir_entry_str, *got)?,
                dr => {
                    eprintln!(
                        "Unexpected error: {} permission mismatch — expected {expected}, got {got:03o}",
                        dr
                    );
                }
            }
        }

        if matches!(ft, DetectResult::Text(_)) {
            let fext = get_extension_from_filename(dir_entry_str);
            if fext == Some("ins") || fext == Some("dtx") {
                check_generated_files(dir_entry_str, &mut generated);
            }
        }
        match ft {
            DetectResult::Text(LineEnding::CrLf) => {
                if !is_windows_batchfile(dir_entry_str) {
                    e0012!(&dir_entry_str);
                    if ARGS.correct_le {
                        fix_inconsistent_le(dir_entry_str);
                    }
                }
            }
            DetectResult::Text(LineEnding::Cr) => {
                e0037!(&dir_entry_str);
                if ARGS.correct_le {
                    fix_inconsistent_le(dir_entry_str);
                }
            }
            DetectResult::Text(LineEnding::Mixed(cr, lf, crlf)) => {
                e0038!(&dir_entry_str, cr, lf, crlf);
                if ARGS.correct_le {
                    if is_windows_batchfile(dir_entry_str) {
                        make_crlf(dir_entry_str);
                    } else {
                        fix_inconsistent_le(dir_entry_str);
                    }
                }
            }
            DetectResult::Text(LineEnding::Lf) => {
                if is_windows_batchfile(dir_entry_str) {
                    w0008!(&dir_entry_str);
                }
            }
            DetectResult::Bom(b) => {
                w0004!(&dir_entry_str, b);
            }
            DetectResult::Pdf => {
                is_pdf_ok(dir_entry_str);
            }

            DetectResult::Archive | DetectResult::Zip => {
                if dir_entry_str.ends_with(".tds.zip") {
                    e0035!(&dir_entry_str);
                } else {
                    w0001!(&dir_entry_str);
                }
                //                check_and_correct_perms4(dir_entry_str, perms)?;
            }

            _ => continue,
        }

        if !(ARGS.ignore_dupes() && tds_zip.is_none()) {
            process(FileSize(fsize), path);
        }
    }

    print_casefolding(&lcnames);
    print_generated(&doublenames, &generated);
    print_hardlinks(&inodes);
    if !ARGS.ignore_same_named() {
        print_doublenames(&doublenames);
    }

    if ARGS.ignore_dupes() && tds_zip.is_none() {
        return Ok(None);
    }

    // Set up thread pool for the task to hash a file.  Number of CPUs + 1 has been
    // found to be a good pool size, likely since the walker thread should be
    // doing mostly IO.
    let mut pool = Pool::new(num_cpus::get() as u32 + 1);

    let mut hashes: HashMap<(FileSize, Vec<u8>), DupPath> = HashMap::default();
    pool.scoped(|scope| {
        let (tx, rx) = channel();

        let hashref = &mut hashes;
        scope.execute(move || {
            for (size, path, hash) in rx {
                hashref.entry((size, hash)).or_default().push(path);
            }
        });

        for size in sizes.keys() {
            let paths = &sizes[size];
            if paths.len() == 1 && tds_zip.is_none() {
                continue;
            }

            for p in &sizes[size] {
                let txc = tx.clone();
                scope.execute(move || {
                    hash_file(*size, p.clone(), &txc)
                        .unwrap_or_else(|_| panic!("error hashing file {}", p.display()));
                });
            }
        }
    });

    if !ARGS.ignore_dupes() {
        print_duplicates(&hashes);
    }
    Ok(Some(hashes))
}

/// Print all hardlinks
/// If we have more than a single file in the Vec<String> then we have a hardlink
fn print_hardlinks(hashes: &HashMap<(Devno, Inode), Vec<String>>) {
    for ((_devid, inode), eles) in hashes {
        if eles.len() > 1 {
            w0010!(inode.0);
            for hfile in eles {
                info!("   >>> {}", &hfile);
            }
        }
    }
}

fn print_casefolding_tds(hashes: &HashMap<PathBuf, Vec<(PathBuf, FileKind)>>) {
    for (k, eles) in hashes {
        //        println!("pcf_tds: {:?}, {:?}", k, &eles);
        if eles.len() == 1 {
            continue;
        }

        e0042!(k.display());

        for (p, ty) in eles {
            info!("   >>> {} ({})", p.display(), ty);
        }
    }
}

/// We don't want to have file names in a directory which are the same when
/// converted to lower case
fn print_casefolding(hashes: &HashMap<PathBuf, Vec<(PathBuf, FileKind)>>) {
    for (k, eles) in hashes {
        //println!("pcf: {:?}, {:?}", k, &eles);
        if eles.len() == 1 {
            continue;
        }

        e0025!(k.display());

        for (p, ty) in eles {
            info!("   >>> {} ({})", p.display(), ty);
        }
    }
}

fn print_generated(doublenames: &HashMap<PathBuf, Vec<PathBuf>>, generated: &GeneratedHashMap) {
    // `k` is the file which is generated by `generator`
    for (k, generator) in generated {
        let path = PathBuf::from(k);
        if doublenames.contains_key(&path) {
            if k.ends_with(".ins") || k.ends_with(".pdf") {
                //println!("key {}, gen {}", k, gen);
                continue;
            }
            let v = &doublenames[&path];
            for fname in v {
                e0019!(fname.to_str().unwrap(), generator.as_str());
            }
        }
    }
}

fn print_doublenames(hashes: &HashMap<PathBuf, Vec<PathBuf>>) {
    for (k, paths) in hashes {
        if paths.len() == 1 {
            continue;
        }
        let ks = k.to_str().unwrap();
        if ks == "README"
            || ks == "README.txt"
            || ks == "README.md"
            || ks == "Makefile"
            || ks == "Makefile.am"
            || ks == "Makefile.in"
            || ks == "makefile"
        {
            continue;
        }

        w0003!(k.to_str().unwrap());
        //        println!(":: {}", k.display());

        for p in paths {
            info!("   >>> {}", p.display());
        }
    }
}

fn show_tmp_endings() {
    i0006!();
    for (t, c) in temp_file_endings() {
        info!("{t:23} {c}");
    }
}
