package edu.uky.ai.data;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.Scanner;
import java.util.Set;
import java.util.TreeSet;
import java.util.function.Function;

import edu.uky.ai.Settings;
import edu.uky.ai.util.ImmutableArray;
import edu.uky.ai.util.Utilities;

/**
 * A data set is a collection of {@link DataPoint}s. This class includes
 * methods to read a data set in from a file and perform various summary
 * operations on the features in the data.
 * 
 * @author Stephen G. Ware
 */
public class DataSet implements Serializable {

	/**
	 * Reads a data set in from a CSV file.
	 * 
	 * @param file the data set in CSV format
	 * @return a data set object
	 * @throws IOException if an error occurs while reading or if the file is
	 * not formatted correctly
	 */
	public static DataSet read(File file) throws IOException {
		try(Scanner scanner = new Scanner(file)) {
			String[] line = scanner.nextLine().split(",");
			Feature<?>[] features = new Feature[line.length];
			@SuppressWarnings("unchecked")
			Function<String, Value>[] parsers = new Function[line.length];
			for(int i=0; i<features.length; i++) {
				if(line[i].startsWith("nominal")) {
					features[i] = new Feature<>(line[i].substring(8), Nominal.class);
					HashMap<String, Value> map = new HashMap<>();
					parsers[i] = (s) -> {
						Value v = map.get(s);
						if(v == null) {
							v = new Nominal(s);
							map.put(s, v);
						}
						return v;
					};
				}
				else if(line[i].startsWith("ordinal")) {
					String[] keys = line[i].split(";");
					features[i] = new Feature<>(line[i].substring(8), Ordinal.class);
					HashMap<String, Value> map = new HashMap<>();
					for(int j=1; j<keys.length; j++)
						map.put(keys[j], new Ordinal(keys[j], j - 1));
					parsers[i] = s -> map.get(s);
				}
				else if(line[i].startsWith("interval")) {
					features[i] = new Feature<>(line[i].substring(9), Interval.class); 
					parsers[i] = s -> new Interval(Double.parseDouble(s));
				}
				else
					throw new IOException("The feature \"" + line[i] + "\" should start with one of \"nominal\", \"ordinal\", or \"interval\".");
			}
			ArrayList<DataPoint> points = new ArrayList<>();
			int i = 1;
			while(scanner.hasNextLine()) {
				line = scanner.nextLine().split(",");
				if(line.length != features.length)
					throw new IOException("Line " + i + " should have exactly " + features.length + " values, but it has " + line.length + ".");
				Value[] values = new Value[features.length];
				for(int j=0; j<values.length; j++)
					values[j] = parsers[j].apply(line[j]);
				points.add(new DataPoint(null, new ImmutableArray<>(values)));
				i++;
			}
			return new DataSet(Utilities.getFileName(file), new ImmutableArray<>(features), points);
		}
	}
	
	/** Serial version UID */
	private static final long serialVersionUID = Settings.VERSION_UID;

	/** The name of the data set */
	public final String name;
	
	/** All features (column labels) in this data set */
	public final ImmutableArray<Feature<?>> features;
	
	/** All {@link DataPoint}s in this data set */
	public final ImmutableArray<DataPoint> points;
	
	/**
	 * The main constructor which associates this data set with each individual
	 * data point object.
	 * 
	 * @param name
	 * @param features
	 * @param points
	 */
	private DataSet(String name, ImmutableArray<Feature<?>> features, DataPoint[] points) {
		this.name = name;
		this.features = features;
		this.points = new ImmutableArray<>(points);
		for(int i=0; i<points.length; i++)
			points[i] = points[i].associate(this);
	}
	
	/**
	 * Creates a new data set with the given name, features, and data points.
	 * The number and type of features given must match the number and types
	 * of features in each data point.
	 * 
	 * @param name the name
	 * @param features the features
	 * @param points the data points
	 */
	public DataSet(String name, ImmutableArray<Feature<?>> features, Iterable<DataPoint> points) {
		this(name, features, check(features, points));
	}
	
	/**
	 * Check that each data point has the correct number of features of the
	 * correct type.
	 * 
	 * @param features the features to check
	 * @param points the data points to check
	 * @return the data points, as an array
	 */
	private static final DataPoint[] check(ImmutableArray<Feature<?>> features, Iterable<DataPoint> points) {
		ArrayList<DataPoint> list = new ArrayList<>();
		for(DataPoint point : points) {
			if(point.values.size() != features.size())
				throw new IllegalArgumentException("All points must have " + features.size() + " features.");
			for(int i=0; i<features.size(); i++)
				if(!features.get(i).type.isAssignableFrom(point.values.get(i).getClass()))
					throw new IllegalArgumentException("The value \"" + point.values.get(i) + "\" is not " + features.get(i).type.getSimpleName().toLowerCase() + ".");
			list.add(point);
		}
		if(list.size() == 0)
			return new DataPoint[0];
		else
			return list.toArray((DataPoint[]) Array.newInstance(list.get(0).getClass(), list.size()));
	}
	
	@Override
	public String toString() {
		return name;
	}
	
	/**
	 * Returns a list of each point's value for the given feature.
	 * 
	 * @param <V> the value type
	 * @param feature a feature from the data set
	 * @return the value of this feature for every point in the data set
	 */
	protected final <V extends Value> ArrayList<V> values(Feature<V> feature) {
		ArrayList<V> list = new ArrayList<>(points.size());
		for(DataPoint point : points)
			list.add(point.get(feature));
		return list;
	}
	
	/**
	 * Returns a set of all unique values observed for this feature among all
	 * points in this data set. Note this method returns only values observed
	 * in this data set, not all possible values the feature could
	 * theoretically have.
	 * 
	 * @param <V> the value type of the feature
	 * @param feature a feature from this data set
	 * @return a set of unique values observed among all data points
	 */
	public <V extends Value> Set<V> getValues(Feature<V> feature) {
		Set<V> set = feature.isNominal() ? new LinkedHashSet<>() : new TreeSet<>();
		set.addAll(values(feature));
		return set;
	}
	
	/**
	 * Returns the maximum value observed for this feature among all data
	 * points in this data set. Note this method returns only the max value
	 * observed in this data, not the theoretical max.
	 * 
	 * @param feature a {@link Numeric} feature from this data set
	 * @return the maximum value of that feature among all points in the data set
	 * @throws UnsupportedOperationException if the feature is not {@link Numeric}
	 */
	@SuppressWarnings("unchecked")
	public Numeric getMax(Feature<?> feature) {
		if(!feature.isNumeric())
			throw new UnsupportedOperationException("Max is only defined for numeric features.");
		Numeric max = null;
		for(Numeric value : values((Feature<Numeric>) feature))
			if(max == null || value.comesAfter(max))
				max = value;
		return max;
	}
	
	/**
	 * Returns the minimum value observed for this feature among all data
	 * points in this data set. Note this method returns only the min value
	 * observed in this data, not the theoretical min.
	 * 
	 * @param feature a {@link Numeric} feature from this data set
	 * @return the minimum value of that feature among all points in the data set
	 * @throws UnsupportedOperationException if the feature is not {@link Numeric}
	 */
	@SuppressWarnings("unchecked")
	public Numeric getMin(Feature<?> feature) {
		if(!feature.isNumeric())
			throw new UnsupportedOperationException("Min is only defined for numeric features.");
		Numeric min = null;
		for(Numeric value : values((Feature<Numeric>) feature))
			if(min == null || value.comesBefore(min))
				min = value;
		return min;
	}
	
	/**
	 * Returns the mean (average) value observed for this feature among all
	 * data points in this data set.
	 * 
	 * @param feature an {@link Interval} feature from this data set
	 * @return the mean value of that feature for all points in the data set
	 * @throws UnsupportedOperationException if the feature is not {@link Interval}
	 */
	public Interval getMean(Feature<?> feature) {
		if(!feature.isInterval())
			throw new UnsupportedOperationException("Mean is only defined for interval features.");
		double total = 0;
		double count = 0;
		for(Value value : values(feature)) {
			total += Interval.class.cast(value).number;
			count++;
		}
		return new Interval(count == 0 ? 0 : total / count);
	}
	
	/**
	 * Returns the median (middle) value observed for this feature among all
	 * data points in this data set.
	 * 
	 * @param <V> the value type of the feature
	 * @param feature a {@link Numeric} feature from this data set
	 * @return the median value of that feature for all points in the data set
	 * @throws UnsupportedOperationException if the feature is not {@link Numeric}
	 */
	@SuppressWarnings("unchecked")
	public <V extends Value> V getMedian(Feature<V> feature) {
		if(!feature.isNumeric())
			throw new UnsupportedOperationException("Median is only defined for numeric features.");
		ArrayList<V> values = values(feature);
		Collections.sort((ArrayList<Numeric>) values);
		if(values.size() == 0)
			return null;
		else
			return values.get(values.size() / 2);
	}
	
	/**
	 * Returns the mode (most common) value observed for this feature among all
	 * data points in this data set. If there are multiple modes, one is chosen
	 * arbitrarily.
	 * 
	 * @param <V> the value type of the feature
	 * @param feature a feature from this data set
	 * @return the mode value of that feature for all points in the data set
	 */
	public <V extends Value> V getMode(Feature<V> feature) {
		HashMap<V, Integer> counts = new HashMap<>();
		V mode = null;
		int max = -1;
		for(V value : values(feature)) {
			int count;
			if(counts.containsKey(value))
				count = counts.get(value) + 1;
			else
				count = 1;
			counts.put(value, count);
			if(count > max) {
				mode = value;
				max = count;
			}
		}
		return mode;
	}
	
	/**
	 * Returns a new data set with the given feature removed. Note this method
	 * returns a new data set and does not modify this object.
	 * 
	 * @param feature the feature to remove
	 * @return the new data set
	 */
	public DataSet remove(Feature<?> feature) {
		return new DataSet(name, removeFeatureFromFeatures(feature), removeFeatureFromPoints(feature));
	}
	
	/**
	 * Removes a feature from the list of features.
	 * 
	 * @param feature the feature to remove
	 * @return the shorter list of features
	 */
	protected ImmutableArray<Feature<?>> removeFeatureFromFeatures(Feature<?> feature) {
		ArrayList<Feature<?>> features = new ArrayList<>();
		for(Feature<?> f : this.features)
			if(!f.equals(feature))
				features.add(f);
		return new ImmutableArray<>(features.toArray(new Feature[features.size()]));
	}
	
	/**
	 * Removes a feature from every data point.
	 * 
	 * @param feature the feature to remove
	 * @return the data points with that feature removed
	 */
	protected ArrayList<DataPoint> removeFeatureFromPoints(Feature<?> feature) {
		ArrayList<DataPoint> points = new ArrayList<>(this.points.size());
		for(DataPoint point : this.points)
			points.add(point.remove(feature));
		return points;
	}
}