bit dojo labs : file explorer

Inspecting: home > java > machine-learning > bmp2arff > Bmp2Arff.java (download)
/*
* Copyright (C) 2008 Alan Huan-Chun Peng Hsu (http://bitdojo.net)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package machineLearning;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.PrintWriter;
import java.util.ArrayList;
import javax.imageio.ImageIO;

/**
 * Converts a bitmap image to ARFF dataset, attributes are [x,y,class], where class
 * is the color of the pixel in integer form
 *
 * @version 1.02 (2008-08-28)
 * @author Alan Huan-Chun Peng Hsu
 * @see http://bitdojo.net/bmp2arff
 */
public class Bmp2Arff {

	public static void main(String[] args) throws Exception {
		
		String inputFileName = "dataset-1.bmp";
		String outputFileName = "dataset-1.arff";

		int[][] pixelData = processImg(inputFileName);
		String arffString = dataToArffString(pixelData);
		
		File outFile = new File(outputFileName);
		outFile.createNewFile();
		PrintWriter pw = new PrintWriter(outFile);
		pw.print(arffString);
		pw.close();
		
		System.out.println("Data saved to " + outFile.getAbsolutePath());
		//System.out.println(arffString);
	}

	private static final String ARFF_HEADER_TEMPLATE = 
		"@RELATION bitmap-data \n\n" +
		"@ATTRIBUTE x REAL \n" +
		"@ATTRIBUTE y REAL \n" +
		"@ATTRIBUTE class {%CLASSES%} \n\n" +
		"@DATA\n";
	
	//private static final String ARFF_INSTANCE_TEMPLATE = "%X%,%Y%,%CLASS% \n";
	
	private static String dataToArffString(int[][] pixelData){
		
		String arffString = ARFF_HEADER_TEMPLATE;
		arffString = arffString.replace("%CLASSES%", genrateClassesString());
		
		int background = pixelData[0][0];
		int count = 0;
		int progress = 0;
		int totalSize = pixelData.length * pixelData[0].length;
		
		for (int x = 1; x < pixelData.length; x++) {
			for (int y = 0; y < pixelData[0].length; y++) {
				
				int thisClass = pixelData[x][y];
				
				if(thisClass != background){
					
					//String thisInstance = ARFF_INSTANCE_TEMPLATE;
					//thisInstance = thisInstance.replace("%X%", ""+x);
					//thisInstance = thisInstance.replace("%Y%", ""+y);
					//thisInstance = thisInstance.replace("%CLASS%", ""+thisClass);
					String thisInstance = x + "," + y + "," + thisClass + "\n";
					
					arffString += thisInstance;
					count++;

				}
				
				if(++progress % 1000 == 0){
					System.out.println("Processing (" + progress + "/" +  totalSize + ")...");
				}
				
			}
		}
		
		System.out.println("Total instances: " + count);
		
		return arffString;
		
	}
	
	private static int[][] processImg(String input_filename) throws Exception {
		
		BufferedImage image = ImageIO.read(new File(input_filename));
		//Graphics g = image.getGraphics();
		
		int sizex = image.getWidth(null);
		int sizey = image.getHeight(null);
		
		System.out.println(input_filename + " is " + sizex + "x" + sizey);
		
		int[][] pixelData = new int[sizex][sizey];
		
		for(int x = 0; x < sizex; x++){
			for(int y = 0; y < sizey; y++){
				
				int rgb = image.getRGB(x, y);
				//Color c = new Color(rgb);
				int thisPixelClass = getClassByColor(rgb); 
				pixelData[x][y] = thisPixelClass; 
			}
		}
		
		return pixelData;
		
	}

// Originally, each color is converted to a simple sequential number like 1,2,3
// But this has proven to be slow when using large bitmaps, so now we just use
// the integer representation of the color as class.
	
//	private static String color2string(int rgb){
//		Color c = new Color(rgb);
//		return "(" + c.getRed() + "," + c.getGreen() + "," + c.getBlue() + ")";
//	}
//	
//	private static HashMap<Integer, Integer> classMap = new HashMap<Integer, Integer>();
//	
//	private static int getClassByColor(int rgb){
//		
//		if(classMap.containsKey(rgb)){
//			
//			return classMap.get(rgb);
//			
//		} else {
//		
//			int x = 0;
//			
//			while(classMap.containsValue(x)) x++;
//			
//			classMap.put(rgb, x);
//			
//			System.out.println("Color " + color2string(rgb) + " has been assigned class " + x);
//			
//			return x;
//			
//		}
//		
//	}
//	
//	// "1,2,3"
//	private static String classesString(){
//	
//		String s = "";
//		
//		int hashSize = classMap.size();
//		
//		for(int i = 1; i < hashSize; i++){
//			s += (i < hashSize-1)? (i + ",") : i;
//		}
//				
//		return s;
//	}
	
	private static int getClassByColor(int rgb){
		
		if(!classList.contains(rgb)){
			classList.add(rgb);
			System.out.println("Added new class: " + rgb);
		}
		
		return rgb;
	}
	
	private static ArrayList<Integer> classList = new ArrayList<Integer>();
	
	private static String genrateClassesString(){
			
		String s = "";
		
		for(int i = 1; i < classList.size(); i++){
			s += (i < classList.size()-1)? (classList.get(i) + ",") : classList.get(i);
		}
				
		return s;
	}

}