sample code to find unsupported character conversions

/*
    http://en.wikipedia.org/wiki/Code_page_1252
    http://en.wikipedia.org/wiki/EBCDIC_037
  
    http://stackoverflow.com/questions/13389182/in-net-how-do-i-determine-what-characters-existing-in-the-windows-1252-codepag
 
 
    Notes:
    (1) There is a problem in the code.  The .Net conversion from Windows 1252 to IBM 37 has problem.  It converted the of Ÿ (159 in windows 12562) to "Y" (232 IBM 37) instead of "Ÿ" (255 IBM 37).
    (2) The Console cannot display all the characters on the terminal.
 */

using System;
using System.Collections.Generic;
using System.IO;
using System.Text;

namespace test_code_page
{
    class find_unsupported_conversions
    {
        static void Main(string[] args)
        {
            FileStream stream = null;
            StreamWriter streamwriter = null;
            TextWriter console_streamwriter = Console.Out;
            string path;

            if (args.Length == 1)
                path = args[0];
            else
            {
                Console.Write("Enter path: ");
                path = Console.ReadLine();
            }

            try
            {
                stream = new FileStream(path, FileMode.CreateNew, FileAccess.Write);
                streamwriter = new StreamWriter(stream);
                Console.SetOut(streamwriter);
            }
            catch (Exception e)
            {
                Console.WriteLine("Cannot open '" + path + "' for writing");
                Console.WriteLine(e.Message);
            }

            test_code_pages();

            Console.SetOut(console_streamwriter);
            if (streamwriter != null)
                streamwriter.Close();
            if (stream != null)
                stream.Close();
        }

        private static void test_code_pages()
        {
            Encoding source_encoding = Encoding.GetEncoding("Windows-1252");
            Encoding target_encoding = Encoding.GetEncoding("IBM037");
            
            byte source_question_mark = source_encoding.GetBytes("?")[0];
            byte target_question_mark = target_encoding.GetBytes("?")[0];

            int count = 0;
            byte[] b = new byte[1];
            for (int i = 0; i < 256; i++)
            {
                b[0] = (byte)i;
                if (b[0] != source_question_mark && Encoding.Convert(source_encoding, target_encoding, b)[0] == target_question_mark)
                {
                    count++;
                    Console.WriteLine(source_encoding.GetString(b) + " " + i.ToString());
                }
            }

            Console.WriteLine(count.ToString() + " unsupported.");
        }
    }
}
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s