보관물

Archive for 10월, 2008

UTF-8 UTF-16

 
/* utf.js – UTF-8 <=> UTF-16 convertion
*
* Copyright (C) 1999 Masanao Izumo <mo@goice.co.jp>
* Version: 1.0
* LastModified: Dec 25 1999
* This library is free.  You can redistribute it and/or modify it.
*/

/*
* Interfaces:
* utf8 = utf16to8(utf16);
* utf16 = utf16to8(utf8);
*/

function utf16to8(str) {
    var out, i, len, c;

    out = “”;
    len = str.length;
    for(i = 0; i < len; i++) {
c = str.charCodeAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
    out += str.charAt(i);
} else if (c > 0x07FF) {
    out += String.fromCharCode(0xE0 | ((c >> 12) & 0x0F));
    out += String.fromCharCode(0x80 | ((c >>  6) & 0x3F));
    out += String.fromCharCode(0x80 | ((c >>  0) & 0x3F));
} else {
    out += String.fromCharCode(0xC0 | ((c >>  6) & 0x1F));
    out += String.fromCharCode(0x80 | ((c >>  0) & 0x3F));
}
    }
    return out;
}

function utf8to16(str) {
    var out, i, len, c;
    var char2, char3;

    out = “”;
    len = str.length;
    i = 0;
    d=””
    while(i < len) {
c = str.charCodeAt(i++);
d+=c
switch(c >> 4)
{
  case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
    // 0xxxxxxx
    out += str.charAt(i-1);
    break;
  case 12: case 13:
    // 110x xxxx   10xx xxxx
    char2 = str.charCodeAt(i++);
    out += String.fromCharCode(((c & 0x1F) << 6) | (char2 & 0x3F));
    break;
  case 14:
    // 1110 xxxx  10xx xxxx  10xx xxxx
    char2 = str.charCodeAt(i++);
    char3 = str.charCodeAt(i++);
    out += String.fromCharCode(((c & 0x0F) << 12) |
   ((char2 & 0x3F) << 6) |
   ((char3 & 0x3F) << 0));
    break;
}
    }alert(d)

    return out;
}

카테고리:Javascript