X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=libjava%2Fgnu%2Fgcj%2Fconvert%2FInput_UTF8.java;fp=libjava%2Fgnu%2Fgcj%2Fconvert%2FInput_UTF8.java;h=0000000000000000000000000000000000000000;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=433a0d1e3a62cea96464256a994e55045f5efdad;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/libjava/gnu/gcj/convert/Input_UTF8.java b/libjava/gnu/gcj/convert/Input_UTF8.java deleted file mode 100644 index 433a0d1e..00000000 --- a/libjava/gnu/gcj/convert/Input_UTF8.java +++ /dev/null @@ -1,114 +0,0 @@ -/* Copyright (C) 1999, 2000 Free Software Foundation - - This file is part of libgcj. - -This software is copyrighted work licensed under the terms of the -Libgcj License. Please consult the file "LIBGCJ_LICENSE" for -details. */ - -package gnu.gcj.convert; - -/** - * Convert UTF8 to Unicode. - * @author Per Bothner - * @date March 1999. - */ - -public class Input_UTF8 extends BytesToUnicode -{ - public String getName() { return "UTF8"; } - - int partial = 0; - int partial_bytes_expected = 0; - //int suggogate_second = -1; - - public int read (char[] outbuffer, int outpos, int count) - { - int origpos = outpos; - for (;;) - { - if (outpos - origpos >= count) - break; - if (inpos >= inlength) - break; - int b = inbuffer[inpos++]; - if (b >= 0) - outbuffer[outpos++] = (char) b; - else - { - if ((b & 0xC0) == 0x80) // Continuation byte - { - partial = (partial << 6) | (b & 0x3F); - --partial_bytes_expected; - if (partial_bytes_expected == 1) - { - if (partial > (0xFFFF>>6)) - { - // The next continuation byte will cause the result - // to exceed 0xFFFF, so we must use a surrogate pair. - // The "Unicode scalar value" (see D28 in section 3.7 - // of the Unicode Standard 2.0) is defined as: - // value == (hi-0xD800)*0x400+(lo-0xDC00)+0x10000, - // where (hi, lo) is the Unicode surrogate pair. - // After reading the first three bytes, we have: - // partial == (value >> 6). - // Substituting and simplifying, we get: - // partial == (hi-0xD800)*0x10+((lo-0xDC00)>>6)+0x400. - // The definition lo>=0xDC00 && lo<=0xDFFF implies - // that (lo-0xDC00)>>6 is in the range 0..15. - // Hence we can solve for `hi' and we can emit - // the high-surrogate without waiting for the - // final byte: - outbuffer[outpos++] - = (char) (0xD800 + ((partial - 0x400) >> 4)); - - // Now we want to set it up so that when we read - // the final byte on the next iteration, we will - // get the low-surrogate without special handling. - // I.e. we want: - // lo == (next_partial << 6) | (next & 0x3F) - // where next is the next input byte and next_partial - // is the value of partial at the end of this - // iteration. This implies: next_partial == lo >> 6. - // We can simplify the previous: - // partial == (hi-0xD800)*0x10+((lo-0xDC00)>>6)+0x400, - // to: partial == (hi-0xD800)*0x10+(lo>>6)+0x90. - // Inserting the values of hi and next_partial, - // and simplifying, we get: partial == - // ( (partial-0x400)&~0xF) + next_partial + 0x90. - // Solving for next_partial, we get: - // next_partial = partial+0x400-0x90-(partial&~0xF): - // or: next_partial = (partial&0xF) + 0x370. Hence: - partial = (partial & 0xF) + 0x370; - } - } - else if (partial_bytes_expected == 0) - { - outbuffer[outpos++] = (char) partial; - partial = 0; - partial_bytes_expected = 0; - } - } - else // prefix byte - { - if ((b & 0xE0) == 0xC0) - { - partial = b & 0x1F; - partial_bytes_expected = 1; - } - else if ((b & 0xF0) == 0xE0) - { - partial = b & 0xF; - partial_bytes_expected = 2; - } - else - { - partial = b & 7; - partial_bytes_expected = 3; - } - } - } - } - return outpos - origpos; - } -}