Merge pull request #3 from saurabhb17/master

secondary files
author: saurabhb17 2020-02-26 16:14:17 +0530
committer: GitHub 2020-02-26 16:14:17 +0530
commit: 003d02608917e7a69d1a98438837e94ccf68352a (patch)
tree: 1392c90227aeea231c1d86371131e04c40382918 /lib_dxf/intern/drw_textcodec.cpp
parent: 886d9cb772e81d2e5262284bc3082664f084337f (diff)
parent: e255d0622297488c1c52755be670733418c994cf (diff)
download: KiCad-eSim-003d02608917e7a69d1a98438837e94ccf68352a.tar.gz
KiCad-eSim-003d02608917e7a69d1a98438837e94ccf68352a.tar.bz2
KiCad-eSim-003d02608917e7a69d1a98438837e94ccf68352a.zip
1 files changed, 693 insertions, 0 deletions
diff --git a/lib_dxf/intern/drw_textcodec.cpp b/lib_dxf/intern/drw_textcodec.cpp
new file mode 100644
index 00000000..b640bc95
--- /dev/null
+++ b/lib_dxf/intern/drw_textcodec.cpp
@@ -0,0 +1,693 @@
+#include "drw_textcodec.h"
+#include <sstream>
+#include <iomanip>
+#include <algorithm>
+#include "../drw_base.h"
+#include "drw_cptables.h"
+#include "drw_cptable932.h"
+#include "drw_cptable936.h"
+#include "drw_cptable949.h"
+#include "drw_cptable950.h"
+
+DRW_TextCodec::DRW_TextCodec()
+{
+    version = DRW::AC1021;
+    conv = new DRW_Converter( NULL, 0 );
+}
+
+
+DRW_TextCodec::~DRW_TextCodec()
+{
+    delete conv;
+}
+
+
+void DRW_TextCodec::setVersion( std::string* v )
+{
+    std::string versionStr = *v;
+
+    if( versionStr == "AC1009" || versionStr == "AC1006" )
+    {
+        version = DRW::AC1009;
+        cp = "ANSI_1252";
+        setCodePage( &cp );
+    }
+    else if( versionStr == "AC1012" || versionStr == "AC1014"
+             || versionStr == "AC1015" || versionStr == "AC1018" )
+    {
+        version = DRW::AC1015;
+
+        if( cp.empty() )    // codepage not set, initialize
+        {
+            cp = "ANSI_1252";
+            setCodePage( &cp );
+        }
+    }
+    else
+    {
+        version = DRW::AC1021;
+        cp = "ANSI_1252";
+    }
+}
+
+
+void DRW_TextCodec::setCodePage( std::string* c )
+{
+    cp = correctCodePage( *c );
+    delete conv;
+
+    if( version == DRW::AC1009 || version == DRW::AC1015 )
+    {
+        if( cp == "ANSI_874" )
+            conv = new DRW_ConvTable( DRW_Table874, CPLENGHTCOMMON );
+        else if( cp == "ANSI_932" )
+            conv = new DRW_Conv932Table( DRW_Table932, DRW_LeadTable932,
+                    DRW_DoubleTable932, CPLENGHT932 );
+        else if( cp == "ANSI_936" )
+            conv = new DRW_ConvDBCSTable( DRW_Table936, DRW_LeadTable936,
+                    DRW_DoubleTable936, CPLENGHT936 );
+        else if( cp == "ANSI_949" )
+            conv = new DRW_ConvDBCSTable( DRW_Table949, DRW_LeadTable949,
+                    DRW_DoubleTable949, CPLENGHT949 );
+        else if( cp == "ANSI_950" )
+            conv = new DRW_ConvDBCSTable( DRW_Table950, DRW_LeadTable950,
+                    DRW_DoubleTable950, CPLENGHT950 );
+        else if( cp == "ANSI_1250" )
+            conv = new DRW_ConvTable( DRW_Table1250, CPLENGHTCOMMON );
+        else if( cp == "ANSI_1251" )
+            conv = new DRW_ConvTable( DRW_Table1251, CPLENGHTCOMMON );
+        else if( cp == "ANSI_1253" )
+            conv = new DRW_ConvTable( DRW_Table1253, CPLENGHTCOMMON );
+        else if( cp == "ANSI_1254" )
+            conv = new DRW_ConvTable( DRW_Table1254, CPLENGHTCOMMON );
+        else if( cp == "ANSI_1255" )
+            conv = new DRW_ConvTable( DRW_Table1255, CPLENGHTCOMMON );
+        else if( cp == "ANSI_1256" )
+            conv = new DRW_ConvTable( DRW_Table1256, CPLENGHTCOMMON );
+        else if( cp == "ANSI_1257" )
+            conv = new DRW_ConvTable( DRW_Table1257, CPLENGHTCOMMON );
+        else if( cp == "ANSI_1258" )
+            conv = new DRW_ConvTable( DRW_Table1258, CPLENGHTCOMMON );
+        else if( cp == "UTF-8" )    // DXF older than 2007 are write in win codepages
+        {
+            cp = "ANSI_1252";
+            conv = new DRW_Converter( NULL, 0 );
+        }
+        else
+            conv = new DRW_ConvTable( DRW_Table1252, CPLENGHTCOMMON );
+    }
+    else
+    {
+        conv = new DRW_Converter( NULL, 0 );
+    }
+}
+
+
+std::string DRW_TextCodec::toUtf8( std::string s )
+{
+    return conv->toUtf8( &s );
+}
+
+
+std::string DRW_TextCodec::fromUtf8( std::string s )
+{
+    return conv->fromUtf8( &s );
+}
+
+
+std::string DRW_Converter::toUtf8( std::string* s )
+{
+    std::string result;
+    int j = 0;
+    unsigned int i = 0;
+
+    for( i = 0; i < s->length(); i++ )
+    {
+        unsigned char c = s->at( i );
+
+        if( c < 0x80 )    // ascii check for /U+????
+        {
+            if( c == '\\' && i + 6 < s->length() && s->at( i + 1 ) == 'U' && s->at( i + 2 ) == '+' )
+            {
+                result  += s->substr( j, i - j );
+                result  += encodeText( s->substr( i, 7 ) );
+                i   += 6;
+                j   = i + 1;
+            }
+        }
+        else if( c < 0xE0 )    // 2 bits
+        {
+            i++;
+        }
+        else if( c < 0xF0 )    // 3 bits
+        {
+            i += 2;
+        }
+        else if( c < 0xF8 )    // 4 bits
+        {
+            i += 3;
+        }
+    }
+
+    result += s->substr( j );
+
+    return result;
+}
+
+
+std::string DRW_ConvTable::fromUtf8( std::string* s )
+{
+    std::string result;
+    bool    notFound;
+    int     code;
+
+    int j = 0;
+
+    for( unsigned int i = 0; i < s->length(); i++ )
+    {
+        unsigned char c = s->at( i );
+
+        if( c > 0x7F )    // need to decode
+        {
+            result += s->substr( j, i - j );
+            std::string part1 = s->substr( i, 4 );
+            int l;
+            code = decodeNum( part1, &l );
+            j   = i + l;
+            i   = j - 1;
+            notFound = true;
+
+            for( int k = 0; k<cpLenght; k++ )
+            {
+                if( table[k] == code )
+                {
+                    result += CPOFFSET + k;    // translate from table
+                    notFound = false;
+                    break;
+                }
+            }
+
+            if( notFound )
+                result += decodeText( code );
+        }
+    }
+
+    result += s->substr( j );
+
+    return result;
+}
+
+
+std::string DRW_ConvTable::toUtf8( std::string* s )
+{
+    std::string res;
+    std::string::iterator it;
+
+    for( it = s->begin(); it < s->end(); ++it )
+    {
+        unsigned char c = *it;
+
+        if( c < 0x80 )
+        {
+            // check for \U+ encoded text
+            if( c == '\\' )
+            {
+                if( it + 6 < s->end() && *(it + 1) == 'U' && *(it + 2) == '+' )
+                {
+                    res += encodeText( std::string( it, it + 7 ) );
+                    it  += 6;
+                }
+                else
+                {
+                    res += c;    // no \U+ encoded text write
+                }
+            }
+            else
+                res += c;                           // c!='\' ascii char write
+        }
+        else                                        // end c < 0x80
+        {
+            res += encodeNum( table[c - 0x80] );    // translate from table
+        }
+    }                                               // end for
+
+    return res;
+}
+
+
+std::string DRW_Converter::encodeText( std::string stmp )
+{
+    int code;
+
+#if defined(__APPLE__)
+    int Succeeded = sscanf( &( stmp.substr( 3, 4 )[0]), "%x", &code );
+
+    if( !Succeeded || Succeeded == EOF )
+        code = 0;
+
+#else
+    std::istringstream sd( stmp.substr( 3, 4 ) );
+    sd >> std::hex >> code;
+#endif
+    return encodeNum( code );
+}
+
+
+std::string DRW_Converter::decodeText( int c )
+{
+    std::string res = "\\U+";
+    std::string num;
+
+#if defined(__APPLE__)
+    std::string str( 16, '\0' );
+    snprintf( &(str[0]), 16, "%04X", c );
+    num = str;
+#else
+    std::stringstream ss;
+    ss << std::uppercase << std::setfill( '0' ) << std::setw( 4 ) << std::hex << c;
+    ss >> num;
+#endif
+    res += num;
+    return res;
+}
+
+
+std::string DRW_Converter::encodeNum( int c )
+{
+    unsigned char ret[5];
+
+    if( c < 128 )    // 0-7F US-ASCII 7 bits
+    {
+        ret[0]  = c;
+        ret[1]  = 0;
+    }
+    else if( c < 0x800 )    // 80-07FF 2 bytes
+    {
+        ret[0]  = 0xC0 | (c >> 6);
+        ret[1]  = 0x80 | (c & 0x3f);
+        ret[2]  = 0;
+    }
+    else if( c< 0x10000 )    // 800-FFFF 3 bytes
+    {
+        ret[0]  = 0xe0 | (c >> 12);
+        ret[1]  = 0x80 | ( (c >> 6) & 0x3f );
+        ret[2]  = 0x80 | (c & 0x3f);
+        ret[3]  = 0;
+    }
+    else     // 10000-10FFFF 4 bytes
+    {
+        ret[0]  = 0xf0 | (c >> 18);
+        ret[1]  = 0x80 | ( (c >> 12) & 0x3f );
+        ret[2]  = 0x80 | ( (c >> 6) & 0x3f );
+        ret[3]  = 0x80 | (c & 0x3f);
+        ret[4]  = 0;
+    }
+
+    return std::string( (char*) ret );
+}
+
+
+/** 's' is a string with at least 4 bytes lenght
+** returned 'b' is byte lenght of encoded char: 2,3 or 4
+**/
+int DRW_Converter::decodeNum( std::string s, int* b )
+{
+    int code = 0;
+    unsigned char c = s.at( 0 );
+
+    if( (c & 0xE0)  == 0xC0 )    // 2 bytes
+    {
+        code    = ( c & 0x1F) << 6;
+        code    = (s.at( 1 ) & 0x3F) | code;
+        *b = 2;
+    }
+    else if( (c & 0xF0)  == 0xE0 )    // 3 bytes
+    {
+        code    = ( c & 0x0F) << 12;
+        code    = ( (s.at( 1 ) & 0x3F) << 6 ) | code;
+        code    = (s.at( 2 ) & 0x3F) | code;
+        *b = 3;
+    }
+    else if( (c & 0xF8)  == 0xF0 )    // 4 bytes
+    {
+        code    = ( c & 0x07) << 18;
+        code    = ( (s.at( 1 ) & 0x3F) << 12 ) | code;
+        code    = ( (s.at( 2 ) & 0x3F) << 6 ) | code;
+        code    = (s.at( 3 ) & 0x3F) | code;
+        *b = 4;
+    }
+
+    return code;
+}
+
+
+std::string DRW_ConvDBCSTable::fromUtf8( std::string* s )
+{
+    std::string result;
+    bool    notFound;
+    int     code;
+
+    int j = 0;
+
+    for( unsigned int i = 0; i < s->length(); i++ )
+    {
+        unsigned char c = s->at( i );
+
+        if( c > 0x7F )    // need to decode
+        {
+            result += s->substr( j, i - j );
+            std::string part1 = s->substr( i, 4 );
+            int l;
+            code = decodeNum( part1, &l );
+            j   = i + l;
+            i   = j - 1;
+            notFound = true;
+
+            for( int k = 0; k<cpLenght; k++ )
+            {
+                if( doubleTable[k][1] == code )
+                {
+                    int data = doubleTable[k][0];
+                    char d[3];
+                    d[0]    = data >> 8;
+                    d[1]    = data & 0xFF;
+                    d[2]    = '\0';
+                    result += d;     // translate from table
+                    notFound = false;
+                    break;
+                }
+            }
+
+            if( notFound )
+                result += decodeText( code );
+        }    // direct conversion
+    }
+
+    result += s->substr( j );
+
+    return result;
+}
+
+
+std::string DRW_ConvDBCSTable::toUtf8( std::string* s )
+{
+    std::string res;
+    std::string::iterator it;
+
+    for( it = s->begin(); it < s->end(); ++it )
+    {
+        bool notFound = true;
+        unsigned char c = *it;
+
+        if( c < 0x80 )
+        {
+            notFound = false;
+
+            // check for \U+ encoded text
+            if( c == '\\' )
+            {
+                if( it + 6 < s->end() && *(it + 1) == 'U' && *(it + 2) == '+' )
+                {
+                    res += encodeText( std::string( it, it + 7 ) );
+                    it  += 6;
+                }
+                else
+                {
+                    res += c;    // no \U+ encoded text write
+                }
+            }
+            else
+                res += c;       // c!='\' ascii char write
+        }
+        else if( c == 0x80 )    // 1 byte table
+        {
+            notFound = false;
+            res += encodeNum( 0x20AC ); // euro sign
+        }
+        else                            // 2 bytes
+        {
+            ++it;
+            int code    = (c << 8) | (unsigned char) (*it);
+            int sta     = leadTable[c - 0x81];
+            int end     = leadTable[c - 0x80];
+
+            for( int k = sta; k<end; k++ )
+            {
+                if( doubleTable[k][0] == code )
+                {
+                    res += encodeNum( doubleTable[k][1] );    // translate from table
+                    notFound = false;
+                    break;
+                }
+            }
+        }
+
+        // not found
+        if( notFound )
+            res += encodeNum( NOTFOUND936 );
+    }    // end for
+
+    return res;
+}
+
+
+std::string DRW_Conv932Table::fromUtf8( std::string* s )
+{
+    std::string result;
+    bool    notFound;
+    int     code;
+
+    int j = 0;
+
+    for( unsigned int i = 0; i < s->length(); i++ )
+    {
+        unsigned char c = s->at( i );
+
+        if( c > 0x7F )    // need to decode
+        {
+            result += s->substr( j, i - j );
+            std::string part1 = s->substr( i, 4 );
+            int l;
+            code = decodeNum( part1, &l );
+            j   = i + l;
+            i   = j - 1;
+            notFound = true;
+
+            // 1 byte table
+            if( code > 0xff60 && code < 0xFFA0 )
+            {
+                result += code - CPOFFSET932;    // translate from table
+                notFound = false;
+            }
+
+            if( notFound && ( code<0xF8 || (code>0x390 && code<0x542)
+                              || (code>0x200F && code<0x9FA1) || code>0xF928 ) )
+            {
+                for( int k = 0; k<cpLenght; k++ )
+                {
+                    if( doubleTable[k][1] == code )
+                    {
+                        int data = doubleTable[k][0];
+                        char d[3];
+                        d[0]    = data >> 8;
+                        d[1]    = data & 0xFF;
+                        d[2]    = '\0';
+                        result += d;    // translate from table
+                        notFound = false;
+                        break;
+                    }
+                }
+            }
+
+            if( notFound )
+                result += decodeText( code );
+        }    // direct conversion
+    }
+
+    result += s->substr( j );
+
+    return result;
+}
+
+
+std::string DRW_Conv932Table::toUtf8( std::string* s )
+{
+    std::string res;
+    std::string::iterator it;
+
+    for( it = s->begin(); it < s->end(); ++it )
+    {
+        bool notFound = true;
+        unsigned char c = *it;
+
+        if( c < 0x80 )
+        {
+            notFound = false;
+
+            // check for \U+ encoded text
+            if( c == '\\' )
+            {
+                if( it + 6 < s->end() && *(it + 1) == 'U' && *(it + 2) == '+' )
+                {
+                    res += encodeText( std::string( it, it + 7 ) );
+                    it  += 6;
+                }
+                else
+                {
+                    res += c;    // no \U+ encoded text write
+                }
+            }
+            else
+                res += c;               // c!='\' ascii char write
+        }
+        else if( c > 0xA0 && c < 0xE0 ) // 1 byte table
+        {
+            notFound = false;
+            res += encodeNum( c + CPOFFSET932 );    // translate from table
+        }
+        else                                        // 2 bytes
+        {
+            ++it;
+            int code = (c << 8) | (unsigned char) (*it);
+            int sta;
+            int end = 0;
+
+            if( c > 0x80 && c < 0xA0 )
+            {
+                sta = DRW_LeadTable932[c - 0x81];
+                end = DRW_LeadTable932[c - 0x80];
+            }
+            else if( c > 0xDF && c < 0xFD )
+            {
+                sta = DRW_LeadTable932[c - 0xC1];
+                end = DRW_LeadTable932[c - 0xC0];
+            }
+
+            if( end > 0 )
+            {
+                for( int k = sta; k<end; k++ )
+                {
+                    if( DRW_DoubleTable932[k][0] == code )
+                    {
+                        res += encodeNum( DRW_DoubleTable932[k][1] );    // translate from table
+                        notFound = false;
+                        break;
+                    }
+                }
+            }
+        }
+
+        // not found
+        if( notFound )
+            res += encodeNum( NOTFOUND932 );
+    }    // end for
+
+    return res;
+}
+
+
+std::string DRW_TextCodec::correctCodePage( const std::string& s )
+{
+    // stringstream cause crash in OS/X, bug#3597944
+    std::string cp = s;
+
+    transform( cp.begin(), cp.end(), cp.begin(), toupper );
+
+    // Latin/Thai
+    if( cp=="ANSI_874" || cp=="CP874" || cp=="ISO8859-11" || cp=="TIS-620" )
+    {
+        return "ANSI_874";
+        // Central Europe and Eastern Europe
+    }
+    else if( cp=="ANSI_1250" || cp=="CP1250" || cp=="ISO8859-2" )
+    {
+        return "ANSI_1250";
+        // Cyrillic script
+    }
+    else if( cp=="ANSI_1251" || cp=="CP1251" || cp=="ISO8859-5" || cp=="KOI8-R"
+             || cp=="KOI8-U" || cp=="IBM 866" )
+    {
+        return "ANSI_1251";
+        // Western Europe
+    }
+    else if( cp=="ANSI_1252" || cp=="CP1252" || cp=="LATIN1" || cp=="ISO-8859-1"
+             || cp=="CP819" || cp=="CSISO" || cp=="IBM819" || cp=="ISO_8859-1" || cp=="APPLE ROMAN"
+             || cp=="ISO8859-1" || cp=="ISO8859-15" || cp=="ISO-IR-100" || cp=="L1" || cp==
+             "IBM 850" )
+    {
+        return "ANSI_1252";
+        // Greek
+    }
+    else if( cp=="ANSI_1253" || cp=="CP1253" || cp=="iso8859-7" )
+    {
+        return "ANSI_1253";
+        // Turkish
+    }
+    else if( cp=="ANSI_1254" || cp=="CP1254" || cp=="iso8859-9" || cp=="iso8859-3" )
+    {
+        return "ANSI_1254";
+        // Hebrew
+    }
+    else if( cp=="ANSI_1255" || cp=="CP1255" || cp=="iso8859-8" )
+    {
+        return "ANSI_1255";
+        // Arabic
+    }
+    else if( cp=="ANSI_1256" || cp=="CP1256" || cp=="ISO8859-6" )
+    {
+        return "ANSI_1256";
+        // Baltic
+    }
+    else if( cp=="ANSI_1257" || cp=="CP1257" || cp=="ISO8859-4" || cp=="ISO8859-10" || cp==
+             "ISO8859-13" )
+    {
+        return "ANSI_1257";
+        // Vietnamese
+    }
+    else if( cp=="ANSI_1258" || cp=="CP1258" )
+    {
+        return "ANSI_1258";
+
+        // Japanese
+    }
+    else if( cp=="ANSI_932" || cp=="SHIFT-JIS" || cp=="SHIFT_JIS" || cp=="CSSHIFTJIS"
+             || cp=="CSWINDOWS31J" || cp=="MS_KANJI" || cp=="X-MS-CP932" || cp=="X-SJIS"
+             || cp=="EUCJP" || cp=="EUC-JP" || cp=="CSEUCPKDFMTJAPANESE" || cp=="X-EUC"
+             || cp=="X-EUC-JP" || cp=="JIS7" )
+    {
+        return "ANSI_932";
+        // Chinese PRC GBK (XGB) simplified
+    }
+    else if( cp=="ANSI_936" || cp=="GBK" || cp=="GB2312" || cp=="CHINESE" || cp=="CN-GB"
+             || cp=="CSGB2312" || cp=="CSGB231280" || cp=="CSISO58BG231280"
+             || cp=="GB_2312-80" || cp=="GB231280" || cp=="GB2312-80"
+             || cp=="ISO-IR-58" || cp=="GB18030" )
+    {
+        return "ANSI_936";
+        // Korean
+    }
+    else if( cp=="ANSI_949" || cp=="EUCKR" )
+    {
+        return "ANSI_949";
+        // Chinese Big5 (Taiwan, Hong Kong SAR)
+    }
+    else if( cp=="ANSI_950" || cp=="BIG5" || cp=="CN-BIG5" || cp=="CSBIG5"
+             || cp=="X-X-BIG5" || cp=="BIG5-HKSCS" )
+    {
+        return "ANSI_950";
+
+// celtic
+/*    } else if (cp=="ISO8859-14") {
+ *      return "ISO8859-14";
+ *   } else if (cp=="TSCII") {
+ *       return "TSCII"; //tamil
+ *   } else if (cp=="UTF16") {
+ *       return "UTF16"; */
+    }
+    else if( cp=="UTF-8" || cp=="UTF8" || cp=="UTF88-BIT" )
+    {
+        return "UTF-8";
+    }
+
+    return "ANSI_1252";
+}
author	saurabhb17	2020-02-26 16:14:17 +0530
committer	GitHub	2020-02-26 16:14:17 +0530
commit	003d02608917e7a69d1a98438837e94ccf68352a (patch)
tree	1392c90227aeea231c1d86371131e04c40382918 /lib_dxf/intern/drw_textcodec.cpp
parent	886d9cb772e81d2e5262284bc3082664f084337f (diff)
parent	e255d0622297488c1c52755be670733418c994cf (diff)
download	KiCad-eSim-003d02608917e7a69d1a98438837e94ccf68352a.tar.gz KiCad-eSim-003d02608917e7a69d1a98438837e94ccf68352a.tar.bz2 KiCad-eSim-003d02608917e7a69d1a98438837e94ccf68352a.zip