// UTF-8 coding unit with friendly support created by Daniel Berghold.
// contact: berdac02@htl-kaindorf.ac.at
unit StrAdapterUTF_8;

interface

uses
  StrAdapterBase;

  // "UTF-8"
Type
  TUTF_8Adapter = Class(TBaseStringAdapter)
  Public
    Class Function EncodingID : String; Override;

    Function    Decode(Const s: String): String; Override;
    Function    Encode(Const s: String): String; Override;
  End;

implementation

{ TUTF_8Adapter }

Uses
  StrAdapter,
  SysUtils;

//******************************************************************************
// Description: Convert a ASCII-String into a UTF-8-String                     *
// Input:       A normal ASCII-String without UTF-8 Characters                 *
// Output:      A string in the UTF-8 format                                   *
// Example:     Input:  Fe                                                   *
//              Output: Füße                                                 *
// Date:        2003-08-09                                                     *
//******************************************************************************
function TUTF_8Adapter.Encode(const s: String):String;
var
  i,j: Integer;          // Counters
  AsciiChar: Integer;    // Actual Char of the Input-String
  AsciiStrLen, NewLen: Integer;  // Length of Ascii-String, Length of UTF-String
  Buffer: PChar;         // Buffer to copy String
begin
  result := StringReplace(s, '&', '&amp;',  [rfReplaceAll]);
  result := StringReplace(result, '"', '&quot;', [rfReplaceAll]);
  result := StringReplace(result, '<', '&lt;',   [rfReplaceAll]);
  result := StringReplace(result, '>', '&gt;',   [rfReplaceAll]);

  AsciiStrLen:=length(result);    // Old length
  NewLen:=0;                 // New length

  // Calculate the new lenth
  For i:=1 to AsciiStrLen do
    If Ord(result[i])>127 then
      inc(NewLen,2)
    else
      inc(NewLen);

  // Get Memory for Buffer
  GetMem(Buffer,NewLen+1);

  i:=1; // Position of the ASCII-String
  j:=0; // Position of the UTF-String
  while (j<NewLen) do
  begin
    AsciiChar:=Ord(result[i]);  // The Decimalvalue of the actual ASCII-Char
    If AsciiChar>127 then  // If the Value is higher then 127 then the UTF
    begin                  // Character must have 2 Bytes else it dosen't change
      // First Byte: 110x.xxxx
      Buffer[j]:=Chr((AsciiChar SHR 6) OR 192);
      inc(j);
      // Second Byte: 10xx.xxxx
      Buffer[j]:=Chr((AsciiChar AND 63) OR 128)
      // Example:
      //  in ASCII: 1111.1100
      //  in UTF:   1100.0011 1011.1100
    end
    else
      Buffer[j]:=Chr(AsciiChar);
    inc(i); // increase Counters
    inc(j);
  end;
  Buffer[NewLen]:=#0; // complete String
  result:=Buffer;     // return String
  FreeMem(Buffer)     // Free Memory
end;

//******************************************************************************
// Description: Convert a UTF-8-String into a ASCII-String                     *
// Input:       A string with the UTF-8 foramt  Characters                     *
// Output:      A normal ASCII-String without UTF-8                            *
// Example:     Input:  Füße                                                 *
//              Output: Fe                                                   *
// Date:        2003-08-09                                                     *
//******************************************************************************
function TUTF_8Adapter.Decode(const s: String):String;
var
  i,j, Temp: Integer;  // Counters
  UtfChar: Byte;       // Actual Char of the Input-String
  UtfStrLen, NewLen: Integer;    // Length of UTF-String, Length of ASCII-String
  Buffer: PChar;       // Buffer to copy String
begin
  result := StringReplace(s,        '&quot;', '"', [rfReplaceAll]);
  result := StringReplace(result, '&#34;',  '"', [rfReplaceAll]);
  result := StringReplace(result, '&amp;',  '&', [rfReplaceAll]);
  result := StringReplace(result, '&#38;',  '&', [rfReplaceAll]);
  result := StringReplace(result, '&lt;',   '<', [rfReplaceAll]);
  result := StringReplace(result, '&#60;',  '<', [rfReplaceAll]);
  result := StringReplace(result, '&gt;',   '>', [rfReplaceAll]);
  result := StringReplace(result, '&#62;',  '>', [rfReplaceAll]);

  UtfStrLen:=length(result); // Old Len
  NewLen:=0;            // New Len
  i:=1;                 // Initalize Counter

  // Calculate Length of the New String
  while (i<=UtfStrLen) do
  begin
    // If 0xxx.xxx then add 1 byte
    If (Ord(result[i]) SHR 7) = 0 then
    begin
      inc(i);
      inc(NewLen)
    end
    else
      // If 110x.xxxx 10xx.xxxx then add 2 byte
      If (Ord(result[i]) SHR 5) = 6 then
      begin
        inc(i,2);
        inc(NewLen)
      end
      else
        // If 1110.xxxx 10xx.xxxx 10xx.xxxx then add 3 Bytes
        If (Ord(result[i]) SHR 4) = 14 then
        begin
          inc(i,3);
          inc(NewLen)
        end
        else
          // If 1111.0xxx 10xx.xxx 10xx.xxxx 10xx.xxx then add 4 Bytes
          If (Ord(s[1]) SHR 3) = 30 then
          begin
            inc(i,4);
            inc(NewLen)
          end
          else  // Invalid UTF-String!
            inc(i);
  end;

  // Get Memory for the Buffer
  GetMem(Buffer,NewLen+1);

  i:=1;  // Position of UTF-String
  j:=0;  // Position of ASCII-String
  while (i<=UtfStrLen) do
  begin
    UtfChar:=Ord(result[i]);  // The Decimalvalue of the actual ASCII-Char
    If (UtfChar SHR 7) = 0 then  // 0xxx.xxx --> 1 Byte, no change
    begin
      inc(i);
      Buffer[j]:=Chr(UtfChar);
    end
    else
      If (UtfChar SHR 5) = 6 then  // 2 Bytes --> 110x.xxxx 10xx.xxxx
      begin
        inc(i);
        Temp:=((Ord(result[i]) AND 63)+(UtfChar AND 31) SHL 6);
        inc(i);
        If Temp<256 then           // If valid write
          Buffer[j]:=Chr(Temp)
        else                       // else write Space
          Buffer[j]:=' ';
      end
      else
        If (UtfChar SHR 4) = 14 then // 3 Bytes --> Not Vaild in ASCII!
        begin
          inc(i,3);
          Buffer[j]:=' '   // Write Space
        end
        else
          If (Ord(s[1]) SHR 3) = 30 then  // 4 Bytes --> Not Vaild in ASCII!
          begin
            inc(i,4);
            Buffer[j]:=' '
          end
          else  // Invalid UTF-String!
            inc(i);
    inc(j);   // Increase Counter of ASCII-String
  end;

  Buffer[NewLen]:=#0; // Complete String
  result:=Buffer;     // return String
  FreeMem(Buffer);    // Free Memory
end;


Class Function TUTF_8Adapter.EncodingID: String;
Begin
  Result := 'UTF-8';
End;

Initialization
    // registering the string adapter to the converter
  TStringAdapter.RegisterAdapter(TUTF_8Adapter);

end.
