Assignment operator that calls a constructor is broken

Posted by Delan Azabani on Stack Overflow See other posts from Stack Overflow or by Delan Azabani
Published on 2010-05-02T09:15:00Z Indexed on 2010/05/02 9:27 UTC
Read the original article Hit count: 285

Filed under:
|
|
|
|

I've implemented some of the changes suggested in this question, and (thanks very much) it works quite well, however... in the process I've seemed to break the post-declaration assignment operator. With the following code:

#include <cstdio>
#include "ucpp"
main() {
  ustring a = "test";
  ustring b = "ing";
  ustring c = "- -";
  ustring d = "cafe\xcc\x81";
  printf("%s\n", (a + b + c[1] + d).encode());
}

I get a nice "testing cafe´" message. However, if I modify the code slightly so that the const char * conversion is done separately, post-declaration:

#include <cstdio>
#include "ucpp"
main() {
  ustring a = "test";
  ustring b = "ing";
  ustring c = "- -";
  ustring d;
  d = "cafe\xcc\x81";
  printf("%s\n", (a + b + c[1] + d).encode());
}

the ustring named d becomes blank, and all that is output is "testing ". My new code has three constructors, one void (which is probably the one being incorrectly used, and is used in the operator+ function), one that takes a const ustring &, and one that takes a const char *. The following is my new library code:

#include <cstdlib>
#include <cstring>
class ustring {
  int * values;
  long len;
  public:
  long length() {
    return len;
  }
  ustring() {
    len = 0;
    values = (int *) malloc(0);
  }
  ustring(const ustring &input) {
    len = input.len;
    values = (int *) malloc(sizeof(int) * len);
    for (long i = 0; i < len; i++)
      values[i] = input.values[i];
  }
  ustring operator=(ustring input) {
    ustring result(input);
    return result;
  }
  ustring(const char * input) {
    values = (int *) malloc(0);
    long s = 0;                                                                 // s = number of parsed chars
    int a, b, c, d, contNeed = 0, cont = 0;
    for (long i = 0; input[i]; i++)
      if (input[i] < 0x80) {                                                    // ASCII, direct copy (00-7f)
        values = (int *) realloc(values, sizeof(int) * ++s);
        values[s - 1] = input[i];
      } else if (input[i] < 0xc0) {                                             // this is a continuation (80-bf)
        if (cont == contNeed) {                                                 // no need for continuation, use U+fffd
          values = (int *) realloc(values, sizeof(int) * ++s);
          values[s - 1] = 0xfffd;
        }
        cont = cont + 1;
        values[s - 1] = values[s - 1] | ((input[i] & 0x3f) << ((contNeed - cont) * 6));
        if (cont == contNeed) cont = contNeed = 0;
      } else if (input[i] < 0xc2) {                                             // invalid byte, use U+fffd (c0-c1)
        values = (int *) realloc(values, sizeof(int) * ++s);
        values[s - 1] = 0xfffd;
      } else if (input[i] < 0xe0) {                                             // start of 2-byte sequence (c2-df)
        contNeed = 1;
        values = (int *) realloc(values, sizeof(int) * ++s);
        values[s - 1] = (input[i] & 0x1f) << 6;
      } else if (input[i] < 0xf0) {                                             // start of 3-byte sequence (e0-ef)
        contNeed = 2;
        values = (int *) realloc(values, sizeof(int) * ++s);
        values[s - 1] = (input[i] & 0x0f) << 12;
      } else if (input[i] < 0xf5) {                                             // start of 4-byte sequence (f0-f4)
        contNeed = 3;
        values = (int *) realloc(values, sizeof(int) * ++s);
        values[s - 1] = (input[i] & 0x07) << 18;
      } else {                                                                  // restricted or invalid (f5-ff)
        values = (int *) realloc(values, sizeof(int) * ++s);
        values[s - 1] = 0xfffd;
      }
    len = s;
  }
  ustring operator=(const char * input) {
    ustring result(input);
    return result;
  }
  ustring operator+(ustring input) {
    ustring result;
    result.len = len + input.len;
    result.values = (int *) malloc(sizeof(int) * result.len);
    for (long i = 0; i < len; i++)
      result.values[i] = values[i];
    for (long i = 0; i < input.len; i++)
      result.values[i + len] = input.values[i];
    return result;
  }
  ustring operator[](long index) {
    ustring result;
    result.len = 1;
    result.values = (int *) malloc(sizeof(int));
    result.values[0] = values[index];
    return result;
  }
  char * encode() {
    char * r = (char *) malloc(0);
    long s = 0;
    for (long i = 0; i < len; i++) {
      if (values[i] < 0x80)
        r = (char *) realloc(r, s + 1),
        r[s + 0] = char(values[i]),
        s += 1;
      else if (values[i] < 0x800)
        r = (char *) realloc(r, s + 2),
        r[s + 0] = char(values[i] >> 6 | 0x60),
        r[s + 1] = char(values[i] & 0x3f | 0x80),
        s += 2;
      else if (values[i] < 0x10000)
        r = (char *) realloc(r, s + 3),
        r[s + 0] = char(values[i] >> 12 | 0xe0),
        r[s + 1] = char(values[i] >> 6 & 0x3f | 0x80),
        r[s + 2] = char(values[i] & 0x3f | 0x80),
        s += 3;
      else
        r = (char *) realloc(r, s + 4),
        r[s + 0] = char(values[i] >> 18 | 0xf0),
        r[s + 1] = char(values[i] >> 12 & 0x3f | 0x80),
        r[s + 2] = char(values[i] >> 6 & 0x3f | 0x80),
        r[s + 3] = char(values[i] & 0x3f | 0x80),
        s += 4;
    }
    return r;
  }
};

© Stack Overflow or respective owner

Related posts about c++

Related posts about constructor