PHP und Multibyte Strings oder: Meine kleine String Klasse

Tags:
Seite:
1
2

Mein Ansatz

In Ruby sind alle Strings Objekte. Ich mag diesen Ansatz sehr, da man dadurch sehr leserlich arbeiten kann (Stichwort Method-Chaining). Deshalb habe ich mich bei der Umsetzung etwas an Ruby orientiert.

Die Version 1.0 (wird bestimmt noch erweitert) wird folgendermassen angewendet:

// Default encoder for all future String objects.
// Default is String::ENCODER_MBSTRING.
String::setDefaultEncoder(String::ENCODER_ICONV);

// Default encoding for all future String objects.
// Default is UTF-8.
String::setDefaultOutputEncoding('utf-8');

$s = new String('Hänschen Klein');
$s->dump() // Hänschen Klein
  ->concat(', ging allein ...')
  ->dump() // Hänschen Klein, ging allein ...
  ->reverse()
  ->dump() // ... niella gnig ,nielK nehcsnäH
  ->reverse()
  ->capitalize()
  ->dump(); // HÄNSCHEN KLEIN, GING ALLEIN ...

echo $s->length; // 31
echo $s->getLength(); // 31
echo count($s); // 31
echo strlen($s); // 32
echo mb_strlen($s, 'UTF-8'); // 31

$s = new String('Hans hat %d %s');
$s->format(12, 'Eier')
  ->dump() // Hans hat 12 Eier
  ->sub(0, $s->pos(' hat'))
  ->dump(); // Hans</pre>

Und hier nun der komplette Code:

class StringException extends Exception
{}

class String implements ArrayAccess, Countable
{
    const ENCODER_ICONV    = 'iconv';
    const ENCODER_MBSTRING = 'mbstring';


    /**
     * The default multibyte extension to use
     * for encoding string
     *
     * @var string
     */
    protected static $_defaultEncoder = self::ENCODER_MBSTRING;

    /**
     * The default output encoding
     *
     * @var string
     */
    protected static $_defaultOutputEncoding = 'UTF-8';


    /**
     * The string value
     *
     * @var string
     */
    protected $_string = '';


    /**
     * The extension used to handle multibyte strings
     *
     * @var string
     */
    protected $_encoder = null;

    /**
     * The encoding of the output string
     *
     * @var string
     */
    protected $_outputEncoding = null;

    /**
     * The internal encoding to handle strings with
     * 
     * @var string
     */
    protected $_internalEncoding = 'UTF-8';


    /**
     * Set the default output encoding
     *
     * @param string $v
     */
    public static function setDefaultOutputEncoding($v)
    {
        self::$_defaultOutputEncoding = $v;
    }


    /**
     * Set the default output encoding
     *
     * @param string $v
     */
    public static function setDefaultEncoder($v)
    {
        if (!in_array($v, array(self::ENCODER_ICONV, self::ENCODER_MBSTRING))) {
            throw new StringException('Not supported encoder: ' . $v);
        }

        self::$_defaultEncoder = $v;
    }


    /**
     * Constructor
     *
     * @param string $v
     */
    public function __construct($v = null, $outputEncoding = null, $internalEncoding = null, $encoder = null)
    {
        if (null === $outputEncoding) {
            $outputEncoding = self::$_defaultOutputEncoding;
        }
        $this->setOutputEncoding($outputEncoding);

        if (null !== $internalEncoding) {
            $this->_internalEncoding = $internalEncoding;
        }

        if (null === $encoder) {
            $encoder = self::$_defaultEncoder;
        }

        $this->_encoder = $encoder;

        if (null !== $v) {
            $this->set($v);
        }
    }


    /**
     * Set string to one value
     *
     * @param string $v
     * @return String
     */
    public function set($v)
    {
        $this->clear()
             ->concat($v);

        return $this;
    }


    /**
     * concat a value to the string
     *
     * @param string $v
     * @return String
     */
    public function concat($v)
    {
        switch ($this->_encoder)
        {
            case self::ENCODER_MBSTRING:
                $this->_string .= mb_convert_encoding($v, $this->_internalEncoding, mb_detect_encoding($v));
                break;

            default:
                // We have to use mb_detect_encoding() here as well ... don't say a word, psst!
                $this->_string .= iconv(mb_detect_encoding($v), $this->_internalEncoding, (string)$v);;
                break;
        }

        return $this;
    }


    /**
     * Get the string value
     *
     * @return string
     */
    public function get($outputEncoding = null)
    {
        if (null === $outputEncoding) {
            $outputEncoding = $this->_outputEncoding;
        }

        return $this->convert($outputEncoding);
    }


    /**
     * Clear the string value
     *
     * @return String
     */
    public function clear()
    {
        $this->_string = '';
        return $this;
    }


    /**
     * Dump the string with it's real
     * length in a given encoding
     *
     * @param string $outputEncoding
     * @return String
     */
    public function dump($outputEncoding = null)
    {
        //dump the var in xdebug style
        if (extension_loaded('xdebug'))
        {
            $o = sprintf(
                "<pre class='xdebug-var-dump' dir='ltr'><small>string</small> <font color='#cc0000'>'%s'</font> <i>(length=%d)</i>%s</pre>",
                $this->get($outputEncoding),
                $this->getLength(),
                PHP_EOL
            );
        }

        //dump the var in php's default way
        else
        {
            $o = sprintf(
                "string '%s' (length=%i)",
                $this->get($outputEncoding),
                $this->getLength()
            );
        }

        echo $o;
        return $this;
    }


    /**
     * Dump the string with PHP's var_dump()
     * function
     *
     * @param string $outputEncoding
     * @return String
     */
    public function dumpRaw($outputEncoding = null)
    {
        var_dump($this->get($outputEncoding));
        return $this;
    }


    /**
     * Set the output encoding
     *
     * @param string $v
     * @return String
     */
    public function setOutputEncoding($v)
    {
        $this->_outputEncoding = (string)$v;
        return $this;
    }


    /**
     * Create a new instance of String
     * for a given value
     *
     * @param string $v
     * @return String
     */
    public function populate($v)
    {
        return new self($v, $this->_outputEncoding, $this->_internalEncoding);
    }


    /**
     * Convert the string from one to
     * another encoding
     *
     * @param string $toEncoding
     * @param string $fromEncoding
     * @return string
     */
    public function convert($toEncoding = null, $fromEncoding = null)
    {
        if (null === $toEncoding) {
            $toEncoding = $this->_outputEncoding;
        }

        if (null === $fromEncoding) {
            $fromEncoding = $this->_internalEncoding;
        }

        switch ($this->_encoder)
        {
            case self::ENCODER_MBSTRING:
                return mb_convert_encoding($this->_string, $toEncoding, $fromEncoding);

            default:
                return iconv($fromEncoding, $toEncoding, $this->_string);
        }
    }


    /**
     * Return the count of characters
     * in the string (not the count of bytes)
     *
     * @param string $outputEncoding
     * @return int
     */
    public function getLength($outputEncoding = null)
    {
        if (null === $outputEncoding) {
            $outputEncoding = $this->_outputEncoding;
        }

        switch ($this->_encoder)
        {
            case self::ENCODER_MBSTRING:
                return mb_strlen($this->get(), $outputEncoding);

            default:
                return iconv_strlen($this->get(), $outputEncoding);
        }
    }


    /**
     * Reverse the string
     * 
     * PHP does not support multibyte strings
     * natively. Neither there is a multibyte
     * version of strrev().
     *
     * @return String
     */
    public function reverse()
    {
        $v = $this->convert('UTF-8');
        $v = strrev($v);
        $v = $this->convert($this->_internalEncoding, 'UTF-8');

        return $this->populate($v);
    }


    /**
     * Capitalize the string
     *
     * There is no iconv() feature to captialize a 
     * string. So we're always gonna use mb_strtoupper().
     * 
     * @return String
     */
    public function capitalize()
    {
        return $this->populate(mb_strtoupper($this->_string, $this->_internalEncoding));
    }


    /**
     * Downcase the string
     *
     * There is no iconv() feature to downcase a 
     * string. So we're always gonna use mb_strtolower().
     * 
     * @return String
     */
    public function downcase()
    {
        return $this->populate(mb_strtolower($this->_string, $this->_internalEncoding));
    }


    /**
     * Return a subset of a string
     *
     * @param int $offset
     * @param int $length
     * @return String
     */
    public function sub($offset, $length = null)
    {
        switch ($this->_encoder)
        {
            case self::ENCODER_MBSTRING:
                $v = mb_substr($this->_string, (int)$offset, $length, $this->_internalEncoding);
                break;

            default:
                $v = iconv_substr($this->_string, (int)$offset, $length, $this->_internalEncoding);
                break;
        }

        return $this->populate($v);
    }


    /**
     * Return the position of a substring
     * inside the actual string
     *
     * @param int $offset
     * @param int $length
     * @return int
     */
    public function pos($needle, $offset = 0)
    {
        switch ($this->_encoder)
        {
            case self::ENCODER_MBSTRING:
                return mb_strpos($this->_string, $needle, (int)$offset, $this->_internalEncoding);

            default:
                return iconv_strpos($this->_string, $needle, (int)$offset, $this->_internalEncoding);
        }
    }


    /**
     * Format a string
     *
     * @return String
     */
    public function format()
    {
        $args = func_get_args();
        array_unshift($args, $this->get());

        $v = call_user_func_array('sprintf', $args);

        return $this->populate($v);
    }


    /**
     * ArrayAccess implementation
     *
     * @param int $offset
     * @return String
     */
    public function offsetGet($offset)
    {
        $v = $this->get();
        return $this->populate($v[(int)$offset]);
    }


    /**
     * ArrayAccess implementation
     *
     * @param int $offset
     * @param string $v
     * @return String
     */
    public function offsetSet($offset, $v)
    {
        $c = iconv_substr($this->_string, $offset, 1, $this->_internalEncoding);
        $bytes = strlen($c);

        $s = iconv_substr($this->_string, 0, $offset, $this->_internalEncoding)
           . $v
           . iconv_substr($this->_string, $offset   $bytes, mb_strlen($this->_string, $this->_internalEncoding), $this->_internalEncoding);

        $this->set($s);

        return $this;
    }


    /**
     * ArrayAccess implementation
     *
     * @param int $offset
     * @return String
     */
    public function offsetUnset($offset)
    {
        $s = $this->get();
        unset($s[(int)$offset]);
        $this->set($s);

        return $this;
    }


    /**
     * ArrayAccess implementation
     *
     * @param int $offset
     * @return bool
     */
    public function offsetExists($offset)
    {
        $s = $this->get();
        return isset($s[(int)$offset]);
    }


    /**
     * Countable implementation
     *
     * @return int
     */
    public function count()
    {
        return $this->getLength();
    }


    /**
     * @see String::get()
     */
    public function __toString()
    {
        return $this->get();
    }


    /**
     * Access control for public properties
     *
     * @param string $k
     * @return mixed
     */
    public function __get($k)
    {
        switch ((string)$k)
        {
            case 'length':
                return $this->getLength();

            default:
                return $this->$k;
        }
    }
}
Seite:
1
2

Ähnliche Artikel

Kommentare