use strict;
use warnings;
use utf8;
use Encode;

sub encode_with_limit {
	my ($encoding, $str, $limit) = @_;
	$encoding = Encode::find_encoding($encoding);

	my $encoded = '';
	for (my $i = 0; $i < length($str); $i++) {
		my $chr = $encoding->encode(substr($str, $i, 1));
		if (length($encoded . $chr) > $limit) {
			last;
		} else {
			$encoded .= $chr;
		}
	}
	$encoded;
}

use Test::More;

is encode_with_limit('UTF-8', 'あいうえお', 1), encode_utf8('');
is encode_with_limit('UTF-8', 'あいうえお', 2), encode_utf8('');
is encode_with_limit('UTF-8', 'あいうえお', 3), encode_utf8('あ');
is encode_with_limit('UTF-8', 'あいうえお', 4), encode_utf8('あ');
is encode_with_limit('UTF-8', 'あいうえお', 5), encode_utf8('あ');
is encode_with_limit('UTF-8', 'あいうえお', 6), encode_utf8('あい');
is encode_with_limit('UTF-8', 'あいうえお', 9), encode_utf8('あいう');

done_testing;

こうしたんだけど、もっと簡単にできないんだろうか…

#!/usr/bin/env perl

use strict;
use warnings;
use utf8;
use Encode;

sub encode_with_limit {
	my ($encoding, $str, $limit) = @_;
	$encoding = Encode::find_encoding($encoding);

	my $encoded = $encoding->encode($str);
	my $short = $encoding->decode(substr($encoded, 0, $limit), Encode::FB_QUIET);
	$encoding->encode($short);
}

use Test::More;

is encode_with_limit('UTF-8', 'あいうえお', 1), encode_utf8('');
is encode_with_limit('UTF-8', 'あいうえお', 2), encode_utf8('');
is encode_with_limit('UTF-8', 'あいうえお', 3), encode_utf8('あ');
is encode_with_limit('UTF-8', 'あいうえお', 4), encode_utf8('あ');
is encode_with_limit('UTF-8', 'あいうえお', 5), encode_utf8('あ');
is encode_with_limit('UTF-8', 'あいうえお', 6), encode_utf8('あい');
is encode_with_limit('UTF-8', 'あいうえお', 9), encode_utf8('あいう');

done_testing;

もっと簡単に書けたけど、効率は悪そう。

  1. トップ
  2. tech
  3. Perl でバイト数を制限しつつ、文字列を妥当なバイト列に変換したい
▲ この日のエントリ