use strict;
use warnings;
use utf8;
use Encode;
sub encode_with_limit {
my ($encoding, $str, $limit) = @_;
$encoding = Encode::find_encoding($encoding);
my $encoded = '';
for (my $i = 0; $i < length($str); $i++) {
my $chr = $encoding->encode(substr($str, $i, 1));
if (length($encoded . $chr) > $limit) {
last;
} else {
$encoded .= $chr;
}
}
$encoded;
}
use Test::More;
is encode_with_limit('UTF-8', 'あいうえお', 1), encode_utf8('');
is encode_with_limit('UTF-8', 'あいうえお', 2), encode_utf8('');
is encode_with_limit('UTF-8', 'あいうえお', 3), encode_utf8('あ');
is encode_with_limit('UTF-8', 'あいうえお', 4), encode_utf8('あ');
is encode_with_limit('UTF-8', 'あいうえお', 5), encode_utf8('あ');
is encode_with_limit('UTF-8', 'あいうえお', 6), encode_utf8('あい');
is encode_with_limit('UTF-8', 'あいうえお', 9), encode_utf8('あいう');
done_testing;
こうしたんだけど、もっと簡単にできないんだろうか…
use strict;
use warnings;
use utf8;
use Encode;
sub encode_with_limit {
my ($encoding, $str, $limit) = @_;
$encoding = Encode::find_encoding($encoding);
my $encoded = $encoding->encode($str);
my $short = $encoding->decode(substr($encoded, 0, $limit), Encode::FB_QUIET);
$encoding->encode($short);
}
use Test::More;
is encode_with_limit('UTF-8', 'あいうえお', 1), encode_utf8('');
is encode_with_limit('UTF-8', 'あいうえお', 2), encode_utf8('');
is encode_with_limit('UTF-8', 'あいうえお', 3), encode_utf8('あ');
is encode_with_limit('UTF-8', 'あいうえお', 4), encode_utf8('あ');
is encode_with_limit('UTF-8', 'あいうえお', 5), encode_utf8('あ');
is encode_with_limit('UTF-8', 'あいうえお', 6), encode_utf8('あい');
is encode_with_limit('UTF-8', 'あいうえお', 9), encode_utf8('あいう');
done_testing;
もっと簡単に書けたけど、効率は悪そう。
- トップ
-
tech
-
Perl でバイト数を制限しつつ、文字列を妥当なバイト列に変換したい