huf [he/him]

  • 12 Posts
  • 1.19K Comments
Joined 4 years ago
cake
Cake day: November 11th, 2021

help-circle







  • extremely similar tool if you dont want to open a browser:

    #!/usr/bin/perl
    use strict;
    use warnings;
    
    our $VERSION = '0.4';
    use Encode qw/decode_utf8 encode_utf8/;
    use Unicode::UCD qw/charinfo prop_invmap/;
    use List::Util qw/max sum/;
    use Getopt::Long qw/:config no_ignore_case/;
    
    binmode STDOUT, ':encoding(UTF-8)';
    
    GetOptions
        'char|c' => \my $opt_char,
        'decimal|d!' => \my $opt_decimal,
        'hex|x!' => \my $opt_hex,
        'hex-escape|X!' => \my $opt_hex_escaped,
        'name|n!' => \my $opt_name,
        'ascii|a!' => \my $opt_ascii,
        'help|h!' => \my $opt_help,
        'version|v!' => \my $opt_version
            or die usage();
    
    die __FILE__ =~ s{.*/}{}r . " v$VERSION\n" if $opt_version;
    
    my $set_opt_count = sum map { $_ // 0 } $opt_char, $opt_decimal, $opt_hex, $opt_hex_escaped, $opt_name, $opt_help;
    
    die usage("Given options exclude each other") if $set_opt_count > 1;
    
    die usage() if $opt_help || !@ARGV;
    
    $opt_char = 1 if $set_opt_count == 0;
    
    my @args;
    if ($opt_decimal) {
        @args = [ map { chr } @ARGV ];
    }
    elsif ($opt_hex) {
        @args = [ map { chr hex $_ } @ARGV ];
    }
    elsif ($opt_hex_escaped) {
        @args = map { [ split //, decode_utf8_or_not($opt_ascii, pack 'H*', join '', /(?:\\x)?([0-9a-f]+)/gi) ] } @ARGV;
    }
    elsif ($opt_name) {
        @args = lookup_by_name(@ARGV);
    }
    elsif ($opt_char) {
        @args = map { [ split //, decode_utf8_or_not($opt_ascii, $_) ] } @ARGV;
    }
    else {
        die usage("No option passed, do not know what to do");
    }
    
    my @data;
    my $codepoint_maxlen = 4;
    my $utf8_maxlen = 2;
    my $name_maxlen = 1;
    
    my $nrun = 0;
    for my $run (@args) {
        push @data, undef if $nrun++ > 0;
    
        for my $char (@$run) {
            my $codepoint = ord $char;
            my $charinfo = charinfo $codepoint;
            my $name = $charinfo->{name} || "NONEXISTENT CHAR";
            my $utf8 = join ' ', map { sprintf "%x", ord $_ } split //, encode_utf8 $char;
            $codepoint_maxlen = max $codepoint_maxlen, length sprintf '%X', $codepoint;
            $utf8_maxlen = max $utf8_maxlen, length $utf8;
            $name_maxlen = max $name_maxlen, length $name;
    
            push @data, {
                char => $name eq '<control>'
                    ? join '', map "\\x$_", split ' ', $utf8
                    : $char,
                codepoint => $codepoint,
                utf8 => $utf8,
                name => $name,
            };
        }
    }
    
    for my $line (@data) {
        if ($line) {
            printf
                "U+%0${codepoint_maxlen}X (%-${utf8_maxlen}s): %-${name_maxlen}s [%s]\n",
                    $line->{codepoint},
                    $line->{utf8},
                    $line->{name},
                    $line->{char};
        }
        else {
            print "\n";
        }
    }
    
    sub usage {
        my $name = __FILE__ =~ s{.*/}{}r;
        print "@_\n" if @_;
        <<~"EOS";
        $name [options] [mode] ...
            modes:
            -c <literal string> ... (this is the default)
            -d <decimal code point> ...
            -x <hexadecimal code point> ...
            -X <string containing only hexadecimal escapes \\xHH> ...
            -n <character name fragment> ...
            options:
            -a - treat input as bytes instead of utf8
            -h - this help
            -v - version
        v$VERSION
        EOS
    }
    
    sub lookup_by_name {
        my @search_terms = @_;
    
        my %cp;
        # All codepoints
        for my $cat (qw(Name Name_Alias)) {
            my ($codepoints, $names, $format, $default) = prop_invmap($cat);
            # $format => "n", $default => ""
            for my $i (0 .. @$codepoints - 2) {
                my ($cp, $n) = ($codepoints->[$i], $names->[$i]);
                # If $n is a ref, the same codepoint has multiple names
                for my $name (ref $n ? @$n : $n) {
                    $cp{$name} //= $cp;
                }
            }
        }
    
        my @names = keys %cp;
        for my $term (@search_terms) {
            @names = grep { /$term/i } @names;
        }
    
        return [ map chr, sort { $a <=> $b } @cp{@names} ];
    }
    
    sub decode_utf8_or_not {
        my ($no_dont_do_it, $string) = @_;
        return $no_dont_do_it ? $string : decode_utf8($string);
    }