feat: add kang program which interpret english number and translate to integer value
This commit is contained in:
parent
7b9fca8d83
commit
25a84aedf9
197
Makefile
Normal file
197
Makefile
Normal file
@ -0,0 +1,197 @@
|
||||
#!/bin/make
|
||||
# 2001 makefile
|
||||
#
|
||||
# Copyright (C) Simon Cooper, Landon Curt Noll, Peter Seebach, 2001.
|
||||
# All Rights Reserved. Permission for personal, educational or non-profit
|
||||
# use is granted provided this this copyright and notice are included in its
|
||||
# entirety and remains unaltered. All other uses must receive prior permission
|
||||
# from the contest judges.
|
||||
|
||||
SHELL= /bin/sh
|
||||
CAT= cat
|
||||
RM= rm
|
||||
SED= sed
|
||||
TRUE= true
|
||||
|
||||
# optimization
|
||||
#
|
||||
# Most compiles will safely use -O2. Some can use only -O.
|
||||
# A few compilers have broken optimizers and thus you may
|
||||
# not want anything.
|
||||
#
|
||||
#OPT=
|
||||
#OPT= -O
|
||||
OPT= -O2
|
||||
|
||||
# flags for ANSI compiles
|
||||
#
|
||||
# NOTE: Some ANSI compilers make use of -Xa to turn on ANSI mode,
|
||||
# others such as gcc may want -ansi, others (Compaq Tru64 Unix)
|
||||
# want -std1, and others may want nothing at all.
|
||||
# Adjust the CFLAGS line as needed.
|
||||
#
|
||||
# NOTE: Some compilers cannot optimize, or optimize some entries
|
||||
# incorrectly. You might want to turn on -O to see if your
|
||||
# compiler can handle them.
|
||||
#
|
||||
#
|
||||
#CFLAGS= -Xa ${OPT}
|
||||
#CFLAGS= -std1 $(OPT)
|
||||
CFLAGS= -ansi ${OPT}
|
||||
|
||||
# NOTE: On some systems, the library path does not include the location
|
||||
# of the X11 libraries. You might need to use a -L/path/to/X11/lib/dir
|
||||
# in order to compile with these libs.
|
||||
#
|
||||
X11CCFLAGS= -I/usr/X11R6/include -I/usr/X11R6/include/X11 -I/usr/include/X11
|
||||
#X11CCFLAGS=
|
||||
X11LDFLAGS= -L/usr/X11R6/lib -Wl,-R/usr/X11R6/lib
|
||||
#X11LDFLAGS=
|
||||
|
||||
# ANSI compiler
|
||||
#
|
||||
# Set CC to the name of your ANSI compiler.
|
||||
#
|
||||
# Some entries seem to need gcc. If you have gcc, set
|
||||
# both CC and MAY_NEED_GCC to gcc.
|
||||
#
|
||||
# If you do not have gcc, set CC to the name of your ANSI compiler, and
|
||||
# set MAY_NEED_GCC to either ${CC} (and hope for the best) or to just :
|
||||
# to disable such programs.
|
||||
#
|
||||
CC= cc
|
||||
MAY_NEED_GCC= gcc
|
||||
|
||||
# winners that compile under ANSI C
|
||||
#
|
||||
WINNERS= anonymous bellard bellard.otccex cheong coupard \
|
||||
ctk dgbeards herrmann1 herrmann2 jason kev ollinger rosten \
|
||||
schweikh westley williams
|
||||
ALT_NAMES= westley.orig
|
||||
DATA_FILES= anonymous_files bellard_files herrmann1_files herrmann2_files
|
||||
|
||||
|
||||
all: ${WINNERS}
|
||||
|
||||
# Most likely to amaze
|
||||
#
|
||||
anonymous: anonymous.c
|
||||
magic='${MAY_NEED_GCC} -O1'; \
|
||||
X='-DA(X)=#X'; \
|
||||
warning='-pedantic -Dprocessor'; \
|
||||
$$magic $$warning -Dmagic= $$X "-DX=A($$magic \"$$X\")" \
|
||||
-o anonymous anonymous.c
|
||||
|
||||
anonymous_files: anonymous.ten.c
|
||||
|
||||
# Best abuse of the rules
|
||||
#
|
||||
bellard: bellard.c
|
||||
${CC} -rdynamic ${CFLAGS} bellard.c -o bellard -ldl
|
||||
|
||||
bellard_files: bellard.otccex.c
|
||||
|
||||
# Best short program
|
||||
#
|
||||
cheong: cheong.c
|
||||
${CC} ${CFLAGS} cheong.c -o cheong
|
||||
|
||||
# Most obfuscated sound
|
||||
#
|
||||
coupard: coupard.c
|
||||
${CC} ${CFLAGS} -o coupard coupard.c
|
||||
|
||||
# Worst Driver
|
||||
#
|
||||
ctk: ctk.c
|
||||
${CC} ${CFLAGS} -o ctk ctk.c
|
||||
|
||||
# Worst AI
|
||||
#
|
||||
dgbeards: dgbeards.c
|
||||
${CC} ${CFLAGS} -DE=break -DF=char -DK=case -DP=int -DR=return \
|
||||
-DI=0xFFFF dgbeards.c -o dgbeards
|
||||
|
||||
# Best abuse of the C preprocessor
|
||||
#
|
||||
herrmann1: herrmann1.c herrmann1.sh
|
||||
@echo "NOTE: $@ must be built/run using the script $@.sh"
|
||||
${SHELL} ./herrmann1.sh
|
||||
|
||||
herrmann1_files: herrmann1.gcd herrmann1.sh herrmann1.times2
|
||||
|
||||
# Most eye-crossing
|
||||
#
|
||||
# This entry requires the GCC -include feature.
|
||||
#
|
||||
herrmann2: herrmann2.c
|
||||
${MAY_NEED_GCC} ${CFLAGS} herrmann2.c -o herrmann2 \
|
||||
-include /usr/include/stdlib.h -include /usr/include/stdio.h \
|
||||
-include /usr/include/time.h -include /usr/include/unistd.h
|
||||
|
||||
herrmann2_files: herrmann2.cup herrmann2.ioccc
|
||||
|
||||
# Best Of Show
|
||||
#
|
||||
jason: jason.c
|
||||
${CC} ${CFLAGS} -o jason jason.c
|
||||
|
||||
# Best Curses Game
|
||||
#
|
||||
kev: kev.c
|
||||
${CC} ${CFLAGS} -DPORT=5455 -DSPEED=50 -o kev kev.c \
|
||||
-lcurses -ltermcap \
|
||||
-Dl_="socket(il.sin_family=AF_INET,SOCK_STREAM,0)"
|
||||
|
||||
# Best primal ASCII graphics
|
||||
#
|
||||
ollinger: ollinger.c
|
||||
${CC} ${CFLAGS} ollinger.c -o ollinger
|
||||
|
||||
# Best abuse of the user
|
||||
#
|
||||
rosten: rosten.c
|
||||
${CC} ${CFLAGS} rosten.c ${X11CCFLAGS} ${X11LDFLAGS} -pedantic -lX11 \
|
||||
-o rosten
|
||||
|
||||
# Best one-liner
|
||||
#
|
||||
schweikh: schweikh.c
|
||||
${CC} ${CFLAGS} schweikh.c -o schweikh
|
||||
|
||||
# Best position-independant code
|
||||
#
|
||||
westley: westley.c
|
||||
${CC} ${CFLAGS} westley.c -o westley
|
||||
|
||||
westley.orig: westley.orig.c
|
||||
${CC} ${CFLAGS} westley.orig.c -o westley.orig
|
||||
|
||||
# Best position-independant code
|
||||
#
|
||||
williams: williams.c
|
||||
${CC} williams.c ${X11CCFLAGS} ${X11LDFLAGS} -lX11 -o williams
|
||||
|
||||
everything: ${WINNERS} ${ALT_NAMES} ${DATA_FILES}
|
||||
|
||||
love:
|
||||
@echo 'not war?'
|
||||
|
||||
haste:
|
||||
${MAKE} waste
|
||||
|
||||
waste:
|
||||
@echo 'waste'
|
||||
|
||||
clean:
|
||||
${RM} -f *.o a.out core ${WINNERS}
|
||||
|
||||
clobber: clean
|
||||
${RM} -f ${WINNERS}
|
||||
${RM} -f ${ALT_NAMES}
|
||||
|
||||
nuke: clobber
|
||||
@${TRUE}
|
||||
|
||||
install: all
|
||||
${CAT} ${WINNERS} ${ALT_NAMES} > /dev/null
|
160
hint.text
Normal file
160
hint.text
Normal file
@ -0,0 +1,160 @@
|
||||
# Best short program
|
||||
|
||||
Seonghoon Kang
|
||||
<kang.seonghoon@mearie.org>
|
||||
|
||||
|
||||
## Judges' comments:
|
||||
### To build:
|
||||
|
||||
make kang
|
||||
|
||||
### To run:
|
||||
|
||||
echo "full spelling of an English cardinal numeral less than a quadrillion" | ./kang
|
||||
|
||||
### Try:
|
||||
|
||||
echo Nineteen hundred and eighty-four | ./kang
|
||||
echo uno | ./kang
|
||||
echo trois | ./kang
|
||||
echo fier | ./kang
|
||||
echo "shest'" | ./kang
|
||||
|
||||
### Selected Judges Remarks:
|
||||
|
||||
The judges were able to appreciate the Indo-European language family
|
||||
relation by making this entry successfully recognize *some* French,
|
||||
German, Italian, Russian, and Spanish numerals.
|
||||
|
||||
Also worth mentioning is this entry's ability to understand the
|
||||
colloquial year numbers of the last millennium.
|
||||
|
||||
We've added a linefeed to the print format for convenience.
|
||||
|
||||
|
||||
## Author's comments:
|
||||
## Synopsis
|
||||
|
||||
This short program reads a spelt number (e.g. `forty-two`) and writes a
|
||||
corresponding decimal number (e.g. `42`). Too long for one-liners, alas,
|
||||
but it still qualifies as a *short* program as it has less than 0x100 bytes.
|
||||
|
||||
It accepts a variety of spelt numbers:
|
||||
|
||||
* It correctly handles `zero`.
|
||||
* Hyphen does not make a difference: `forty-two` and `forty two` are same.
|
||||
So does period or comma.
|
||||
* Cases do not make a difference either: `TWO`, `Two`, `two` are same.
|
||||
* `one` and `a` are interchangeable: `one hundred` and `a hundred` are same.
|
||||
* `and` is optional: `one hundred twenty-three` and `one hundred and twenty-three`
|
||||
are same.
|
||||
* It supports every non-negative integer less than 10<sup>15</sup>-1. It uses
|
||||
the small scale (i.e. American): `billion` is 10<sup>9</sup> and `trillion` is
|
||||
10<sup>12</sup>.
|
||||
* Sometimes, it can magically correct typos.
|
||||
|
||||
It does *not* accept some spelt numbers, which I found mostly irrelevant:
|
||||
|
||||
* A bare `hundred`, `thousand` etc. do not work.
|
||||
* `one million million` does not work. Get used to `one trillion`!
|
||||
|
||||
## Requirements
|
||||
|
||||
This program is quite portable, only requiring the following:
|
||||
|
||||
* The signature `int main(int, int)` should be accepted by the linker. (Original
|
||||
version only)
|
||||
* `char` should be at least 8 bits long (as dictated by the standard), `int`
|
||||
should be at least 32 bits long, `long long` should be at least 64 bits long.
|
||||
* Both the compiler and execution environment should use an ASCII-compatible
|
||||
character set and two's complement representation.
|
||||
* Overflow and underflow on `char` should wrap around, if your `char` is unsigned.
|
||||
* [A trustworthy compiler][trustingtrust].
|
||||
|
||||
[trustingtrust]: http://cm.bell-labs.com/who/ken/trust.html
|
||||
|
||||
The design of the program explicitly allows for `EOF` which does not equal to -1
|
||||
(it has to be negative per the standard) and both signed and unsigned `char`,
|
||||
for example. Furthermore it is endian-independent.
|
||||
|
||||
## Obfuscations (SPOILERS!)
|
||||
|
||||
Many obfuscations used are typical for standard IOCCC entries:
|
||||
|
||||
* Two arguments from `main` function are reused as normal variables.
|
||||
* Every conditional has been replaced with `?:` ternary operator and `||`
|
||||
short-circuiting operator.
|
||||
* It has exactly three nested `for` loops and nothing else.
|
||||
* Common two's complement tricks: `~-a` instead of `a-1`,
|
||||
`~a?...:...` instead of `a!=-1?...:...`, etc.
|
||||
* Comma operators (`,`) for multiple statements. The number of them is
|
||||
minimized, however, as it is too easy to (ab)use them.
|
||||
* It lacks most parentheses around bitwise and arithmetic operators. It was
|
||||
originally written for shortness so parentheses were **EVIL**.
|
||||
* `n["string"]` instead of `"string"[n]`. Both are fine for this program but
|
||||
I went to the former just for fun.
|
||||
* Utter lack of any kind of layouts. (Oh, except for the first column.)
|
||||
|
||||
Other obfuscations are more subtle:
|
||||
|
||||
* The string `"1+DIY/.K430x9G(kC["` is 18 bytes long, but actually 19 bytes
|
||||
including the final null character are used.
|
||||
* It internally represents numbers as hexadecimal. When the input is `two
|
||||
hundred and three`, it actually writes 0x203 as hexadecimal.
|
||||
* Some variables (notably, `n`) have dual uses.
|
||||
* The magic number [42][hhgg] makes an appearance.
|
||||
* It has a long long numb-`main`-er within it!
|
||||
|
||||
[hhgg]: http://en.wikipedia.org/wiki/Answer_to_The_Ultimate_Question_of_Life,_the_Universe,_and_Everything
|
||||
|
||||
But the most important obfuscation is the clever construction of lookup table.
|
||||
The program uses 11 different characters required for recognizing 22 lexemes:
|
||||
|
||||
zero one tw- th(i)r- fo(u)r- fi- six-
|
||||
seven- eigh- nin- ten eleven twelve
|
||||
hundred(s) thousand(s) million(s) billion(s) trillion(s)
|
||||
a and -teen -ty
|
||||
|
||||
So that they are internally represented as like:
|
||||
|
||||
r n tw- tr- fr- f- s-
|
||||
sn- g- nn- tn ln twl
|
||||
nr(s) tsan(s) lln(s) blln(s) trlln(s)
|
||||
a an -tn -ty
|
||||
|
||||
The stemmer recognizes the longest matching prefix, so every lexeme can be
|
||||
recognized by at most three characters (e.g. `trl` instead of `trlln`). This is
|
||||
also handy for ignoring plurals. But that would make that the table does not fit
|
||||
in the printable byte---11<sup>2</sup> is already almost 2<sup>7</sup>!
|
||||
|
||||
The trick is to use octal; three characters (`a`, `b` and `g`) are interpreted
|
||||
as sequences of two characters (`ny`, `nn` and `nw` respectively). Asides from
|
||||
a smaller lookup table, it has many good consequences:
|
||||
|
||||
* Both `a` and `and` share the common prefix, `ny`, and can be discarded
|
||||
altogether. Note that `ny` itself does not appear from other entries.
|
||||
* `thousand` is interpreted as `tsan`, which is equivalent to `tfyn` in the
|
||||
octal scheme. As it is the only entry with `tf` prefix, it can be shorten by
|
||||
one character.
|
||||
|
||||
Having said this important trick, other details should be relatively easier to
|
||||
follow. The order of lookup table, for example, is very important,
|
||||
and the biggest constant 6177 is not arbitrarily chosen.
|
||||
|
||||
## Acknowledgement
|
||||
|
||||
The cleaner (size-optimized) version of this program was originally published
|
||||
in my website in July 2011. Sun Park and others have reviewed it and let me
|
||||
aware of possible improvements. I'd also like to thank Seo Sanghyeon for
|
||||
proof-reading remarks.
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
<!--
|
||||
(c) Copyright 1984-2015, [Leo Broukhis, Simon Cooper, Landon Curt Noll][judges] - All rights reserved
|
||||
This work is licensed under a [Creative Commons Attribution-ShareAlike 3.0 Unported License][cc].
|
||||
|
||||
[judges]: http://www.ioccc.org/judges.html
|
||||
[cc]: http://creativecommons.org/licenses/by-sa/3.0/
|
||||
-->
|
11
kang.c
Normal file
11
kang.c
Normal file
@ -0,0 +1,11 @@
|
||||
#include <stdio.h>
|
||||
long long n,u,m,b;main(e,r)char **r;
|
||||
{for(;n++||(e=getchar()|32)>=0;
|
||||
b="ynwtsflrabg"[n%=11]-e?b:b*8+n)
|
||||
for(r=b%64-25;e<47&&b;b/=8)for(n=19
|
||||
;n;n["1+DIY/.K430x9\
|
||||
G(kC["]-42&255^b||(m+=n>15?n:n>9
|
||||
?m%u*~-u:~(int)r?n+
|
||||
!(int)r*16:n*16,b=0))u=1ll<<6177%n
|
||||
--*4;
|
||||
printf("%llx\n",m);}
|
Loading…
Reference in New Issue
Block a user