feat: add kang program which interpret english number and translate to integer value
This commit is contained in:
parent
7b9fca8d83
commit
25a84aedf9
197
Makefile
Normal file
197
Makefile
Normal file
@ -0,0 +1,197 @@
|
|||||||
|
#!/bin/make
|
||||||
|
# 2001 makefile
|
||||||
|
#
|
||||||
|
# Copyright (C) Simon Cooper, Landon Curt Noll, Peter Seebach, 2001.
|
||||||
|
# All Rights Reserved. Permission for personal, educational or non-profit
|
||||||
|
# use is granted provided this this copyright and notice are included in its
|
||||||
|
# entirety and remains unaltered. All other uses must receive prior permission
|
||||||
|
# from the contest judges.
|
||||||
|
|
||||||
|
SHELL= /bin/sh
|
||||||
|
CAT= cat
|
||||||
|
RM= rm
|
||||||
|
SED= sed
|
||||||
|
TRUE= true
|
||||||
|
|
||||||
|
# optimization
|
||||||
|
#
|
||||||
|
# Most compiles will safely use -O2. Some can use only -O.
|
||||||
|
# A few compilers have broken optimizers and thus you may
|
||||||
|
# not want anything.
|
||||||
|
#
|
||||||
|
#OPT=
|
||||||
|
#OPT= -O
|
||||||
|
OPT= -O2
|
||||||
|
|
||||||
|
# flags for ANSI compiles
|
||||||
|
#
|
||||||
|
# NOTE: Some ANSI compilers make use of -Xa to turn on ANSI mode,
|
||||||
|
# others such as gcc may want -ansi, others (Compaq Tru64 Unix)
|
||||||
|
# want -std1, and others may want nothing at all.
|
||||||
|
# Adjust the CFLAGS line as needed.
|
||||||
|
#
|
||||||
|
# NOTE: Some compilers cannot optimize, or optimize some entries
|
||||||
|
# incorrectly. You might want to turn on -O to see if your
|
||||||
|
# compiler can handle them.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#CFLAGS= -Xa ${OPT}
|
||||||
|
#CFLAGS= -std1 $(OPT)
|
||||||
|
CFLAGS= -ansi ${OPT}
|
||||||
|
|
||||||
|
# NOTE: On some systems, the library path does not include the location
|
||||||
|
# of the X11 libraries. You might need to use a -L/path/to/X11/lib/dir
|
||||||
|
# in order to compile with these libs.
|
||||||
|
#
|
||||||
|
X11CCFLAGS= -I/usr/X11R6/include -I/usr/X11R6/include/X11 -I/usr/include/X11
|
||||||
|
#X11CCFLAGS=
|
||||||
|
X11LDFLAGS= -L/usr/X11R6/lib -Wl,-R/usr/X11R6/lib
|
||||||
|
#X11LDFLAGS=
|
||||||
|
|
||||||
|
# ANSI compiler
|
||||||
|
#
|
||||||
|
# Set CC to the name of your ANSI compiler.
|
||||||
|
#
|
||||||
|
# Some entries seem to need gcc. If you have gcc, set
|
||||||
|
# both CC and MAY_NEED_GCC to gcc.
|
||||||
|
#
|
||||||
|
# If you do not have gcc, set CC to the name of your ANSI compiler, and
|
||||||
|
# set MAY_NEED_GCC to either ${CC} (and hope for the best) or to just :
|
||||||
|
# to disable such programs.
|
||||||
|
#
|
||||||
|
CC= cc
|
||||||
|
MAY_NEED_GCC= gcc
|
||||||
|
|
||||||
|
# winners that compile under ANSI C
|
||||||
|
#
|
||||||
|
WINNERS= anonymous bellard bellard.otccex cheong coupard \
|
||||||
|
ctk dgbeards herrmann1 herrmann2 jason kev ollinger rosten \
|
||||||
|
schweikh westley williams
|
||||||
|
ALT_NAMES= westley.orig
|
||||||
|
DATA_FILES= anonymous_files bellard_files herrmann1_files herrmann2_files
|
||||||
|
|
||||||
|
|
||||||
|
all: ${WINNERS}
|
||||||
|
|
||||||
|
# Most likely to amaze
|
||||||
|
#
|
||||||
|
anonymous: anonymous.c
|
||||||
|
magic='${MAY_NEED_GCC} -O1'; \
|
||||||
|
X='-DA(X)=#X'; \
|
||||||
|
warning='-pedantic -Dprocessor'; \
|
||||||
|
$$magic $$warning -Dmagic= $$X "-DX=A($$magic \"$$X\")" \
|
||||||
|
-o anonymous anonymous.c
|
||||||
|
|
||||||
|
anonymous_files: anonymous.ten.c
|
||||||
|
|
||||||
|
# Best abuse of the rules
|
||||||
|
#
|
||||||
|
bellard: bellard.c
|
||||||
|
${CC} -rdynamic ${CFLAGS} bellard.c -o bellard -ldl
|
||||||
|
|
||||||
|
bellard_files: bellard.otccex.c
|
||||||
|
|
||||||
|
# Best short program
|
||||||
|
#
|
||||||
|
cheong: cheong.c
|
||||||
|
${CC} ${CFLAGS} cheong.c -o cheong
|
||||||
|
|
||||||
|
# Most obfuscated sound
|
||||||
|
#
|
||||||
|
coupard: coupard.c
|
||||||
|
${CC} ${CFLAGS} -o coupard coupard.c
|
||||||
|
|
||||||
|
# Worst Driver
|
||||||
|
#
|
||||||
|
ctk: ctk.c
|
||||||
|
${CC} ${CFLAGS} -o ctk ctk.c
|
||||||
|
|
||||||
|
# Worst AI
|
||||||
|
#
|
||||||
|
dgbeards: dgbeards.c
|
||||||
|
${CC} ${CFLAGS} -DE=break -DF=char -DK=case -DP=int -DR=return \
|
||||||
|
-DI=0xFFFF dgbeards.c -o dgbeards
|
||||||
|
|
||||||
|
# Best abuse of the C preprocessor
|
||||||
|
#
|
||||||
|
herrmann1: herrmann1.c herrmann1.sh
|
||||||
|
@echo "NOTE: $@ must be built/run using the script $@.sh"
|
||||||
|
${SHELL} ./herrmann1.sh
|
||||||
|
|
||||||
|
herrmann1_files: herrmann1.gcd herrmann1.sh herrmann1.times2
|
||||||
|
|
||||||
|
# Most eye-crossing
|
||||||
|
#
|
||||||
|
# This entry requires the GCC -include feature.
|
||||||
|
#
|
||||||
|
herrmann2: herrmann2.c
|
||||||
|
${MAY_NEED_GCC} ${CFLAGS} herrmann2.c -o herrmann2 \
|
||||||
|
-include /usr/include/stdlib.h -include /usr/include/stdio.h \
|
||||||
|
-include /usr/include/time.h -include /usr/include/unistd.h
|
||||||
|
|
||||||
|
herrmann2_files: herrmann2.cup herrmann2.ioccc
|
||||||
|
|
||||||
|
# Best Of Show
|
||||||
|
#
|
||||||
|
jason: jason.c
|
||||||
|
${CC} ${CFLAGS} -o jason jason.c
|
||||||
|
|
||||||
|
# Best Curses Game
|
||||||
|
#
|
||||||
|
kev: kev.c
|
||||||
|
${CC} ${CFLAGS} -DPORT=5455 -DSPEED=50 -o kev kev.c \
|
||||||
|
-lcurses -ltermcap \
|
||||||
|
-Dl_="socket(il.sin_family=AF_INET,SOCK_STREAM,0)"
|
||||||
|
|
||||||
|
# Best primal ASCII graphics
|
||||||
|
#
|
||||||
|
ollinger: ollinger.c
|
||||||
|
${CC} ${CFLAGS} ollinger.c -o ollinger
|
||||||
|
|
||||||
|
# Best abuse of the user
|
||||||
|
#
|
||||||
|
rosten: rosten.c
|
||||||
|
${CC} ${CFLAGS} rosten.c ${X11CCFLAGS} ${X11LDFLAGS} -pedantic -lX11 \
|
||||||
|
-o rosten
|
||||||
|
|
||||||
|
# Best one-liner
|
||||||
|
#
|
||||||
|
schweikh: schweikh.c
|
||||||
|
${CC} ${CFLAGS} schweikh.c -o schweikh
|
||||||
|
|
||||||
|
# Best position-independant code
|
||||||
|
#
|
||||||
|
westley: westley.c
|
||||||
|
${CC} ${CFLAGS} westley.c -o westley
|
||||||
|
|
||||||
|
westley.orig: westley.orig.c
|
||||||
|
${CC} ${CFLAGS} westley.orig.c -o westley.orig
|
||||||
|
|
||||||
|
# Best position-independant code
|
||||||
|
#
|
||||||
|
williams: williams.c
|
||||||
|
${CC} williams.c ${X11CCFLAGS} ${X11LDFLAGS} -lX11 -o williams
|
||||||
|
|
||||||
|
everything: ${WINNERS} ${ALT_NAMES} ${DATA_FILES}
|
||||||
|
|
||||||
|
love:
|
||||||
|
@echo 'not war?'
|
||||||
|
|
||||||
|
haste:
|
||||||
|
${MAKE} waste
|
||||||
|
|
||||||
|
waste:
|
||||||
|
@echo 'waste'
|
||||||
|
|
||||||
|
clean:
|
||||||
|
${RM} -f *.o a.out core ${WINNERS}
|
||||||
|
|
||||||
|
clobber: clean
|
||||||
|
${RM} -f ${WINNERS}
|
||||||
|
${RM} -f ${ALT_NAMES}
|
||||||
|
|
||||||
|
nuke: clobber
|
||||||
|
@${TRUE}
|
||||||
|
|
||||||
|
install: all
|
||||||
|
${CAT} ${WINNERS} ${ALT_NAMES} > /dev/null
|
160
hint.text
Normal file
160
hint.text
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
# Best short program
|
||||||
|
|
||||||
|
Seonghoon Kang
|
||||||
|
<kang.seonghoon@mearie.org>
|
||||||
|
|
||||||
|
|
||||||
|
## Judges' comments:
|
||||||
|
### To build:
|
||||||
|
|
||||||
|
make kang
|
||||||
|
|
||||||
|
### To run:
|
||||||
|
|
||||||
|
echo "full spelling of an English cardinal numeral less than a quadrillion" | ./kang
|
||||||
|
|
||||||
|
### Try:
|
||||||
|
|
||||||
|
echo Nineteen hundred and eighty-four | ./kang
|
||||||
|
echo uno | ./kang
|
||||||
|
echo trois | ./kang
|
||||||
|
echo fier | ./kang
|
||||||
|
echo "shest'" | ./kang
|
||||||
|
|
||||||
|
### Selected Judges Remarks:
|
||||||
|
|
||||||
|
The judges were able to appreciate the Indo-European language family
|
||||||
|
relation by making this entry successfully recognize *some* French,
|
||||||
|
German, Italian, Russian, and Spanish numerals.
|
||||||
|
|
||||||
|
Also worth mentioning is this entry's ability to understand the
|
||||||
|
colloquial year numbers of the last millennium.
|
||||||
|
|
||||||
|
We've added a linefeed to the print format for convenience.
|
||||||
|
|
||||||
|
|
||||||
|
## Author's comments:
|
||||||
|
## Synopsis
|
||||||
|
|
||||||
|
This short program reads a spelt number (e.g. `forty-two`) and writes a
|
||||||
|
corresponding decimal number (e.g. `42`). Too long for one-liners, alas,
|
||||||
|
but it still qualifies as a *short* program as it has less than 0x100 bytes.
|
||||||
|
|
||||||
|
It accepts a variety of spelt numbers:
|
||||||
|
|
||||||
|
* It correctly handles `zero`.
|
||||||
|
* Hyphen does not make a difference: `forty-two` and `forty two` are same.
|
||||||
|
So does period or comma.
|
||||||
|
* Cases do not make a difference either: `TWO`, `Two`, `two` are same.
|
||||||
|
* `one` and `a` are interchangeable: `one hundred` and `a hundred` are same.
|
||||||
|
* `and` is optional: `one hundred twenty-three` and `one hundred and twenty-three`
|
||||||
|
are same.
|
||||||
|
* It supports every non-negative integer less than 10<sup>15</sup>-1. It uses
|
||||||
|
the small scale (i.e. American): `billion` is 10<sup>9</sup> and `trillion` is
|
||||||
|
10<sup>12</sup>.
|
||||||
|
* Sometimes, it can magically correct typos.
|
||||||
|
|
||||||
|
It does *not* accept some spelt numbers, which I found mostly irrelevant:
|
||||||
|
|
||||||
|
* A bare `hundred`, `thousand` etc. do not work.
|
||||||
|
* `one million million` does not work. Get used to `one trillion`!
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
This program is quite portable, only requiring the following:
|
||||||
|
|
||||||
|
* The signature `int main(int, int)` should be accepted by the linker. (Original
|
||||||
|
version only)
|
||||||
|
* `char` should be at least 8 bits long (as dictated by the standard), `int`
|
||||||
|
should be at least 32 bits long, `long long` should be at least 64 bits long.
|
||||||
|
* Both the compiler and execution environment should use an ASCII-compatible
|
||||||
|
character set and two's complement representation.
|
||||||
|
* Overflow and underflow on `char` should wrap around, if your `char` is unsigned.
|
||||||
|
* [A trustworthy compiler][trustingtrust].
|
||||||
|
|
||||||
|
[trustingtrust]: http://cm.bell-labs.com/who/ken/trust.html
|
||||||
|
|
||||||
|
The design of the program explicitly allows for `EOF` which does not equal to -1
|
||||||
|
(it has to be negative per the standard) and both signed and unsigned `char`,
|
||||||
|
for example. Furthermore it is endian-independent.
|
||||||
|
|
||||||
|
## Obfuscations (SPOILERS!)
|
||||||
|
|
||||||
|
Many obfuscations used are typical for standard IOCCC entries:
|
||||||
|
|
||||||
|
* Two arguments from `main` function are reused as normal variables.
|
||||||
|
* Every conditional has been replaced with `?:` ternary operator and `||`
|
||||||
|
short-circuiting operator.
|
||||||
|
* It has exactly three nested `for` loops and nothing else.
|
||||||
|
* Common two's complement tricks: `~-a` instead of `a-1`,
|
||||||
|
`~a?...:...` instead of `a!=-1?...:...`, etc.
|
||||||
|
* Comma operators (`,`) for multiple statements. The number of them is
|
||||||
|
minimized, however, as it is too easy to (ab)use them.
|
||||||
|
* It lacks most parentheses around bitwise and arithmetic operators. It was
|
||||||
|
originally written for shortness so parentheses were **EVIL**.
|
||||||
|
* `n["string"]` instead of `"string"[n]`. Both are fine for this program but
|
||||||
|
I went to the former just for fun.
|
||||||
|
* Utter lack of any kind of layouts. (Oh, except for the first column.)
|
||||||
|
|
||||||
|
Other obfuscations are more subtle:
|
||||||
|
|
||||||
|
* The string `"1+DIY/.K430x9G(kC["` is 18 bytes long, but actually 19 bytes
|
||||||
|
including the final null character are used.
|
||||||
|
* It internally represents numbers as hexadecimal. When the input is `two
|
||||||
|
hundred and three`, it actually writes 0x203 as hexadecimal.
|
||||||
|
* Some variables (notably, `n`) have dual uses.
|
||||||
|
* The magic number [42][hhgg] makes an appearance.
|
||||||
|
* It has a long long numb-`main`-er within it!
|
||||||
|
|
||||||
|
[hhgg]: http://en.wikipedia.org/wiki/Answer_to_The_Ultimate_Question_of_Life,_the_Universe,_and_Everything
|
||||||
|
|
||||||
|
But the most important obfuscation is the clever construction of lookup table.
|
||||||
|
The program uses 11 different characters required for recognizing 22 lexemes:
|
||||||
|
|
||||||
|
zero one tw- th(i)r- fo(u)r- fi- six-
|
||||||
|
seven- eigh- nin- ten eleven twelve
|
||||||
|
hundred(s) thousand(s) million(s) billion(s) trillion(s)
|
||||||
|
a and -teen -ty
|
||||||
|
|
||||||
|
So that they are internally represented as like:
|
||||||
|
|
||||||
|
r n tw- tr- fr- f- s-
|
||||||
|
sn- g- nn- tn ln twl
|
||||||
|
nr(s) tsan(s) lln(s) blln(s) trlln(s)
|
||||||
|
a an -tn -ty
|
||||||
|
|
||||||
|
The stemmer recognizes the longest matching prefix, so every lexeme can be
|
||||||
|
recognized by at most three characters (e.g. `trl` instead of `trlln`). This is
|
||||||
|
also handy for ignoring plurals. But that would make that the table does not fit
|
||||||
|
in the printable byte---11<sup>2</sup> is already almost 2<sup>7</sup>!
|
||||||
|
|
||||||
|
The trick is to use octal; three characters (`a`, `b` and `g`) are interpreted
|
||||||
|
as sequences of two characters (`ny`, `nn` and `nw` respectively). Asides from
|
||||||
|
a smaller lookup table, it has many good consequences:
|
||||||
|
|
||||||
|
* Both `a` and `and` share the common prefix, `ny`, and can be discarded
|
||||||
|
altogether. Note that `ny` itself does not appear from other entries.
|
||||||
|
* `thousand` is interpreted as `tsan`, which is equivalent to `tfyn` in the
|
||||||
|
octal scheme. As it is the only entry with `tf` prefix, it can be shorten by
|
||||||
|
one character.
|
||||||
|
|
||||||
|
Having said this important trick, other details should be relatively easier to
|
||||||
|
follow. The order of lookup table, for example, is very important,
|
||||||
|
and the biggest constant 6177 is not arbitrarily chosen.
|
||||||
|
|
||||||
|
## Acknowledgement
|
||||||
|
|
||||||
|
The cleaner (size-optimized) version of this program was originally published
|
||||||
|
in my website in July 2011. Sun Park and others have reviewed it and let me
|
||||||
|
aware of possible improvements. I'd also like to thank Seo Sanghyeon for
|
||||||
|
proof-reading remarks.
|
||||||
|
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
<!--
|
||||||
|
(c) Copyright 1984-2015, [Leo Broukhis, Simon Cooper, Landon Curt Noll][judges] - All rights reserved
|
||||||
|
This work is licensed under a [Creative Commons Attribution-ShareAlike 3.0 Unported License][cc].
|
||||||
|
|
||||||
|
[judges]: http://www.ioccc.org/judges.html
|
||||||
|
[cc]: http://creativecommons.org/licenses/by-sa/3.0/
|
||||||
|
-->
|
11
kang.c
Normal file
11
kang.c
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
long long n,u,m,b;main(e,r)char **r;
|
||||||
|
{for(;n++||(e=getchar()|32)>=0;
|
||||||
|
b="ynwtsflrabg"[n%=11]-e?b:b*8+n)
|
||||||
|
for(r=b%64-25;e<47&&b;b/=8)for(n=19
|
||||||
|
;n;n["1+DIY/.K430x9\
|
||||||
|
G(kC["]-42&255^b||(m+=n>15?n:n>9
|
||||||
|
?m%u*~-u:~(int)r?n+
|
||||||
|
!(int)r*16:n*16,b=0))u=1ll<<6177%n
|
||||||
|
--*4;
|
||||||
|
printf("%llx\n",m);}
|
Loading…
Reference in New Issue
Block a user