add prev files

main
onebigear 2 years ago
commit 591d2d4ae5

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,33 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "e76a2d1f-88b4-434a-bdbc-0e4fca523be8",
"metadata": {},
"source": [
"https://danielmiessler.com/study/encoding-encryption-hashing-obfuscation/"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

@ -0,0 +1,646 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 20,
"id": "03589aa7-7fc4-4fe1-92ac-9f523e2ba90c",
"metadata": {},
"outputs": [],
"source": [
"# print ascii encoding of a character"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "e068c988-0a8a-45d8-ac64-ea07cf37b275",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"'\\\\u4f60'\""
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ascii(\"你\")"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "0760b0c9-17f7-4fca-87a1-2c3423aaf61f",
"metadata": {},
"outputs": [],
"source": [
"# what is this ascii standard?"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "44976d3e-c6a0-4d2e-8e3a-9d21ac254803",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"'a'\""
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ascii(\"a\")"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "8ffd857f-e182-49d0-9722-f8d2204f9054",
"metadata": {},
"outputs": [],
"source": [
"# try ord "
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "97c7c7d1-b79f-42ac-8d00-81c12b428281",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"97"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ord(\"a\")"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "9477f26f-3e9c-4a4c-be1b-1c135cd3688e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"65"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ord(\"A\")"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "f0bb5070-d333-46c2-bdea-da6404989ba3",
"metadata": {},
"outputs": [],
"source": [
"#Print alphabet lower and upper case\n",
"alphabet = [\"a\",\"b\",\"c\",\"d\",\"e\",\"f\",\"g\",\"h\",\"i\",\"j\",\"k\",\"l\",\"m\",\"n\",\"o\",\"p\",\"q\",\"r\",\"s\",\"t\",\"u\",\"v\",\"w\",\"x\",\"y\",\"z\"]\n",
"ALPHABET = []\n",
"for letter in alphabet: ALPHABET.append(letter.upper())"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "b53660cc-cee1-4d69-8498-c94edd84e5aa",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Printing corresponding ASCII numerical representation :\n",
"a :97 b :98 c :99 d :100 e :101 f :102 g :103 h :104 i :105 j :106 k :107 l :108 m :109 n :110 o :111 p :112 q :113 r :114 s :115 t :116 u :117 v :118 w :119 x :120 y :121 z :122 "
]
}
],
"source": [
"print(\"Printing corresponding ASCII numerical representation :\")\n",
"for letter in alphabet: \n",
" print(\"{} :\".format (letter) + str (ord(letter)), end = \" \")"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "4f7ec4ff-b6e6-4cda-b8a3-6e2a3521d9c8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Printing corresponding ASCII numerical representation :\n",
"A :65 B :66 C :67 D :68 E :69 F :70 G :71 H :72 I :73 J :74 K :75 L :76 M :77 N :78 O :79 P :80 Q :81 R :82 S :83 T :84 U :85 V :86 W :87 X :88 Y :89 Z :90 "
]
}
],
"source": [
"print(\"Printing corresponding ASCII numerical representation :\")\n",
"for LETTER in ALPHABET: \n",
" print(\"{} :\".format (LETTER) + str (ord(LETTER)), end = \" \")"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "87963ac8-050c-44a5-9f5a-af94e5ba40ce",
"metadata": {},
"outputs": [],
"source": [
"# the lower case letter representing the same letter is +32 of the capital letter, and thus the following hack \n",
"# without using the upper() method in Python"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "6318993c-1f92-4aa5-befd-2e01963bda15",
"metadata": {},
"outputs": [],
"source": [
"# takes input of a lower case char\n",
"def upperHack(c):\n",
" # get number of the lower case letter\n",
" l_num = ord(c)\n",
" u_num = l_num - 32\n",
" # return the upper case letter from u_num with chr()method\n",
" return chr(u_num)"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "b47bae87-4e21-4a9c-a90a-6f999df2ac95",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'C'"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"upperHack(\"c\")"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "f0c628f3-e92c-446d-81c4-9cc7149b3a1c",
"metadata": {},
"outputs": [],
"source": [
"# a vice versa method\n",
"def lowerHack(C):\n",
" # get number of the lower case letter\n",
" u_num = ord(C)\n",
" l_num = u_num + 32\n",
" # return the upper case letter from u_num with chr()method\n",
" return chr(l_num)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "46a9f58f-5428-4901-85f4-73542d8110fd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'c'"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lowerHack(\"C\")"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "a467b7a2-1ea5-4148-bfa4-4424ec110465",
"metadata": {},
"outputs": [],
"source": [
"# 好,现在我们把西文的方法应用到中文里去\n",
"# 拉丁语系下的西文中的词由字母构成;中文里的字由笔划构成。\n",
"# 我们先从系统层面思考笔划是构造单个汉字的材料在python中可以实现对笔划的探索性编程exploratory programming\n",
"# 或者,我们可以在“语料库”的系统层面思考。\n",
"#《千字文》中包含了一千个不重复的汉字,我们可以理解为这是古人的“识字课本”。\n",
"# 汉字和千字文之间个体与系统的关系,好比“永字八法”中,笔划和“永”字之间的关系。\n",
"# 以王羲之的永字为范本,学童反复摹习,掌握“永”字中的“侧、勒、弩、趯、策、掠、啄、磔”,以期掌握写好每一个汉字。\n",
"# 同样,以千字文为范本,学童反复背诵研习,为了识字,并以“字”为单位行文。"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "92840cd9-b859-4c27-9016-80608168e448",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"天地玄黄宇宙洪荒。日月盈昃辰宿列张。4\n",
"寒来暑往秋收冬藏。闰余成岁律吕调阳。8\n",
"云腾致雨露结为霜。金生丽水玉出昆冈。12\n",
"剑号巨阙珠称夜光。果珍李柰菜重芥姜。16\n",
"海咸河淡鳞潜羽翔。龙师火帝鸟官人皇。20\n",
"始制文字乃服衣裳。推位让国有虞陶唐。24\n",
"吊民伐罪周发殷汤。坐朝问道垂拱平章。28\n",
"爱育黎首臣伏戎羌。遐迩壹体率宾归王。32\n",
"鸣凤在树白驹食场。化被草木赖及万方。36\n",
"盖此身发四大五常。恭惟鞠养岂敢毁伤。40\n",
"女慕贞洁男效才良。知过必改得能莫忘。44\n",
"罔谈彼短靡恃己长。信使可覆器欲难量。48\n",
"墨悲丝染诗赞羔羊。景行维贤克念作圣。52\n",
"德建名立形端表正。空谷传声虚堂习听。56\n",
"祸因恶积福缘善庆。尺璧非宝寸阴是竞。60\n",
"资父事君曰严与敬。孝当竭力忠则尽命。64\n",
"临深履薄夙兴温凊。似兰斯馨如松之盛。68\n",
"川流不息渊澄取映。容止若思言辞安定。72\n",
"笃初诚美慎终宜令。荣业所基藉甚无竟。76\n",
"学优登仕摄职从政。存以甘棠去而益咏。80\n",
"乐殊贵贱礼别尊卑。上和下睦夫唱妇随。84\n",
"外受傅训入奉母仪。诸姑伯叔犹子比儿。88\n",
"孔怀兄弟同气连枝。交友投分切磨箴规。92\n",
"仁慈隐恻造次弗离。节义廉退颠沛匪亏。96\n",
"性静情逸心动神疲。守真志满逐物意移。100\n",
"坚持雅操好爵自縻。都邑华夏东西二京。104\n",
"背邙面洛浮渭据泾。宫殿盘郁楼观飞惊。108\n",
"图写禽兽画彩仙灵。丙舍傍启甲帐对楹。112\n",
"肆筵设席鼓瑟吹笙。升阶纳陛弁转疑星。116\n",
"右通广内左达承明。既集坟典亦聚群英。120\n",
"杜稿钟隶漆书壁经。府罗将相路侠槐卿。124\n",
"户封八县家给千兵。高冠陪辇驱毂振缨。128\n",
"世禄侈富车驾肥轻。策功茂实勒碑刻铭。132\n",
"磻溪伊尹佐时阿衡。奄宅曲阜微旦孰营。136\n",
"桓公匡合济弱扶倾。绮回汉惠说感武丁。140\n",
"俊乂密勿多士实宁。晋楚更霸赵魏困横。144\n",
"假途灭虢践土会盟。何遵约法韩弊烦刑。148\n",
"起翦颇牧用军最精。宣威沙漠驰誉丹青。152\n",
"九州禹迹百郡秦并。岳宗恒岱禅主云亭。156\n",
"雁门紫塞鸡田赤城。昆池碣石巨野洞庭。160\n",
"旷远绵邈岩岫杳冥。治本于农务兹稼穑。164\n",
"俶载南亩我艺黍稷。税熟贡新劝赏黜陟。168\n",
"孟轲敦素史鱼秉直。庶几中庸劳谦谨敕。172\n",
"聆音察理鉴貌辨色。贻厥嘉猷勉其祗植。176\n",
"省躬讥诫宠增抗极。殆辱近耻林皋幸即。180\n",
"两疏见机解组谁逼。索居闲处沉默寂寥。184\n",
"求古寻论散虑逍遥。欣奏累遣戚谢欢招。188\n",
"渠荷的历园莽抽条。枇杷晚翠梧桐早凋。192\n",
"陈根委翳落叶飘摇。游鹍独运凌摩绛霄。196\n",
"耽读玩市寓目囊箱。易輶攸畏属耳垣墙。200\n",
"具膳餐饭适口充肠。饱饫烹宰饥厌糟糠。204\n",
"亲戚故旧老少异粮。妾御绩纺侍巾帷房。208\n",
"纨扇圆洁银烛炜煌。昼眠夕寐蓝笋象床。212\n",
"弦歌酒宴接杯举觞。矫手顿足悦豫且康。216\n",
"嫡后嗣续祭祀烝尝。稽颡再拜悚惧恐惶。220\n",
"笺牒简要顾答审详。骸垢想浴执热愿凉。224\n",
"驴骡犊特骇跃超骧。诛斩贼盗捕获叛亡。228\n",
"布射辽丸嵇琴阮啸。恬笔伦纸钧巧任钓。232\n",
"释纷利俗并皆佳妙。毛施淑姿工颦妍笑。236\n",
"年矢每催曦晖朗曜。璇玑悬斡晦魄环照。240\n",
"指薪修祜永绥吉劭。矩步引领俯仰廊庙。244\n",
"束带矜庄徘徊瞻眺。孤陋寡闻愚蒙等诮。248\n",
"谓语助者焉哉乎也。250\n"
]
}
],
"source": [
"# read in \"A Thousand Character Essay\"\n",
"thousand_w = open(\"files/thousand_char_essay.txt\",\"r\")\n",
"corpus = thousand_w.read(None)\n",
"print(corpus)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "23892e53-9d2e-4d93-b98f-e4fb08ad910c",
"metadata": {},
"outputs": [],
"source": [
"# remove counter numbers from corpus \n",
"clean_corpus=[]\n",
"for word in corpus:\n",
" if word.isdigit() is not True:\n",
" clean_corpus.append(word)"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "b6b6892e-6b4f-4d54-a466-c2c5ec04f4c9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"天地玄黄,宇宙洪荒。日月盈昃,辰宿列张。\n",
"寒来暑往,秋收冬藏。闰余成岁,律吕调阳。\n",
"云腾致雨,露结为霜。金生丽水,玉出昆冈。\n",
"剑号巨阙,珠称夜光。果珍李柰,菜重芥姜。\n",
"海咸河淡,鳞潜羽翔。龙师火帝,鸟官人皇。\n",
"始制文字,乃服衣裳。推位让国,有虞陶唐。\n",
"吊民伐罪,周发殷汤。坐朝问道,垂拱平章。\n",
"爱育黎首,臣伏戎羌。遐迩壹体,率宾归王。\n",
"鸣凤在树,白驹食场。化被草木,赖及万方。\n",
"盖此身发,四大五常。恭惟鞠养,岂敢毁伤。\n",
"女慕贞洁,男效才良。知过必改,得能莫忘。\n",
"罔谈彼短,靡恃己长。信使可覆,器欲难量。\n",
"墨悲丝染,诗赞羔羊。景行维贤,克念作圣。\n",
"德建名立,形端表正。空谷传声,虚堂习听。\n",
"祸因恶积,福缘善庆。尺璧非宝,寸阴是竞。\n",
"资父事君,曰严与敬。孝当竭力,忠则尽命。\n",
"临深履薄,夙兴温凊。似兰斯馨,如松之盛。\n",
"川流不息,渊澄取映。容止若思,言辞安定。\n",
"笃初诚美,慎终宜令。荣业所基,藉甚无竟。\n",
"学优登仕,摄职从政。存以甘棠,去而益咏。\n",
"乐殊贵贱,礼别尊卑。上和下睦,夫唱妇随。\n",
"外受傅训,入奉母仪。诸姑伯叔,犹子比儿。\n",
"孔怀兄弟,同气连枝。交友投分,切磨箴规。\n",
"仁慈隐恻,造次弗离。节义廉退,颠沛匪亏。\n",
"性静情逸,心动神疲。守真志满,逐物意移。\n",
"坚持雅操,好爵自縻。都邑华夏,东西二京。\n",
"背邙面洛,浮渭据泾。宫殿盘郁,楼观飞惊。\n",
"图写禽兽,画彩仙灵。丙舍傍启,甲帐对楹。\n",
"肆筵设席,鼓瑟吹笙。升阶纳陛,弁转疑星。\n",
"右通广内,左达承明。既集坟典,亦聚群英。\n",
"杜稿钟隶,漆书壁经。府罗将相,路侠槐卿。\n",
"户封八县,家给千兵。高冠陪辇,驱毂振缨。\n",
"世禄侈富,车驾肥轻。策功茂实,勒碑刻铭。\n",
"磻溪伊尹,佐时阿衡。奄宅曲阜,微旦孰营。\n",
"桓公匡合,济弱扶倾。绮回汉惠,说感武丁。\n",
"俊乂密勿,多士实宁。晋楚更霸,赵魏困横。\n",
"假途灭虢,践土会盟。何遵约法,韩弊烦刑。\n",
"起翦颇牧,用军最精。宣威沙漠,驰誉丹青。\n",
"九州禹迹,百郡秦并。岳宗恒岱,禅主云亭。\n",
"雁门紫塞,鸡田赤城。昆池碣石,巨野洞庭。\n",
"旷远绵邈,岩岫杳冥。治本于农,务兹稼穑。\n",
"俶载南亩,我艺黍稷。税熟贡新,劝赏黜陟。\n",
"孟轲敦素,史鱼秉直。庶几中庸,劳谦谨敕。\n",
"聆音察理,鉴貌辨色。贻厥嘉猷,勉其祗植。\n",
"省躬讥诫,宠增抗极。殆辱近耻,林皋幸即。\n",
"两疏见机,解组谁逼。索居闲处,沉默寂寥。\n",
"求古寻论,散虑逍遥。欣奏累遣,戚谢欢招。\n",
"渠荷的历,园莽抽条。枇杷晚翠,梧桐早凋。\n",
"陈根委翳,落叶飘摇。游鹍独运,凌摩绛霄。\n",
"耽读玩市,寓目囊箱。易輶攸畏,属耳垣墙。\n",
"具膳餐饭,适口充肠。饱饫烹宰,饥厌糟糠。\n",
"亲戚故旧,老少异粮。妾御绩纺,侍巾帷房。\n",
"纨扇圆洁,银烛炜煌。昼眠夕寐,蓝笋象床。\n",
"弦歌酒宴,接杯举觞。矫手顿足,悦豫且康。\n",
"嫡后嗣续,祭祀烝尝。稽颡再拜,悚惧恐惶。\n",
"笺牒简要,顾答审详。骸垢想浴,执热愿凉。\n",
"驴骡犊特,骇跃超骧。诛斩贼盗,捕获叛亡。\n",
"布射辽丸,嵇琴阮啸。恬笔伦纸,钧巧任钓。\n",
"释纷利俗,并皆佳妙。毛施淑姿,工颦妍笑。\n",
"年矢每催,曦晖朗曜。璇玑悬斡,晦魄环照。\n",
"指薪修祜,永绥吉劭。矩步引领,俯仰廊庙。\n",
"束带矜庄,徘徊瞻眺。孤陋寡闻,愚蒙等诮。\n",
"谓语助者,焉哉乎也。\n"
]
}
],
"source": [
"# print(clean_corpus)\n",
"clean_corpus_s = \" \"\n",
"clean_corpus_s = \"\".join(str(x) for x in clean_corpus)\n",
"print(clean_corpus_s)"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "34dfbc8f-df1b-4969-a5fb-d073a08c9a32",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"'\\u5929''\\u5730''\\u7384''\\u9ec4''\\u5b87''\\u5b99''\\u6d2a''\\u8352'。'\\u65e5''\\u6708''\\u76c8''\\u6603''\\u8fb0''\\u5bbf''\\u5217''\\u5f20'。\n",
"'\\u5bd2''\\u6765''\\u6691''\\u5f80''\\u79cb''\\u6536''\\u51ac''\\u85cf'。'\\u95f0''\\u4f59''\\u6210''\\u5c81''\\u5f8b''\\u5415''\\u8c03''\\u9633'。\n",
"'\\u4e91''\\u817e''\\u81f4''\\u96e8''\\u9732''\\u7ed3''\\u4e3a''\\u971c'。'\\u91d1''\\u751f''\\u4e3d''\\u6c34''\\u7389''\\u51fa''\\u6606''\\u5188'。\n",
"'\\u5251''\\u53f7''\\u5de8''\\u9619''\\u73e0''\\u79f0''\\u591c''\\u5149'。'\\u679c''\\u73cd''\\u674e''\\u67f0''\\u83dc''\\u91cd''\\u82a5''\\u59dc'。\n",
"'\\u6d77''\\u54b8''\\u6cb3''\\u6de1''\\u9cde''\\u6f5c''\\u7fbd''\\u7fd4'。'\\u9f99''\\u5e08''\\u706b''\\u5e1d''\\u9e1f''\\u5b98''\\u4eba''\\u7687'。\n",
"'\\u59cb''\\u5236''\\u6587''\\u5b57''\\u4e43''\\u670d''\\u8863''\\u88f3'。'\\u63a8''\\u4f4d''\\u8ba9''\\u56fd''\\u6709''\\u865e''\\u9676''\\u5510'。\n",
"'\\u540a''\\u6c11''\\u4f10''\\u7f6a''\\u5468''\\u53d1''\\u6bb7''\\u6c64'。'\\u5750''\\u671d''\\u95ee''\\u9053''\\u5782''\\u62f1''\\u5e73''\\u7ae0'。\n",
"'\\u7231''\\u80b2''\\u9ece''\\u9996''\\u81e3''\\u4f0f''\\u620e''\\u7f8c'。'\\u9050''\\u8fe9''\\u58f9''\\u4f53''\\u7387''\\u5bbe''\\u5f52''\\u738b'。\n",
"'\\u9e23''\\u51e4''\\u5728''\\u6811''\\u767d''\\u9a79''\\u98df''\\u573a'。'\\u5316''\\u88ab''\\u8349''\\u6728''\\u8d56''\\u53ca''\\u4e07''\\u65b9'。\n",
"'\\u76d6''\\u6b64''\\u8eab''\\u53d1''\\u56db''\\u5927''\\u4e94''\\u5e38'。'\\u606d''\\u60df''\\u97a0''\\u517b''\\u5c82''\\u6562''\\u6bc1''\\u4f24'。\n",
"'\\u5973''\\u6155''\\u8d1e''\\u6d01''\\u7537''\\u6548''\\u624d''\\u826f'。'\\u77e5''\\u8fc7''\\u5fc5''\\u6539''\\u5f97''\\u80fd''\\u83ab''\\u5fd8'。\n",
"'\\u7f54''\\u8c08''\\u5f7c''\\u77ed''\\u9761''\\u6043''\\u5df1''\\u957f'。'\\u4fe1''\\u4f7f''\\u53ef''\\u8986''\\u5668''\\u6b32''\\u96be''\\u91cf'。\n",
"'\\u58a8''\\u60b2''\\u4e1d''\\u67d3''\\u8bd7''\\u8d5e''\\u7f94''\\u7f8a'。'\\u666f''\\u884c''\\u7ef4''\\u8d24''\\u514b''\\u5ff5''\\u4f5c''\\u5723'。\n",
"'\\u5fb7''\\u5efa''\\u540d''\\u7acb''\\u5f62''\\u7aef''\\u8868''\\u6b63'。'\\u7a7a''\\u8c37''\\u4f20''\\u58f0''\\u865a''\\u5802''\\u4e60''\\u542c'。\n",
"'\\u7978''\\u56e0''\\u6076''\\u79ef''\\u798f''\\u7f18''\\u5584''\\u5e86'。'\\u5c3a''\\u74a7''\\u975e''\\u5b9d''\\u5bf8''\\u9634''\\u662f''\\u7ade'。\n",
"'\\u8d44''\\u7236''\\u4e8b''\\u541b''\\u66f0''\\u4e25''\\u4e0e''\\u656c'。'\\u5b5d''\\u5f53''\\u7aed''\\u529b''\\u5fe0''\\u5219''\\u5c3d''\\u547d'。\n",
"'\\u4e34''\\u6df1''\\u5c65''\\u8584''\\u5919''\\u5174''\\u6e29''\\u51ca'。'\\u4f3c''\\u5170''\\u65af''\\u99a8''\\u5982''\\u677e''\\u4e4b''\\u76db'。\n",
"'\\u5ddd''\\u6d41''\\u4e0d''\\u606f''\\u6e0a''\\u6f84''\\u53d6''\\u6620'。'\\u5bb9''\\u6b62''\\u82e5''\\u601d''\\u8a00''\\u8f9e''\\u5b89''\\u5b9a'。\n",
"'\\u7b03''\\u521d''\\u8bda''\\u7f8e''\\u614e''\\u7ec8''\\u5b9c''\\u4ee4'。'\\u8363''\\u4e1a''\\u6240''\\u57fa''\\u85c9''\\u751a''\\u65e0''\\u7adf'。\n",
"'\\u5b66''\\u4f18''\\u767b''\\u4ed5''\\u6444''\\u804c''\\u4ece''\\u653f'。'\\u5b58''\\u4ee5''\\u7518''\\u68e0''\\u53bb''\\u800c''\\u76ca''\\u548f'。\n",
"'\\u4e50''\\u6b8a''\\u8d35''\\u8d31''\\u793c''\\u522b''\\u5c0a''\\u5351'。'\\u4e0a''\\u548c''\\u4e0b''\\u7766''\\u592b''\\u5531''\\u5987''\\u968f'。\n",
"'\\u5916''\\u53d7''\\u5085''\\u8bad''\\u5165''\\u5949''\\u6bcd''\\u4eea'。'\\u8bf8''\\u59d1''\\u4f2f''\\u53d4''\\u72b9''\\u5b50''\\u6bd4''\\u513f'。\n",
"'\\u5b54''\\u6000''\\u5144''\\u5f1f''\\u540c''\\u6c14''\\u8fde''\\u679d'。'\\u4ea4''\\u53cb''\\u6295''\\u5206''\\u5207''\\u78e8''\\u7bb4''\\u89c4'。\n",
"'\\u4ec1''\\u6148''\\u9690''\\u607b''\\u9020''\\u6b21''\\u5f17''\\u79bb'。'\\u8282''\\u4e49''\\u5ec9''\\u9000''\\u98a0''\\u6c9b''\\u532a''\\u4e8f'。\n",
"'\\u6027''\\u9759''\\u60c5''\\u9038''\\u5fc3''\\u52a8''\\u795e''\\u75b2'。'\\u5b88''\\u771f''\\u5fd7''\\u6ee1''\\u9010''\\u7269''\\u610f''\\u79fb'。\n",
"'\\u575a''\\u6301''\\u96c5''\\u64cd''\\u597d''\\u7235''\\u81ea''\\u7e3b'。'\\u90fd''\\u9091''\\u534e''\\u590f''\\u4e1c''\\u897f''\\u4e8c''\\u4eac'。\n",
"'\\u80cc''\\u9099''\\u9762''\\u6d1b''\\u6d6e''\\u6e2d''\\u636e''\\u6cfe'。'\\u5bab''\\u6bbf''\\u76d8''\\u90c1''\\u697c''\\u89c2''\\u98de''\\u60ca'。\n",
"'\\u56fe''\\u5199''\\u79bd''\\u517d''\\u753b''\\u5f69''\\u4ed9''\\u7075'。'\\u4e19''\\u820d''\\u508d''\\u542f''\\u7532''\\u5e10''\\u5bf9''\\u6979'。\n",
"'\\u8086''\\u7b75''\\u8bbe''\\u5e2d''\\u9f13''\\u745f''\\u5439''\\u7b19'。'\\u5347''\\u9636''\\u7eb3''\\u965b''\\u5f01''\\u8f6c''\\u7591''\\u661f'。\n",
"'\\u53f3''\\u901a''\\u5e7f''\\u5185''\\u5de6''\\u8fbe''\\u627f''\\u660e'。'\\u65e2''\\u96c6''\\u575f''\\u5178''\\u4ea6''\\u805a''\\u7fa4''\\u82f1'。\n",
"'\\u675c''\\u7a3f''\\u949f''\\u96b6''\\u6f06''\\u4e66''\\u58c1''\\u7ecf'。'\\u5e9c''\\u7f57''\\u5c06''\\u76f8''\\u8def''\\u4fa0''\\u69d0''\\u537f'。\n",
"'\\u6237''\\u5c01''\\u516b''\\u53bf''\\u5bb6''\\u7ed9''\\u5343''\\u5175'。'\\u9ad8''\\u51a0''\\u966a''\\u8f87''\\u9a71''\\u6bc2''\\u632f''\\u7f28'。\n",
"'\\u4e16''\\u7984''\\u4f88''\\u5bcc''\\u8f66''\\u9a7e''\\u80a5''\\u8f7b'。'\\u7b56''\\u529f''\\u8302''\\u5b9e''\\u52d2''\\u7891''\\u523b''\\u94ed'。\n",
"'\\u78fb''\\u6eaa''\\u4f0a''\\u5c39''\\u4f50''\\u65f6''\\u963f''\\u8861'。'\\u5944''\\u5b85''\\u66f2''\\u961c''\\u5fae''\\u65e6''\\u5b70''\\u8425'。\n",
"'\\u6853''\\u516c''\\u5321''\\u5408''\\u6d4e''\\u5f31''\\u6276''\\u503e'。'\\u7eee''\\u56de''\\u6c49''\\u60e0''\\u8bf4''\\u611f''\\u6b66''\\u4e01'。\n",
"'\\u4fca''\\u4e42''\\u5bc6''\\u52ff''\\u591a''\\u58eb''\\u5b9e''\\u5b81'。'\\u664b''\\u695a''\\u66f4''\\u9738''\\u8d75''\\u9b4f''\\u56f0''\\u6a2a'。\n",
"'\\u5047''\\u9014''\\u706d''\\u8662''\\u8df5''\\u571f''\\u4f1a''\\u76df'。'\\u4f55''\\u9075''\\u7ea6''\\u6cd5''\\u97e9''\\u5f0a''\\u70e6''\\u5211'。\n",
"'\\u8d77''\\u7fe6''\\u9887''\\u7267''\\u7528''\\u519b''\\u6700''\\u7cbe'。'\\u5ba3''\\u5a01''\\u6c99''\\u6f20''\\u9a70''\\u8a89''\\u4e39''\\u9752'。\n",
"'\\u4e5d''\\u5dde''\\u79b9''\\u8ff9''\\u767e''\\u90e1''\\u79e6''\\u5e76'。'\\u5cb3''\\u5b97''\\u6052''\\u5cb1''\\u7985''\\u4e3b''\\u4e91''\\u4ead'。\n",
"'\\u96c1''\\u95e8''\\u7d2b''\\u585e''\\u9e21''\\u7530''\\u8d64''\\u57ce'。'\\u6606''\\u6c60''\\u78a3''\\u77f3''\\u5de8''\\u91ce''\\u6d1e''\\u5ead'。\n",
"'\\u65f7''\\u8fdc''\\u7ef5''\\u9088''\\u5ca9''\\u5cab''\\u6773''\\u51a5'。'\\u6cbb''\\u672c''\\u4e8e''\\u519c''\\u52a1''\\u5179''\\u7a3c''\\u7a51'。\n",
"'\\u4ff6''\\u8f7d''\\u5357''\\u4ea9''\\u6211''\\u827a''\\u9ecd''\\u7a37'。'\\u7a0e''\\u719f''\\u8d21''\\u65b0''\\u529d''\\u8d4f''\\u9edc''\\u965f'。\n",
"'\\u5b5f''\\u8f72''\\u6566''\\u7d20''\\u53f2''\\u9c7c''\\u79c9''\\u76f4'。'\\u5eb6''\\u51e0''\\u4e2d''\\u5eb8''\\u52b3''\\u8c26''\\u8c28''\\u6555'。\n",
"'\\u8046''\\u97f3''\\u5bdf''\\u7406''\\u9274''\\u8c8c''\\u8fa8''\\u8272'。'\\u8d3b''\\u53a5''\\u5609''\\u7337''\\u52c9''\\u5176''\\u7957''\\u690d'。\n",
"'\\u7701''\\u8eac''\\u8ba5''\\u8beb''\\u5ba0''\\u589e''\\u6297''\\u6781'。'\\u6b86''\\u8fb1''\\u8fd1''\\u803b''\\u6797''\\u768b''\\u5e78''\\u5373'。\n",
"'\\u4e24''\\u758f''\\u89c1''\\u673a''\\u89e3''\\u7ec4''\\u8c01''\\u903c'。'\\u7d22''\\u5c45''\\u95f2''\\u5904''\\u6c89''\\u9ed8''\\u5bc2''\\u5be5'。\n",
"'\\u6c42''\\u53e4''\\u5bfb''\\u8bba''\\u6563''\\u8651''\\u900d''\\u9065'。'\\u6b23''\\u594f''\\u7d2f''\\u9063''\\u621a''\\u8c22''\\u6b22''\\u62db'。\n",
"'\\u6e20''\\u8377''\\u7684''\\u5386''\\u56ed''\\u83bd''\\u62bd''\\u6761'。'\\u6787''\\u6777''\\u665a''\\u7fe0''\\u68a7''\\u6850''\\u65e9''\\u51cb'。\n",
"'\\u9648''\\u6839''\\u59d4''\\u7ff3''\\u843d''\\u53f6''\\u98d8''\\u6447'。'\\u6e38''\\u9e4d''\\u72ec''\\u8fd0''\\u51cc''\\u6469''\\u7edb''\\u9704'。\n",
"'\\u803d''\\u8bfb''\\u73a9''\\u5e02''\\u5bd3''\\u76ee''\\u56ca''\\u7bb1'。'\\u6613''\\u8f36''\\u6538''\\u754f''\\u5c5e''\\u8033''\\u57a3''\\u5899'。\n",
"'\\u5177''\\u81b3''\\u9910''\\u996d''\\u9002''\\u53e3''\\u5145''\\u80a0'。'\\u9971''\\u996b''\\u70f9''\\u5bb0''\\u9965''\\u538c''\\u7cdf''\\u7ce0'。\n",
"'\\u4eb2''\\u621a''\\u6545''\\u65e7''\\u8001''\\u5c11''\\u5f02''\\u7cae'。'\\u59be''\\u5fa1''\\u7ee9''\\u7eba''\\u4f8d''\\u5dfe''\\u5e37''\\u623f'。\n",
"'\\u7ea8''\\u6247''\\u5706''\\u6d01''\\u94f6''\\u70db''\\u709c''\\u714c'。'\\u663c''\\u7720''\\u5915''\\u5bd0''\\u84dd''\\u7b0b''\\u8c61''\\u5e8a'。\n",
"'\\u5f26''\\u6b4c''\\u9152''\\u5bb4''\\u63a5''\\u676f''\\u4e3e''\\u89de'。'\\u77eb''\\u624b''\\u987f''\\u8db3''\\u60a6''\\u8c6b''\\u4e14''\\u5eb7'。\n",
"'\\u5ae1''\\u540e''\\u55e3''\\u7eed''\\u796d''\\u7940''\\u70dd''\\u5c1d'。'\\u7a3d''\\u98a1''\\u518d''\\u62dc''\\u609a''\\u60e7''\\u6050''\\u60f6'。\n",
"'\\u7b3a''\\u7252''\\u7b80''\\u8981''\\u987e''\\u7b54''\\u5ba1''\\u8be6'。'\\u9ab8''\\u57a2''\\u60f3''\\u6d74''\\u6267''\\u70ed''\\u613f''\\u51c9'。\n",
"'\\u9a74''\\u9aa1''\\u728a''\\u7279''\\u9a87''\\u8dc3''\\u8d85''\\u9aa7'。'\\u8bdb''\\u65a9''\\u8d3c''\\u76d7''\\u6355''\\u83b7''\\u53db''\\u4ea1'。\n",
"'\\u5e03''\\u5c04''\\u8fbd''\\u4e38''\\u5d47''\\u7434''\\u962e''\\u5578'。'\\u606c''\\u7b14''\\u4f26''\\u7eb8''\\u94a7''\\u5de7''\\u4efb''\\u9493'。\n",
"'\\u91ca''\\u7eb7''\\u5229''\\u4fd7''\\u5e76''\\u7686''\\u4f73''\\u5999'。'\\u6bdb''\\u65bd''\\u6dd1''\\u59ff''\\u5de5''\\u98a6''\\u598d''\\u7b11'。\n",
"'\\u5e74''\\u77e2''\\u6bcf''\\u50ac''\\u66e6''\\u6656''\\u6717''\\u66dc'。'\\u7487''\\u7391''\\u60ac''\\u65a1''\\u6666''\\u9b44''\\u73af''\\u7167'。\n",
"'\\u6307''\\u85aa''\\u4fee''\\u795c''\\u6c38''\\u7ee5''\\u5409''\\u52ad'。'\\u77e9''\\u6b65''\\u5f15''\\u9886''\\u4fef''\\u4ef0''\\u5eca''\\u5e99'。\n",
"'\\u675f''\\u5e26''\\u77dc''\\u5e84''\\u5f98''\\u5f8a''\\u77bb''\\u773a'。'\\u5b64''\\u964b''\\u5be1''\\u95fb''\\u611a''\\u8499''\\u7b49''\\u8bee'。\n",
"'\\u8c13''\\u8bed''\\u52a9''\\u8005''\\u7109''\\u54c9''\\u4e4e''\\u4e5f'。\n"
]
}
],
"source": [
"import re \n",
"# print the ascii number of each text\n",
"punctuation = [\"\",\"。\"]\n",
"line_break = \"\\n\"\n",
"ascii_corpus = \"\"\n",
"ascii_num = \"\"\n",
"\n",
"#ascii_corpus = \"\".join(x for x in clean_corpus)\n",
"\n",
"\n",
"for character in clean_corpus_s:\n",
" #print puntuation as is\n",
" if character in punctuation: \n",
" #print(character)\n",
" # TODO remove unicode \\u\n",
" ascii_corpus += character\n",
" elif character == line_break:\n",
" pass\n",
" ascii_corpus += \"\\n\"\n",
" else:\n",
" ascii_num = ascii(character)\n",
" #print(ascii_num)\n",
" ascii_corpus += ascii_num\n",
"print(ascii_corpus)\n"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "78e14928-2ac6-48ca-be8d-1ecc73f6603d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"8312"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# write corpus to a new file\n",
"ascii_output = open(\"files/ascii_output.txt\",\"w\")\n",
"ascii_output.write(ascii_corpus)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "b930f8f8-33d8-4856-9704-064bda5e88a9",
"metadata": {},
"outputs": [],
"source": [
"# more unicode howto\n",
"# https://docs.python.org/3/howto/unicode.html"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "ee668d04-d1b5-4c6e-8868-f500015c8934",
"metadata": {},
"outputs": [],
"source": [
"# preserve the original punctuation and layout\n",
"# need to turn the list back into a string to preserve string layout? "
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "862091df-0d0b-449a-a402-e3ecf007c252",
"metadata": {},
"outputs": [],
"source": [
"thousand_w.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3736d5e5-c332-4ab7-8333-9c1efd4e62c8",
"metadata": {},
"outputs": [],
"source": [
"# https://github.com/callmefeifei/baby-names\n",
"# https://blog.csdn.net/anmo9499/article/details/101646224\n",
"# https://www.cnblogs.com/zhongbin/p/3273086.html\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,646 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 20,
"id": "03589aa7-7fc4-4fe1-92ac-9f523e2ba90c",
"metadata": {},
"outputs": [],
"source": [
"# print ascii encoding of a character"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "e068c988-0a8a-45d8-ac64-ea07cf37b275",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"'\\\\u4f60'\""
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ascii(\"你\")"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "0760b0c9-17f7-4fca-87a1-2c3423aaf61f",
"metadata": {},
"outputs": [],
"source": [
"# what is this ascii standard?"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "44976d3e-c6a0-4d2e-8e3a-9d21ac254803",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"'a'\""
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ascii(\"a\")"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "8ffd857f-e182-49d0-9722-f8d2204f9054",
"metadata": {},
"outputs": [],
"source": [
"# try ord "
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "97c7c7d1-b79f-42ac-8d00-81c12b428281",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"97"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ord(\"a\")"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "9477f26f-3e9c-4a4c-be1b-1c135cd3688e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"65"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ord(\"A\")"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "f0bb5070-d333-46c2-bdea-da6404989ba3",
"metadata": {},
"outputs": [],
"source": [
"#Print alphabet lower and upper case\n",
"alphabet = [\"a\",\"b\",\"c\",\"d\",\"e\",\"f\",\"g\",\"h\",\"i\",\"j\",\"k\",\"l\",\"m\",\"n\",\"o\",\"p\",\"q\",\"r\",\"s\",\"t\",\"u\",\"v\",\"w\",\"x\",\"y\",\"z\"]\n",
"ALPHABET = []\n",
"for letter in alphabet: ALPHABET.append(letter.upper())"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "b53660cc-cee1-4d69-8498-c94edd84e5aa",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Printing corresponding ASCII numerical representation :\n",
"a :97 b :98 c :99 d :100 e :101 f :102 g :103 h :104 i :105 j :106 k :107 l :108 m :109 n :110 o :111 p :112 q :113 r :114 s :115 t :116 u :117 v :118 w :119 x :120 y :121 z :122 "
]
}
],
"source": [
"print(\"Printing corresponding ASCII numerical representation :\")\n",
"for letter in alphabet: \n",
" print(\"{} :\".format (letter) + str (ord(letter)), end = \" \")"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "4f7ec4ff-b6e6-4cda-b8a3-6e2a3521d9c8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Printing corresponding ASCII numerical representation :\n",
"A :65 B :66 C :67 D :68 E :69 F :70 G :71 H :72 I :73 J :74 K :75 L :76 M :77 N :78 O :79 P :80 Q :81 R :82 S :83 T :84 U :85 V :86 W :87 X :88 Y :89 Z :90 "
]
}
],
"source": [
"print(\"Printing corresponding ASCII numerical representation :\")\n",
"for LETTER in ALPHABET: \n",
" print(\"{} :\".format (LETTER) + str (ord(LETTER)), end = \" \")"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "87963ac8-050c-44a5-9f5a-af94e5ba40ce",
"metadata": {},
"outputs": [],
"source": [
"# the lower case letter representing the same letter is +32 of the capital letter, and thus the following hack \n",
"# without using the upper() method in Python"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "6318993c-1f92-4aa5-befd-2e01963bda15",
"metadata": {},
"outputs": [],
"source": [
"# takes input of a lower case char\n",
"def upperHack(c):\n",
" # get number of the lower case letter\n",
" l_num = ord(c)\n",
" u_num = l_num - 32\n",
" # return the upper case letter from u_num with chr()method\n",
" return chr(u_num)"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "b47bae87-4e21-4a9c-a90a-6f999df2ac95",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'C'"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"upperHack(\"c\")"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "f0c628f3-e92c-446d-81c4-9cc7149b3a1c",
"metadata": {},
"outputs": [],
"source": [
"# a vice versa method\n",
"def lowerHack(C):\n",
" # get number of the lower case letter\n",
" u_num = ord(C)\n",
" l_num = u_num + 32\n",
" # return the upper case letter from u_num with chr()method\n",
" return chr(l_num)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "46a9f58f-5428-4901-85f4-73542d8110fd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'c'"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lowerHack(\"C\")"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "a467b7a2-1ea5-4148-bfa4-4424ec110465",
"metadata": {},
"outputs": [],
"source": [
"# 好,现在我们把西文的方法应用到中文里去\n",
"# 拉丁语系下的西文中的词由字母构成;中文里的字由笔划构成。\n",
"# 我们先从系统层面思考笔划是构造单个汉字的材料在python中可以实现对笔划的探索性编程exploratory programming\n",
"# 或者,我们可以在“语料库”的系统层面思考。\n",
"#《千字文》中包含了一千个不重复的汉字,我们可以理解为这是古人的“识字课本”。\n",
"# 汉字和千字文之间个体与系统的关系,好比“永字八法”中,笔划和“永”字之间的关系。\n",
"# 以王羲之的永字为范本,学童反复摹习,掌握“永”字中的“侧、勒、弩、趯、策、掠、啄、磔”,以期掌握写好每一个汉字。\n",
"# 同样,以千字文为范本,学童反复背诵研习,为了识字,并以“字”为单位行文。"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "92840cd9-b859-4c27-9016-80608168e448",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"天地玄黄宇宙洪荒。日月盈昃辰宿列张。4\n",
"寒来暑往秋收冬藏。闰余成岁律吕调阳。8\n",
"云腾致雨露结为霜。金生丽水玉出昆冈。12\n",
"剑号巨阙珠称夜光。果珍李柰菜重芥姜。16\n",
"海咸河淡鳞潜羽翔。龙师火帝鸟官人皇。20\n",
"始制文字乃服衣裳。推位让国有虞陶唐。24\n",
"吊民伐罪周发殷汤。坐朝问道垂拱平章。28\n",
"爱育黎首臣伏戎羌。遐迩壹体率宾归王。32\n",
"鸣凤在树白驹食场。化被草木赖及万方。36\n",
"盖此身发四大五常。恭惟鞠养岂敢毁伤。40\n",
"女慕贞洁男效才良。知过必改得能莫忘。44\n",
"罔谈彼短靡恃己长。信使可覆器欲难量。48\n",
"墨悲丝染诗赞羔羊。景行维贤克念作圣。52\n",
"德建名立形端表正。空谷传声虚堂习听。56\n",
"祸因恶积福缘善庆。尺璧非宝寸阴是竞。60\n",
"资父事君曰严与敬。孝当竭力忠则尽命。64\n",
"临深履薄夙兴温凊。似兰斯馨如松之盛。68\n",
"川流不息渊澄取映。容止若思言辞安定。72\n",
"笃初诚美慎终宜令。荣业所基藉甚无竟。76\n",
"学优登仕摄职从政。存以甘棠去而益咏。80\n",
"乐殊贵贱礼别尊卑。上和下睦夫唱妇随。84\n",
"外受傅训入奉母仪。诸姑伯叔犹子比儿。88\n",
"孔怀兄弟同气连枝。交友投分切磨箴规。92\n",
"仁慈隐恻造次弗离。节义廉退颠沛匪亏。96\n",
"性静情逸心动神疲。守真志满逐物意移。100\n",
"坚持雅操好爵自縻。都邑华夏东西二京。104\n",
"背邙面洛浮渭据泾。宫殿盘郁楼观飞惊。108\n",
"图写禽兽画彩仙灵。丙舍傍启甲帐对楹。112\n",
"肆筵设席鼓瑟吹笙。升阶纳陛弁转疑星。116\n",
"右通广内左达承明。既集坟典亦聚群英。120\n",
"杜稿钟隶漆书壁经。府罗将相路侠槐卿。124\n",
"户封八县家给千兵。高冠陪辇驱毂振缨。128\n",
"世禄侈富车驾肥轻。策功茂实勒碑刻铭。132\n",
"磻溪伊尹佐时阿衡。奄宅曲阜微旦孰营。136\n",
"桓公匡合济弱扶倾。绮回汉惠说感武丁。140\n",
"俊乂密勿多士实宁。晋楚更霸赵魏困横。144\n",
"假途灭虢践土会盟。何遵约法韩弊烦刑。148\n",
"起翦颇牧用军最精。宣威沙漠驰誉丹青。152\n",
"九州禹迹百郡秦并。岳宗恒岱禅主云亭。156\n",
"雁门紫塞鸡田赤城。昆池碣石巨野洞庭。160\n",
"旷远绵邈岩岫杳冥。治本于农务兹稼穑。164\n",
"俶载南亩我艺黍稷。税熟贡新劝赏黜陟。168\n",
"孟轲敦素史鱼秉直。庶几中庸劳谦谨敕。172\n",
"聆音察理鉴貌辨色。贻厥嘉猷勉其祗植。176\n",
"省躬讥诫宠增抗极。殆辱近耻林皋幸即。180\n",
"两疏见机解组谁逼。索居闲处沉默寂寥。184\n",
"求古寻论散虑逍遥。欣奏累遣戚谢欢招。188\n",
"渠荷的历园莽抽条。枇杷晚翠梧桐早凋。192\n",
"陈根委翳落叶飘摇。游鹍独运凌摩绛霄。196\n",
"耽读玩市寓目囊箱。易輶攸畏属耳垣墙。200\n",
"具膳餐饭适口充肠。饱饫烹宰饥厌糟糠。204\n",
"亲戚故旧老少异粮。妾御绩纺侍巾帷房。208\n",
"纨扇圆洁银烛炜煌。昼眠夕寐蓝笋象床。212\n",
"弦歌酒宴接杯举觞。矫手顿足悦豫且康。216\n",
"嫡后嗣续祭祀烝尝。稽颡再拜悚惧恐惶。220\n",
"笺牒简要顾答审详。骸垢想浴执热愿凉。224\n",
"驴骡犊特骇跃超骧。诛斩贼盗捕获叛亡。228\n",
"布射辽丸嵇琴阮啸。恬笔伦纸钧巧任钓。232\n",
"释纷利俗并皆佳妙。毛施淑姿工颦妍笑。236\n",
"年矢每催曦晖朗曜。璇玑悬斡晦魄环照。240\n",
"指薪修祜永绥吉劭。矩步引领俯仰廊庙。244\n",
"束带矜庄徘徊瞻眺。孤陋寡闻愚蒙等诮。248\n",
"谓语助者焉哉乎也。250\n"
]
}
],
"source": [
"# read in \"A Thousand Character Essay\"\n",
"thousand_w = open(\"files/thousand_char_essay.txt\",\"r\")\n",
"corpus = thousand_w.read(None)\n",
"print(corpus)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "23892e53-9d2e-4d93-b98f-e4fb08ad910c",
"metadata": {},
"outputs": [],
"source": [
"# remove counter numbers from corpus \n",
"clean_corpus=[]\n",
"for word in corpus:\n",
" if word.isdigit() is not True:\n",
" clean_corpus.append(word)"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "b6b6892e-6b4f-4d54-a466-c2c5ec04f4c9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"天地玄黄,宇宙洪荒。日月盈昃,辰宿列张。\n",
"寒来暑往,秋收冬藏。闰余成岁,律吕调阳。\n",
"云腾致雨,露结为霜。金生丽水,玉出昆冈。\n",
"剑号巨阙,珠称夜光。果珍李柰,菜重芥姜。\n",
"海咸河淡,鳞潜羽翔。龙师火帝,鸟官人皇。\n",
"始制文字,乃服衣裳。推位让国,有虞陶唐。\n",
"吊民伐罪,周发殷汤。坐朝问道,垂拱平章。\n",
"爱育黎首,臣伏戎羌。遐迩壹体,率宾归王。\n",
"鸣凤在树,白驹食场。化被草木,赖及万方。\n",
"盖此身发,四大五常。恭惟鞠养,岂敢毁伤。\n",
"女慕贞洁,男效才良。知过必改,得能莫忘。\n",
"罔谈彼短,靡恃己长。信使可覆,器欲难量。\n",
"墨悲丝染,诗赞羔羊。景行维贤,克念作圣。\n",
"德建名立,形端表正。空谷传声,虚堂习听。\n",
"祸因恶积,福缘善庆。尺璧非宝,寸阴是竞。\n",
"资父事君,曰严与敬。孝当竭力,忠则尽命。\n",
"临深履薄,夙兴温凊。似兰斯馨,如松之盛。\n",
"川流不息,渊澄取映。容止若思,言辞安定。\n",
"笃初诚美,慎终宜令。荣业所基,藉甚无竟。\n",
"学优登仕,摄职从政。存以甘棠,去而益咏。\n",
"乐殊贵贱,礼别尊卑。上和下睦,夫唱妇随。\n",
"外受傅训,入奉母仪。诸姑伯叔,犹子比儿。\n",
"孔怀兄弟,同气连枝。交友投分,切磨箴规。\n",
"仁慈隐恻,造次弗离。节义廉退,颠沛匪亏。\n",
"性静情逸,心动神疲。守真志满,逐物意移。\n",
"坚持雅操,好爵自縻。都邑华夏,东西二京。\n",
"背邙面洛,浮渭据泾。宫殿盘郁,楼观飞惊。\n",
"图写禽兽,画彩仙灵。丙舍傍启,甲帐对楹。\n",
"肆筵设席,鼓瑟吹笙。升阶纳陛,弁转疑星。\n",
"右通广内,左达承明。既集坟典,亦聚群英。\n",
"杜稿钟隶,漆书壁经。府罗将相,路侠槐卿。\n",
"户封八县,家给千兵。高冠陪辇,驱毂振缨。\n",
"世禄侈富,车驾肥轻。策功茂实,勒碑刻铭。\n",
"磻溪伊尹,佐时阿衡。奄宅曲阜,微旦孰营。\n",
"桓公匡合,济弱扶倾。绮回汉惠,说感武丁。\n",
"俊乂密勿,多士实宁。晋楚更霸,赵魏困横。\n",
"假途灭虢,践土会盟。何遵约法,韩弊烦刑。\n",
"起翦颇牧,用军最精。宣威沙漠,驰誉丹青。\n",
"九州禹迹,百郡秦并。岳宗恒岱,禅主云亭。\n",
"雁门紫塞,鸡田赤城。昆池碣石,巨野洞庭。\n",
"旷远绵邈,岩岫杳冥。治本于农,务兹稼穑。\n",
"俶载南亩,我艺黍稷。税熟贡新,劝赏黜陟。\n",
"孟轲敦素,史鱼秉直。庶几中庸,劳谦谨敕。\n",
"聆音察理,鉴貌辨色。贻厥嘉猷,勉其祗植。\n",
"省躬讥诫,宠增抗极。殆辱近耻,林皋幸即。\n",
"两疏见机,解组谁逼。索居闲处,沉默寂寥。\n",
"求古寻论,散虑逍遥。欣奏累遣,戚谢欢招。\n",
"渠荷的历,园莽抽条。枇杷晚翠,梧桐早凋。\n",
"陈根委翳,落叶飘摇。游鹍独运,凌摩绛霄。\n",
"耽读玩市,寓目囊箱。易輶攸畏,属耳垣墙。\n",
"具膳餐饭,适口充肠。饱饫烹宰,饥厌糟糠。\n",
"亲戚故旧,老少异粮。妾御绩纺,侍巾帷房。\n",
"纨扇圆洁,银烛炜煌。昼眠夕寐,蓝笋象床。\n",
"弦歌酒宴,接杯举觞。矫手顿足,悦豫且康。\n",
"嫡后嗣续,祭祀烝尝。稽颡再拜,悚惧恐惶。\n",
"笺牒简要,顾答审详。骸垢想浴,执热愿凉。\n",
"驴骡犊特,骇跃超骧。诛斩贼盗,捕获叛亡。\n",
"布射辽丸,嵇琴阮啸。恬笔伦纸,钧巧任钓。\n",
"释纷利俗,并皆佳妙。毛施淑姿,工颦妍笑。\n",
"年矢每催,曦晖朗曜。璇玑悬斡,晦魄环照。\n",
"指薪修祜,永绥吉劭。矩步引领,俯仰廊庙。\n",
"束带矜庄,徘徊瞻眺。孤陋寡闻,愚蒙等诮。\n",
"谓语助者,焉哉乎也。\n"
]
}
],
"source": [
"# print(clean_corpus)\n",
"clean_corpus_s = \" \"\n",
"clean_corpus_s = \"\".join(str(x) for x in clean_corpus)\n",
"print(clean_corpus_s)"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "34dfbc8f-df1b-4969-a5fb-d073a08c9a32",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"'\\u5929''\\u5730''\\u7384''\\u9ec4''\\u5b87''\\u5b99''\\u6d2a''\\u8352'。'\\u65e5''\\u6708''\\u76c8''\\u6603''\\u8fb0''\\u5bbf''\\u5217''\\u5f20'。\n",
"'\\u5bd2''\\u6765''\\u6691''\\u5f80''\\u79cb''\\u6536''\\u51ac''\\u85cf'。'\\u95f0''\\u4f59''\\u6210''\\u5c81''\\u5f8b''\\u5415''\\u8c03''\\u9633'。\n",
"'\\u4e91''\\u817e''\\u81f4''\\u96e8''\\u9732''\\u7ed3''\\u4e3a''\\u971c'。'\\u91d1''\\u751f''\\u4e3d''\\u6c34''\\u7389''\\u51fa''\\u6606''\\u5188'。\n",
"'\\u5251''\\u53f7''\\u5de8''\\u9619''\\u73e0''\\u79f0''\\u591c''\\u5149'。'\\u679c''\\u73cd''\\u674e''\\u67f0''\\u83dc''\\u91cd''\\u82a5''\\u59dc'。\n",
"'\\u6d77''\\u54b8''\\u6cb3''\\u6de1''\\u9cde''\\u6f5c''\\u7fbd''\\u7fd4'。'\\u9f99''\\u5e08''\\u706b''\\u5e1d''\\u9e1f''\\u5b98''\\u4eba''\\u7687'。\n",
"'\\u59cb''\\u5236''\\u6587''\\u5b57''\\u4e43''\\u670d''\\u8863''\\u88f3'。'\\u63a8''\\u4f4d''\\u8ba9''\\u56fd''\\u6709''\\u865e''\\u9676''\\u5510'。\n",
"'\\u540a''\\u6c11''\\u4f10''\\u7f6a''\\u5468''\\u53d1''\\u6bb7''\\u6c64'。'\\u5750''\\u671d''\\u95ee''\\u9053''\\u5782''\\u62f1''\\u5e73''\\u7ae0'。\n",
"'\\u7231''\\u80b2''\\u9ece''\\u9996''\\u81e3''\\u4f0f''\\u620e''\\u7f8c'。'\\u9050''\\u8fe9''\\u58f9''\\u4f53''\\u7387''\\u5bbe''\\u5f52''\\u738b'。\n",
"'\\u9e23''\\u51e4''\\u5728''\\u6811''\\u767d''\\u9a79''\\u98df''\\u573a'。'\\u5316''\\u88ab''\\u8349''\\u6728''\\u8d56''\\u53ca''\\u4e07''\\u65b9'。\n",
"'\\u76d6''\\u6b64''\\u8eab''\\u53d1''\\u56db''\\u5927''\\u4e94''\\u5e38'。'\\u606d''\\u60df''\\u97a0''\\u517b''\\u5c82''\\u6562''\\u6bc1''\\u4f24'。\n",
"'\\u5973''\\u6155''\\u8d1e''\\u6d01''\\u7537''\\u6548''\\u624d''\\u826f'。'\\u77e5''\\u8fc7''\\u5fc5''\\u6539''\\u5f97''\\u80fd''\\u83ab''\\u5fd8'。\n",
"'\\u7f54''\\u8c08''\\u5f7c''\\u77ed''\\u9761''\\u6043''\\u5df1''\\u957f'。'\\u4fe1''\\u4f7f''\\u53ef''\\u8986''\\u5668''\\u6b32''\\u96be''\\u91cf'。\n",
"'\\u58a8''\\u60b2''\\u4e1d''\\u67d3''\\u8bd7''\\u8d5e''\\u7f94''\\u7f8a'。'\\u666f''\\u884c''\\u7ef4''\\u8d24''\\u514b''\\u5ff5''\\u4f5c''\\u5723'。\n",
"'\\u5fb7''\\u5efa''\\u540d''\\u7acb''\\u5f62''\\u7aef''\\u8868''\\u6b63'。'\\u7a7a''\\u8c37''\\u4f20''\\u58f0''\\u865a''\\u5802''\\u4e60''\\u542c'。\n",
"'\\u7978''\\u56e0''\\u6076''\\u79ef''\\u798f''\\u7f18''\\u5584''\\u5e86'。'\\u5c3a''\\u74a7''\\u975e''\\u5b9d''\\u5bf8''\\u9634''\\u662f''\\u7ade'。\n",
"'\\u8d44''\\u7236''\\u4e8b''\\u541b''\\u66f0''\\u4e25''\\u4e0e''\\u656c'。'\\u5b5d''\\u5f53''\\u7aed''\\u529b''\\u5fe0''\\u5219''\\u5c3d''\\u547d'。\n",
"'\\u4e34''\\u6df1''\\u5c65''\\u8584''\\u5919''\\u5174''\\u6e29''\\u51ca'。'\\u4f3c''\\u5170''\\u65af''\\u99a8''\\u5982''\\u677e''\\u4e4b''\\u76db'。\n",
"'\\u5ddd''\\u6d41''\\u4e0d''\\u606f''\\u6e0a''\\u6f84''\\u53d6''\\u6620'。'\\u5bb9''\\u6b62''\\u82e5''\\u601d''\\u8a00''\\u8f9e''\\u5b89''\\u5b9a'。\n",
"'\\u7b03''\\u521d''\\u8bda''\\u7f8e''\\u614e''\\u7ec8''\\u5b9c''\\u4ee4'。'\\u8363''\\u4e1a''\\u6240''\\u57fa''\\u85c9''\\u751a''\\u65e0''\\u7adf'。\n",
"'\\u5b66''\\u4f18''\\u767b''\\u4ed5''\\u6444''\\u804c''\\u4ece''\\u653f'。'\\u5b58''\\u4ee5''\\u7518''\\u68e0''\\u53bb''\\u800c''\\u76ca''\\u548f'。\n",
"'\\u4e50''\\u6b8a''\\u8d35''\\u8d31''\\u793c''\\u522b''\\u5c0a''\\u5351'。'\\u4e0a''\\u548c''\\u4e0b''\\u7766''\\u592b''\\u5531''\\u5987''\\u968f'。\n",
"'\\u5916''\\u53d7''\\u5085''\\u8bad''\\u5165''\\u5949''\\u6bcd''\\u4eea'。'\\u8bf8''\\u59d1''\\u4f2f''\\u53d4''\\u72b9''\\u5b50''\\u6bd4''\\u513f'。\n",
"'\\u5b54''\\u6000''\\u5144''\\u5f1f''\\u540c''\\u6c14''\\u8fde''\\u679d'。'\\u4ea4''\\u53cb''\\u6295''\\u5206''\\u5207''\\u78e8''\\u7bb4''\\u89c4'。\n",
"'\\u4ec1''\\u6148''\\u9690''\\u607b''\\u9020''\\u6b21''\\u5f17''\\u79bb'。'\\u8282''\\u4e49''\\u5ec9''\\u9000''\\u98a0''\\u6c9b''\\u532a''\\u4e8f'。\n",
"'\\u6027''\\u9759''\\u60c5''\\u9038''\\u5fc3''\\u52a8''\\u795e''\\u75b2'。'\\u5b88''\\u771f''\\u5fd7''\\u6ee1''\\u9010''\\u7269''\\u610f''\\u79fb'。\n",
"'\\u575a''\\u6301''\\u96c5''\\u64cd''\\u597d''\\u7235''\\u81ea''\\u7e3b'。'\\u90fd''\\u9091''\\u534e''\\u590f''\\u4e1c''\\u897f''\\u4e8c''\\u4eac'。\n",
"'\\u80cc''\\u9099''\\u9762''\\u6d1b''\\u6d6e''\\u6e2d''\\u636e''\\u6cfe'。'\\u5bab''\\u6bbf''\\u76d8''\\u90c1''\\u697c''\\u89c2''\\u98de''\\u60ca'。\n",
"'\\u56fe''\\u5199''\\u79bd''\\u517d''\\u753b''\\u5f69''\\u4ed9''\\u7075'。'\\u4e19''\\u820d''\\u508d''\\u542f''\\u7532''\\u5e10''\\u5bf9''\\u6979'。\n",
"'\\u8086''\\u7b75''\\u8bbe''\\u5e2d''\\u9f13''\\u745f''\\u5439''\\u7b19'。'\\u5347''\\u9636''\\u7eb3''\\u965b''\\u5f01''\\u8f6c''\\u7591''\\u661f'。\n",
"'\\u53f3''\\u901a''\\u5e7f''\\u5185''\\u5de6''\\u8fbe''\\u627f''\\u660e'。'\\u65e2''\\u96c6''\\u575f''\\u5178''\\u4ea6''\\u805a''\\u7fa4''\\u82f1'。\n",
"'\\u675c''\\u7a3f''\\u949f''\\u96b6''\\u6f06''\\u4e66''\\u58c1''\\u7ecf'。'\\u5e9c''\\u7f57''\\u5c06''\\u76f8''\\u8def''\\u4fa0''\\u69d0''\\u537f'。\n",
"'\\u6237''\\u5c01''\\u516b''\\u53bf''\\u5bb6''\\u7ed9''\\u5343''\\u5175'。'\\u9ad8''\\u51a0''\\u966a''\\u8f87''\\u9a71''\\u6bc2''\\u632f''\\u7f28'。\n",
"'\\u4e16''\\u7984''\\u4f88''\\u5bcc''\\u8f66''\\u9a7e''\\u80a5''\\u8f7b'。'\\u7b56''\\u529f''\\u8302''\\u5b9e''\\u52d2''\\u7891''\\u523b''\\u94ed'。\n",
"'\\u78fb''\\u6eaa''\\u4f0a''\\u5c39''\\u4f50''\\u65f6''\\u963f''\\u8861'。'\\u5944''\\u5b85''\\u66f2''\\u961c''\\u5fae''\\u65e6''\\u5b70''\\u8425'。\n",
"'\\u6853''\\u516c''\\u5321''\\u5408''\\u6d4e''\\u5f31''\\u6276''\\u503e'。'\\u7eee''\\u56de''\\u6c49''\\u60e0''\\u8bf4''\\u611f''\\u6b66''\\u4e01'。\n",
"'\\u4fca''\\u4e42''\\u5bc6''\\u52ff''\\u591a''\\u58eb''\\u5b9e''\\u5b81'。'\\u664b''\\u695a''\\u66f4''\\u9738''\\u8d75''\\u9b4f''\\u56f0''\\u6a2a'。\n",
"'\\u5047''\\u9014''\\u706d''\\u8662''\\u8df5''\\u571f''\\u4f1a''\\u76df'。'\\u4f55''\\u9075''\\u7ea6''\\u6cd5''\\u97e9''\\u5f0a''\\u70e6''\\u5211'。\n",
"'\\u8d77''\\u7fe6''\\u9887''\\u7267''\\u7528''\\u519b''\\u6700''\\u7cbe'。'\\u5ba3''\\u5a01''\\u6c99''\\u6f20''\\u9a70''\\u8a89''\\u4e39''\\u9752'。\n",
"'\\u4e5d''\\u5dde''\\u79b9''\\u8ff9''\\u767e''\\u90e1''\\u79e6''\\u5e76'。'\\u5cb3''\\u5b97''\\u6052''\\u5cb1''\\u7985''\\u4e3b''\\u4e91''\\u4ead'。\n",
"'\\u96c1''\\u95e8''\\u7d2b''\\u585e''\\u9e21''\\u7530''\\u8d64''\\u57ce'。'\\u6606''\\u6c60''\\u78a3''\\u77f3''\\u5de8''\\u91ce''\\u6d1e''\\u5ead'。\n",
"'\\u65f7''\\u8fdc''\\u7ef5''\\u9088''\\u5ca9''\\u5cab''\\u6773''\\u51a5'。'\\u6cbb''\\u672c''\\u4e8e''\\u519c''\\u52a1''\\u5179''\\u7a3c''\\u7a51'。\n",
"'\\u4ff6''\\u8f7d''\\u5357''\\u4ea9''\\u6211''\\u827a''\\u9ecd''\\u7a37'。'\\u7a0e''\\u719f''\\u8d21''\\u65b0''\\u529d''\\u8d4f''\\u9edc''\\u965f'。\n",
"'\\u5b5f''\\u8f72''\\u6566''\\u7d20''\\u53f2''\\u9c7c''\\u79c9''\\u76f4'。'\\u5eb6''\\u51e0''\\u4e2d''\\u5eb8''\\u52b3''\\u8c26''\\u8c28''\\u6555'。\n",
"'\\u8046''\\u97f3''\\u5bdf''\\u7406''\\u9274''\\u8c8c''\\u8fa8''\\u8272'。'\\u8d3b''\\u53a5''\\u5609''\\u7337''\\u52c9''\\u5176''\\u7957''\\u690d'。\n",
"'\\u7701''\\u8eac''\\u8ba5''\\u8beb''\\u5ba0''\\u589e''\\u6297''\\u6781'。'\\u6b86''\\u8fb1''\\u8fd1''\\u803b''\\u6797''\\u768b''\\u5e78''\\u5373'。\n",
"'\\u4e24''\\u758f''\\u89c1''\\u673a''\\u89e3''\\u7ec4''\\u8c01''\\u903c'。'\\u7d22''\\u5c45''\\u95f2''\\u5904''\\u6c89''\\u9ed8''\\u5bc2''\\u5be5'。\n",
"'\\u6c42''\\u53e4''\\u5bfb''\\u8bba''\\u6563''\\u8651''\\u900d''\\u9065'。'\\u6b23''\\u594f''\\u7d2f''\\u9063''\\u621a''\\u8c22''\\u6b22''\\u62db'。\n",
"'\\u6e20''\\u8377''\\u7684''\\u5386''\\u56ed''\\u83bd''\\u62bd''\\u6761'。'\\u6787''\\u6777''\\u665a''\\u7fe0''\\u68a7''\\u6850''\\u65e9''\\u51cb'。\n",
"'\\u9648''\\u6839''\\u59d4''\\u7ff3''\\u843d''\\u53f6''\\u98d8''\\u6447'。'\\u6e38''\\u9e4d''\\u72ec''\\u8fd0''\\u51cc''\\u6469''\\u7edb''\\u9704'。\n",
"'\\u803d''\\u8bfb''\\u73a9''\\u5e02''\\u5bd3''\\u76ee''\\u56ca''\\u7bb1'。'\\u6613''\\u8f36''\\u6538''\\u754f''\\u5c5e''\\u8033''\\u57a3''\\u5899'。\n",
"'\\u5177''\\u81b3''\\u9910''\\u996d''\\u9002''\\u53e3''\\u5145''\\u80a0'。'\\u9971''\\u996b''\\u70f9''\\u5bb0''\\u9965''\\u538c''\\u7cdf''\\u7ce0'。\n",
"'\\u4eb2''\\u621a''\\u6545''\\u65e7''\\u8001''\\u5c11''\\u5f02''\\u7cae'。'\\u59be''\\u5fa1''\\u7ee9''\\u7eba''\\u4f8d''\\u5dfe''\\u5e37''\\u623f'。\n",
"'\\u7ea8''\\u6247''\\u5706''\\u6d01''\\u94f6''\\u70db''\\u709c''\\u714c'。'\\u663c''\\u7720''\\u5915''\\u5bd0''\\u84dd''\\u7b0b''\\u8c61''\\u5e8a'。\n",
"'\\u5f26''\\u6b4c''\\u9152''\\u5bb4''\\u63a5''\\u676f''\\u4e3e''\\u89de'。'\\u77eb''\\u624b''\\u987f''\\u8db3''\\u60a6''\\u8c6b''\\u4e14''\\u5eb7'。\n",
"'\\u5ae1''\\u540e''\\u55e3''\\u7eed''\\u796d''\\u7940''\\u70dd''\\u5c1d'。'\\u7a3d''\\u98a1''\\u518d''\\u62dc''\\u609a''\\u60e7''\\u6050''\\u60f6'。\n",
"'\\u7b3a''\\u7252''\\u7b80''\\u8981''\\u987e''\\u7b54''\\u5ba1''\\u8be6'。'\\u9ab8''\\u57a2''\\u60f3''\\u6d74''\\u6267''\\u70ed''\\u613f''\\u51c9'。\n",
"'\\u9a74''\\u9aa1''\\u728a''\\u7279''\\u9a87''\\u8dc3''\\u8d85''\\u9aa7'。'\\u8bdb''\\u65a9''\\u8d3c''\\u76d7''\\u6355''\\u83b7''\\u53db''\\u4ea1'。\n",
"'\\u5e03''\\u5c04''\\u8fbd''\\u4e38''\\u5d47''\\u7434''\\u962e''\\u5578'。'\\u606c''\\u7b14''\\u4f26''\\u7eb8''\\u94a7''\\u5de7''\\u4efb''\\u9493'。\n",
"'\\u91ca''\\u7eb7''\\u5229''\\u4fd7''\\u5e76''\\u7686''\\u4f73''\\u5999'。'\\u6bdb''\\u65bd''\\u6dd1''\\u59ff''\\u5de5''\\u98a6''\\u598d''\\u7b11'。\n",
"'\\u5e74''\\u77e2''\\u6bcf''\\u50ac''\\u66e6''\\u6656''\\u6717''\\u66dc'。'\\u7487''\\u7391''\\u60ac''\\u65a1''\\u6666''\\u9b44''\\u73af''\\u7167'。\n",
"'\\u6307''\\u85aa''\\u4fee''\\u795c''\\u6c38''\\u7ee5''\\u5409''\\u52ad'。'\\u77e9''\\u6b65''\\u5f15''\\u9886''\\u4fef''\\u4ef0''\\u5eca''\\u5e99'。\n",
"'\\u675f''\\u5e26''\\u77dc''\\u5e84''\\u5f98''\\u5f8a''\\u77bb''\\u773a'。'\\u5b64''\\u964b''\\u5be1''\\u95fb''\\u611a''\\u8499''\\u7b49''\\u8bee'。\n",
"'\\u8c13''\\u8bed''\\u52a9''\\u8005''\\u7109''\\u54c9''\\u4e4e''\\u4e5f'。\n"
]
}
],
"source": [
"import re \n",
"# print the ascii number of each text\n",
"punctuation = [\"\",\"。\"]\n",
"line_break = \"\\n\"\n",
"ascii_corpus = \"\"\n",
"ascii_num = \"\"\n",
"\n",
"#ascii_corpus = \"\".join(x for x in clean_corpus)\n",
"\n",
"\n",
"for character in clean_corpus_s:\n",
" #print puntuation as is\n",
" if character in punctuation: \n",
" #print(character)\n",
" # TODO remove unicode \\u\n",
" ascii_corpus += character\n",
" elif character == line_break:\n",
" pass\n",
" ascii_corpus += \"\\n\"\n",
" else:\n",
" ascii_num = ascii(character)\n",
" #print(ascii_num)\n",
" ascii_corpus += ascii_num\n",
"print(ascii_corpus)\n"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "78e14928-2ac6-48ca-be8d-1ecc73f6603d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"8312"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# write corpus to a new file\n",
"ascii_output = open(\"files/ascii_output.txt\",\"w\")\n",
"ascii_output.write(ascii_corpus)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "b930f8f8-33d8-4856-9704-064bda5e88a9",
"metadata": {},
"outputs": [],
"source": [
"# more unicode howto\n",
"# https://docs.python.org/3/howto/unicode.html"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "ee668d04-d1b5-4c6e-8868-f500015c8934",
"metadata": {},
"outputs": [],
"source": [
"# preserve the original punctuation and layout\n",
"# need to turn the list back into a string to preserve string layout? "
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "862091df-0d0b-449a-a402-e3ecf007c252",
"metadata": {},
"outputs": [],
"source": [
"thousand_w.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3736d5e5-c332-4ab7-8333-9c1efd4e62c8",
"metadata": {},
"outputs": [],
"source": [
"# https://github.com/callmefeifei/baby-names\n",
"# https://blog.csdn.net/anmo9499/article/details/101646224\n",
"# https://www.cnblogs.com/zhongbin/p/3273086.html\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,63 @@
天地玄黄宇宙洪荒。日月盈昃辰宿列张。4
寒来暑往秋收冬藏。闰余成岁律吕调阳。8
云腾致雨露结为霜。金生丽水玉出昆冈。12
剑号巨阙珠称夜光。果珍李柰菜重芥姜。16
海咸河淡鳞潜羽翔。龙师火帝鸟官人皇。20
始制文字乃服衣裳。推位让国有虞陶唐。24
吊民伐罪周发殷汤。坐朝问道垂拱平章。28
爱育黎首臣伏戎羌。遐迩壹体率宾归王。32
鸣凤在树白驹食场。化被草木赖及万方。36
盖此身发四大五常。恭惟鞠养岂敢毁伤。40
女慕贞洁男效才良。知过必改得能莫忘。44
罔谈彼短靡恃己长。信使可覆器欲难量。48
墨悲丝染诗赞羔羊。景行维贤克念作圣。52
德建名立形端表正。空谷传声虚堂习听。56
祸因恶积福缘善庆。尺璧非宝寸阴是竞。60
资父事君曰严与敬。孝当竭力忠则尽命。64
临深履薄夙兴温凊。似兰斯馨如松之盛。68
川流不息渊澄取映。容止若思言辞安定。72
笃初诚美慎终宜令。荣业所基藉甚无竟。76
学优登仕摄职从政。存以甘棠去而益咏。80
乐殊贵贱礼别尊卑。上和下睦夫唱妇随。84
外受傅训入奉母仪。诸姑伯叔犹子比儿。88
孔怀兄弟同气连枝。交友投分切磨箴规。92
仁慈隐恻造次弗离。节义廉退颠沛匪亏。96
性静情逸心动神疲。守真志满逐物意移。100
坚持雅操好爵自縻。都邑华夏东西二京。104
背邙面洛浮渭据泾。宫殿盘郁楼观飞惊。108
图写禽兽画彩仙灵。丙舍傍启甲帐对楹。112
肆筵设席鼓瑟吹笙。升阶纳陛弁转疑星。116
右通广内左达承明。既集坟典亦聚群英。120
杜稿钟隶漆书壁经。府罗将相路侠槐卿。124
户封八县家给千兵。高冠陪辇驱毂振缨。128
世禄侈富车驾肥轻。策功茂实勒碑刻铭。132
磻溪伊尹佐时阿衡。奄宅曲阜微旦孰营。136
桓公匡合济弱扶倾。绮回汉惠说感武丁。140
俊乂密勿多士实宁。晋楚更霸赵魏困横。144
假途灭虢践土会盟。何遵约法韩弊烦刑。148
起翦颇牧用军最精。宣威沙漠驰誉丹青。152
九州禹迹百郡秦并。岳宗恒岱禅主云亭。156
雁门紫塞鸡田赤城。昆池碣石巨野洞庭。160
旷远绵邈岩岫杳冥。治本于农务兹稼穑。164
俶载南亩我艺黍稷。税熟贡新劝赏黜陟。168
孟轲敦素史鱼秉直。庶几中庸劳谦谨敕。172
聆音察理鉴貌辨色。贻厥嘉猷勉其祗植。176
省躬讥诫宠增抗极。殆辱近耻林皋幸即。180
两疏见机解组谁逼。索居闲处沉默寂寥。184
求古寻论散虑逍遥。欣奏累遣戚谢欢招。188
渠荷的历园莽抽条。枇杷晚翠梧桐早凋。192
陈根委翳落叶飘摇。游鹍独运凌摩绛霄。196
耽读玩市寓目囊箱。易輶攸畏属耳垣墙。200
具膳餐饭适口充肠。饱饫烹宰饥厌糟糠。204
亲戚故旧老少异粮。妾御绩纺侍巾帷房。208
纨扇圆洁银烛炜煌。昼眠夕寐蓝笋象床。212
弦歌酒宴接杯举觞。矫手顿足悦豫且康。216
嫡后嗣续祭祀烝尝。稽颡再拜悚惧恐惶。220
笺牒简要顾答审详。骸垢想浴执热愿凉。224
驴骡犊特骇跃超骧。诛斩贼盗捕获叛亡。228
布射辽丸嵇琴阮啸。恬笔伦纸钧巧任钓。232
释纷利俗并皆佳妙。毛施淑姿工颦妍笑。236
年矢每催曦晖朗曜。璇玑悬斡晦魄环照。240
指薪修祜永绥吉劭。矩步引领俯仰廊庙。244
束带矜庄徘徊瞻眺。孤陋寡闻愚蒙等诮。248
谓语助者焉哉乎也。250

@ -0,0 +1,63 @@
'\u5929''\u5730''\u7384''\u9ec4''\u5b87''\u5b99''\u6d2a''\u8352'。'\u65e5''\u6708''\u76c8''\u6603''\u8fb0''\u5bbf''\u5217''\u5f20'。
'\u5bd2''\u6765''\u6691''\u5f80''\u79cb''\u6536''\u51ac''\u85cf'。'\u95f0''\u4f59''\u6210''\u5c81''\u5f8b''\u5415''\u8c03''\u9633'。
'\u4e91''\u817e''\u81f4''\u96e8''\u9732''\u7ed3''\u4e3a''\u971c'。'\u91d1''\u751f''\u4e3d''\u6c34''\u7389''\u51fa''\u6606''\u5188'。
'\u5251''\u53f7''\u5de8''\u9619''\u73e0''\u79f0''\u591c''\u5149'。'\u679c''\u73cd''\u674e''\u67f0''\u83dc''\u91cd''\u82a5''\u59dc'。
'\u6d77''\u54b8''\u6cb3''\u6de1''\u9cde''\u6f5c''\u7fbd''\u7fd4'。'\u9f99''\u5e08''\u706b''\u5e1d''\u9e1f''\u5b98''\u4eba''\u7687'。
'\u59cb''\u5236''\u6587''\u5b57''\u4e43''\u670d''\u8863''\u88f3'。'\u63a8''\u4f4d''\u8ba9''\u56fd''\u6709''\u865e''\u9676''\u5510'。
'\u540a''\u6c11''\u4f10''\u7f6a''\u5468''\u53d1''\u6bb7''\u6c64'。'\u5750''\u671d''\u95ee''\u9053''\u5782''\u62f1''\u5e73''\u7ae0'。
'\u7231''\u80b2''\u9ece''\u9996''\u81e3''\u4f0f''\u620e''\u7f8c'。'\u9050''\u8fe9''\u58f9''\u4f53''\u7387''\u5bbe''\u5f52''\u738b'。
'\u9e23''\u51e4''\u5728''\u6811''\u767d''\u9a79''\u98df''\u573a'。'\u5316''\u88ab''\u8349''\u6728''\u8d56''\u53ca''\u4e07''\u65b9'。
'\u76d6''\u6b64''\u8eab''\u53d1''\u56db''\u5927''\u4e94''\u5e38'。'\u606d''\u60df''\u97a0''\u517b''\u5c82''\u6562''\u6bc1''\u4f24'。
'\u5973''\u6155''\u8d1e''\u6d01''\u7537''\u6548''\u624d''\u826f'。'\u77e5''\u8fc7''\u5fc5''\u6539''\u5f97''\u80fd''\u83ab''\u5fd8'。
'\u7f54''\u8c08''\u5f7c''\u77ed''\u9761''\u6043''\u5df1''\u957f'。'\u4fe1''\u4f7f''\u53ef''\u8986''\u5668''\u6b32''\u96be''\u91cf'。
'\u58a8''\u60b2''\u4e1d''\u67d3''\u8bd7''\u8d5e''\u7f94''\u7f8a'。'\u666f''\u884c''\u7ef4''\u8d24''\u514b''\u5ff5''\u4f5c''\u5723'。
'\u5fb7''\u5efa''\u540d''\u7acb''\u5f62''\u7aef''\u8868''\u6b63'。'\u7a7a''\u8c37''\u4f20''\u58f0''\u865a''\u5802''\u4e60''\u542c'。
'\u7978''\u56e0''\u6076''\u79ef''\u798f''\u7f18''\u5584''\u5e86'。'\u5c3a''\u74a7''\u975e''\u5b9d''\u5bf8''\u9634''\u662f''\u7ade'。
'\u8d44''\u7236''\u4e8b''\u541b''\u66f0''\u4e25''\u4e0e''\u656c'。'\u5b5d''\u5f53''\u7aed''\u529b''\u5fe0''\u5219''\u5c3d''\u547d'。
'\u4e34''\u6df1''\u5c65''\u8584''\u5919''\u5174''\u6e29''\u51ca'。'\u4f3c''\u5170''\u65af''\u99a8''\u5982''\u677e''\u4e4b''\u76db'。
'\u5ddd''\u6d41''\u4e0d''\u606f''\u6e0a''\u6f84''\u53d6''\u6620'。'\u5bb9''\u6b62''\u82e5''\u601d''\u8a00''\u8f9e''\u5b89''\u5b9a'。
'\u7b03''\u521d''\u8bda''\u7f8e''\u614e''\u7ec8''\u5b9c''\u4ee4'。'\u8363''\u4e1a''\u6240''\u57fa''\u85c9''\u751a''\u65e0''\u7adf'。
'\u5b66''\u4f18''\u767b''\u4ed5''\u6444''\u804c''\u4ece''\u653f'。'\u5b58''\u4ee5''\u7518''\u68e0''\u53bb''\u800c''\u76ca''\u548f'。
'\u4e50''\u6b8a''\u8d35''\u8d31''\u793c''\u522b''\u5c0a''\u5351'。'\u4e0a''\u548c''\u4e0b''\u7766''\u592b''\u5531''\u5987''\u968f'。
'\u5916''\u53d7''\u5085''\u8bad''\u5165''\u5949''\u6bcd''\u4eea'。'\u8bf8''\u59d1''\u4f2f''\u53d4''\u72b9''\u5b50''\u6bd4''\u513f'。
'\u5b54''\u6000''\u5144''\u5f1f''\u540c''\u6c14''\u8fde''\u679d'。'\u4ea4''\u53cb''\u6295''\u5206''\u5207''\u78e8''\u7bb4''\u89c4'。
'\u4ec1''\u6148''\u9690''\u607b''\u9020''\u6b21''\u5f17''\u79bb'。'\u8282''\u4e49''\u5ec9''\u9000''\u98a0''\u6c9b''\u532a''\u4e8f'。
'\u6027''\u9759''\u60c5''\u9038''\u5fc3''\u52a8''\u795e''\u75b2'。'\u5b88''\u771f''\u5fd7''\u6ee1''\u9010''\u7269''\u610f''\u79fb'。
'\u575a''\u6301''\u96c5''\u64cd''\u597d''\u7235''\u81ea''\u7e3b'。'\u90fd''\u9091''\u534e''\u590f''\u4e1c''\u897f''\u4e8c''\u4eac'。
'\u80cc''\u9099''\u9762''\u6d1b''\u6d6e''\u6e2d''\u636e''\u6cfe'。'\u5bab''\u6bbf''\u76d8''\u90c1''\u697c''\u89c2''\u98de''\u60ca'。
'\u56fe''\u5199''\u79bd''\u517d''\u753b''\u5f69''\u4ed9''\u7075'。'\u4e19''\u820d''\u508d''\u542f''\u7532''\u5e10''\u5bf9''\u6979'。
'\u8086''\u7b75''\u8bbe''\u5e2d''\u9f13''\u745f''\u5439''\u7b19'。'\u5347''\u9636''\u7eb3''\u965b''\u5f01''\u8f6c''\u7591''\u661f'。
'\u53f3''\u901a''\u5e7f''\u5185''\u5de6''\u8fbe''\u627f''\u660e'。'\u65e2''\u96c6''\u575f''\u5178''\u4ea6''\u805a''\u7fa4''\u82f1'。
'\u675c''\u7a3f''\u949f''\u96b6''\u6f06''\u4e66''\u58c1''\u7ecf'。'\u5e9c''\u7f57''\u5c06''\u76f8''\u8def''\u4fa0''\u69d0''\u537f'。
'\u6237''\u5c01''\u516b''\u53bf''\u5bb6''\u7ed9''\u5343''\u5175'。'\u9ad8''\u51a0''\u966a''\u8f87''\u9a71''\u6bc2''\u632f''\u7f28'。
'\u4e16''\u7984''\u4f88''\u5bcc''\u8f66''\u9a7e''\u80a5''\u8f7b'。'\u7b56''\u529f''\u8302''\u5b9e''\u52d2''\u7891''\u523b''\u94ed'。
'\u78fb''\u6eaa''\u4f0a''\u5c39''\u4f50''\u65f6''\u963f''\u8861'。'\u5944''\u5b85''\u66f2''\u961c''\u5fae''\u65e6''\u5b70''\u8425'。
'\u6853''\u516c''\u5321''\u5408''\u6d4e''\u5f31''\u6276''\u503e'。'\u7eee''\u56de''\u6c49''\u60e0''\u8bf4''\u611f''\u6b66''\u4e01'。
'\u4fca''\u4e42''\u5bc6''\u52ff''\u591a''\u58eb''\u5b9e''\u5b81'。'\u664b''\u695a''\u66f4''\u9738''\u8d75''\u9b4f''\u56f0''\u6a2a'。
'\u5047''\u9014''\u706d''\u8662''\u8df5''\u571f''\u4f1a''\u76df'。'\u4f55''\u9075''\u7ea6''\u6cd5''\u97e9''\u5f0a''\u70e6''\u5211'。
'\u8d77''\u7fe6''\u9887''\u7267''\u7528''\u519b''\u6700''\u7cbe'。'\u5ba3''\u5a01''\u6c99''\u6f20''\u9a70''\u8a89''\u4e39''\u9752'。
'\u4e5d''\u5dde''\u79b9''\u8ff9''\u767e''\u90e1''\u79e6''\u5e76'。'\u5cb3''\u5b97''\u6052''\u5cb1''\u7985''\u4e3b''\u4e91''\u4ead'。
'\u96c1''\u95e8''\u7d2b''\u585e''\u9e21''\u7530''\u8d64''\u57ce'。'\u6606''\u6c60''\u78a3''\u77f3''\u5de8''\u91ce''\u6d1e''\u5ead'。
'\u65f7''\u8fdc''\u7ef5''\u9088''\u5ca9''\u5cab''\u6773''\u51a5'。'\u6cbb''\u672c''\u4e8e''\u519c''\u52a1''\u5179''\u7a3c''\u7a51'。
'\u4ff6''\u8f7d''\u5357''\u4ea9''\u6211''\u827a''\u9ecd''\u7a37'。'\u7a0e''\u719f''\u8d21''\u65b0''\u529d''\u8d4f''\u9edc''\u965f'。
'\u5b5f''\u8f72''\u6566''\u7d20''\u53f2''\u9c7c''\u79c9''\u76f4'。'\u5eb6''\u51e0''\u4e2d''\u5eb8''\u52b3''\u8c26''\u8c28''\u6555'。
'\u8046''\u97f3''\u5bdf''\u7406''\u9274''\u8c8c''\u8fa8''\u8272'。'\u8d3b''\u53a5''\u5609''\u7337''\u52c9''\u5176''\u7957''\u690d'。
'\u7701''\u8eac''\u8ba5''\u8beb''\u5ba0''\u589e''\u6297''\u6781'。'\u6b86''\u8fb1''\u8fd1''\u803b''\u6797''\u768b''\u5e78''\u5373'。
'\u4e24''\u758f''\u89c1''\u673a''\u89e3''\u7ec4''\u8c01''\u903c'。'\u7d22''\u5c45''\u95f2''\u5904''\u6c89''\u9ed8''\u5bc2''\u5be5'。
'\u6c42''\u53e4''\u5bfb''\u8bba''\u6563''\u8651''\u900d''\u9065'。'\u6b23''\u594f''\u7d2f''\u9063''\u621a''\u8c22''\u6b22''\u62db'。
'\u6e20''\u8377''\u7684''\u5386''\u56ed''\u83bd''\u62bd''\u6761'。'\u6787''\u6777''\u665a''\u7fe0''\u68a7''\u6850''\u65e9''\u51cb'。
'\u9648''\u6839''\u59d4''\u7ff3''\u843d''\u53f6''\u98d8''\u6447'。'\u6e38''\u9e4d''\u72ec''\u8fd0''\u51cc''\u6469''\u7edb''\u9704'。
'\u803d''\u8bfb''\u73a9''\u5e02''\u5bd3''\u76ee''\u56ca''\u7bb1'。'\u6613''\u8f36''\u6538''\u754f''\u5c5e''\u8033''\u57a3''\u5899'。
'\u5177''\u81b3''\u9910''\u996d''\u9002''\u53e3''\u5145''\u80a0'。'\u9971''\u996b''\u70f9''\u5bb0''\u9965''\u538c''\u7cdf''\u7ce0'。
'\u4eb2''\u621a''\u6545''\u65e7''\u8001''\u5c11''\u5f02''\u7cae'。'\u59be''\u5fa1''\u7ee9''\u7eba''\u4f8d''\u5dfe''\u5e37''\u623f'。
'\u7ea8''\u6247''\u5706''\u6d01''\u94f6''\u70db''\u709c''\u714c'。'\u663c''\u7720''\u5915''\u5bd0''\u84dd''\u7b0b''\u8c61''\u5e8a'。
'\u5f26''\u6b4c''\u9152''\u5bb4''\u63a5''\u676f''\u4e3e''\u89de'。'\u77eb''\u624b''\u987f''\u8db3''\u60a6''\u8c6b''\u4e14''\u5eb7'。
'\u5ae1''\u540e''\u55e3''\u7eed''\u796d''\u7940''\u70dd''\u5c1d'。'\u7a3d''\u98a1''\u518d''\u62dc''\u609a''\u60e7''\u6050''\u60f6'。
'\u7b3a''\u7252''\u7b80''\u8981''\u987e''\u7b54''\u5ba1''\u8be6'。'\u9ab8''\u57a2''\u60f3''\u6d74''\u6267''\u70ed''\u613f''\u51c9'。
'\u9a74''\u9aa1''\u728a''\u7279''\u9a87''\u8dc3''\u8d85''\u9aa7'。'\u8bdb''\u65a9''\u8d3c''\u76d7''\u6355''\u83b7''\u53db''\u4ea1'。
'\u5e03''\u5c04''\u8fbd''\u4e38''\u5d47''\u7434''\u962e''\u5578'。'\u606c''\u7b14''\u4f26''\u7eb8''\u94a7''\u5de7''\u4efb''\u9493'。
'\u91ca''\u7eb7''\u5229''\u4fd7''\u5e76''\u7686''\u4f73''\u5999'。'\u6bdb''\u65bd''\u6dd1''\u59ff''\u5de5''\u98a6''\u598d''\u7b11'。
'\u5e74''\u77e2''\u6bcf''\u50ac''\u66e6''\u6656''\u6717''\u66dc'。'\u7487''\u7391''\u60ac''\u65a1''\u6666''\u9b44''\u73af''\u7167'。
'\u6307''\u85aa''\u4fee''\u795c''\u6c38''\u7ee5''\u5409''\u52ad'。'\u77e9''\u6b65''\u5f15''\u9886''\u4fef''\u4ef0''\u5eca''\u5e99'。
'\u675f''\u5e26''\u77dc''\u5e84''\u5f98''\u5f8a''\u77bb''\u773a'。'\u5b64''\u964b''\u5be1''\u95fb''\u611a''\u8499''\u7b49''\u8bee'。
'\u8c13''\u8bed''\u52a9''\u8005''\u7109''\u54c9''\u4e4e''\u4e5f'。

@ -0,0 +1,63 @@
天地玄黄宇宙洪荒。日月盈昃辰宿列张。4
寒来暑往秋收冬藏。闰余成岁律吕调阳。8
云腾致雨露结为霜。金生丽水玉出昆冈。12
剑号巨阙珠称夜光。果珍李柰菜重芥姜。16
海咸河淡鳞潜羽翔。龙师火帝鸟官人皇。20
始制文字乃服衣裳。推位让国有虞陶唐。24
吊民伐罪周发殷汤。坐朝问道垂拱平章。28
爱育黎首臣伏戎羌。遐迩壹体率宾归王。32
鸣凤在树白驹食场。化被草木赖及万方。36
盖此身发四大五常。恭惟鞠养岂敢毁伤。40
女慕贞洁男效才良。知过必改得能莫忘。44
罔谈彼短靡恃己长。信使可覆器欲难量。48
墨悲丝染诗赞羔羊。景行维贤克念作圣。52
德建名立形端表正。空谷传声虚堂习听。56
祸因恶积福缘善庆。尺璧非宝寸阴是竞。60
资父事君曰严与敬。孝当竭力忠则尽命。64
临深履薄夙兴温凊。似兰斯馨如松之盛。68
川流不息渊澄取映。容止若思言辞安定。72
笃初诚美慎终宜令。荣业所基藉甚无竟。76
学优登仕摄职从政。存以甘棠去而益咏。80
乐殊贵贱礼别尊卑。上和下睦夫唱妇随。84
外受傅训入奉母仪。诸姑伯叔犹子比儿。88
孔怀兄弟同气连枝。交友投分切磨箴规。92
仁慈隐恻造次弗离。节义廉退颠沛匪亏。96
性静情逸心动神疲。守真志满逐物意移。100
坚持雅操好爵自縻。都邑华夏东西二京。104
背邙面洛浮渭据泾。宫殿盘郁楼观飞惊。108
图写禽兽画彩仙灵。丙舍傍启甲帐对楹。112
肆筵设席鼓瑟吹笙。升阶纳陛弁转疑星。116
右通广内左达承明。既集坟典亦聚群英。120
杜稿钟隶漆书壁经。府罗将相路侠槐卿。124
户封八县家给千兵。高冠陪辇驱毂振缨。128
世禄侈富车驾肥轻。策功茂实勒碑刻铭。132
磻溪伊尹佐时阿衡。奄宅曲阜微旦孰营。136
桓公匡合济弱扶倾。绮回汉惠说感武丁。140
俊乂密勿多士实宁。晋楚更霸赵魏困横。144
假途灭虢践土会盟。何遵约法韩弊烦刑。148
起翦颇牧用军最精。宣威沙漠驰誉丹青。152
九州禹迹百郡秦并。岳宗恒岱禅主云亭。156
雁门紫塞鸡田赤城。昆池碣石巨野洞庭。160
旷远绵邈岩岫杳冥。治本于农务兹稼穑。164
俶载南亩我艺黍稷。税熟贡新劝赏黜陟。168
孟轲敦素史鱼秉直。庶几中庸劳谦谨敕。172
聆音察理鉴貌辨色。贻厥嘉猷勉其祗植。176
省躬讥诫宠增抗极。殆辱近耻林皋幸即。180
两疏见机解组谁逼。索居闲处沉默寂寥。184
求古寻论散虑逍遥。欣奏累遣戚谢欢招。188
渠荷的历园莽抽条。枇杷晚翠梧桐早凋。192
陈根委翳落叶飘摇。游鹍独运凌摩绛霄。196
耽读玩市寓目囊箱。易輶攸畏属耳垣墙。200
具膳餐饭适口充肠。饱饫烹宰饥厌糟糠。204
亲戚故旧老少异粮。妾御绩纺侍巾帷房。208
纨扇圆洁银烛炜煌。昼眠夕寐蓝笋象床。212
弦歌酒宴接杯举觞。矫手顿足悦豫且康。216
嫡后嗣续祭祀烝尝。稽颡再拜悚惧恐惶。220
笺牒简要顾答审详。骸垢想浴执热愿凉。224
驴骡犊特骇跃超骧。诛斩贼盗捕获叛亡。228
布射辽丸嵇琴阮啸。恬笔伦纸钧巧任钓。232
释纷利俗并皆佳妙。毛施淑姿工颦妍笑。236
年矢每催曦晖朗曜。璇玑悬斡晦魄环照。240
指薪修祜永绥吉劭。矩步引领俯仰廊庙。244
束带矜庄徘徊瞻眺。孤陋寡闻愚蒙等诮。248
谓语助者焉哉乎也。250

@ -0,0 +1,68 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "4bd349d3-17e2-4f65-87e1-8468d6388da3",
"metadata": {},
"outputs": [],
"source": [
"# given an acrostic poem, the letter to be revealed is of n length, all positioned at the first letter of the 1th - nth word on the incrementing lines"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0f316e70-4d4d-4571-8dd4-ad8bca805079",
"metadata": {},
"outputs": [],
"source": [
"# write a function to extract the letters into the full word"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "51dbc6a9-dfca-4cf6-a811-cf8ce7326068",
"metadata": {},
"outputs": [],
"source": [
"# use grepFirstletter\n",
"# use , to separate input program \n",
"def diagonal_acrostic_join(input):\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "101f9659-522d-4a10-93e5-0b4ae528c755",
"metadata": {},
"outputs": [],
"source": [
"sample_poem = \"\""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,68 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "4bd349d3-17e2-4f65-87e1-8468d6388da3",
"metadata": {},
"outputs": [],
"source": [
"# given an acrostic poem, the letter to be revealed is of n length, all positioned at the first letter of the 1th - nth word on the incrementing lines"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0f316e70-4d4d-4571-8dd4-ad8bca805079",
"metadata": {},
"outputs": [],
"source": [
"# write a function to extract the letters into the full word"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "51dbc6a9-dfca-4cf6-a811-cf8ce7326068",
"metadata": {},
"outputs": [],
"source": [
"# use grepFirstletter\n",
"# use , to separate input program \n",
"def diagonal_acrostic_join(input):\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "101f9659-522d-4a10-93e5-0b4ae528c755",
"metadata": {},
"outputs": [],
"source": [
"sample_poem = \"\""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -0,0 +1,334 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# balanced emoticons: a playful ~~critical~~ code study\n",
"\n",
"this is written for the doggies and kitties in the anthropo-sphere <br>\n",
"methods implemented were referred with caution to the following:<br>\n",
"https://www.geeksforgeeks.org/python-program-check-string-palindrome-not/\n",
"more cat emoticons: https://www.copyandpastesymbols.net/emoticon/cat-emoticons.html"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0_0 is a balanced emoticon!\n"
]
}
],
"source": [
"# palindrome method \n",
"# slice the string backwards \n",
"\n",
"def isBalanced(s):\n",
" if s == s[::-1]:\n",
" print(\"{} is a balanced emoticon!\".format(s))\n",
" else:\n",
" print(\"{} is not a balanced emoticon, try another one?\".format(s))\n",
"\n",
"# try a simple emoticon\n",
"suprised_f = \"0_0\"\n",
"isBalanced(suprised_f)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(*♥^•ﻌ•^♥*) is not a balanced emoticon, try another one?\n"
]
}
],
"source": [
"cat_hearty = \"(*♥^•ﻌ•^♥*)\"\n",
"isBalanced(cat_hearty)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"*♥^•ﻌ•^♥* is a balanced emoticon!\n"
]
}
],
"source": [
"#strip the opening brackets and closing brackets \n",
"s_cat_hearty = cat_hearty.strip(\"()\")\n",
"isBalanced(s_cat_hearty)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#update the isBalanced method \n",
"def isBalanced(s):\n",
" if s == s[::-1]:\n",
" print(\"{} is a balanced emoticon!\".format(s))\n",
" else:\n",
" s_strip = s.strip(\"()\")\n",
" if s_strip == s_strip[::-1]:\n",
" print(\"{} is a balanced emoticon!\".format(s))\n",
" else:\n",
" print(\"{} is not a balanced emoticon, try another one?\".format(s))\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(*♥^•ﻌ•^♥*) is a balanced emoticon!\n"
]
}
],
"source": [
"#try with the updated method \n",
"isBalanced(cat_hearty)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ლ(●ↀωↀ●)ლ is not a balanced emoticon, try another one?\n",
"\n",
"(゚∀゚) is a balanced emoticon!\n",
"\n",
"(=^・ω・^=) is a balanced emoticon!\n",
"\n",
"(^・x・^) is a balanced emoticon!\n",
"\n"
]
}
],
"source": [
"# try a list of kitty emoji strings\n",
"c_strings_l = [\"ლ(●ↀωↀ●)ლ\",\"(゚∀゚)\",\"(=^・ω・^=)\",\"(^・x・^)\"]\n",
"for string in c_strings_l:\n",
" isBalanced(string)\n",
" print()\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"▼・ᴥ・▼ is a balanced emoticon!\n",
"\n",
"U・ﻌ・U is a balanced emoticon!\n",
"\n",
" υ´• ﻌ •`υ is not a balanced emoticon, try another one?\n",
"\n",
"ᐡ ・ ﻌ ・ ᐡ is a balanced emoticon!\n",
"\n"
]
}
],
"source": [
"d_strings_l = [\"▼・ᴥ・▼\",\"U・ﻌ・U\",\" υ´• ﻌ •`υ\",\"ᐡ ・ ﻌ ・ ᐡ\"]\n",
"for string in d_strings_l:\n",
" isBalanced(string)\n",
" print()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ლ)●ↀωↀ●(ლ\n"
]
}
],
"source": [
"#why would it not be? \n",
"wide_eyes = \"ლ(●ↀωↀ●)ლ\"\n",
"r_wide_eyes = wide_eyes[::-1]\n",
"s_r_wide_eyes = r_wide_eyes.strip(\"()\")\n",
"print(s_r_wide_eyes)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"in the kitty emoji, the strip method won't work now since it only applies to stripping away leading or trailing chars. therefore the method is buggy, as the pair of brackets are not matching. similarily, in the doggie emoji, the pair of ticks that make up the eyebrows are not assigned as matching. "
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Half loop comparison says failing test\n",
"it's going to print half a doggie tree watch out\n",
"υ\n",
"´υ\n",
"•´υ\n",
" •´υ\n",
"ﻌ •´υ\n",
" ﻌ •´υ\n",
"• ﻌ •´υ\n",
"`• ﻌ •´υ\n",
"υ`• ﻌ •´υ\n"
]
}
],
"source": [
"# bonus program\n",
"# printing emoticon trees\n",
"# the program will fail \n",
"\n",
"# *******************************\n",
"# * run a half loop comparison * \n",
"# *******************************\n",
"\n",
"def isPalindrome(str):\n",
" \n",
" # Run loop from 0 to len/2\n",
" for i in range(0, int(len(str)/2)):\n",
" if str[i] != str[len(str)-i-1]:\n",
" return False\n",
" return True\n",
" \n",
"# main function\n",
"s = \" υ´• ﻌ •`υ\"\n",
"ans = isPalindrome(s)\n",
" \n",
"if (ans):\n",
" print(\"Half loop comparison says passing test\")\n",
"else:\n",
" print(\"Half loop comparison says failing test\")\n",
" \n",
"# *******************************\n",
"# * move emoticon to a new var * \n",
"# *******************************\n",
"\n",
"# Python program to check\n",
"# if a string is palindrome\n",
"# or not\n",
"\n",
"x = \"υ´• ﻌ •`υ\"\n",
"\n",
"w = \"\"\n",
"z = \"\"\n",
"y = \"\"\n",
"\n",
"print(\"it's going to print half a doggie tree watch out\")\n",
"for i in x:\n",
" w = i + w\n",
" print(w)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"now the eyebrows are flipped and doggie's mood changed, the ticks didn't match the original and it failed the test. "
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" υ\n",
" `υ\n",
" •`υ\n",
" •`υ\n",
" ﻌ •`υ\n",
" ﻌ •`υ\n",
" • ﻌ •`υ\n",
" ´• ﻌ •`υ\n",
" υ´• ﻌ •`υ\n"
]
}
],
"source": [
"#add indent to make the tree's other half \n",
"indent = \" \"\n",
"counter = len(w)\n",
"\n",
"for i in w:\n",
" z = i + z\n",
" indent = indent * counter \n",
" print(indent + z)\n",
" counter = counter - 1\n",
" indent = \" \""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,341 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# balanced emoticons: a playful ~~critical~~ code study\n",
"\n",
"this is written for the doggies and kitties in the anthropo-sphere <br>\n",
"methods implemented were referred with caution to the following:<br>\n",
"https://www.geeksforgeeks.org/python-program-check-string-palindrome-not/\n",
"more cat emoticons: https://www.copyandpastesymbols.net/emoticon/cat-emoticons.html"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0_0 is a balanced emoticon!\n"
]
}
],
"source": [
"# palindrome method \n",
"# slice the string backwards \n",
"\n",
"def isBalanced(s):\n",
" if s == s[::-1]:\n",
" print(\"{} is a balanced emoticon!\".format(s))\n",
" else:\n",
" print(\"{} is not a balanced emoticon, try another one?\".format(s))\n",
"\n",
"# try a simple emoticon\n",
"suprised_f = \"0_0\"\n",
"isBalanced(suprised_f)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(*♥^•ﻌ•^♥*) is not a balanced emoticon, try another one?\n"
]
}
],
"source": [
"cat_hearty = \"(*♥^•ﻌ•^♥*)\"\n",
"isBalanced(cat_hearty)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"*♥^•ﻌ•^♥* is a balanced emoticon!\n"
]
}
],
"source": [
"#strip the opening brackets and closing brackets \n",
"s_cat_hearty = cat_hearty.strip(\"()\")\n",
"isBalanced(s_cat_hearty)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"#update the isBalanced method \n",
"def isBalanced(s):\n",
" if s == s[::-1]:\n",
" print(\"{} is a balanced emoticon!\".format(s))\n",
" else:\n",
" s_strip = s.strip(\"()\")\n",
" if s_strip == s_strip[::-1]:\n",
" print(\"{} is a balanced emoticon!\".format(s))\n",
" else:\n",
" print(\"{} is not a balanced emoticon, try another one?\".format(s))\n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(*♥^•ﻌ•^♥*) is a balanced emoticon!\n"
]
}
],
"source": [
"#try with the updated method \n",
"isBalanced(cat_hearty)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ლ(●ↀωↀ●)ლ is not a balanced emoticon, try another one?\n",
"\n",
"(゚∀゚) is a balanced emoticon!\n",
"\n",
"(=^・ω・^=) is a balanced emoticon!\n",
"\n",
"(^・x・^) is a balanced emoticon!\n",
"\n"
]
}
],
"source": [
"# try a list of kitty emoji strings\n",
"c_strings_l = [\"ლ(●ↀωↀ●)ლ\",\"(゚∀゚)\",\"(=^・ω・^=)\",\"(^・x・^)\"]\n",
"for string in c_strings_l:\n",
" isBalanced(string)\n",
" print()\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"▼・ᴥ・▼ is a balanced emoticon!\n",
"\n",
"U・ﻌ・U is a balanced emoticon!\n",
"\n",
" υ´• ﻌ •`υ is not a balanced emoticon, try another one?\n",
"\n",
"ᐡ ・ ﻌ ・ ᐡ is a balanced emoticon!\n",
"\n"
]
}
],
"source": [
"d_strings_l = [\"▼・ᴥ・▼\",\"U・ﻌ・U\",\" υ´• ﻌ •`υ\",\"ᐡ ・ ﻌ ・ ᐡ\"]\n",
"for string in d_strings_l:\n",
" isBalanced(string)\n",
" print()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ლ)●ↀωↀ●(ლ\n"
]
}
],
"source": [
"#why would it not be? \n",
"wide_eyes = \"ლ(●ↀωↀ●)ლ\"\n",
"r_wide_eyes = wide_eyes[::-1]\n",
"s_r_wide_eyes = r_wide_eyes.strip(\"()\")\n",
"print(s_r_wide_eyes)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"in the kitty emoji, the strip method won't work now since it only applies to stripping away leading or trailing chars. therefore the method is buggy, as the pair of brackets are not matching. similarily, in the doggie emoji, the pair of ticks that make up the eyebrows are not assigned as matching. "
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Half loop comparison says failing test\n",
"it's going to print half a doggie tree watch out\n",
"υ\n",
"´υ\n",
"•´υ\n",
" •´υ\n",
"ﻌ •´υ\n",
" ﻌ •´υ\n",
"• ﻌ •´υ\n",
"`• ﻌ •´υ\n",
"υ`• ﻌ •´υ\n"
]
}
],
"source": [
"# bonus program\n",
"# printing emoticon trees\n",
"# the program will fail \n",
"\n",
"# *******************************\n",
"# * run a half loop comparison * \n",
"# *******************************\n",
"\n",
"def isPalindrome(str):\n",
" \n",
" # Run loop from 0 to len/2\n",
" for i in range(0, int(len(str)/2)):\n",
" if str[i] != str[len(str)-i-1]:\n",
" return False\n",
" return True\n",
" \n",
"# main function\n",
"s = \" υ´• ﻌ •`υ\"\n",
"ans = isPalindrome(s)\n",
" \n",
"if (ans):\n",
" print(\"Half loop comparison says passing test\")\n",
"else:\n",
" print(\"Half loop comparison says failing test\")\n",
" \n",
"# *******************************\n",
"# * move emoticon to a new var * \n",
"# *******************************\n",
"\n",
"# Python program to check\n",
"# if a string is palindrome\n",
"# or not\n",
"\n",
"x = \"υ´• ﻌ •`υ\"\n",
"\n",
"w = \"\"\n",
"z = \"\"\n",
"y = \"\"\n",
"\n",
"print(\"it's going to print half a doggie tree watch out\")\n",
"for i in x:\n",
" w = i + w\n",
" print(w)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"now the eyebrows are flipped and doggie's mood changed, the ticks didn't match the original and it failed the test. "
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" υ\n",
" `υ\n",
" •`υ\n",
" •`υ\n",
" ﻌ •`υ\n",
" ﻌ •`υ\n",
" • ﻌ •`υ\n",
" ´• ﻌ •`υ\n",
" υ´• ﻌ •`υ\n"
]
}
],
"source": [
"#add indent to make the tree's other half \n",
"indent = \" \"\n",
"counter = len(w)\n",
"\n",
"for i in w:\n",
" z = i + z\n",
" indent = indent * counter \n",
" print(indent + z)\n",
" counter = counter - 1\n",
" indent = \" \""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

@ -0,0 +1,17 @@
pulpit tulip
pul pit tul ips
pu l p it tu l i ps
p u l p i tt u l i p s
p u l p t iu t l i p s
p u l t u pl i t i p s
p u t u l li p i t p s
p t u l i up l p i t s
t u l i p ps u l p i t
t u l i p sp u l p i t
tu l i ps pul pit
tulips pulpit
# **************************** #
# select the text to see #
# indentations #
#******************************#

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 KiB

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,39 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "5a6bf879-ef10-48f8-ae8e-8fe39e314516",
"metadata": {},
"outputs": [],
"source": [
"# find json structure of the medication \n",
"# relevant ones \n",
"# use networkx to draw network \n",
"# store json structure of the medication\n",
"# visualize \n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 159 KiB

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

File diff suppressed because one or more lines are too long
Loading…
Cancel
Save