From e8c1c0eb37720233e3fe54c81732d454aeefbc67 Mon Sep 17 00:00:00 2001 From: Tr0n Date: Sat, 6 Apr 2024 00:12:15 +0200 Subject: [PATCH] initial commit --- .gitignore | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 37 +++++++++++ bun.lockb | Bin 0 -> 3139 bytes index.ts | 27 ++++++++ package.json | 11 ++++ test.cpp | 4 ++ tsconfig.json | 27 ++++++++ 7 files changed, 281 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100755 bun.lockb create mode 100644 index.ts create mode 100644 package.json create mode 100644 test.cpp create mode 100644 tsconfig.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9b1ee42 --- /dev/null +++ b/.gitignore @@ -0,0 +1,175 @@ +# Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore + +# Logs + +logs +_.log +npm-debug.log_ +yarn-debug.log* +yarn-error.log* +lerna-debug.log* +.pnpm-debug.log* + +# Caches + +.cache + +# Diagnostic reports (https://nodejs.org/api/report.html) + +report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json + +# Runtime data + +pids +_.pid +_.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover + +lib-cov + +# Coverage directory used by tools like istanbul + +coverage +*.lcov + +# nyc test coverage + +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) + +.grunt + +# Bower dependency directory (https://bower.io/) + +bower_components + +# node-waf configuration + +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) + +build/Release + +# Dependency directories + +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) + +web_modules/ + +# TypeScript cache + +*.tsbuildinfo + +# Optional npm cache directory + +.npm + +# Optional eslint cache + +.eslintcache + +# Optional stylelint cache + +.stylelintcache + +# Microbundle cache + +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history + +.node_repl_history + +# Output of 'npm pack' + +*.tgz + +# Yarn Integrity file + +.yarn-integrity + +# dotenv environment variable files + +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# parcel-bundler cache (https://parceljs.org/) + +.parcel-cache + +# Next.js build output + +.next +out + +# Nuxt.js build / generate output + +.nuxt +dist + +# Gatsby files + +# Comment in the public line in if your project uses Gatsby and not Next.js + +# https://nextjs.org/blog/next-9-1#public-directory-support + +# public + +# vuepress build output + +.vuepress/dist + +# vuepress v2.x temp and cache directory + +.temp + +# Docusaurus cache and generated files + +.docusaurus + +# Serverless directories + +.serverless/ + +# FuseBox cache + +.fusebox/ + +# DynamoDB Local files + +.dynamodb/ + +# TernJS port file + +.tern-port + +# Stores VSCode versions used for testing VSCode extensions + +.vscode-test + +# yarn v2 + +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* + +# IntelliJ based IDEs +.idea + +# Finder (MacOS) folder config +.DS_Store diff --git a/README.md b/README.md new file mode 100644 index 0000000..aea18cc --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +# Encoding Fix Tool + +Prepares source files for conversion of encoding from EUC-KR to UTF-8. + +## Background + +Most files in the source were originally written using the EUC-KR encoding. This would be fine if only comments were using characters that only exist in that encoding. + +However, the original devs used EUC-KR also in string literals, which in turn are sent to the client or localized directly on the server and act as a lookup key. + +If we simply convert the whole file from EUC-KR to UTF-8, these lookups will break since not all references are server-side and we want to keep compatibility with existing systems (client, quests, etc). + +Therefore, we convert characters that are not valid UTF-8 characters used in string literals to their byte's string representation. + +We leave comments untouched in order to convert those in bulk with a `iconv` + +```bash +find . -name '*.cpp' -exec iconv -f EUC-KR -t UTF-8//TRANSLIT -o {}_u {} \; -exec mv {}_u {} \; +``` + +Repeat for the desired file extensions. + +## Usage + +To install dependencies: + +```bash +bun install +``` + +To run: + +```bash +bun run index.ts +``` + +This project was created using `bun init` in bun v1.1.1. [Bun](https://bun.sh) is a fast all-in-one JavaScript runtime. diff --git a/bun.lockb b/bun.lockb new file mode 100755 index 0000000000000000000000000000000000000000..61779cface353510da2b1ae5d9c71bfcf21d6161 GIT binary patch literal 3139 zcmd5;4Nz276n@~UusCcX;NPaZ@}m;_-tGb`t}7LeTJDUp1tg%(EX&*V!SZ%_@9|Hv zOlmu!jghHiHX{Be(_S!wLuGeZ@~n)VR#_cfzFANesipn|LA)Q@A&Xi* zZ&~E#>m_uZv_#$@i#N>>MD#Z_#-A}K|txJoEayI?6qqZrm{lt;}zVf3H zr@Cdy7Zhcc7j=owOK-kV@MYD@Un%K3wace$_u@tI#v(bM6Jk1fd3#OF`B}L)yL&FD z^?ldnKKPCEJ+4zbWK6G#2#^ha&~MteAZ_pS2bAuHlUcO^Aun#KTy(r#GWFYr+g`lh z`IiX1-X@RQbuDu#En9z=8934@OSxLTVtoGj=%K3;ZS}_b@VVh@Gqpw`wKcSHLy9b# zl5I(kY~H`=XjfhlxzdZ*`&>$*Q#LQWzE?<=9x)ZXRyQl%6`H;-?#3qzJD%wJxk#v= zf4D(;E#>tUXCtYW4LdVR$FHW`74Ot2f9d+j7-sD)+waAT&KVYoVAiD%$q!`Rx^m`s zc|`B27QAPC$m;M(u+rvXSDlx5sC;4Z-5Z@pGcYBXfrA@B$F7r0M=e-gz&X8+_tTus3Ak|WMA(y+!1GY666jje!jRSe2Pa6^wI{Zq42M0tC z>ctKW&H%()bFXj{G;49>fIBKHQ$lk>sgvd{9OD$8^gL3~1FjPUC$Fa{jluA4xYJ@Cga~KPA-;H>i0gd-moK2 OC?0;`lz1Bdcj^z6?m|ca literal 0 HcmV?d00001 diff --git a/index.ts b/index.ts new file mode 100644 index 0000000..0d4d202 --- /dev/null +++ b/index.ts @@ -0,0 +1,27 @@ +import { Glob } from 'bun'; + +// adjust path to the desired directory +const glob = new Glob('../../src/**/*.cpp'); + +for await (const fileName of glob.scan('.')) { + const file = Bun.file(fileName); + const content = await file.arrayBuffer(); + const bytes = new Uint8Array(content); + const newFileContent = []; + let isString = false; + for (const byte of bytes) { + if (byte === 0x22) { + isString = !isString; + } + if (byte > 127 && isString) { + const stringifiedByte = `\\x${byte + .toString(16) + .padStart(2, '0') + .toUpperCase()}`; + newFileContent.push(...Buffer.from(stringifiedByte)); + } else { + newFileContent.push(byte); + } + } + await Bun.write(file, Buffer.from(newFileContent)); +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..a4f2126 --- /dev/null +++ b/package.json @@ -0,0 +1,11 @@ +{ + "name": "encoding-fixer", + "module": "index.ts", + "type": "module", + "devDependencies": { + "@types/bun": "latest" + }, + "peerDependencies": { + "typescript": "^5.0.0" + } +} \ No newline at end of file diff --git a/test.cpp b/test.cpp new file mode 100644 index 0000000..8597d12 --- /dev/null +++ b/test.cpp @@ -0,0 +1,4 @@ +// this string literal should be converted +chA->ChatPacket(CHAT_TYPE_INFO, LC_TEXT("�ִ� ��Ƽ�� 8�� �̹Ƿ�..")); +// this line should stay untouched +DWORD dwOppList[8]; // �ִ� ��Ƽ�� 8�� �̹Ƿ�.. diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..238655f --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,27 @@ +{ + "compilerOptions": { + // Enable latest features + "lib": ["ESNext", "DOM"], + "target": "ESNext", + "module": "ESNext", + "moduleDetection": "force", + "jsx": "react-jsx", + "allowJs": true, + + // Bundler mode + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "noEmit": true, + + // Best practices + "strict": true, + "skipLibCheck": true, + "noFallthroughCasesInSwitch": true, + + // Some stricter flags (disabled by default) + "noUnusedLocals": false, + "noUnusedParameters": false, + "noPropertyAccessFromIndexSignature": false + } +}