Refactor to external scraper and update dependencies (#113)
This commit is contained in:
@@ -1,54 +0,0 @@
|
||||
import languagesJson from "./languages.json";
|
||||
const { languages, exceptions, mappings } = languagesJson;
|
||||
|
||||
export type LangCode = keyof typeof languages;
|
||||
|
||||
const checkTypes = {
|
||||
exception: exceptions,
|
||||
mapping: mappings
|
||||
};
|
||||
|
||||
export type CheckType = keyof typeof checkTypes;
|
||||
|
||||
const langTypes = [
|
||||
"source",
|
||||
"target"
|
||||
] as const;
|
||||
|
||||
export type LangType = typeof langTypes[number];
|
||||
|
||||
const isKeyOf = <T extends object>(obj: T) => (key: keyof any): key is keyof T => key in obj;
|
||||
|
||||
export function replaceBoth(
|
||||
checkType: CheckType,
|
||||
langs: {
|
||||
[key in LangType]: LangCode
|
||||
}
|
||||
): {
|
||||
[key in LangType]: LangCode
|
||||
} {
|
||||
const [source, target] = langTypes.map(langType => {
|
||||
const object = checkTypes[checkType][langType];
|
||||
const langCode = langs[langType];
|
||||
return isKeyOf(object)(langCode) ? object[langCode] : langCode;
|
||||
});
|
||||
return { source, target };
|
||||
}
|
||||
|
||||
export function retrieveFromType(type?: LangType) {
|
||||
const langEntries = Object.entries(languages) as [LangCode, string][];
|
||||
|
||||
if (!type)
|
||||
return langEntries;
|
||||
return langEntries.filter(([code]) => (
|
||||
!Object.keys(exceptions[type]).includes(code)
|
||||
));
|
||||
}
|
||||
|
||||
export function isValid(code: string | null | undefined): code is LangCode {
|
||||
return !!code && isKeyOf(languages)(code);
|
||||
}
|
||||
|
||||
export function getName(code: string): string | null {
|
||||
return isValid(code) ? languages[code] : null;
|
||||
}
|
||||
@@ -1,130 +0,0 @@
|
||||
{
|
||||
"languages": {
|
||||
"auto": "Detect",
|
||||
"af": "Afrikaans",
|
||||
"sq": "Albanian",
|
||||
"am": "Amharic",
|
||||
"ar": "Arabic",
|
||||
"hy": "Armenian",
|
||||
"az": "Azerbaijani",
|
||||
"eu": "Basque",
|
||||
"be": "Belarusian",
|
||||
"bn": "Bengali",
|
||||
"bs": "Bosnian",
|
||||
"bg": "Bulgarian",
|
||||
"ca": "Catalan",
|
||||
"ceb": "Cebuano",
|
||||
"ny": "Chichewa",
|
||||
"zh": "Chinese",
|
||||
"zh_HANT": "Chinese (Traditional)",
|
||||
"co": "Corsican",
|
||||
"hr": "Croatian",
|
||||
"cs": "Czech",
|
||||
"da": "Danish",
|
||||
"nl": "Dutch",
|
||||
"en": "English",
|
||||
"eo": "Esperanto",
|
||||
"et": "Estonian",
|
||||
"tl": "Filipino",
|
||||
"fi": "Finnish",
|
||||
"fr": "French",
|
||||
"fy": "Frisian",
|
||||
"gl": "Galician",
|
||||
"ka": "Georgian",
|
||||
"de": "German",
|
||||
"el": "Greek",
|
||||
"gu": "Gujarati",
|
||||
"ht": "Haitian Creole",
|
||||
"ha": "Hausa",
|
||||
"haw": "Hawaiian",
|
||||
"iw": "Hebrew",
|
||||
"hi": "Hindi",
|
||||
"hmn": "Hmong",
|
||||
"hu": "Hungarian",
|
||||
"is": "Icelandic",
|
||||
"ig": "Igbo",
|
||||
"id": "Indonesian",
|
||||
"ga": "Irish",
|
||||
"it": "Italian",
|
||||
"ja": "Japanese",
|
||||
"jw": "Javanese",
|
||||
"kn": "Kannada",
|
||||
"kk": "Kazakh",
|
||||
"km": "Khmer",
|
||||
"rw": "Kinyarwanda",
|
||||
"ko": "Korean",
|
||||
"ku": "Kurdish (Kurmanji)",
|
||||
"ky": "Kyrgyz",
|
||||
"lo": "Lao",
|
||||
"la": "Latin",
|
||||
"lv": "Latvian",
|
||||
"lt": "Lithuanian",
|
||||
"lb": "Luxembourgish",
|
||||
"mk": "Macedonian",
|
||||
"mg": "Malagasy",
|
||||
"ms": "Malay",
|
||||
"ml": "Malayalam",
|
||||
"mt": "Maltese",
|
||||
"mi": "Maori",
|
||||
"mr": "Marathi",
|
||||
"mn": "Mongolian",
|
||||
"my": "Myanmar (Burmese)",
|
||||
"ne": "Nepali",
|
||||
"no": "Norwegian",
|
||||
"or": "Odia (Oriya)",
|
||||
"ps": "Pashto",
|
||||
"fa": "Persian",
|
||||
"pl": "Polish",
|
||||
"pt": "Portuguese",
|
||||
"pa": "Punjabi",
|
||||
"ro": "Romanian",
|
||||
"ru": "Russian",
|
||||
"sm": "Samoan",
|
||||
"gd": "Scots Gaelic",
|
||||
"sr": "Serbian",
|
||||
"st": "Sesotho",
|
||||
"sn": "Shona",
|
||||
"sd": "Sindhi",
|
||||
"si": "Sinhala",
|
||||
"sk": "Slovak",
|
||||
"sl": "Slovenian",
|
||||
"so": "Somali",
|
||||
"es": "Spanish",
|
||||
"su": "Sundanese",
|
||||
"sw": "Swahili",
|
||||
"sv": "Swedish",
|
||||
"tg": "Tajik",
|
||||
"ta": "Tamil",
|
||||
"tt": "Tatar",
|
||||
"te": "Telugu",
|
||||
"th": "Thai",
|
||||
"tr": "Turkish",
|
||||
"tk": "Turkmen",
|
||||
"uk": "Ukrainian",
|
||||
"ur": "Urdu",
|
||||
"ug": "Uyghur",
|
||||
"uz": "Uzbek",
|
||||
"vi": "Vietnamese",
|
||||
"cy": "Welsh",
|
||||
"xh": "Xhosa",
|
||||
"yi": "Yiddish",
|
||||
"yo": "Yoruba",
|
||||
"zu": "Zulu"
|
||||
},
|
||||
"exceptions": {
|
||||
"source": {
|
||||
"zh_HANT": "zh"
|
||||
},
|
||||
"target": {
|
||||
"auto": "en"
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"source": {},
|
||||
"target": {
|
||||
"zh": "zh-CN",
|
||||
"zh_HANT": "zh-TW",
|
||||
"auto": "en"
|
||||
}
|
||||
}
|
||||
}
|
||||
125
utils/reducer.ts
125
utils/reducer.ts
@@ -1,37 +1,54 @@
|
||||
import { replaceBoth, isValid, LangCode } from "./language";
|
||||
import { replaceExceptedCode, isValidCode, LanguageType, LangCode } from "lingva-scraper";
|
||||
|
||||
const defaultSourceLang = process.env["NEXT_PUBLIC_DEFAULT_SOURCE_LANG"];
|
||||
const defaultTargetLang = process.env["NEXT_PUBLIC_DEFAULT_TARGET_LANG"];
|
||||
|
||||
type State = {
|
||||
source: LangCode,
|
||||
target: LangCode,
|
||||
export type State = {
|
||||
source: LangCode<"source">,
|
||||
target: LangCode<"target">,
|
||||
query: string,
|
||||
delayedQuery: string,
|
||||
translation: string,
|
||||
isLoading: boolean
|
||||
isLoading: boolean,
|
||||
pronunciation: {
|
||||
query?: string,
|
||||
translation?: string
|
||||
},
|
||||
audio: {
|
||||
query?: number[],
|
||||
translation?: number[]
|
||||
}
|
||||
}
|
||||
|
||||
export const initialState: State = {
|
||||
source: isValid(defaultSourceLang) ? defaultSourceLang : "auto",
|
||||
target: isValid(defaultTargetLang) ? defaultTargetLang : "en",
|
||||
source: isValidCode(defaultSourceLang, LanguageType.SOURCE) ? defaultSourceLang : "auto",
|
||||
target: isValidCode(defaultTargetLang, LanguageType.TARGET) ? defaultTargetLang : "en",
|
||||
query: "",
|
||||
delayedQuery: "",
|
||||
translation: "",
|
||||
isLoading: true
|
||||
isLoading: true,
|
||||
pronunciation: {},
|
||||
audio: {}
|
||||
}
|
||||
|
||||
export enum Actions {
|
||||
SET_FIELD,
|
||||
SET_SOURCE,
|
||||
SET_TARGET,
|
||||
SET_ALL,
|
||||
SWITCH_LANGS
|
||||
}
|
||||
|
||||
type Action = {
|
||||
type Action<T extends keyof State = keyof State> = {
|
||||
type: Actions.SET_FIELD,
|
||||
payload: {
|
||||
key: string,
|
||||
value: any
|
||||
key: T,
|
||||
value: State[T]
|
||||
}
|
||||
} | {
|
||||
type: Actions.SET_SOURCE | Actions.SET_TARGET,
|
||||
payload: {
|
||||
code: string
|
||||
}
|
||||
} | {
|
||||
type: Actions.SET_ALL,
|
||||
@@ -39,36 +56,84 @@ type Action = {
|
||||
state: State
|
||||
}
|
||||
} | {
|
||||
type: Actions.SWITCH_LANGS
|
||||
type: Actions.SWITCH_LANGS,
|
||||
payload: {
|
||||
detectedSource?: LangCode<"source">
|
||||
}
|
||||
}
|
||||
|
||||
export default function reducer(state: State, action: Action): State {
|
||||
const { source, target } = replaceBoth("exception", {
|
||||
source: state.target,
|
||||
target: state.source
|
||||
});
|
||||
|
||||
switch (action.type) {
|
||||
case Actions.SET_FIELD:
|
||||
case Actions.SET_FIELD: {
|
||||
const { key, value } = action.payload;
|
||||
if (key === "source" && value === state.target)
|
||||
return { ...state, [key]: value, target: target !== value ? target : "eo" };
|
||||
if (key === "target" && value === state.source)
|
||||
return { ...state, [key]: value, source };
|
||||
return { ...state, [key]: value };
|
||||
case Actions.SET_ALL:
|
||||
return { ...state, ...action.payload.state };
|
||||
case Actions.SWITCH_LANGS:
|
||||
}
|
||||
case Actions.SET_SOURCE: {
|
||||
const { code } = action.payload;
|
||||
if (!isValidCode(code, LanguageType.SOURCE))
|
||||
return state;
|
||||
|
||||
if (code !== state.target)
|
||||
return { ...state, source: code };
|
||||
|
||||
const sourceAsTarget = replaceExceptedCode(LanguageType.TARGET, state.source);
|
||||
return {
|
||||
...state,
|
||||
source: source !== target
|
||||
? source
|
||||
: initialState.source,
|
||||
target,
|
||||
source: code,
|
||||
target: sourceAsTarget !== code
|
||||
? sourceAsTarget
|
||||
: "eo"
|
||||
};
|
||||
}
|
||||
case Actions.SET_TARGET: {
|
||||
const { code } = action.payload;
|
||||
if (!isValidCode(code, LanguageType.TARGET))
|
||||
return state;
|
||||
|
||||
if (code !== state.source)
|
||||
return { ...state, target: code };
|
||||
|
||||
const targetAsSource = replaceExceptedCode(LanguageType.SOURCE, state.target);
|
||||
return {
|
||||
...state,
|
||||
target: code,
|
||||
source: targetAsSource !== code
|
||||
? targetAsSource
|
||||
: "auto"
|
||||
};
|
||||
}
|
||||
case Actions.SET_ALL: {
|
||||
return { ...state, ...action.payload.state };
|
||||
}
|
||||
case Actions.SWITCH_LANGS: {
|
||||
const { detectedSource } = action.payload;
|
||||
|
||||
const newTarget = state.source === "auto" && detectedSource
|
||||
? detectedSource
|
||||
: state.source;
|
||||
const parsedNewTarget = replaceExceptedCode(LanguageType.TARGET, newTarget);
|
||||
|
||||
const parsedNewSource = parsedNewTarget === state.target
|
||||
? initialState.source
|
||||
: replaceExceptedCode(LanguageType.SOURCE, state.target);
|
||||
|
||||
return {
|
||||
...state,
|
||||
source: parsedNewSource,
|
||||
target: parsedNewTarget,
|
||||
query: state.translation,
|
||||
delayedQuery: state.translation,
|
||||
translation: state.query
|
||||
translation: state.query,
|
||||
pronunciation: {
|
||||
query: state.pronunciation.translation,
|
||||
translation: state.pronunciation.query
|
||||
},
|
||||
audio: {
|
||||
query: state.audio.translation,
|
||||
translation: state.audio.query
|
||||
}
|
||||
};
|
||||
}
|
||||
default:
|
||||
return state;
|
||||
}
|
||||
|
||||
19
utils/slug.ts
Normal file
19
utils/slug.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
export const extractSlug = (
|
||||
slug: string[]
|
||||
): {
|
||||
source?: string,
|
||||
target?: string,
|
||||
query?: string
|
||||
} => {
|
||||
const [p1, p2, p3] = slug;
|
||||
switch (slug.length) {
|
||||
case 1:
|
||||
return { query: p1 };
|
||||
case 2:
|
||||
return { target: p1, query: p2 };
|
||||
case 3:
|
||||
return { source: p1, target: p2, query: p3 };
|
||||
default:
|
||||
return {};
|
||||
}
|
||||
};
|
||||
@@ -1,88 +0,0 @@
|
||||
import UserAgent from "user-agents";
|
||||
import cheerio from "cheerio";
|
||||
import { replaceBoth, LangCode } from "./language";
|
||||
|
||||
export async function googleScrape(
|
||||
source: LangCode,
|
||||
target: LangCode,
|
||||
query: string
|
||||
): Promise<{
|
||||
translationRes: string
|
||||
} | {
|
||||
errorMsg: string
|
||||
}> {
|
||||
const parsed = replaceBoth("mapping", { source, target });
|
||||
const encodedQuery = encodeURIComponent(query);
|
||||
|
||||
if (encodedQuery.length > 7500)
|
||||
return {
|
||||
errorMsg: "The translation query is too long"
|
||||
};
|
||||
|
||||
const res = await fetch(
|
||||
`https://translate.google.com/m?sl=${parsed.source}&tl=${parsed.target}&q=${encodedQuery}`,
|
||||
{
|
||||
headers: {
|
||||
"User-Agent": new UserAgent().toString()
|
||||
}
|
||||
}
|
||||
).catch(
|
||||
() => null
|
||||
);
|
||||
|
||||
if (!res?.ok)
|
||||
return {
|
||||
errorMsg: "An error occurred while retrieving the translation"
|
||||
};
|
||||
|
||||
const html = await res.text();
|
||||
const translationRes = cheerio.load(html)(".result-container").text().trim();
|
||||
|
||||
return translationRes && !translationRes.includes("#af-error-page")
|
||||
? {
|
||||
translationRes
|
||||
} : {
|
||||
errorMsg: "An error occurred while parsing the translation"
|
||||
};
|
||||
}
|
||||
|
||||
export function extractSlug(slug: string[]): {
|
||||
source?: string,
|
||||
target?: string,
|
||||
query?: string
|
||||
} {
|
||||
const [p1, p2, p3] = slug;
|
||||
switch (slug.length) {
|
||||
case 1:
|
||||
return { query: p1 };
|
||||
case 2:
|
||||
return { target: p1, query: p2 };
|
||||
case 3:
|
||||
return { source: p1, target: p2, query: p3 };
|
||||
default:
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
export async function textToSpeechScrape(lang: LangCode, text: string) {
|
||||
const { target: parsedLang } = replaceBoth("mapping", { source: "auto", target: lang });
|
||||
|
||||
const lastSpace = text.lastIndexOf(" ", 200);
|
||||
const slicedText = text.slice(0, text.length > 200 && lastSpace !== -1 ? lastSpace : 200);
|
||||
|
||||
const res = await fetch(
|
||||
`https://translate.google.com/translate_tts?tl=${parsedLang}&q=${encodeURIComponent(slicedText)}&textlen=${slicedText.length}&client=tw-ob`,
|
||||
{
|
||||
headers: {
|
||||
"User-Agent": new UserAgent().toString()
|
||||
}
|
||||
}
|
||||
).catch(
|
||||
() => null
|
||||
);
|
||||
|
||||
return res?.ok
|
||||
? res.blob().then(blob => blob.arrayBuffer()).then(buffer => Array.from(new Uint8Array(buffer)))
|
||||
: null;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user