Refactor to external scraper and update dependencies (#113)

This commit is contained in:
David
2022-06-15 23:37:15 +02:00
committed by GitHub
parent ff1ad202ae
commit 274e7f1a4b
49 changed files with 6952 additions and 4414 deletions

View File

@@ -1,54 +0,0 @@
import languagesJson from "./languages.json";
const { languages, exceptions, mappings } = languagesJson;
export type LangCode = keyof typeof languages;
const checkTypes = {
exception: exceptions,
mapping: mappings
};
export type CheckType = keyof typeof checkTypes;
const langTypes = [
"source",
"target"
] as const;
export type LangType = typeof langTypes[number];
const isKeyOf = <T extends object>(obj: T) => (key: keyof any): key is keyof T => key in obj;
export function replaceBoth(
checkType: CheckType,
langs: {
[key in LangType]: LangCode
}
): {
[key in LangType]: LangCode
} {
const [source, target] = langTypes.map(langType => {
const object = checkTypes[checkType][langType];
const langCode = langs[langType];
return isKeyOf(object)(langCode) ? object[langCode] : langCode;
});
return { source, target };
}
export function retrieveFromType(type?: LangType) {
const langEntries = Object.entries(languages) as [LangCode, string][];
if (!type)
return langEntries;
return langEntries.filter(([code]) => (
!Object.keys(exceptions[type]).includes(code)
));
}
export function isValid(code: string | null | undefined): code is LangCode {
return !!code && isKeyOf(languages)(code);
}
export function getName(code: string): string | null {
return isValid(code) ? languages[code] : null;
}

View File

@@ -1,130 +0,0 @@
{
"languages": {
"auto": "Detect",
"af": "Afrikaans",
"sq": "Albanian",
"am": "Amharic",
"ar": "Arabic",
"hy": "Armenian",
"az": "Azerbaijani",
"eu": "Basque",
"be": "Belarusian",
"bn": "Bengali",
"bs": "Bosnian",
"bg": "Bulgarian",
"ca": "Catalan",
"ceb": "Cebuano",
"ny": "Chichewa",
"zh": "Chinese",
"zh_HANT": "Chinese (Traditional)",
"co": "Corsican",
"hr": "Croatian",
"cs": "Czech",
"da": "Danish",
"nl": "Dutch",
"en": "English",
"eo": "Esperanto",
"et": "Estonian",
"tl": "Filipino",
"fi": "Finnish",
"fr": "French",
"fy": "Frisian",
"gl": "Galician",
"ka": "Georgian",
"de": "German",
"el": "Greek",
"gu": "Gujarati",
"ht": "Haitian Creole",
"ha": "Hausa",
"haw": "Hawaiian",
"iw": "Hebrew",
"hi": "Hindi",
"hmn": "Hmong",
"hu": "Hungarian",
"is": "Icelandic",
"ig": "Igbo",
"id": "Indonesian",
"ga": "Irish",
"it": "Italian",
"ja": "Japanese",
"jw": "Javanese",
"kn": "Kannada",
"kk": "Kazakh",
"km": "Khmer",
"rw": "Kinyarwanda",
"ko": "Korean",
"ku": "Kurdish (Kurmanji)",
"ky": "Kyrgyz",
"lo": "Lao",
"la": "Latin",
"lv": "Latvian",
"lt": "Lithuanian",
"lb": "Luxembourgish",
"mk": "Macedonian",
"mg": "Malagasy",
"ms": "Malay",
"ml": "Malayalam",
"mt": "Maltese",
"mi": "Maori",
"mr": "Marathi",
"mn": "Mongolian",
"my": "Myanmar (Burmese)",
"ne": "Nepali",
"no": "Norwegian",
"or": "Odia (Oriya)",
"ps": "Pashto",
"fa": "Persian",
"pl": "Polish",
"pt": "Portuguese",
"pa": "Punjabi",
"ro": "Romanian",
"ru": "Russian",
"sm": "Samoan",
"gd": "Scots Gaelic",
"sr": "Serbian",
"st": "Sesotho",
"sn": "Shona",
"sd": "Sindhi",
"si": "Sinhala",
"sk": "Slovak",
"sl": "Slovenian",
"so": "Somali",
"es": "Spanish",
"su": "Sundanese",
"sw": "Swahili",
"sv": "Swedish",
"tg": "Tajik",
"ta": "Tamil",
"tt": "Tatar",
"te": "Telugu",
"th": "Thai",
"tr": "Turkish",
"tk": "Turkmen",
"uk": "Ukrainian",
"ur": "Urdu",
"ug": "Uyghur",
"uz": "Uzbek",
"vi": "Vietnamese",
"cy": "Welsh",
"xh": "Xhosa",
"yi": "Yiddish",
"yo": "Yoruba",
"zu": "Zulu"
},
"exceptions": {
"source": {
"zh_HANT": "zh"
},
"target": {
"auto": "en"
}
},
"mappings": {
"source": {},
"target": {
"zh": "zh-CN",
"zh_HANT": "zh-TW",
"auto": "en"
}
}
}

View File

@@ -1,37 +1,54 @@
import { replaceBoth, isValid, LangCode } from "./language";
import { replaceExceptedCode, isValidCode, LanguageType, LangCode } from "lingva-scraper";
const defaultSourceLang = process.env["NEXT_PUBLIC_DEFAULT_SOURCE_LANG"];
const defaultTargetLang = process.env["NEXT_PUBLIC_DEFAULT_TARGET_LANG"];
type State = {
source: LangCode,
target: LangCode,
export type State = {
source: LangCode<"source">,
target: LangCode<"target">,
query: string,
delayedQuery: string,
translation: string,
isLoading: boolean
isLoading: boolean,
pronunciation: {
query?: string,
translation?: string
},
audio: {
query?: number[],
translation?: number[]
}
}
export const initialState: State = {
source: isValid(defaultSourceLang) ? defaultSourceLang : "auto",
target: isValid(defaultTargetLang) ? defaultTargetLang : "en",
source: isValidCode(defaultSourceLang, LanguageType.SOURCE) ? defaultSourceLang : "auto",
target: isValidCode(defaultTargetLang, LanguageType.TARGET) ? defaultTargetLang : "en",
query: "",
delayedQuery: "",
translation: "",
isLoading: true
isLoading: true,
pronunciation: {},
audio: {}
}
export enum Actions {
SET_FIELD,
SET_SOURCE,
SET_TARGET,
SET_ALL,
SWITCH_LANGS
}
type Action = {
type Action<T extends keyof State = keyof State> = {
type: Actions.SET_FIELD,
payload: {
key: string,
value: any
key: T,
value: State[T]
}
} | {
type: Actions.SET_SOURCE | Actions.SET_TARGET,
payload: {
code: string
}
} | {
type: Actions.SET_ALL,
@@ -39,36 +56,84 @@ type Action = {
state: State
}
} | {
type: Actions.SWITCH_LANGS
type: Actions.SWITCH_LANGS,
payload: {
detectedSource?: LangCode<"source">
}
}
export default function reducer(state: State, action: Action): State {
const { source, target } = replaceBoth("exception", {
source: state.target,
target: state.source
});
switch (action.type) {
case Actions.SET_FIELD:
case Actions.SET_FIELD: {
const { key, value } = action.payload;
if (key === "source" && value === state.target)
return { ...state, [key]: value, target: target !== value ? target : "eo" };
if (key === "target" && value === state.source)
return { ...state, [key]: value, source };
return { ...state, [key]: value };
case Actions.SET_ALL:
return { ...state, ...action.payload.state };
case Actions.SWITCH_LANGS:
}
case Actions.SET_SOURCE: {
const { code } = action.payload;
if (!isValidCode(code, LanguageType.SOURCE))
return state;
if (code !== state.target)
return { ...state, source: code };
const sourceAsTarget = replaceExceptedCode(LanguageType.TARGET, state.source);
return {
...state,
source: source !== target
? source
: initialState.source,
target,
source: code,
target: sourceAsTarget !== code
? sourceAsTarget
: "eo"
};
}
case Actions.SET_TARGET: {
const { code } = action.payload;
if (!isValidCode(code, LanguageType.TARGET))
return state;
if (code !== state.source)
return { ...state, target: code };
const targetAsSource = replaceExceptedCode(LanguageType.SOURCE, state.target);
return {
...state,
target: code,
source: targetAsSource !== code
? targetAsSource
: "auto"
};
}
case Actions.SET_ALL: {
return { ...state, ...action.payload.state };
}
case Actions.SWITCH_LANGS: {
const { detectedSource } = action.payload;
const newTarget = state.source === "auto" && detectedSource
? detectedSource
: state.source;
const parsedNewTarget = replaceExceptedCode(LanguageType.TARGET, newTarget);
const parsedNewSource = parsedNewTarget === state.target
? initialState.source
: replaceExceptedCode(LanguageType.SOURCE, state.target);
return {
...state,
source: parsedNewSource,
target: parsedNewTarget,
query: state.translation,
delayedQuery: state.translation,
translation: state.query
translation: state.query,
pronunciation: {
query: state.pronunciation.translation,
translation: state.pronunciation.query
},
audio: {
query: state.audio.translation,
translation: state.audio.query
}
};
}
default:
return state;
}

19
utils/slug.ts Normal file
View File

@@ -0,0 +1,19 @@
export const extractSlug = (
slug: string[]
): {
source?: string,
target?: string,
query?: string
} => {
const [p1, p2, p3] = slug;
switch (slug.length) {
case 1:
return { query: p1 };
case 2:
return { target: p1, query: p2 };
case 3:
return { source: p1, target: p2, query: p3 };
default:
return {};
}
};

View File

@@ -1,88 +0,0 @@
import UserAgent from "user-agents";
import cheerio from "cheerio";
import { replaceBoth, LangCode } from "./language";
export async function googleScrape(
source: LangCode,
target: LangCode,
query: string
): Promise<{
translationRes: string
} | {
errorMsg: string
}> {
const parsed = replaceBoth("mapping", { source, target });
const encodedQuery = encodeURIComponent(query);
if (encodedQuery.length > 7500)
return {
errorMsg: "The translation query is too long"
};
const res = await fetch(
`https://translate.google.com/m?sl=${parsed.source}&tl=${parsed.target}&q=${encodedQuery}`,
{
headers: {
"User-Agent": new UserAgent().toString()
}
}
).catch(
() => null
);
if (!res?.ok)
return {
errorMsg: "An error occurred while retrieving the translation"
};
const html = await res.text();
const translationRes = cheerio.load(html)(".result-container").text().trim();
return translationRes && !translationRes.includes("#af-error-page")
? {
translationRes
} : {
errorMsg: "An error occurred while parsing the translation"
};
}
export function extractSlug(slug: string[]): {
source?: string,
target?: string,
query?: string
} {
const [p1, p2, p3] = slug;
switch (slug.length) {
case 1:
return { query: p1 };
case 2:
return { target: p1, query: p2 };
case 3:
return { source: p1, target: p2, query: p3 };
default:
return {};
}
}
export async function textToSpeechScrape(lang: LangCode, text: string) {
const { target: parsedLang } = replaceBoth("mapping", { source: "auto", target: lang });
const lastSpace = text.lastIndexOf(" ", 200);
const slicedText = text.slice(0, text.length > 200 && lastSpace !== -1 ? lastSpace : 200);
const res = await fetch(
`https://translate.google.com/translate_tts?tl=${parsedLang}&q=${encodeURIComponent(slicedText)}&textlen=${slicedText.length}&client=tw-ob`,
{
headers: {
"User-Agent": new UserAgent().toString()
}
}
).catch(
() => null
);
return res?.ok
? res.blob().then(blob => blob.arrayBuffer()).then(buffer => Array.from(new Uint8Array(buffer)))
: null;
}