import React, { FunctionComponent } from 'react'

import FrontPageLayout from '../../components/layouts/FrontPageLayout'
import bg from '../../images/front_page/bg-svg/wave-header.svg'

const Intro: FunctionComponent = () => {
    return (
        <FrontPageLayout>
            <div
                className="flex flex-col justify-between w-full h-screen relative px-10px sm:px-140px bg-no-repeat bg-cover sm:bg-contain"
                style={{
                    backgroundImage: `url(${bg})`,
                }}
            >
                <div></div>
                <div className=" mx-auto sm:text-center lg:text-left">
                    <h1 className="text-4xl tracking-tight font-extrabold text-center text-gray-900 sm:text-5xl md:text-6xl">
                        <span className="block xl:inline  text-white sm:text-black">Introduction</span>
                    </h1>
                </div>

                <div className="flex justify-center w-full">
                    <img
                        className="w-12"
                        src={require('../../images/front_page/icon/down-chevron.svg')}
                        alt=""
                    />
                </div>
            </div>

            <div
                id="about"
                className="flex justify-center items-start w-full px-10px sm:px-140px mt-20px sm:mt-48px"
            >
                <div className="w-full md:4/5 xl:w-2/3">
                    <h2 className="heading text-page-heading font-bold text-3xl">
                        About the Corpus
                    </h2>

                    <p className="">
                        Welcome to the Chinese and English Learner Language (CELL) Corpus, a
                        web-based corpus chiefly composed of Chinese and English academic essays
                        written by undergraduate students at the Open University of Hong Kong. The
                        establishment of the CELL Corpus was part of the Institutional Development
                        Scheme project (project code: UGC/IDS16/17) funded by the Research Grants
                        Council, Hong Kong SAR. One of the important objectives of the project,
                        which is served by the CELL Corpus, is to allow researchers, teachers and
                        learners of the Chinese and English languages to investigate university
                        students’ patterns of language use in Chinese and English academic essays.
                    </p>
                    <br />
                    <p className="">
                        There are three components in the CELL Corpus: 1) English Corpus; 2) Chinese
                        Corpus; and 3) Spoken data archive (see ‘About the Corpus data’ for details
                        of each component). Qualtrics surveys were administered to the undergraduate
                        students whose academic essays were collected as data for the Corpus. The
                        survey aimed at collecting the undergraduate students’ meta-data such as
                        gender, place of birth, first language and public examination results. This
                        enables the data in the CELL Corpus to be matched with the meta-data for
                        further analysis.
                    </p>
                    <br />
                    <p className="">
                        The project started in January 2018 and it took three years for the research
                        team to establish the Corpus. The CELL Corpus was first released to the
                        public in January 2021.
                    </p>
                </div>
            </div>

            <div
                id="about-function"
                className="flex justify-center items-start w-full px-10px sm:px-140px mt-20px sm:mt-48px"
            >
                <div className="w-full md:4/5 xl:w-2/3">
                    <h2 className="heading text-page-heading font-bold text-3xl">
                        About the Corpus functions
                    </h2>

                    <p className="">
                        The CELL Corpus has a concordance feature and a word list feature, both of
                        which allow web-based searches of the Corpus data as part-of-speech (POS)
                        tagged texts. Concordance lines can be exported to an Excel file for further
                        analysis. For the Chinese Corpus and the English Corpus, filter functions
                        are available for searches of the Corpus data with regard to: i) categories
                        of courses where the academic essays come from; ii) sections of the academic
                        essays; and iii) meta-data of the undergraduate students. See below for the
                        options provided in the filter functions:
                    </p>
                </div>
            </div>

            <div
                id="about-data"
                className="flex justify-center items-start w-full px-10px sm:px-140px mt-20px sm:mt-48px"
            >
                <section className="flex flex-col w-full md:4/5 xl:w-2/3">
                    <h2 className="heading text-page-heading font-bold text-3xl">
                        About the Corpus data
                    </h2>

                    <p className="">
                        The CELL Corpus is composed of three subparts, which are: 1) English Corpus;
                        2) Chinese Corpus; and 3) Spoken data archive. All data is anonymized to
                        ensure the identities of the writers and of those being referred to in the
                        essays and recordings are not disclosed.
                    </p>

                    <div className="flex flex-col sm:flex-row w-full">
                        <div className="w-full sm:w-1/2 relative rounded-lg shadow-lg sm:mx-10px">
                            <div className=" p-48px ">
                                <div className="text-2xl font-bold text-page-heading">
                                    <p className="inline">
                                        <img
                                            className="inline w-12 mr-3"
                                            src={require('../../images/front_page/icon/english-language-icon.svg')}
                                            alt=""
                                        />
                                        English Corpus
                                    </p>
                                </div>
                                <div className=" text-gray-800 mt-10px">
                                    The English Corpus contains English academic essays written by
                                    L2 English learners who were, at the time when the essays were
                                    collected, studying in one of the undergraduate programmes
                                    offered by the School of Education and Languages at the Open
                                    University of Hong Kong. The English academic essays collected
                                    were the written assignments of the following five categories of
                                    courses:
                                </div>
                                <div className="mt-10px">
                                    <ul className="list-decimal list-inside">
                                        <li>English Language Education </li>
                                        <li>English Language Studies </li>
                                        <li>Education Theories </li>
                                        <li>General Education </li>
                                        <li>English Language Proficiency </li>
                                    </ul>
                                </div>
                                <div className=" italic mt-10px">
                                    Size of the English Corpus:{' '}
                                    <span className="text-red-900 underline font-bold">
                                        4,186,653
                                    </span>{' '}
                                    words
                                </div>
                            </div>
                        </div>

                        <div className="w-full sm:w-1/2 rounded-lg shadow-lg sm:mx-10px">
                            <div className=" p-48px ">
                                <div className=" inline-block text-2xl font-bold text-page-heading">
                                    <p className="inline">
                                        <img
                                            className="inline w-12 mr-3"
                                            src={require('../../images/front_page/icon/chinese-language-icon.svg')}
                                            alt=""
                                        />
                                        Chinese Corpus
                                    </p>
                                </div>
                                <div className=" text-gray-800 mt-10px">
                                    The Chinese Corpus contains Chinese academic essays written by
                                    L1 Chinese learners who were, at the time when the essays were
                                    collected, studying in one of the undergraduate programmes
                                    offered by the School of Education and Languages at the Open
                                    University of Hong Kong. The Chinese academic essays collected
                                    were the written assignments of the following five categories of
                                    courses:
                                </div>
                                <div className="mt-10px">
                                    <ul className="list-decimal list-inside">
                                        <li>Chinese Language Education</li>
                                        <li>Chinese Language Studies</li>
                                        <li>Education Theories</li>
                                        <li>General Education</li>
                                        <li>Early Childhood Education</li>
                                    </ul>
                                </div>
                                <div className=" italic mt-10px">
                                    Size of the Chinese Corpus:{' '}
                                    <span className="text-red-900 underline font-bold">
                                        18,029,899
                                    </span>{' '}
                                    characters
                                </div>
                            </div>
                        </div>
                    </div>

                    <div className="flex flex-col w-full my-48px">
                        <div>
                            <h2 className="heading text-page-heading font-bold text-3xl">
                                Spoken data archive
                            </h2>

                            <p className="">
                                The spoken data archive comprises seven transcriptions of primary
                                Mathematics lessons. The research team visited a local primary
                                school seven times in total in three academic years between 2018 and
                                2020 to record the Mathematics lessons where the seven target pupils
                                were in. Three visits were made when the target pupils were in
                                primary one, and two were made when they were in primary two and in
                                primary three. The recordings feature on the one hand the English
                                language used by the Mathematics teachers in teacher talk and
                                teacher-pupil talk and on the other the English and Chinese
                                languages used by the pupils in their responses to the teachers’
                                questions and in pupil-pupil talk.
                            </p>
                        </div>

                        <div className="w-full my-48px">
                            <div className="flex flex-col border border-gray-500 rounded-lg shadow-lg">
                                <div className="flex justify-center items-center w-full p-10px">
                                    <p>
                                        Duration of seven recordings in total:{' '}
                                        <span className="text-red-900 underline font-bold">
                                            476
                                        </span>{' '}
                                        minutes
                                    </p>
                                </div>
                                <div className="flex justify-center items-center w-full p-10px">
                                    <p>
                                        Word count of seven transcriptions in total:{' '}
                                        <span className="w-full md:4/5 xl:w-2/3 text-red-900 underline font-bold">
                                            50,666
                                        </span>{' '}
                                        words
                                    </p>
                                </div>
                            </div>
                        </div>
                    </div>

                    <div id="about-team" className="flex flex-col w-full my-48px">
                        <h2 className="heading text-page-heading font-bold text-3xl">
                            About the Research Team
                        </h2>

                        <p className="">
                            The Research Team of the CELL Corpus is composed of academics from the
                            School of Education and Languages at the Open University of Hong Kong
                            and colleagues working for the Research Institute for Bilingual Learning
                            and Teaching (RIBiLT).
                        </p>

                        <br />

                        <ul className="list-decimal list-inside">
                            <li>Dr. Danny LEUNG (Supervisor)</li>
                            <li>Dr. Vanliza CHOW (Supervisor)</li>
                            <li>Dr. Emily GE (Supervisor)</li>
                            <li>Suman RAI (Back-end Programmer)</li>
                            <li>Vincent CHUNG (Front-end Developer)</li>
                            <li>Alex CHAN (Research Assistant, 2019-20)</li>
                            <li>Wai Ming CHUNG(Research Assistant, 2019-20)</li>
                            <li>Jasmine HUNG (Research Assistant, 2019-20) </li>
                            <li>Kelly CHAN (Research Assistant, 2018-2019) </li>
                            <li>Alan AU (Research Assistant, 2018-2019)</li>
                            <li>Vincent AU (Research Assistant, 2018-2019)</li>
                            <li>Alan CHAN (Research Assistant, part-time, 2019-20)</li>
                            <li>Jason HUNG (Research Assistant, part-time, 2019-20) </li>
                            <li>Sally CHAN (Research Assistant, part-time, 2018-2019) </li>
                            <li>Raphael CHIM (Research Assistant, part-time, 2018-2019) </li>
                        </ul>
                    </div>

                    <div id="about-acknowledgements" className="flex flex-col w-full my-48px">
                        <h2 className="heading text-page-heading font-bold text-3xl">
                            Acknowledgements
                        </h2>

                        <p className="">
                            The CELL Corpus is one of the major outcomes of the Institutional
                            Development Scheme (IDS) project (project code: UGC/IDS16/17) entitled
                            ‘Expertise and Infrastructure Development for Advancing Research
                            Capacity in Selected Strategic Areas’. The IDS project was funded by the
                            Research Grants Council (RGC), Hong Kong SAR. The establishment of the
                            CELL Corpus is indebted to the financial support of the RGC and to the
                            valuable professional advice by Prof. Laurence Anthony. The CELL Corpus
                            would not have taken shape without the support of the management team of
                            the University, the supervision by the academics in the School of
                            Education and Languages and the hard work by the RIBiLT colleagues.
                        </p>
                    </div>
                </section>
            </div>
        </FrontPageLayout>
    )
}

export default Intro
