본문 바로가기
자료/Python

[한국기술교육대학교] 스크립트프로그래밍 과제4

by cjw.git 2021. 11. 20.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
import stop_words_eng
 
 
class Solve:
    @staticmethod
    def problem1():
        '''
            오류가 나는 것을 보면 TypeError: can only assign an iterable 를 볼 수 있습니다.
            이는 슬라이싱에 대한 값 대입은 iterable한 개체만 올 수 있으며 100이나 (100)은 튜플이나 리스트와 같은 반복 가능한
            개체가 아니므로 타입오류가 납니다.
        '''
        print('-' * 30 + ' 1 ' + '-' * 30)
        # region 1
        f1 = open('s.txt''w')
        f1.write('pig ham\n')
        f1.write('cat dog\n')
        f1.write('ham bird\n')
        f1.write('dog pig\n')
        f1.close()
        # endregion
 
        # region 2
        f1 = open('s.txt''r')
        f2 = open('s1.txt''w')
        data = [item for item in f1.read().split('\n'if item != '']
        data.sort(key=lambda x: x[0:])
        for item in data:
            f2.write(item + '\n')
        f1.close()
        f2.close()
        # endregion
 
        # region 3
        f1 = open('s.txt''r')
        f2 = open('s2.txt''w')
        data = [item for item in f1.read().split('\n'if item != '']
        data.sort(key=lambda x: x[1:])
        for item in data:
            f2.write(item + '\n')
        f1.close()
        f2.close()
        # endregion
 
        # region 4
        f1 = open('s.txt''r')
        f2 = open('s3.txt''w')
        data = [item for item in f1.read().split() if item != '']
 
        index = 0
        result = []
        for item in data:
            if index < 3:
                result.append(item)
                index += 1
            else:
                f2.write(' '.join(result) + '\n')
                index = 1
                result = [item]
        if len(result) != 0:
            f2.write(' '.join(result) + '\n')
        f1.close()
        f2.close()
        # endregion
 
    @staticmethod
    def problem2():
        print('-' * 30 + ' 2 ' + '-' * 30)
 
        import hashlib
        import os
        print('Welcome to Our Service')
        print('A. Sign Up')
        print('B. Sign In')
        print('C. Quit')
        while True:
            command = input('User: ')
 
            if command == '1':
                data = []
                if os.path.isfile('access.txt'):
                    f1 = open('access.txt''r')
                    result = f1.read().replace('[''').replace(']''').replace(':''').replace(',''').split()
                    for i in range(len(result) // 4):
                        data.append((result[i * 4:i * 4 + 4]))
                    f1.close()
 
                isID = True
                user_id = None
                while isID:
                    user_id = input('id : ')
 
                    for account in data:
                        if account[0== user_id:
                            print('Sorry, the entered ID is already used.')
                            break
                    else:
                        break
 
                user_pw = input('pw : ')
                user_name = input('name : ')
                user_school = input('school : ')
 
                hasher = hashlib.new('sha1')
                hasher.update(user_pw.encode('utf-8'))
 
                fw = open('access.txt''w')
                if len(data) > 0:
                    for id, pw, name, sch in data:
                        fw.write('[%s]: [%s], [%s], [%s]\n' % (id, pw, name, sch))
 
                fw.write('[%s]: [%s], [%s], [%s]' % (user_id, hasher.hexdigest(), user_name, user_school))
                fw.close()
 
            elif command == '2':
                import hashlib
                data = []
                if os.path.isfile('access.txt'):
                    f1 = open('access.txt''r')
                    result = f1.read().replace('[''').replace(']''').replace(':''').replace(',''').split()
                    for i in range(len(result) // 4):
                        data.append((result[i * 4:i * 4 + 4]))
                    f1.close()
                existID = True
                user_data = None
                while existID:
                    user_id = input('user_id : ')
                    if len(data) == 0:
                        print('Sorry, you are not a registered member.')
                    for account in data:
                        if account[0== user_id:
                            user_data = account
                            existID = False
                            break
                    else:
                        print('Sorry, you are not a registered member.')
 
                existPW = True
                while existPW:
                    user_pw = input('user_pw : ')
                    hasher = hashlib.new('sha1')
                    hasher.update(user_pw.encode('utf-8'))
                    if hasher.hexdigest() == user_data[1]:
                        existPW = False
                        break
                    else:
                        print('Sorry, the entered password is not correct.')
 
                print("Hello [%s]" % (user_data[2]))
            elif command == '3':
                break
 
    @staticmethod
    def problem3():
        print('-' * 30 + ' 3 ' + '-' * 30)
 
        def frange(*args):
            start = 0.0
            stop = 0.0
            step = 0.1
            if len(args) == 1:
                stop = args[0]
            elif len(args) == 2:
                start = args[0]
                stop = args[1]
            elif len(args) == 3:
                start = args[0]
                stop = args[1]
                step = args[2]
 
            result = []
            while start < stop:
                result.append(round(start, 10))
                start += step
            return result
 
        print(frange(0.5))
        print(frange(1.02.0))
        print(frange(2.24.00.5))
 
    @staticmethod
    def problem4():
        print('-' * 30 + ' 4 ' + '-' * 30)
 
        def sum(*args):
            result = 0
            for val in args:
                result += val
            return result
 
        print(sum())
        print(sum(12))
        print(sum(12345))
        print(sum(1572-10))
 
    @staticmethod
    def problem5():
        print('-' * 30 + ' 5 ' + '-' * 30)
 
        def myinitial(body: str):
            return ''.join(map(str.upper, [token[0for token in body.split()]))
 
        print(myinitial('as soon as possible'))
 
    @staticmethod
    def problem6():
        print('-' * 30 + ' 6 ' + '-' * 30)
 
        def fact(n: int):
            if n < 1:
                return 1
            else:
                return n * fact(n - 1)
 
        print(fact(5))
 
    @staticmethod
    def problem7():
        print('-' * 30 + ' 7 ' + '-' * 30)
        '''
            import string 은 string에 있는 모든 것들을 불러오는데
            접근할 때 string.name 식으로 모듈명을 입력하여 접근해야합니다.
            하지만
            from string import * 는 string 내부에 있는것을 다 읽어오는 거라
            바로 모듈명 없이 바로 함수를 호출할 수 있습니다.
        '''
 
    @staticmethod
    def incrementalProject():
        import requests
        import pickle
        print('-' * 30 + ' Incremental Project ' + '-' * 30)
 
        # 해당 웹 소스 기반으로 키워드를 알아옴
        def getKeyword(web_source):
            import string
            import html
            source = web_source
            # region 쓸모 없는 데이터를 지움
            repalce_words = ['\r''\t''\n''<!doctype html>',
                             '<!DOCTYPE html>']
 
            # 의미없는 데이터 교체
            for word in repalce_words:
                source = source.replace(word, '')
            source.strip()
 
            # html 파일 내 스크립트 문장을 지움
            while source.find('<script'!= -1:
                start = source.find('<script')
                end = source.find('</script>')
                source = (source[:start] + source[end + 9:])
 
            # html 파일 내 스타일 문장을 지움
            while source.find('<style'!= -1:
                start = source.find('<style')
                end = source.find('</style>')
                source = (source[:start] + source[end + 8:])
 
            # html 파일 내 주석을 지움
            while source.find('<!--'!= -1:
                start = source.find('<!--')
                end = source.find('-->')
                source = (source[:start] + source[end + 3:])
 
            # endregion
            result = []
            for token in source.split('>'):
                body = token.strip()
                find_end = body.find('<')
                ht = html.entities.html5
                if find_end > 0:
                    temp = body[:find_end]
                    s_range = 0
 
                    # 아래 내용은 엔티티 문자를 제거하기 위해서
                    while temp[s_range:].find('&'!= -1:
                        start = temp[s_range:].find('&')
                        end = temp[s_range:].find(';')
                        if end == -1:
                            break
                        entity = temp[s_range:][start + 1:end]
                        # 다음은 &#32; 같은 문자는 entity에 해당하지 않아서 넘김
                        if entity + ';' in ht:
                            temp = temp.replace('&' + entity + ';', ht[entity + ';'])
                        else:
                            s_range += (end + 1)
 
                    if len(temp) > 0:
                        result.append(temp.strip())
 
            # 모든 리스트를 재구성
            all_word = ' '.join(result)
 
            # punctuation 제거
            for pun in string.punctuation:
                all_word = all_word.replace(pun, '')
 
            # dict 으로 사전 통계
            word_dict = dict()
            for val in all_word.split():
                if val not in word_dict:
                    word_dict[val] = 0
                word_dict[val] += 1
            return word_dict
 
        # 해당 url로 html과 키워드를 파일로 저장함
        def saveFile(url):
            req = requests.get(url)
            word_dict = getKeyword(req.text)
            print(len(word_dict))
            remove_url = requests.get(
                'https://raw.githubusercontent.com/stopwords-iso/stopwords-ko/master/stopwords-ko.txt')
 
            # 불용어를 전부 삭제해주는 과정
            for token in remove_url.text.split():
                if token in word_dict:
                    del word_dict[token]
 
            for token in stop_words_eng.stop_words_eng:
                if token in word_dict:
                    del word_dict[token]
 
            # 파일 이름을 가져옴
            filename = url[url.find('://'+ 3:].replace('/''.')
 
            # html 파일을 씀
            with open(filename + '.html''w'as fp:
                fp.write(req.text)
                fp.close()
 
            # pickle 파일을 씀
            with open(filename + '.words_frequency.pickle''wb'as fp:
                pickle.dump(word_dict, fp)
                fp.close()
 
        # saveFile('http://www.president.go.kr')  # 청와대
        # saveFile('https://wikidocs.net/book/2')  # 파이썬
        # saveFile('https://ko.wikipedia.org/wiki/군대')  # 군대 정보
        # saveFile('https://ko.wikipedia.org/wiki/주식')  # 주식
        # saveFile('https://www.coupang.com')  # 쇼핑
 
        dict_data = []
        web_data = [('www.president.go.kr.html''www.president.go.kr.words_frequency.pickle'),
                    ('wikidocs.net.book.2.html''wikidocs.net.book.2.words_frequency.pickle'),
                    ('ko.wikipedia.org.wiki.군대.html''ko.wikipedia.org.wiki.군대.words_frequency.pickle'),
                    ('ko.wikipedia.org.wiki.주식.html''ko.wikipedia.org.wiki.주식.words_frequency.pickle'),
                    ('www.coupang.com.html''www.coupang.com.words_frequency.pickle')]
        for i in web_data:
            with open(i[1], 'rb'as fp:
                dict_data.append(pickle.load(fp))
                fp.close()
 
        # 키워드를 불러오고
        sorted_keyword = []
        for keywords in dict_data:
            data = list(keywords.items())
            # 가중치를 기준으로 내림차순 정렬
            data.sort(key=lambda x: x[1], reverse=True)
            sorted_keyword.append(data)
            print(data[:3], len(data))  # 가장 높은 3개를 출력함
 
        input_str = input('search : ')
        remove_url = requests.get(
            'https://raw.githubusercontent.com/stopwords-iso/stopwords-ko/master/stopwords-ko.txt')
 
        # 불용어를 입력에서 지우는 과정
        for word in remove_url.text.split('\n'):
            input_str = input_str.replace(word, '')
 
        for word in stop_words_eng.stop_words_eng:
            input_str = input_str.replace(word, '')
 
        search_list = input_str.split()
        sm_list = []
 
        # 사전을 차례대로
        for cnt, keywords in enumerate(dict_data):
            result = 0
            # 검색어를 공백 기준으로 따르고 그거를 각각 계산하여 더함
            for sword in search_list:
                if sword in keywords:  # 만약 키워드가 존재하면 가중치를 더해줌
                    result += keywords[sword]
            sm_list.append((cnt, result))
        sm_list.sort(key=lambda k: k[1], reverse=True)
 
        for idx, rank in enumerate(sm_list):
            print(web_data[rank[0]][0], sm_list[idx][1])
 
 
Solve.problem1()
Solve.problem2()
Solve.problem3()
Solve.problem4()
Solve.problem5()
Solve.problem6()
Solve.problem7()
Solve.incrementalProject()
 
cs

/Users/jinwoo/PycharmProjects/pythonProject/venv/bin/python /Users/jinwoo/PycharmProjects/pythonProject/problem1.py
------------------------------ 1 ------------------------------
------------------------------ 2 ------------------------------
Welcome to Our Service
A. Sign Up
B. Sign In
C. Quit
User: 2
user_id : test1@email.com
Sorry, you are not a registered member.
user_id : test1
Sorry, you are not a registered member.
user_id : admin
Sorry, you are not a registered member.
user_id : userid1
user_pw : userpw1
Hello [username1]
User: 3
------------------------------ 3 ------------------------------
[0.0, 0.1, 0.2, 0.3, 0.4]
[1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9]
[2.2, 2.7, 3.2, 3.7]
------------------------------ 4 ------------------------------
0
3
15
5
------------------------------ 5 ------------------------------
ASAP
------------------------------ 6 ------------------------------
120
------------------------------ 7 ------------------------------
------------------------------ Incremental Project ------------------------------
[('뉴딜', 50), ('한국판', 48), ('있습니다', 31)] 2429
[('문제', 59), ('연습', 56), ('2021년', 31)] 817
[('편집', 16), ('005', 15), ('문서', 14)] 832
[('주식의', 28), ('↑', 21), ('편집', 20)] 1441
[('더보기', 47), ('건강식품', 15), ('바로가기', 14)] 2089
search : 문재인 주식
search : ko.wikipedia.org.wiki.주식.html 16
www.president.go.kr.html 6
wikidocs.net.book.2.html 0
ko.wikipedia.org.wiki.군대.html 0
www.coupang.com.html 0

Process finished with exit code 0

댓글